Skip to content

Commit e809158

Browse files
authored
Merge pull request RustPython#3709 from youknowone/pystr-interned
introduce PyStrInterned
2 parents 44ccc40 + 6e57194 commit e809158

File tree

7 files changed

+210
-59
lines changed

7 files changed

+210
-59
lines changed

stdlib/src/array.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -681,8 +681,8 @@ mod array {
681681
#[pyproperty]
682682
fn typecode(&self, vm: &VirtualMachine) -> PyStrRef {
683683
vm.ctx
684-
.intern_string(self.read().typecode().to_string())
685-
.into_pyref()
684+
.intern_str(self.read().typecode().to_string())
685+
.to_str()
686686
}
687687

688688
#[pyproperty]

vm/src/builtins/bool.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -112,10 +112,12 @@ impl PyBool {
112112
#[pymethod(magic)]
113113
fn repr(zelf: bool, vm: &VirtualMachine) -> PyStrRef {
114114
if zelf {
115-
vm.ctx.true_str.clone()
115+
vm.ctx.true_str
116116
} else {
117-
vm.ctx.false_str.clone()
117+
vm.ctx.false_str
118118
}
119+
.to_owned()
120+
.into_pyref()
119121
}
120122

121123
#[pymethod(magic)]

vm/src/builtins/code.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ impl ConstantBag for PyObjBag<'_> {
8282
bytecode::BorrowedConstant::Float { value } => ctx.new_float(value).into(),
8383
bytecode::BorrowedConstant::Complex { value } => ctx.new_complex(value).into(),
8484
bytecode::BorrowedConstant::Str { value } if value.len() <= 20 => {
85-
ctx.intern_string(value).into_pyref().into()
85+
ctx.intern_str(value).to_object()
8686
}
8787
bytecode::BorrowedConstant::Str { value } => ctx.new_str(value).into(),
8888
bytecode::BorrowedConstant::Bytes { value } => ctx.new_bytes(value.to_vec()).into(),
@@ -104,7 +104,7 @@ impl ConstantBag for PyObjBag<'_> {
104104
}
105105

106106
fn make_name(&self, name: &str) -> PyStrRef {
107-
self.0.intern_string(name).into_pyref()
107+
self.0.intern_str(name).to_str()
108108
}
109109
}
110110

vm/src/builtins/str.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -227,6 +227,13 @@ impl IntoPyStrRef for &str {
227227
}
228228
}
229229

230+
impl IntoPyStrRef for &'static crate::intern::PyStrInterned {
231+
#[inline]
232+
fn into_pystr_ref(self, _vm: &VirtualMachine) -> PyRef<PyStr> {
233+
self.to_str()
234+
}
235+
}
236+
230237
#[pyclass(module = false, name = "str_iterator")]
231238
#[derive(Debug)]
232239
pub struct PyStrIterator {

vm/src/intern.rs

Lines changed: 182 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,13 @@
11
use crate::{
22
builtins::{PyStr, PyTypeRef},
33
common::lock::PyRwLock,
4-
Py, PyRef, PyRefExact,
4+
convert::ToPyObject,
5+
Py, PyObject, PyObjectRef, PyRef, PyRefExact,
6+
};
7+
use std::{
8+
borrow::{Borrow, ToOwned},
9+
ops::Deref,
510
};
6-
use std::ops::Deref;
711

812
#[derive(Debug)]
913
pub struct StringPool {
@@ -28,24 +32,40 @@ impl Clone for StringPool {
2832

2933
impl StringPool {
3034
#[inline]
31-
pub unsafe fn intern<S: Internable>(&self, s: S, typ: PyTypeRef) -> PyRefExact<PyStr> {
32-
if let Some(found) = self.inner.read().get(s.as_str()) {
33-
return found.clone().inner;
35+
pub unsafe fn intern<S: Internable>(&self, s: S, typ: PyTypeRef) -> &'static PyStrInterned {
36+
if let Some(found) = self.interned(s.as_ref()) {
37+
return found;
38+
}
39+
40+
#[cold]
41+
fn miss(zelf: &StringPool, s: PyRefExact<PyStr>) -> &'static PyStrInterned {
42+
let cache = CachedPyStrRef { inner: s };
43+
let inserted = zelf.inner.write().insert(cache.clone());
44+
if inserted {
45+
let interned = unsafe { PyStrInterned::borrow_cache(&cache) };
46+
// unsafe { interned.as_object().mark_intern() };
47+
interned
48+
} else {
49+
zelf.inner
50+
.read()
51+
.get(cache.as_str())
52+
.map(|cached| unsafe { PyStrInterned::borrow_cache(cached) })
53+
.expect("")
54+
}
3455
}
35-
let cache = CachedPyStrRef {
36-
inner: s.into_pyref(typ),
37-
};
38-
let inserted = self.inner.write().insert(cache.clone());
39-
if inserted {
40-
cache.inner
41-
} else {
42-
self.inner
43-
.read()
44-
.get(cache.inner.as_str())
45-
.unwrap()
46-
.clone()
47-
.inner
56+
let str_ref = s.into_pyref_exact(typ);
57+
miss(self, str_ref)
58+
}
59+
60+
#[inline]
61+
pub fn interned<S: MaybeInterned + ?Sized>(&self, s: &S) -> Option<&'static PyStrInterned> {
62+
if let Some(interned) = s.as_interned() {
63+
return Some(interned);
4864
}
65+
self.inner
66+
.read()
67+
.get(s.as_ref())
68+
.map(|cached| unsafe { PyStrInterned::borrow_cache(cached) })
4969
}
5070
}
5171

@@ -70,57 +90,178 @@ impl PartialEq for CachedPyStrRef {
7090
impl Eq for CachedPyStrRef {}
7191

7292
impl std::borrow::Borrow<str> for CachedPyStrRef {
93+
#[inline]
7394
fn borrow(&self) -> &str {
7495
self.inner.as_str()
7596
}
7697
}
7798

99+
impl AsRef<str> for CachedPyStrRef {
100+
#[inline]
101+
fn as_ref(&self) -> &str {
102+
self.as_str()
103+
}
104+
}
105+
106+
impl CachedPyStrRef {
107+
#[inline]
108+
fn as_str(&self) -> &str {
109+
self.inner.as_str()
110+
}
111+
}
112+
113+
/// The unique reference of interned PyStr
114+
/// Always intended to be used as a static reference
115+
pub struct PyStrInterned {
116+
inner: Py<PyStr>,
117+
}
118+
119+
impl PyStrInterned {
120+
/// # Safety
121+
/// the given cache must be alive while returned reference is alive
122+
#[inline]
123+
unsafe fn borrow_cache(cache: &CachedPyStrRef) -> &'static Self {
124+
std::mem::transmute_copy(cache)
125+
}
126+
127+
#[inline]
128+
fn as_ptr(&self) -> *const Py<PyStr> {
129+
self as *const _ as *const _
130+
}
131+
132+
#[inline]
133+
pub fn to_owned(&'static self) -> PyRefExact<PyStr> {
134+
unsafe { (*(&self as *const _ as *const PyRefExact<PyStr>)).clone() }
135+
}
136+
137+
#[inline]
138+
pub fn to_str(&'static self) -> PyRef<PyStr> {
139+
self.to_owned().into_pyref()
140+
}
141+
142+
#[inline]
143+
pub fn to_object(&'static self) -> PyObjectRef {
144+
self.to_str().into()
145+
}
146+
}
147+
148+
impl Borrow<PyObject> for PyStrInterned {
149+
#[inline(always)]
150+
fn borrow(&self) -> &PyObject {
151+
self.inner.borrow()
152+
}
153+
}
154+
155+
impl Deref for PyStrInterned {
156+
type Target = Py<PyStr>;
157+
#[inline(always)]
158+
fn deref(&self) -> &Self::Target {
159+
&self.inner
160+
}
161+
}
162+
163+
impl std::hash::Hash for PyStrInterned {
164+
#[inline(always)]
165+
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
166+
std::hash::Hash::hash(&(self as *const _), state)
167+
}
168+
}
169+
170+
impl PartialEq for PyStrInterned {
171+
#[inline(always)]
172+
fn eq(&self, other: &Self) -> bool {
173+
std::ptr::eq(self, other)
174+
}
175+
}
176+
177+
impl Eq for PyStrInterned {}
178+
179+
impl AsRef<str> for PyStrInterned {
180+
#[inline]
181+
fn as_ref(&self) -> &str {
182+
self.as_str()
183+
}
184+
}
185+
186+
impl std::fmt::Debug for PyStrInterned {
187+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
188+
std::fmt::Debug::fmt(self.as_str(), f)?;
189+
write!(f, "@{:p}", self.as_ptr())
190+
}
191+
}
192+
193+
impl std::fmt::Display for PyStrInterned {
194+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
195+
std::fmt::Display::fmt(self.as_str(), f)
196+
}
197+
}
198+
78199
mod sealed {
79-
use crate::{builtins::PyStr, object::PyRefExact};
200+
use crate::{
201+
builtins::PyStr,
202+
object::{Py, PyRefExact},
203+
};
80204

81205
pub trait SealedInternable {}
82206

83207
impl SealedInternable for String {}
84-
85208
impl SealedInternable for &str {}
86-
87209
impl SealedInternable for PyRefExact<PyStr> {}
210+
211+
pub trait SealedMaybeInterned {}
212+
213+
impl SealedMaybeInterned for str {}
214+
impl SealedMaybeInterned for PyRefExact<PyStr> {}
215+
impl SealedMaybeInterned for Py<PyStr> {}
88216
}
89217

90218
/// A sealed marker trait for `DictKey` types that always become an exact instance of `str`
91-
pub trait Internable: sealed::SealedInternable + AsRef<Self::Key> {
92-
type Key: crate::dictdatatype::DictKey + ?Sized;
93-
fn as_str(&self) -> &str;
94-
fn into_pyref(self, str_type: PyTypeRef) -> PyRefExact<PyStr>;
219+
pub trait Internable: sealed::SealedInternable + ToPyObject + AsRef<Self::Interned> {
220+
type Interned: ?Sized + MaybeInterned;
221+
fn into_pyref_exact(self, str_type: PyTypeRef) -> PyRefExact<PyStr>;
95222
}
96223

97224
impl Internable for String {
98-
type Key = str;
99-
fn as_str(&self) -> &str {
100-
String::as_str(self)
101-
}
102-
fn into_pyref(self, str_type: PyTypeRef) -> PyRefExact<PyStr> {
225+
type Interned = str;
226+
#[inline]
227+
fn into_pyref_exact(self, str_type: PyTypeRef) -> PyRefExact<PyStr> {
103228
let obj = PyRef::new_ref(PyStr::from(self), str_type, None);
104229
unsafe { PyRefExact::new_unchecked(obj) }
105230
}
106231
}
107232

108233
impl Internable for &str {
109-
type Key = str;
110-
fn as_str(&self) -> &str {
111-
self
112-
}
113-
fn into_pyref(self, str_type: PyTypeRef) -> PyRefExact<PyStr> {
114-
self.to_owned().into_pyref(str_type)
234+
type Interned = str;
235+
#[inline]
236+
fn into_pyref_exact(self, str_type: PyTypeRef) -> PyRefExact<PyStr> {
237+
self.to_owned().into_pyref_exact(str_type)
115238
}
116239
}
117240

118241
impl Internable for PyRefExact<PyStr> {
119-
type Key = Py<PyStr>;
120-
fn as_str(&self) -> &str {
121-
self.deref().as_str()
122-
}
123-
fn into_pyref(self, _str_type: PyTypeRef) -> PyRefExact<PyStr> {
242+
type Interned = Py<PyStr>;
243+
#[inline]
244+
fn into_pyref_exact(self, _str_type: PyTypeRef) -> PyRefExact<PyStr> {
124245
self
125246
}
126247
}
248+
249+
pub trait MaybeInterned:
250+
AsRef<str> + crate::dictdatatype::DictKey + sealed::SealedMaybeInterned
251+
{
252+
fn as_interned(&self) -> Option<&'static PyStrInterned>;
253+
}
254+
255+
impl MaybeInterned for str {
256+
#[inline(always)]
257+
fn as_interned(&self) -> Option<&'static PyStrInterned> {
258+
None
259+
}
260+
}
261+
262+
impl MaybeInterned for Py<PyStr> {
263+
#[inline(always)]
264+
fn as_interned(&self) -> Option<&'static PyStrInterned> {
265+
None
266+
}
267+
}

vm/src/stdlib/sys.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -125,14 +125,14 @@ mod sys {
125125
fn byteorder(vm: &VirtualMachine) -> PyStrRef {
126126
// https://doc.rust-lang.org/reference/conditional-compilation.html#target_endian
127127
vm.ctx
128-
.intern_string(if cfg!(target_endian = "little") {
128+
.intern_str(if cfg!(target_endian = "little") {
129129
"little"
130130
} else if cfg!(target_endian = "big") {
131131
"big"
132132
} else {
133133
"unknown"
134134
})
135-
.into_pyref()
135+
.to_str()
136136
}
137137

138138
#[pyattr]
@@ -513,7 +513,7 @@ mod sys {
513513

514514
#[pyfunction]
515515
fn intern(s: PyRefExact<PyStr>, vm: &VirtualMachine) -> PyRefExact<PyStr> {
516-
vm.ctx.intern_string(s)
516+
vm.ctx.intern_str(s).to_owned()
517517
}
518518

519519
#[pyattr]

0 commit comments

Comments
 (0)