Skip to content

Commit 2c74716

Browse files
authored
Merge pull request RustPython#926 from youknowone/number-hashes
Fix number hashes for small numbers
2 parents 0e56bb4 + 7a64f3e commit 2c74716

18 files changed

+148
-45
lines changed

tests/snippets/dict.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,3 +198,5 @@ def __eq__(self, other):
198198

199199
w = {1: 1, **x, 2: 2, **y, 3: 3, **z, 4: 4}
200200
assert w == {1: 1, 'a': 1, 'b': 2, 'c': 3, 2: 2, 'd': 3, 3: 3, 'e': 3, 4: 4}
201+
202+
assert str({True: True, 1.0: 1.0}) == str({True: 1.0})

tests/snippets/floats.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,21 @@
9898
assert_raises(ValueError, lambda: float('foo'))
9999
assert_raises(OverflowError, lambda: float(2**10000))
100100

101+
# check eq and hash for small numbers
102+
103+
assert 1.0 == 1
104+
assert 1.0 == True
105+
assert 0.0 == 0
106+
assert 0.0 == False
107+
assert hash(1.0) == hash(1)
108+
assert hash(1.0) == hash(True)
109+
assert hash(0.0) == hash(0)
110+
assert hash(0.0) == hash(False)
111+
assert hash(1.0) != hash(1.0000000001)
112+
113+
assert 5.0 in {3, 4, 5}
114+
assert {-1: 2}[-1.0] == 2
115+
101116
# check that magic methods are implemented for ints and floats
102117

103118
assert 1.0.__add__(1.0) == 2.0

vm/src/builtins.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -303,8 +303,7 @@ fn builtin_hasattr(obj: PyObjectRef, attr: PyStringRef, vm: &VirtualMachine) ->
303303

304304
fn builtin_hash(vm: &VirtualMachine, args: PyFuncArgs) -> PyResult {
305305
arg_check!(vm, args, required = [(obj, None)]);
306-
307-
vm.call_method(obj, "__hash__", vec![])
306+
vm._hash(obj).and_then(|v| Ok(vm.new_int(v)))
308307
}
309308

310309
// builtin_help

vm/src/dictdatatype.rs

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
use crate::obj::objbool;
2-
use crate::obj::objint;
32
use crate::pyobject::{IdProtocol, PyObjectRef, PyResult};
43
use crate::vm::VirtualMachine;
5-
use num_traits::ToPrimitive;
64
/// Ordered dictionary implementation.
75
/// Inspired by: https://morepypy.blogspot.com/2015/01/faster-more-memory-efficient-and-more.html
86
/// And: https://www.youtube.com/watch?v=p33CVV29OG8
@@ -218,8 +216,7 @@ enum LookupResult {
218216
}
219217

220218
fn calc_hash(vm: &VirtualMachine, key: &PyObjectRef) -> PyResult<usize> {
221-
let hash = vm.call_method(key, "__hash__", vec![])?;
222-
Ok(objint::get_value(&hash).to_usize().unwrap())
219+
Ok(vm._hash(key)? as usize)
223220
}
224221

225222
/// Invoke __eq__ on two keys

vm/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ pub mod frame;
5555
pub mod function;
5656
pub mod import;
5757
pub mod obj;
58+
mod pyhash;
5859
pub mod pyobject;
5960
pub mod stdlib;
6061
mod symboltable;

vm/src/obj/objbytearray.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ impl PyByteArrayRef {
135135
}
136136

137137
#[pymethod(name = "__hash__")]
138-
fn hash(self, vm: &VirtualMachine) -> PyResult {
138+
fn hash(self, vm: &VirtualMachine) -> PyResult<()> {
139139
Err(vm.new_type_error("unhashable type: bytearray".to_string()))
140140
}
141141

vm/src/obj/objbyteinner.rs

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ use crate::obj::objint::PyIntRef;
22
use crate::obj::objnone::PyNoneRef;
33
use crate::obj::objslice::PySliceRef;
44
use crate::obj::objtuple::PyTupleRef;
5+
use crate::pyhash;
56
use crate::pyobject::Either;
67
use crate::pyobject::PyRef;
78
use crate::pyobject::PyValue;
@@ -12,17 +13,12 @@ use core::ops::Range;
1213
use num_bigint::BigInt;
1314

1415
use crate::function::OptionalArg;
15-
16-
use crate::vm::VirtualMachine;
17-
1816
use crate::pyobject::{PyResult, TypeProtocol};
19-
20-
use crate::obj::objstr::{PyString, PyStringRef};
21-
use std::collections::hash_map::DefaultHasher;
22-
use std::hash::{Hash, Hasher};
17+
use crate::vm::VirtualMachine;
2318

2419
use super::objint;
2520
use super::objsequence::{is_valid_slice_arg, PySliceableSequence};
21+
use super::objstr::{PyString, PyStringRef};
2622

2723
use crate::obj::objint::PyInt;
2824
use num_integer::Integer;
@@ -379,10 +375,8 @@ impl PyByteInner {
379375
}
380376
}
381377

382-
pub fn hash(&self) -> usize {
383-
let mut hasher = DefaultHasher::new();
384-
self.elements.hash(&mut hasher);
385-
hasher.finish() as usize
378+
pub fn hash(&self) -> pyhash::PyHash {
379+
pyhash::hash_value(&self.elements)
386380
}
387381

388382
pub fn add(&self, other: PyByteInner) -> Vec<u8> {

vm/src/obj/objbytes.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
use crate::obj::objint::PyIntRef;
2-
32
use crate::obj::objslice::PySliceRef;
43
use crate::obj::objstr::PyStringRef;
54
use crate::obj::objtuple::PyTupleRef;
5+
use crate::pyhash;
66

77
use crate::pyobject::Either;
88
use crate::vm::VirtualMachine;
@@ -125,7 +125,7 @@ impl PyBytesRef {
125125
}
126126

127127
#[pymethod(name = "__hash__")]
128-
fn hash(self, _vm: &VirtualMachine) -> usize {
128+
fn hash(self, _vm: &VirtualMachine) -> pyhash::PyHash {
129129
self.inner.hash()
130130
}
131131

vm/src/obj/objdict.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -298,7 +298,7 @@ impl PyDictRef {
298298
Ok(PyDict { entries }.into_ref(vm))
299299
}
300300

301-
fn hash(self, vm: &VirtualMachine) -> PyResult {
301+
fn hash(self, vm: &VirtualMachine) -> PyResult<()> {
302302
Err(vm.new_type_error("unhashable type".to_string()))
303303
}
304304

vm/src/obj/objfloat.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ use super::objstr;
44
use super::objtype;
55
use crate::function::OptionalArg;
66
use crate::obj::objtype::PyClassRef;
7+
use crate::pyhash;
78
use crate::pyobject::{
89
IdProtocol, IntoPyObject, PyClassImpl, PyContext, PyObjectRef, PyRef, PyResult, PyValue,
910
TypeProtocol,
@@ -429,6 +430,11 @@ impl PyFloat {
429430
zelf
430431
}
431432

433+
#[pymethod(name = "__hash__")]
434+
fn hash(&self, _vm: &VirtualMachine) -> pyhash::PyHash {
435+
pyhash::hash_float(self.value)
436+
}
437+
432438
#[pyproperty(name = "real")]
433439
fn real(zelf: PyRef<Self>, _vm: &VirtualMachine) -> PyFloatRef {
434440
zelf

vm/src/obj/objint.rs

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
use std::fmt;
2-
use std::hash::{Hash, Hasher};
32

43
use num_bigint::{BigInt, Sign};
54
use num_integer::Integer;
65
use num_traits::{One, Pow, Signed, ToPrimitive, Zero};
76

87
use crate::format::FormatSpec;
98
use crate::function::{KwArgs, OptionalArg, PyFuncArgs};
9+
use crate::pyhash;
1010
use crate::pyobject::{
1111
IntoPyObject, PyClassImpl, PyContext, PyObjectRef, PyRef, PyResult, PyValue, TryFromObject,
1212
TypeProtocol,
@@ -400,10 +400,11 @@ impl PyInt {
400400
}
401401

402402
#[pymethod(name = "__hash__")]
403-
fn hash(&self, _vm: &VirtualMachine) -> u64 {
404-
let mut hasher = std::collections::hash_map::DefaultHasher::new();
405-
self.value.hash(&mut hasher);
406-
hasher.finish()
403+
pub fn hash(&self, _vm: &VirtualMachine) -> pyhash::PyHash {
404+
match self.value.to_i64() {
405+
Some(value) => (value % pyhash::MODULUS as i64),
406+
None => (&self.value % pyhash::MODULUS).to_i64().unwrap(),
407+
}
407408
}
408409

409410
#[pymethod(name = "__abs__")]

vm/src/obj/objlist.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -387,7 +387,7 @@ impl PyListRef {
387387
Ok(s)
388388
}
389389

390-
fn hash(self, vm: &VirtualMachine) -> PyResult {
390+
fn hash(self, vm: &VirtualMachine) -> PyResult<()> {
391391
Err(vm.new_type_error("unhashable type".to_string()))
392392
}
393393

vm/src/obj/objobject.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ use super::objtype;
55
use crate::function::PyFuncArgs;
66
use crate::obj::objproperty::PropertyBuilder;
77
use crate::obj::objtype::PyClassRef;
8+
use crate::pyhash;
89
use crate::pyobject::{
910
IdProtocol, ItemProtocol, PyAttributes, PyContext, PyObject, PyObjectRef, PyResult, PyValue,
1011
TryFromObject, TypeProtocol,
@@ -55,8 +56,8 @@ fn object_ge(_zelf: PyObjectRef, _other: PyObjectRef, vm: &VirtualMachine) -> Py
5556
vm.ctx.not_implemented()
5657
}
5758

58-
fn object_hash(zelf: PyObjectRef, _vm: &VirtualMachine) -> u64 {
59-
zelf.get_id() as u64
59+
fn object_hash(zelf: PyObjectRef, _vm: &VirtualMachine) -> pyhash::PyHash {
60+
zelf.get_id() as pyhash::PyHash
6061
}
6162

6263
fn object_setattr(

vm/src/obj/objset.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -759,7 +759,7 @@ impl TryFromObject for SetIterable {
759759
}
760760
}
761761

762-
fn set_hash(_zelf: PySetRef, vm: &VirtualMachine) -> PyResult {
762+
fn set_hash(_zelf: PySetRef, vm: &VirtualMachine) -> PyResult<()> {
763763
Err(vm.new_type_error("unhashable type".to_string()))
764764
}
765765

vm/src/obj/objstr.rs

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
extern crate unicode_xid;
22

33
use std::fmt;
4-
use std::hash::{Hash, Hasher};
54
use std::ops::Range;
65
use std::str::FromStr;
76
use std::string::ToString;
@@ -13,6 +12,7 @@ use unicode_xid::UnicodeXID;
1312

1413
use crate::format::{FormatParseError, FormatPart, FormatString};
1514
use crate::function::{single_or_tuple_any, OptionalArg, PyFuncArgs};
15+
use crate::pyhash;
1616
use crate::pyobject::{
1717
IdProtocol, IntoPyObject, ItemProtocol, PyClassImpl, PyContext, PyIterable, PyObjectRef, PyRef,
1818
PyResult, PyValue, TryFromObject, TryIntoRef, TypeProtocol,
@@ -172,10 +172,8 @@ impl PyString {
172172
}
173173

174174
#[pymethod(name = "__hash__")]
175-
fn hash(&self, _vm: &VirtualMachine) -> usize {
176-
let mut hasher = std::collections::hash_map::DefaultHasher::new();
177-
self.value.hash(&mut hasher);
178-
hasher.finish() as usize
175+
fn hash(&self, _vm: &VirtualMachine) -> pyhash::PyHash {
176+
pyhash::hash_value(&self.value)
179177
}
180178

181179
#[pymethod(name = "__len__")]

vm/src/obj/objtuple.rs

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,12 @@
11
use std::cell::{Cell, RefCell};
22
use std::fmt;
3-
use std::hash::{Hash, Hasher};
43

54
use crate::function::OptionalArg;
5+
use crate::pyhash;
66
use crate::pyobject::{IdProtocol, PyClassImpl, PyContext, PyObjectRef, PyRef, PyResult, PyValue};
77
use crate::vm::{ReprGuard, VirtualMachine};
88

99
use super::objbool;
10-
use super::objint;
1110
use super::objiter;
1211
use super::objsequence::{
1312
get_elements, get_item, seq_equal, seq_ge, seq_gt, seq_le, seq_lt, seq_mul,
@@ -129,14 +128,8 @@ impl PyTupleRef {
129128
}
130129
}
131130

132-
fn hash(self, vm: &VirtualMachine) -> PyResult<u64> {
133-
let mut hasher = std::collections::hash_map::DefaultHasher::new();
134-
for element in self.elements.borrow().iter() {
135-
let hash_result = vm.call_method(element, "__hash__", vec![])?;
136-
let element_hash = objint::get_value(&hash_result);
137-
element_hash.hash(&mut hasher);
138-
}
139-
Ok(hasher.finish())
131+
fn hash(self, vm: &VirtualMachine) -> PyResult<pyhash::PyHash> {
132+
pyhash::hash_iter(self.elements.borrow().iter(), vm)
140133
}
141134

142135
fn iter(self, _vm: &VirtualMachine) -> PyTupleIterator {

vm/src/pyhash.rs

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
use std::hash::{Hash, Hasher};
2+
3+
use crate::pyobject::PyObjectRef;
4+
use crate::pyobject::PyResult;
5+
use crate::vm::VirtualMachine;
6+
7+
pub type PyHash = i64;
8+
pub type PyUHash = u64;
9+
10+
pub const BITS: usize = 61;
11+
pub const MODULUS: PyUHash = (1 << BITS) - 1;
12+
// pub const CUTOFF: usize = 7;
13+
14+
pub const INF: PyHash = 314159;
15+
pub const NAN: PyHash = 0;
16+
17+
pub fn hash_float(value: f64) -> PyHash {
18+
// cpython _Py_HashDouble
19+
if !value.is_finite() {
20+
return if value.is_infinite() {
21+
if value > 0.0 {
22+
INF
23+
} else {
24+
-INF
25+
}
26+
} else {
27+
NAN
28+
};
29+
}
30+
31+
let frexp = if 0.0 == value {
32+
(value, 0i32)
33+
} else {
34+
let bits = value.to_bits();
35+
let exponent: i32 = ((bits >> 52) & 0x7ff) as i32 - 1022;
36+
let mantissa_bits = bits & (0x000fffffffffffff) | (1022 << 52);
37+
(f64::from_bits(mantissa_bits), exponent)
38+
};
39+
40+
// process 28 bits at a time; this should work well both for binary
41+
// and hexadecimal floating point.
42+
let mut m = frexp.0;
43+
let mut e = frexp.1;
44+
let mut x: PyUHash = 0;
45+
while m != 0.0 {
46+
x = ((x << 28) & MODULUS) | x >> (BITS - 28);
47+
m *= 268435456.0; // 2**28
48+
e -= 28;
49+
let y = m as PyUHash; // pull out integer part
50+
m -= y as f64;
51+
x += y;
52+
if x >= MODULUS {
53+
x -= MODULUS;
54+
}
55+
}
56+
57+
// adjust for the exponent; first reduce it modulo BITS
58+
const BITS32: i32 = BITS as i32;
59+
e = if e >= 0 {
60+
e % BITS32
61+
} else {
62+
BITS32 - 1 - ((-1 - e) % BITS32)
63+
};
64+
x = ((x << e) & MODULUS) | x >> (BITS32 - e);
65+
66+
x as PyHash * value.signum() as PyHash
67+
}
68+
69+
pub fn hash_value<T: Hash>(data: &T) -> PyHash {
70+
let mut hasher = std::collections::hash_map::DefaultHasher::new();
71+
data.hash(&mut hasher);
72+
hasher.finish() as PyHash
73+
}
74+
75+
pub fn hash_iter<'a, I: std::iter::Iterator<Item = &'a PyObjectRef>>(
76+
iter: I,
77+
vm: &VirtualMachine,
78+
) -> PyResult<PyHash> {
79+
let mut hasher = std::collections::hash_map::DefaultHasher::new();
80+
for element in iter {
81+
let item_hash = vm._hash(&element)?;
82+
item_hash.hash(&mut hasher);
83+
}
84+
Ok(hasher.finish() as PyHash)
85+
}

0 commit comments

Comments
 (0)