Skip to content

Commit 58ae899

Browse files
committed
Fix str.isspace to support unicode space characters.
1 parent e11285a commit 58ae899

File tree

6 files changed

+12
-20
lines changed

6 files changed

+12
-20
lines changed

Cargo.lock

Lines changed: 1 addition & 11 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Lib/test/test_bool.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -204,8 +204,6 @@ def test_contains(self):
204204
self.assertIs(1 in {}, False)
205205
self.assertIs(1 in {1:1}, True)
206206

207-
# TODO: RUSTPYTHON
208-
@unittest.expectedFailure
209207
def test_string(self):
210208
self.assertIs("xyz".endswith("z"), True)
211209
self.assertIs("xyz".endswith("x"), False)

Lib/test/test_unicode.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -620,8 +620,6 @@ def test_istitle(self):
620620
for ch in ['\U00010429', '\U0001044E', '\U0001F40D', '\U0001F46F']:
621621
self.assertFalse(ch.istitle(), '{!a} is not title'.format(ch))
622622

623-
# TODO: RUSTPYTHON
624-
@unittest.expectedFailure
625623
def test_isspace(self):
626624
super().test_isspace()
627625
self.checkequalnofix(True, '\u2000', 'isspace')

vm/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,9 +81,9 @@ unicode_names2 = "0.4"
8181
# https://github.com/RustPython/RustPython/pull/832#discussion_r275428939
8282
unicode-casing = "0.1"
8383
# update version all at the same time
84-
unic-bidi = "0.9"
8584
unic-char-property = "0.9"
8685
unic-normal = "0.9"
86+
unic-ucd-bidi = "0.9"
8787
unic-ucd-category = "0.9"
8888
unic-ucd-age = "0.9"
8989
unic-ucd-ident = "0.9"

vm/src/obj/objstr.rs

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ use std::string::ToString;
88
use crossbeam_utils::atomic::AtomicCell;
99
use itertools::Itertools;
1010
use num_traits::ToPrimitive;
11+
use unic_ucd_bidi::BidiClass;
1112
use unic_ucd_category::GeneralCategory;
1213
use unic_ucd_ident::{is_xid_continue, is_xid_start};
1314
use unicode_casing::CharExt;
@@ -751,11 +752,16 @@ impl PyString {
751752
.all(|c| c == '\u{0020}' || char_is_printable(c))
752753
}
753754

754-
// cpython's isspace ignores whitespace, including \t and \n, etc, unless the whole string is empty
755-
// which is why isspace is using is_ascii_whitespace. Same for isupper & islower
756755
#[pymethod]
757756
fn isspace(&self) -> bool {
758-
!self.value.is_empty() && self.value.chars().all(|c| c.is_ascii_whitespace())
757+
if self.value.is_empty() {
758+
return false;
759+
}
760+
use unic_ucd_bidi::bidi_class::abbr_names::*;
761+
self.value.chars().all(|c| {
762+
GeneralCategory::of(c) == GeneralCategory::SpaceSeparator
763+
|| matches!(BidiClass::of(c), WS | B | S)
764+
})
759765
}
760766

761767
// Return true if all cased characters in the string are lowercase and there is at least one cased character, false otherwise.

vm/src/stdlib/unicodedata.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,10 @@ use crate::pyobject::{PyClassImpl, PyObject, PyObjectRef, PyResult, PyValue};
99
use crate::vm::VirtualMachine;
1010

1111
use itertools::Itertools;
12-
use unic_bidi::BidiClass;
1312
use unic_char_property::EnumeratedCharProperty;
1413
use unic_normal::StrNormalForm;
1514
use unic_ucd_age::{Age, UnicodeVersion, UNICODE_VERSION};
15+
use unic_ucd_bidi::BidiClass;
1616
use unic_ucd_category::GeneralCategory;
1717

1818
pub fn make_module(vm: &VirtualMachine) -> PyObjectRef {

0 commit comments

Comments
 (0)