Skip to content

Commit 062a543

Browse files
youknowoneyt2b
andcommitted
Fix f-string padding
Co-Authored-By: yt2b <[email protected]>
1 parent d3b587a commit 062a543

File tree

3 files changed

+147
-79
lines changed

3 files changed

+147
-79
lines changed

common/src/format.rs

Lines changed: 20 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use crate::float_ops;
1+
use crate::{float_ops, str::BorrowedStr};
22
use itertools::{Itertools, PeekingNext};
33
use num_bigint::{BigInt, Sign};
44
use num_traits::{cast::ToPrimitive, Signed};
@@ -517,8 +517,12 @@ impl FormatSpec {
517517
}
518518
};
519519
let magnitude_str = self.add_magnitude_separators(raw_magnitude_str?, sign_str);
520-
self.format_sign_and_align(&magnitude_str, sign_str, FormatAlign::Right)
521-
.map_err(|msg| msg.to_owned())
520+
self.format_sign_and_align(
521+
unsafe { &BorrowedStr::from_ascii_unchecked(magnitude_str.as_bytes()) },
522+
sign_str,
523+
FormatAlign::Right,
524+
)
525+
.map_err(|msg| msg.to_owned())
522526
}
523527

524528
#[inline]
@@ -596,11 +600,15 @@ impl FormatSpec {
596600
};
597601
let sign_prefix = format!("{}{}", sign_str, prefix);
598602
let magnitude_str = self.add_magnitude_separators(raw_magnitude_str?, &sign_prefix);
599-
self.format_sign_and_align(&magnitude_str, &sign_prefix, FormatAlign::Right)
600-
.map_err(|msg| msg.to_owned())
603+
self.format_sign_and_align(
604+
unsafe { &BorrowedStr::from_ascii_unchecked(magnitude_str.as_bytes()) },
605+
&sign_prefix,
606+
FormatAlign::Right,
607+
)
608+
.map_err(|msg| msg.to_owned())
601609
}
602610

603-
pub fn format_string(&self, s: &str) -> Result<String, &'static str> {
611+
pub fn format_string(&self, s: &BorrowedStr) -> Result<String, &'static str> {
604612
match self.format_type {
605613
Some(FormatType::String) | None => self
606614
.format_sign_and_align(s, "", FormatAlign::Left)
@@ -616,14 +624,13 @@ impl FormatSpec {
616624

617625
fn format_sign_and_align(
618626
&self,
619-
magnitude_string: &str,
627+
magnitude_str: &BorrowedStr,
620628
sign_str: &str,
621629
default_align: FormatAlign,
622630
) -> Result<String, &'static str> {
623631
let align = self.align.unwrap_or(default_align);
624632

625-
// Use the byte length as the string length since we're in ascii
626-
let num_chars = magnitude_string.len();
633+
let num_chars = magnitude_str.char_len();
627634
let fill_char = self.fill.unwrap_or(' ');
628635
let fill_chars_needed: i32 = self.width.map_or(0, |w| {
629636
cmp::max(0, (w as i32) - (num_chars as i32) - (sign_str.len() as i32))
@@ -632,20 +639,20 @@ impl FormatSpec {
632639
FormatAlign::Left => format!(
633640
"{}{}{}",
634641
sign_str,
635-
magnitude_string,
642+
magnitude_str,
636643
FormatSpec::compute_fill_string(fill_char, fill_chars_needed)
637644
),
638645
FormatAlign::Right => format!(
639646
"{}{}{}",
640647
FormatSpec::compute_fill_string(fill_char, fill_chars_needed),
641648
sign_str,
642-
magnitude_string
649+
magnitude_str
643650
),
644651
FormatAlign::AfterSign => format!(
645652
"{}{}{}",
646653
sign_str,
647654
FormatSpec::compute_fill_string(fill_char, fill_chars_needed),
648-
magnitude_string
655+
magnitude_str
649656
),
650657
FormatAlign::Center => {
651658
let left_fill_chars_needed = fill_chars_needed / 2;
@@ -654,7 +661,7 @@ impl FormatSpec {
654661
FormatSpec::compute_fill_string(fill_char, left_fill_chars_needed);
655662
let right_fill_string =
656663
FormatSpec::compute_fill_string(fill_char, right_fill_chars_needed);
657-
format!("{left_fill_string}{sign_str}{magnitude_string}{right_fill_string}")
664+
format!("{left_fill_string}{sign_str}{magnitude_str}{right_fill_string}")
658665
}
659666
})
660667
}

common/src/str.rs

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
use crate::{
2+
atomic::{PyAtomic, Radium},
3+
hash::PyHash,
4+
};
15
use ascii::AsciiString;
26
use once_cell::unsync::OnceCell;
37
use std::{
@@ -12,6 +16,119 @@ pub type wchar_t = libc::wchar_t;
1216
#[allow(non_camel_case_types)]
1317
pub type wchar_t = u32;
1418

19+
/// Utf8 + state.ascii (+ PyUnicode_Kind in future)
20+
#[derive(Debug, Copy, Clone, PartialEq)]
21+
pub enum PyStrKind {
22+
Ascii,
23+
Utf8,
24+
}
25+
26+
impl std::ops::BitOr for PyStrKind {
27+
type Output = Self;
28+
fn bitor(self, other: Self) -> Self {
29+
match (self, other) {
30+
(Self::Ascii, Self::Ascii) => Self::Ascii,
31+
_ => Self::Utf8,
32+
}
33+
}
34+
}
35+
36+
impl PyStrKind {
37+
#[inline]
38+
pub fn new_data(self) -> PyStrKindData {
39+
match self {
40+
PyStrKind::Ascii => PyStrKindData::Ascii,
41+
PyStrKind::Utf8 => PyStrKindData::Utf8(Radium::new(usize::MAX)),
42+
}
43+
}
44+
}
45+
46+
#[derive(Debug)]
47+
pub enum PyStrKindData {
48+
Ascii,
49+
// uses usize::MAX as a sentinel for "uncomputed"
50+
Utf8(PyAtomic<usize>),
51+
}
52+
53+
impl PyStrKindData {
54+
#[inline]
55+
pub fn kind(&self) -> PyStrKind {
56+
match self {
57+
PyStrKindData::Ascii => PyStrKind::Ascii,
58+
PyStrKindData::Utf8(_) => PyStrKind::Utf8,
59+
}
60+
}
61+
}
62+
63+
pub struct BorrowedStr<'a> {
64+
bytes: &'a [u8],
65+
kind: PyStrKindData,
66+
#[allow(dead_code)]
67+
hash: PyAtomic<PyHash>,
68+
}
69+
70+
impl<'a> BorrowedStr<'a> {
71+
/// # Safety
72+
/// `s` have to be an ascii string
73+
#[inline]
74+
pub unsafe fn from_ascii_unchecked(s: &'a [u8]) -> Self {
75+
debug_assert!(s.is_ascii());
76+
Self {
77+
bytes: s,
78+
kind: PyStrKind::Ascii.new_data(),
79+
hash: PyAtomic::<PyHash>::new(0),
80+
}
81+
}
82+
83+
#[inline]
84+
pub fn as_str(&self) -> &str {
85+
unsafe {
86+
// SAFETY: Both PyStrKind::{Ascii, Utf8} are valid utf8 string
87+
std::str::from_utf8_unchecked(self.bytes)
88+
}
89+
}
90+
91+
#[inline]
92+
pub fn char_len(&self) -> usize {
93+
match self.kind {
94+
PyStrKindData::Ascii => self.bytes.len(),
95+
PyStrKindData::Utf8(ref len) => match len.load(core::sync::atomic::Ordering::Relaxed) {
96+
usize::MAX => self._compute_char_len(),
97+
len => len,
98+
},
99+
}
100+
}
101+
102+
#[cold]
103+
fn _compute_char_len(&self) -> usize {
104+
match self.kind {
105+
PyStrKindData::Utf8(ref char_len) => {
106+
let len = self.as_str().chars().count();
107+
// len cannot be usize::MAX, since vec.capacity() < sys.maxsize
108+
char_len.store(len, core::sync::atomic::Ordering::Relaxed);
109+
len
110+
}
111+
_ => unsafe {
112+
debug_assert!(false); // invalid for non-utf8 strings
113+
std::hint::unreachable_unchecked()
114+
},
115+
}
116+
}
117+
}
118+
119+
impl std::ops::Deref for BorrowedStr<'_> {
120+
type Target = str;
121+
fn deref(&self) -> &str {
122+
self.as_str()
123+
}
124+
}
125+
126+
impl std::fmt::Display for BorrowedStr<'_> {
127+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
128+
self.as_str().fmt(f)
129+
}
130+
}
131+
15132
pub fn try_get_chars(s: &str, range: impl RangeBounds<usize>) -> Option<&str> {
16133
let mut chars = s.chars();
17134
let start = match range.start_bound() {

vm/src/builtins/str.rs

Lines changed: 10 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,10 @@ use crate::{
77
anystr::{self, adjust_indices, AnyStr, AnyStrContainer, AnyStrWrapper},
88
atomic_func,
99
class::PyClassImpl,
10-
common::format::{FormatSpec, FormatString, FromTemplate},
10+
common::{
11+
format::{FormatSpec, FormatString, FromTemplate},
12+
str::{BorrowedStr, PyStrKind, PyStrKindData},
13+
},
1114
convert::{ToPyException, ToPyObject},
1215
format::{format, format_map},
1316
function::{ArgIterable, FuncArgs, OptionalArg, OptionalOption, PyComparisonValue},
@@ -39,48 +42,6 @@ use unic_ucd_category::GeneralCategory;
3942
use unic_ucd_ident::{is_xid_continue, is_xid_start};
4043
use unicode_casing::CharExt;
4144

42-
/// Utf8 + state.ascii (+ PyUnicode_Kind in future)
43-
#[derive(Debug, Copy, Clone, PartialEq)]
44-
pub(crate) enum PyStrKind {
45-
Ascii,
46-
Utf8,
47-
}
48-
49-
impl std::ops::BitOr for PyStrKind {
50-
type Output = Self;
51-
fn bitor(self, other: Self) -> Self {
52-
match (self, other) {
53-
(Self::Ascii, Self::Ascii) => Self::Ascii,
54-
_ => Self::Utf8,
55-
}
56-
}
57-
}
58-
59-
impl PyStrKind {
60-
fn new_data(self) -> PyStrKindData {
61-
match self {
62-
PyStrKind::Ascii => PyStrKindData::Ascii,
63-
PyStrKind::Utf8 => PyStrKindData::Utf8(Radium::new(usize::MAX)),
64-
}
65-
}
66-
}
67-
68-
#[derive(Debug)]
69-
enum PyStrKindData {
70-
Ascii,
71-
// uses usize::MAX as a sentinel for "uncomputed"
72-
Utf8(PyAtomic<usize>),
73-
}
74-
75-
impl PyStrKindData {
76-
fn kind(&self) -> PyStrKind {
77-
match self {
78-
PyStrKindData::Ascii => PyStrKind::Ascii,
79-
PyStrKindData::Utf8(_) => PyStrKind::Utf8,
80-
}
81-
}
82-
}
83-
8445
impl TryFromBorrowedObject for String {
8546
fn try_from_borrowed_object(vm: &VirtualMachine, obj: &PyObject) -> PyResult<Self> {
8647
obj.try_value_with(|pystr: &PyStr| Ok(pystr.as_str().to_owned()), vm)
@@ -385,6 +346,10 @@ impl PyStr {
385346
PyStrKind::Utf8 => self.as_str().chars().all(test),
386347
}
387348
}
349+
350+
fn borrow(&self) -> &BorrowedStr {
351+
unsafe { std::mem::transmute(self) }
352+
}
388353
}
389354

390355
#[pyclass(
@@ -475,28 +440,7 @@ impl PyStr {
475440
#[pymethod(name = "__len__")]
476441
#[inline]
477442
pub fn char_len(&self) -> usize {
478-
match self.kind {
479-
PyStrKindData::Ascii => self.bytes.len(),
480-
PyStrKindData::Utf8(ref len) => match len.load(atomic::Ordering::Relaxed) {
481-
usize::MAX => self._compute_char_len(),
482-
len => len,
483-
},
484-
}
485-
}
486-
#[cold]
487-
fn _compute_char_len(&self) -> usize {
488-
match self.kind {
489-
PyStrKindData::Utf8(ref char_len) => {
490-
let len = self.as_str().chars().count();
491-
// len cannot be usize::MAX, since vec.capacity() < sys.maxsize
492-
char_len.store(len, atomic::Ordering::Relaxed);
493-
len
494-
}
495-
_ => unsafe {
496-
debug_assert!(false); // invalid for non-utf8 strings
497-
std::hint::unreachable_unchecked()
498-
},
499-
}
443+
self.borrow().char_len()
500444
}
501445

502446
#[pymethod(name = "isascii")]
@@ -787,7 +731,7 @@ impl PyStr {
787731
#[pymethod(name = "__format__")]
788732
fn format_str(&self, spec: PyStrRef, vm: &VirtualMachine) -> PyResult<String> {
789733
FormatSpec::parse(spec.as_str())
790-
.and_then(|format_spec| format_spec.format_string(self.as_str()))
734+
.and_then(|format_spec| format_spec.format_string(self.borrow()))
791735
.map_err(|err| vm.new_value_error(err.to_string()))
792736
}
793737

0 commit comments

Comments
 (0)