Skip to content

Commit ed5b182

Browse files
authored
Merge pull request RustPython#4420 from harupy/simplify-string-check
Simplify string check in `Lexer.lex_identifier`
2 parents 134355e + c640312 commit ed5b182

File tree

3 files changed

+54
-50
lines changed

3 files changed

+54
-50
lines changed

compiler/parser/src/lexer.rs

Lines changed: 16 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -241,59 +241,23 @@ where
241241

242242
// Lexer helper functions:
243243
fn lex_identifier(&mut self) -> LexResult {
244-
let mut name = String::new();
245-
let start_pos = self.get_pos();
246-
247244
// Detect potential string like rb'' b'' f'' u'' r''
248-
let mut saw_b = false;
249-
let mut saw_r = false;
250-
let mut saw_u = false;
251-
let mut saw_f = false;
252-
loop {
253-
// Detect r"", f"", b"" and u""
254-
if !(saw_b || saw_u || saw_f) && matches!(self.window[0], Some('b' | 'B')) {
255-
saw_b = true;
256-
} else if !(saw_b || saw_r || saw_u || saw_f)
257-
&& matches!(self.window[0], Some('u' | 'U'))
258-
{
259-
saw_u = true;
260-
} else if !(saw_r || saw_u) && matches!(self.window[0], Some('r' | 'R')) {
261-
saw_r = true;
262-
} else if !(saw_b || saw_u || saw_f) && matches!(self.window[0], Some('f' | 'F')) {
263-
saw_f = true;
264-
} else {
265-
break;
245+
match self.window[..3] {
246+
[Some(c), Some('"' | '\''), ..] => {
247+
if let Ok(kind) = StringKind::try_from(c) {
248+
return self.lex_string(kind);
249+
}
266250
}
267-
268-
// Take up char into name:
269-
name.push(self.next_char().unwrap());
270-
271-
// Check if we have a string:
272-
if matches!(self.window[0], Some('"' | '\'')) {
273-
let kind = if saw_r {
274-
if saw_b {
275-
StringKind::RawBytes
276-
} else if saw_f {
277-
StringKind::RawFString
278-
} else {
279-
StringKind::RawString
280-
}
281-
} else if saw_b {
282-
StringKind::Bytes
283-
} else if saw_u {
284-
StringKind::Unicode
285-
} else if saw_f {
286-
StringKind::FString
287-
} else {
288-
StringKind::String
289-
};
290-
291-
return self
292-
.lex_string(kind)
293-
.map(|(_, tok, end_pos)| (start_pos, tok, end_pos));
251+
[Some(c1), Some(c2), Some('"' | '\'')] => {
252+
if let Ok(kind) = StringKind::try_from([c1, c2]) {
253+
return self.lex_string(kind);
254+
}
294255
}
295-
}
256+
_ => {}
257+
};
296258

259+
let start_pos = self.get_pos();
260+
let mut name = String::new();
297261
while self.is_identifier_continuation() {
298262
name.push(self.next_char().unwrap());
299263
}
@@ -495,6 +459,9 @@ where
495459

496460
fn lex_string(&mut self, kind: StringKind) -> LexResult {
497461
let start_pos = self.get_pos();
462+
for _ in 0..kind.prefix_len() {
463+
self.next_char();
464+
}
498465
let quote_char = self.next_char().unwrap();
499466
let mut string_content = String::new();
500467

compiler/parser/src/string_parser.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ impl<'a> StringParser<'a> {
2626
str_start: Location,
2727
str_end: Location,
2828
) -> Self {
29-
let offset = kind.to_string().len() + if triple_quoted { 3 } else { 1 };
29+
let offset = kind.prefix_len() + if triple_quoted { 3 } else { 1 };
3030
Self {
3131
chars: source.chars().peekable(),
3232
kind,

compiler/parser/src/token.rs

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,34 @@ pub enum StringKind {
240240
Unicode,
241241
}
242242

243+
impl TryFrom<char> for StringKind {
244+
type Error = String;
245+
246+
fn try_from(ch: char) -> Result<Self, String> {
247+
match ch {
248+
'r' | 'R' => Ok(StringKind::RawString),
249+
'f' | 'F' => Ok(StringKind::FString),
250+
'u' | 'U' => Ok(StringKind::Unicode),
251+
'b' | 'B' => Ok(StringKind::Bytes),
252+
c => Err(format!("Unexpected string prefix: {c}")),
253+
}
254+
}
255+
}
256+
257+
impl TryFrom<[char; 2]> for StringKind {
258+
type Error = String;
259+
260+
fn try_from(chars: [char; 2]) -> Result<Self, String> {
261+
match chars {
262+
['r' | 'R', 'f' | 'F'] => Ok(StringKind::RawFString),
263+
['f' | 'F', 'r' | 'R'] => Ok(StringKind::RawFString),
264+
['r' | 'R', 'b' | 'B'] => Ok(StringKind::RawBytes),
265+
['b' | 'B', 'r' | 'R'] => Ok(StringKind::RawBytes),
266+
[c1, c2] => Err(format!("Unexpected string prefix: {c1}{c2}")),
267+
}
268+
}
269+
}
270+
243271
impl fmt::Display for StringKind {
244272
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
245273
use StringKind::*;
@@ -274,4 +302,13 @@ impl StringKind {
274302
pub fn is_unicode(&self) -> bool {
275303
matches!(self, StringKind::Unicode)
276304
}
305+
306+
pub fn prefix_len(&self) -> usize {
307+
use StringKind::*;
308+
match self {
309+
String => 0,
310+
RawString | FString | Unicode | Bytes => 1,
311+
RawFString | RawBytes => 2,
312+
}
313+
}
277314
}

0 commit comments

Comments
 (0)