Skip to content

Commit 5053d6d

Browse files
committed
Add bytes literal support.
1 parent 7e10671 commit 5053d6d

File tree

9 files changed

+46
-15
lines changed

9 files changed

+46
-15
lines changed

parser/src/ast.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,9 @@ pub enum Expression {
191191
String {
192192
value: String,
193193
},
194+
Bytes {
195+
value: Vec<u8>,
196+
},
194197
Identifier {
195198
name: String,
196199
},

parser/src/lexer.rs

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -232,7 +232,7 @@ where
232232
let mut saw_b = false;
233233
let mut saw_r = false;
234234
let mut saw_u = false;
235-
let saw_f = false;
235+
let mut saw_f = false;
236236
loop {
237237
// Detect r"", f"", b"" and u""
238238
// TODO: handle f-strings
@@ -242,10 +242,12 @@ where
242242
&& (self.chr0 == Some('u') || self.chr0 == Some('U'))
243243
{
244244
saw_u = true;
245-
} else if !(saw_r || saw_u || saw_f)
246-
&& (self.chr0 == Some('r') || self.chr0 == Some('R'))
247-
{
245+
} else if !(saw_r || saw_u) && (self.chr0 == Some('r') || self.chr0 == Some('R')) {
248246
saw_r = true;
247+
} else if !(saw_b || saw_u || saw_f)
248+
&& (self.chr0 == Some('f') || self.chr0 == Some('F'))
249+
{
250+
saw_f = true;
249251
} else {
250252
break;
251253
}
@@ -313,7 +315,7 @@ where
313315

314316
fn lex_string(
315317
&mut self,
316-
_is_bytes: bool,
318+
is_bytes: bool,
317319
is_raw: bool,
318320
_is_unicode: bool,
319321
_is_fstring: bool,
@@ -400,13 +402,17 @@ where
400402
}
401403
let end_pos = self.get_pos();
402404

403-
return Ok((
404-
start_pos,
405+
let tok = if is_bytes {
406+
Tok::Bytes {
407+
value: string_content.as_bytes().to_vec(),
408+
}
409+
} else {
405410
Tok::String {
406411
value: string_content,
407-
},
408-
end_pos,
409-
));
412+
}
413+
};
414+
415+
return Ok((start_pos, tok, end_pos));
410416
}
411417

412418
fn is_char(&self) -> bool {
@@ -921,10 +927,10 @@ mod tests {
921927
tokens,
922928
vec![
923929
Tok::String {
924-
value: "\\\\".to_string()
930+
value: "\\\\".to_string(),
925931
},
926932
Tok::String {
927-
value: "\\".to_string()
933+
value: "\\".to_string(),
928934
}
929935
]
930936
);

parser/src/python.lalrpop

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -768,7 +768,7 @@ SliceOp: ast::Expression = {
768768
}
769769

770770
Atom: ast::Expression = {
771-
<s:String> => ast::Expression::String { value: s },
771+
StringConstant,
772772
<n:Number> => ast::Expression::Number { value: n },
773773
<i:Identifier> => ast::Expression::Identifier { name: i },
774774
"[" <e:TestListComp?> "]" => {
@@ -972,9 +972,14 @@ Number: ast::Number = {
972972
}
973973
};
974974

975-
String: String = {
975+
StringConstant: ast::Expression = {
976976
<s:string+> => {
977-
s.join("")
977+
let glued = s.join("");
978+
ast::Expression::String { value: glued }
979+
},
980+
<s:bytes+> => {
981+
let glued = s.into_iter().flatten().collect::<Vec<u8>>();
982+
ast::Expression::Bytes { value: glued }
978983
},
979984
};
980985

@@ -1067,6 +1072,7 @@ extern {
10671072
"None" => lexer::Tok::None,
10681073
number => lexer::Tok::Number { value: <String> },
10691074
string => lexer::Tok::String { value: <String> },
1075+
bytes => lexer::Tok::Bytes { value: <Vec<u8>> },
10701076
name => lexer::Tok::Name { name: <String> },
10711077
"\n" => lexer::Tok::Newline,
10721078
";" => lexer::Tok::Semi,

parser/src/token.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ pub enum Tok {
44
Name { name: String },
55
Number { value: String },
66
String { value: String },
7+
Bytes { value: Vec<u8> },
78
Newline,
89
Indent,
910
Dedent,

tests/snippets/strings.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
assert len(""" " \" """) == 5
1111

1212
assert type("") is str
13+
assert type(b"") is bytes
1314

1415
assert str(1) == "1"
1516
assert str(2.1) == "2.1"

vm/src/bytecode.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,7 @@ pub enum Constant {
199199
Float { value: f64 },
200200
Boolean { value: bool },
201201
String { value: String },
202+
Bytes { value: Vec<u8> },
202203
Code { code: CodeObject },
203204
Tuple { elements: Vec<Constant> },
204205
None,

vm/src/compile.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -932,6 +932,13 @@ impl Compiler {
932932
},
933933
});
934934
}
935+
ast::Expression::Bytes { value } => {
936+
self.emit(Instruction::LoadConst {
937+
value: bytecode::Constant::Bytes {
938+
value: value.clone(),
939+
},
940+
});
941+
}
935942
ast::Expression::Identifier { name } => {
936943
self.emit(Instruction::LoadName {
937944
name: name.to_string(),

vm/src/frame.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1075,6 +1075,7 @@ impl Frame {
10751075
bytecode::Constant::Integer { ref value } => vm.ctx.new_int(*value),
10761076
bytecode::Constant::Float { ref value } => vm.ctx.new_float(*value),
10771077
bytecode::Constant::String { ref value } => vm.new_str(value.clone()),
1078+
bytecode::Constant::Bytes { ref value } => vm.ctx.new_bytes(value.clone()),
10781079
bytecode::Constant::Boolean { ref value } => vm.new_bool(value.clone()),
10791080
bytecode::Constant::Code { ref code } => {
10801081
PyObject::new(PyObjectKind::Code { code: code.clone() }, vm.get_type())

vm/src/stdlib/ast.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -543,6 +543,11 @@ fn expression_to_ast(ctx: &PyContext, expression: &ast::Expression) -> PyObjectR
543543
node.set_attr("s", ctx.new_str(value.clone()));
544544
node
545545
}
546+
ast::Expression::Bytes { value } => {
547+
let node = create_node(ctx, "Bytes");
548+
node.set_attr("s", ctx.new_bytes(value.clone()));
549+
node
550+
}
546551
};
547552

548553
// TODO: retrieve correct lineno:

0 commit comments

Comments
 (0)