Skip to content

Commit 4187a6e

Browse files
committed
Add support for star expanded argument in list and tuple constructions.
1 parent 1974cbe commit 4187a6e

File tree

6 files changed

+174
-79
lines changed

6 files changed

+174
-79
lines changed

parser/src/lexer.rs

Lines changed: 42 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -227,6 +227,38 @@ where
227227
fn lex_identifier(&mut self) -> Spanned<Tok> {
228228
let mut name = String::new();
229229
let start_pos = self.get_pos();
230+
231+
// Detect potential string like rb'' b'' f'' u'' r''
232+
let mut saw_b = false;
233+
let mut saw_r = false;
234+
let mut saw_u = false;
235+
let saw_f = false;
236+
loop {
237+
// Detect r"", f"", b"" and u""
238+
// TODO: handle f-strings
239+
if !(saw_b || saw_u || saw_f) && (self.chr0 == Some('b') || self.chr0 == Some('B')) {
240+
saw_b = true;
241+
} else if !(saw_b || saw_r || saw_u || saw_f)
242+
&& (self.chr0 == Some('u') || self.chr0 == Some('U'))
243+
{
244+
saw_u = true;
245+
} else if !(saw_r || saw_u || saw_f)
246+
&& (self.chr0 == Some('r') || self.chr0 == Some('R'))
247+
{
248+
saw_r = true;
249+
} else {
250+
break;
251+
}
252+
253+
// Take up char into name:
254+
name.push(self.next_char().unwrap());
255+
256+
// Check if we have a string:
257+
if self.chr0 == Some('"') || self.chr0 == Some('\'') {
258+
return self.lex_string(saw_b, saw_r, saw_u, saw_f);
259+
}
260+
}
261+
230262
while self.is_char() {
231263
name.push(self.next_char().unwrap());
232264
}
@@ -279,14 +311,13 @@ where
279311
}
280312
}
281313

282-
fn lex_string(&mut self) -> Spanned<Tok> {
283-
let type_char = match self.chr0 {
284-
Some('u') | Some('f') | Some('r') => self.next_char(),
285-
_ => None,
286-
};
287-
288-
let is_raw = type_char == Some('r');
289-
314+
fn lex_string(
315+
&mut self,
316+
_is_bytes: bool,
317+
is_raw: bool,
318+
_is_unicode: bool,
319+
_is_fstring: bool,
320+
) -> Spanned<Tok> {
290321
let quote_char = self.next_char().unwrap();
291322
let mut string_content = String::new();
292323
let start_pos = self.get_pos();
@@ -480,27 +511,16 @@ where
480511

481512
match self.chr0 {
482513
Some('0'...'9') => return Some(self.lex_number()),
483-
Some('_') | Some('a'...'z') | Some('A'...'Z') => {
484-
// Detect r"", f"" and u""
485-
match self.chr0 {
486-
Some('r') | Some('u') | Some('f') => match self.chr1 {
487-
Some('\'') | Some('\"') => {
488-
return Some(self.lex_string());
489-
}
490-
_ => return Some(self.lex_identifier()),
491-
},
492-
_ => return Some(self.lex_identifier()),
493-
}
494-
}
514+
Some('_') | Some('a'...'z') | Some('A'...'Z') => return Some(self.lex_identifier()),
495515
Some('#') => {
496516
self.lex_comment();
497517
continue;
498518
}
499519
Some('"') => {
500-
return Some(self.lex_string());
520+
return Some(self.lex_string(false, false, false, false));
501521
}
502522
Some('\'') => {
503-
return Some(self.lex_string());
523+
return Some(self.lex_string(false, false, false, false));
504524
}
505525
Some('=') => {
506526
let tok_start = self.get_pos();

parser/src/python.lalrpop

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -808,14 +808,15 @@ Atom: ast::Expression = {
808808
};
809809

810810
TestListComp: Vec<ast::Expression> = {
811-
<e:TestList> <_trailing_comma:","?> => {
812-
e
811+
<e:TestOrStarExpr> <e2:("," TestOrStarExpr)*> <_trailing_comma:","?> => {
812+
let mut res = vec![e];
813+
res.extend(e2.into_iter().map(|x| x.1));
814+
res
813815
},
814816
};
815817

816818
TestListComp2: ast::Expression = {
817-
<e:Test> <c:CompFor> => {
818-
// vec![e]
819+
<e:TestOrStarExpr> <c:CompFor> => {
819820
ast::Expression::Comprehension {
820821
kind: Box::new(ast::ComprehensionKind::List { element: e }),
821822
generators: c,

tests/snippets/list.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,8 @@
1-
x = [1,2,3]
1+
x = [1, 2, 3]
22
assert x[0] == 1
33
assert x[1] == 2
44
# assert x[7]
5+
6+
y = [2, *x]
7+
assert y == [2, 1, 2, 3]
8+

vm/src/bytecode.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,9 +150,11 @@ pub enum Instruction {
150150
},
151151
BuildTuple {
152152
size: usize,
153+
unpack: bool,
153154
},
154155
BuildList {
155156
size: usize,
157+
unpack: bool,
156158
},
157159
BuildSet {
158160
size: usize,

vm/src/compile.rs

Lines changed: 94 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -523,7 +523,10 @@ impl Compiler {
523523

524524
// If we have more than 1 return value, make it a tuple:
525525
if size > 1 {
526-
self.emit(Instruction::BuildTuple { size });
526+
self.emit(Instruction::BuildTuple {
527+
size,
528+
unpack: false,
529+
});
527530
}
528531
}
529532
None => {
@@ -597,7 +600,10 @@ impl Compiler {
597600
for element in &args.defaults {
598601
self.compile_expression(element)?;
599602
}
600-
self.emit(Instruction::BuildTuple { size });
603+
self.emit(Instruction::BuildTuple {
604+
size,
605+
unpack: false,
606+
});
601607
}
602608

603609
self.code_object_stack.push(CodeObject::new(
@@ -774,40 +780,7 @@ impl Compiler {
774780
function,
775781
args,
776782
keywords,
777-
} => {
778-
self.compile_expression(&*function)?;
779-
let count = args.len() + keywords.len();
780-
781-
// Normal arguments:
782-
for value in args {
783-
self.compile_expression(value)?;
784-
}
785-
786-
// Keyword arguments:
787-
if keywords.len() > 0 {
788-
let mut kwarg_names = vec![];
789-
for keyword in keywords {
790-
if let Some(name) = &keyword.name {
791-
kwarg_names.push(bytecode::Constant::String {
792-
value: name.to_string(),
793-
});
794-
} else {
795-
// This means **kwargs!
796-
panic!("name must be set");
797-
}
798-
self.compile_expression(&keyword.value)?;
799-
}
800-
801-
self.emit(Instruction::LoadConst {
802-
value: bytecode::Constant::Tuple {
803-
elements: kwarg_names,
804-
},
805-
});
806-
self.emit(Instruction::CallFunctionKw { count });
807-
} else {
808-
self.emit(Instruction::CallFunction { count });
809-
}
810-
}
783+
} => self.compile_call(function, args, keywords)?,
811784
ast::Expression::BoolOp { .. } => {
812785
self.compile_test(expression, None, None, EvalContext::Expression)?
813786
}
@@ -870,17 +843,19 @@ impl Compiler {
870843
}
871844
ast::Expression::List { elements } => {
872845
let size = elements.len();
873-
for element in elements {
874-
self.compile_expression(element)?;
875-
}
876-
self.emit(Instruction::BuildList { size: size });
846+
let must_unpack = self.gather_elements(elements)?;
847+
self.emit(Instruction::BuildList {
848+
size: size,
849+
unpack: must_unpack,
850+
});
877851
}
878852
ast::Expression::Tuple { elements } => {
879853
let size = elements.len();
880-
for element in elements {
881-
self.compile_expression(element)?;
882-
}
883-
self.emit(Instruction::BuildTuple { size: size });
854+
let must_unpack = self.gather_elements(elements)?;
855+
self.emit(Instruction::BuildTuple {
856+
size: size,
857+
unpack: must_unpack,
858+
});
884859
}
885860
ast::Expression::Set { elements } => {
886861
let size = elements.len();
@@ -971,6 +946,7 @@ impl Compiler {
971946
ast::Expression::Starred { value } => {
972947
self.compile_expression(value)?;
973948
self.emit(Instruction::Unpack);
949+
panic!("We should not just unpack a starred args, since the size is unknown.");
974950
}
975951
ast::Expression::IfExpression { test, body, orelse } => {
976952
let no_label = self.new_label();
@@ -986,6 +962,76 @@ impl Compiler {
986962
Ok(())
987963
}
988964

965+
fn compile_call(
966+
&mut self,
967+
function: &ast::Expression,
968+
args: &Vec<ast::Expression>,
969+
keywords: &Vec<ast::Keyword>,
970+
) -> Result<(), String> {
971+
self.compile_expression(&*function)?;
972+
let count = args.len() + keywords.len();
973+
974+
// Normal arguments:
975+
for value in args {
976+
self.compile_expression(value)?;
977+
}
978+
979+
// Keyword arguments:
980+
if keywords.len() > 0 {
981+
let mut kwarg_names = vec![];
982+
for keyword in keywords {
983+
if let Some(name) = &keyword.name {
984+
kwarg_names.push(bytecode::Constant::String {
985+
value: name.to_string(),
986+
});
987+
} else {
988+
// This means **kwargs!
989+
panic!("name must be set");
990+
}
991+
self.compile_expression(&keyword.value)?;
992+
}
993+
994+
self.emit(Instruction::LoadConst {
995+
value: bytecode::Constant::Tuple {
996+
elements: kwarg_names,
997+
},
998+
});
999+
self.emit(Instruction::CallFunctionKw { count });
1000+
} else {
1001+
self.emit(Instruction::CallFunction { count });
1002+
}
1003+
Ok(())
1004+
}
1005+
1006+
// Given a vector of expr / star expr generate code which gives either
1007+
// a list of expressions on the stack, or a list of tuples.
1008+
fn gather_elements(&mut self, elements: &Vec<ast::Expression>) -> Result<bool, String> {
1009+
// First determine if we have starred elements:
1010+
let has_stars = elements.iter().any(|e| {
1011+
if let ast::Expression::Starred { .. } = e {
1012+
true
1013+
} else {
1014+
false
1015+
}
1016+
});
1017+
1018+
for element in elements {
1019+
if let ast::Expression::Starred { value } = element {
1020+
self.compile_expression(value)?;
1021+
} else {
1022+
self.compile_expression(element)?;
1023+
if has_stars {
1024+
self.emit(Instruction::BuildTuple {
1025+
size: 1,
1026+
unpack: false,
1027+
});
1028+
}
1029+
}
1030+
}
1031+
1032+
Ok(has_stars)
1033+
}
1034+
9891035
fn compile_comprehension(
9901036
&mut self,
9911037
kind: &ast::ComprehensionKind,
@@ -1016,7 +1062,10 @@ impl Compiler {
10161062
match kind {
10171063
ast::ComprehensionKind::GeneratorExpression { .. } => {}
10181064
ast::ComprehensionKind::List { .. } => {
1019-
self.emit(Instruction::BuildList { size: 0 });
1065+
self.emit(Instruction::BuildList {
1066+
size: 0,
1067+
unpack: false,
1068+
});
10201069
}
10211070
ast::ComprehensionKind::Set { .. } => {
10221071
self.emit(Instruction::BuildSet { size: 0 });

vm/src/frame.rs

Lines changed: 26 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -227,8 +227,8 @@ impl Frame {
227227
}
228228
Ok(None)
229229
}
230-
bytecode::Instruction::BuildList { size } => {
231-
let elements = self.pop_multiple(*size);
230+
bytecode::Instruction::BuildList { size, unpack } => {
231+
let elements = self.get_elements(vm, *size, *unpack)?;
232232
let list_obj = vm.ctx.new_list(elements);
233233
self.push_value(list_obj);
234234
Ok(None)
@@ -239,8 +239,8 @@ impl Frame {
239239
self.push_value(py_obj);
240240
Ok(None)
241241
}
242-
bytecode::Instruction::BuildTuple { size } => {
243-
let elements = self.pop_multiple(*size);
242+
bytecode::Instruction::BuildTuple { size, unpack } => {
243+
let elements = self.get_elements(vm, *size, *unpack)?;
244244
let list_obj = vm.ctx.new_tuple(elements);
245245
self.push_value(list_obj);
246246
Ok(None)
@@ -625,9 +625,7 @@ impl Frame {
625625
}
626626
bytecode::Instruction::Unpack => {
627627
let value = self.pop_value();
628-
629-
let elements = objtuple::get_elements(&value);
630-
628+
let elements = self.extract_elements(vm, &value)?;
631629
for element in elements.into_iter().rev() {
632630
self.push_value(element);
633631
}
@@ -653,6 +651,27 @@ impl Frame {
653651
Ok(elements)
654652
}
655653

654+
fn get_elements(
655+
&mut self,
656+
vm: &mut VirtualMachine,
657+
size: usize,
658+
unpack: bool,
659+
) -> Result<Vec<PyObjectRef>, PyObjectRef> {
660+
let elements = self.pop_multiple(size);
661+
if unpack {
662+
let mut result: Vec<PyObjectRef> = vec![];
663+
for element in elements {
664+
let expanded = self.extract_elements(vm, &element)?;
665+
for inner in expanded {
666+
result.push(inner);
667+
}
668+
}
669+
Ok(result)
670+
} else {
671+
Ok(elements)
672+
}
673+
}
674+
656675
fn import(
657676
&mut self,
658677
vm: &mut VirtualMachine,

0 commit comments

Comments
 (0)