From 064919348dc344682ef16d374ac56022a92f5932 Mon Sep 17 00:00:00 2001 From: coolreader18 <33094578+coolreader18@users.noreply.github.com> Date: Wed, 12 Jun 2019 21:43:43 -0500 Subject: [PATCH 001/459] Split off bytecode compilation into a separate crate --- Cargo.toml | 13 + src/bytecode.rs | 446 +++++++++++ src/compile.rs | 1874 ++++++++++++++++++++++++++++++++++++++++++++ src/error.rs | 67 ++ src/lib.rs | 7 + src/symboltable.rs | 588 ++++++++++++++ 6 files changed, 2995 insertions(+) create mode 100644 Cargo.toml create mode 100644 src/bytecode.rs create mode 100644 src/compile.rs create mode 100644 src/error.rs create mode 100644 src/lib.rs create mode 100644 src/symboltable.rs diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 00000000..fc0d1959 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "rustpython_compiler" +version = "0.1.0" +authors = ["coolreader18 <33094578+coolreader18@users.noreply.github.com>"] +edition = "2018" + +[dependencies] +bitflags = "1.1" +rustpython_parser = { path = "../parser" } +serde = { version = "1.0", features = ["derive"] } +num-complex = { version = "0.2", features = ["serde"] } +num-bigint = { version = "0.2", features = ["serde"] } +log = "0.3" diff --git a/src/bytecode.rs b/src/bytecode.rs new file mode 100644 index 00000000..0fc226b5 --- /dev/null +++ b/src/bytecode.rs @@ -0,0 +1,446 @@ +//! Implement python as a virtual machine with bytecodes. This module +//! implements bytecode structure. + +/* + * Primitive instruction type, which can be encoded and decoded. + */ + +use bitflags::bitflags; +use num_bigint::BigInt; +use num_complex::Complex64; +use rustpython_parser::ast; +use serde::{Deserialize, Serialize}; +use std::collections::{HashMap, HashSet}; +use std::fmt; + +/// Primary container of a single code object. Each python function has +/// a codeobject. Also a module has a codeobject. +#[derive(Clone, PartialEq, Serialize, Deserialize)] +pub struct CodeObject { + pub instructions: Vec, + /// Jump targets. + pub label_map: HashMap, + pub locations: Vec, + pub arg_names: Vec, // Names of positional arguments + pub varargs: Varargs, // *args or * + pub kwonlyarg_names: Vec, + pub varkeywords: Varargs, // **kwargs or ** + pub source_path: String, + pub first_line_number: usize, + pub obj_name: String, // Name of the object that created this code object + pub is_generator: bool, +} + +bitflags! { + #[derive(Serialize, Deserialize)] + pub struct FunctionOpArg: u8 { + const HAS_DEFAULTS = 0x01; + const HAS_KW_ONLY_DEFAULTS = 0x02; + const HAS_ANNOTATIONS = 0x04; + } +} + +pub type Label = usize; + +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub enum NameScope { + Local, + NonLocal, + Global, +} + +/// A Single bytecode instruction. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub enum Instruction { + Import { + name: String, + symbol: Option, + }, + ImportStar { + name: String, + }, + LoadName { + name: String, + scope: NameScope, + }, + StoreName { + name: String, + scope: NameScope, + }, + DeleteName { + name: String, + }, + StoreSubscript, + DeleteSubscript, + StoreAttr { + name: String, + }, + DeleteAttr { + name: String, + }, + LoadConst { + value: Constant, + }, + UnaryOperation { + op: UnaryOperator, + }, + BinaryOperation { + op: BinaryOperator, + inplace: bool, + }, + LoadAttr { + name: String, + }, + CompareOperation { + op: ComparisonOperator, + }, + Pop, + Rotate { + amount: usize, + }, + Duplicate, + GetIter, + Pass, + Continue, + Break, + Jump { + target: Label, + }, + JumpIf { + target: Label, + }, + JumpIfFalse { + target: Label, + }, + MakeFunction { + flags: FunctionOpArg, + }, + CallFunction { + typ: CallType, + }, + ForIter { + target: Label, + }, + ReturnValue, + YieldValue, + YieldFrom, + SetupLoop { + start: Label, + end: Label, + }, + SetupExcept { + handler: Label, + }, + SetupWith { + end: Label, + }, + CleanupWith { + end: Label, + }, + PopBlock, + Raise { + argc: usize, + }, + BuildString { + size: usize, + }, + BuildTuple { + size: usize, + unpack: bool, + }, + BuildList { + size: usize, + unpack: bool, + }, + BuildSet { + size: usize, + unpack: bool, + }, + BuildMap { + size: usize, + unpack: bool, + }, + BuildSlice { + size: usize, + }, + ListAppend { + i: usize, + }, + SetAdd { + i: usize, + }, + MapAdd { + i: usize, + }, + PrintExpr, + LoadBuildClass, + UnpackSequence { + size: usize, + }, + UnpackEx { + before: usize, + after: usize, + }, + Unpack, + FormatValue { + conversion: Option, + spec: String, + }, + PopException, +} + +use self::Instruction::*; + +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub enum CallType { + Positional(usize), + Keyword(usize), + Ex(bool), +} + +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub enum Constant { + Integer { value: BigInt }, + Float { value: f64 }, + Complex { value: Complex64 }, + Boolean { value: bool }, + String { value: String }, + Bytes { value: Vec }, + Code { code: Box }, + Tuple { elements: Vec }, + None, + Ellipsis, +} + +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub enum ComparisonOperator { + Greater, + GreaterOrEqual, + Less, + LessOrEqual, + Equal, + NotEqual, + In, + NotIn, + Is, + IsNot, +} + +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub enum BinaryOperator { + Power, + Multiply, + MatrixMultiply, + Divide, + FloorDivide, + Modulo, + Add, + Subtract, + Subscript, + Lshift, + Rshift, + And, + Xor, + Or, +} + +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub enum UnaryOperator { + Not, + Invert, + Minus, + Plus, +} + +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub enum Varargs { + None, + Unnamed, + Named(String), +} + +/* +Maintain a stack of blocks on the VM. +pub enum BlockType { + Loop, + Except, +} +*/ + +impl CodeObject { + pub fn new( + arg_names: Vec, + varargs: Varargs, + kwonlyarg_names: Vec, + varkeywords: Varargs, + source_path: String, + first_line_number: usize, + obj_name: String, + ) -> CodeObject { + CodeObject { + instructions: Vec::new(), + label_map: HashMap::new(), + locations: Vec::new(), + arg_names, + varargs, + kwonlyarg_names, + varkeywords, + source_path, + first_line_number, + obj_name, + is_generator: false, + } + } + + pub fn get_constants(&self) -> impl Iterator { + self.instructions.iter().filter_map(|x| { + if let Instruction::LoadConst { value } = x { + Some(value) + } else { + None + } + }) + } +} + +impl fmt::Display for CodeObject { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let label_targets: HashSet<&usize> = self.label_map.values().collect(); + for (offset, instruction) in self.instructions.iter().enumerate() { + let arrow = if label_targets.contains(&offset) { + ">>" + } else { + " " + }; + write!(f, " {} {:5} ", arrow, offset)?; + instruction.fmt_dis(f, &self.label_map)?; + } + Ok(()) + } +} + +impl Instruction { + fn fmt_dis(&self, f: &mut fmt::Formatter, label_map: &HashMap) -> fmt::Result { + macro_rules! w { + ($variant:ident) => { + write!(f, "{:20}\n", stringify!($variant)) + }; + ($variant:ident, $var:expr) => { + write!(f, "{:20} ({})\n", stringify!($variant), $var) + }; + ($variant:ident, $var1:expr, $var2:expr) => { + write!(f, "{:20} ({}, {})\n", stringify!($variant), $var1, $var2) + }; + } + + match self { + Import { name, symbol } => w!(Import, name, format!("{:?}", symbol)), + ImportStar { name } => w!(ImportStar, name), + LoadName { name, scope } => w!(LoadName, name, format!("{:?}", scope)), + StoreName { name, scope } => w!(StoreName, name, format!("{:?}", scope)), + DeleteName { name } => w!(DeleteName, name), + StoreSubscript => w!(StoreSubscript), + DeleteSubscript => w!(DeleteSubscript), + StoreAttr { name } => w!(StoreAttr, name), + DeleteAttr { name } => w!(DeleteAttr, name), + LoadConst { value } => w!(LoadConst, value), + UnaryOperation { op } => w!(UnaryOperation, format!("{:?}", op)), + BinaryOperation { op, inplace } => w!(BinaryOperation, format!("{:?}", op), inplace), + LoadAttr { name } => w!(LoadAttr, name), + CompareOperation { op } => w!(CompareOperation, format!("{:?}", op)), + Pop => w!(Pop), + Rotate { amount } => w!(Rotate, amount), + Duplicate => w!(Duplicate), + GetIter => w!(GetIter), + Pass => w!(Pass), + Continue => w!(Continue), + Break => w!(Break), + Jump { target } => w!(Jump, label_map[target]), + JumpIf { target } => w!(JumpIf, label_map[target]), + JumpIfFalse { target } => w!(JumpIfFalse, label_map[target]), + MakeFunction { flags } => w!(MakeFunction, format!("{:?}", flags)), + CallFunction { typ } => w!(CallFunction, format!("{:?}", typ)), + ForIter { target } => w!(ForIter, label_map[target]), + ReturnValue => w!(ReturnValue), + YieldValue => w!(YieldValue), + YieldFrom => w!(YieldFrom), + SetupLoop { start, end } => w!(SetupLoop, label_map[start], label_map[end]), + SetupExcept { handler } => w!(SetupExcept, handler), + SetupWith { end } => w!(SetupWith, end), + CleanupWith { end } => w!(CleanupWith, end), + PopBlock => w!(PopBlock), + Raise { argc } => w!(Raise, argc), + BuildString { size } => w!(BuildString, size), + BuildTuple { size, unpack } => w!(BuildTuple, size, unpack), + BuildList { size, unpack } => w!(BuildList, size, unpack), + BuildSet { size, unpack } => w!(BuildSet, size, unpack), + BuildMap { size, unpack } => w!(BuildMap, size, unpack), + BuildSlice { size } => w!(BuildSlice, size), + ListAppend { i } => w!(ListAppend, i), + SetAdd { i } => w!(SetAdd, i), + MapAdd { i } => w!(MapAdd, i), + PrintExpr => w!(PrintExpr), + LoadBuildClass => w!(LoadBuildClass), + UnpackSequence { size } => w!(UnpackSequence, size), + UnpackEx { before, after } => w!(UnpackEx, before, after), + Unpack => w!(Unpack), + FormatValue { spec, .. } => w!(FormatValue, spec), // TODO: write conversion + PopException => w!(PopException), + } + } +} + +impl fmt::Display for Constant { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Constant::Integer { value } => write!(f, "{}", value), + Constant::Float { value } => write!(f, "{}", value), + Constant::Complex { value } => write!(f, "{}", value), + Constant::Boolean { value } => write!(f, "{}", value), + Constant::String { value } => write!(f, "{:?}", value), + Constant::Bytes { value } => write!(f, "{:?}", value), + Constant::Code { code } => write!(f, "{:?}", code), + Constant::Tuple { elements } => write!( + f, + "({})", + elements + .iter() + .map(|e| format!("{}", e)) + .collect::>() + .join(", ") + ), + Constant::None => write!(f, "None"), + Constant::Ellipsis => write!(f, "Ellipsis"), + } + } +} + +impl fmt::Debug for CodeObject { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "", + self.obj_name, self.source_path, self.first_line_number + ) + } +} + +impl From for Varargs { + fn from(varargs: ast::Varargs) -> Varargs { + match varargs { + ast::Varargs::None => Varargs::None, + ast::Varargs::Unnamed => Varargs::Unnamed, + ast::Varargs::Named(param) => Varargs::Named(param.arg), + } + } +} + +impl<'a> From<&'a ast::Varargs> for Varargs { + fn from(varargs: &'a ast::Varargs) -> Varargs { + match varargs { + ast::Varargs::None => Varargs::None, + ast::Varargs::Unnamed => Varargs::Unnamed, + ast::Varargs::Named(ref param) => Varargs::Named(param.arg.clone()), + } + } +} diff --git a/src/compile.rs b/src/compile.rs new file mode 100644 index 00000000..dec158e9 --- /dev/null +++ b/src/compile.rs @@ -0,0 +1,1874 @@ +//! +//! Take an AST and transform it into bytecode +//! +//! Inspirational code: +//! https://github.com/python/cpython/blob/master/Python/compile.c +//! https://github.com/micropython/micropython/blob/master/py/compile.c + +use crate::bytecode::{self, CallType, CodeObject, Instruction, Varargs}; +use crate::error::{CompileError, CompileErrorType}; +use crate::symboltable::{make_symbol_table, statements_to_symbol_table, SymbolRole, SymbolScope}; +use num_complex::Complex64; +use rustpython_parser::{ast, parser}; + +struct Compiler { + code_object_stack: Vec, + scope_stack: Vec, + nxt_label: usize, + source_path: Option, + current_source_location: ast::Location, + current_qualified_path: Option, + in_loop: bool, + in_function_def: bool, +} + +/// Compile a given sourcecode into a bytecode object. +pub fn compile(source: &str, mode: &Mode, source_path: String) -> Result { + let mut compiler = Compiler::new(); + compiler.source_path = Some(source_path); + compiler.push_new_code_object("".to_string()); + + match mode { + Mode::Exec => { + let ast = parser::parse_program(source)?; + let symbol_table = make_symbol_table(&ast)?; + compiler.compile_program(&ast, symbol_table) + } + Mode::Eval => { + let statement = parser::parse_statement(source)?; + let symbol_table = statements_to_symbol_table(&statement)?; + compiler.compile_statement_eval(&statement, symbol_table) + } + Mode::Single => { + let ast = parser::parse_program(source)?; + let symbol_table = make_symbol_table(&ast)?; + compiler.compile_program_single(&ast, symbol_table) + } + }?; + + let code = compiler.pop_code_object(); + trace!("Compilation completed: {:?}", code); + Ok(code) +} + +pub enum Mode { + Exec, + Eval, + Single, +} + +#[derive(Clone, Copy)] +enum EvalContext { + Statement, + Expression, +} + +type Label = usize; + +impl Compiler { + fn new() -> Self { + Compiler { + code_object_stack: Vec::new(), + scope_stack: Vec::new(), + nxt_label: 0, + source_path: None, + current_source_location: ast::Location::default(), + current_qualified_path: None, + in_loop: false, + in_function_def: false, + } + } + + fn push_new_code_object(&mut self, obj_name: String) { + let line_number = self.get_source_line_number(); + self.code_object_stack.push(CodeObject::new( + Vec::new(), + Varargs::None, + Vec::new(), + Varargs::None, + self.source_path.clone().unwrap(), + line_number, + obj_name, + )); + } + + fn pop_code_object(&mut self) -> CodeObject { + // self.scope_stack.pop().unwrap(); + self.code_object_stack.pop().unwrap() + } + + fn compile_program( + &mut self, + program: &ast::Program, + symbol_scope: SymbolScope, + ) -> Result<(), CompileError> { + let size_before = self.code_object_stack.len(); + self.scope_stack.push(symbol_scope); + self.compile_statements(&program.statements)?; + assert!(self.code_object_stack.len() == size_before); + + // Emit None at end: + self.emit(Instruction::LoadConst { + value: bytecode::Constant::None, + }); + self.emit(Instruction::ReturnValue); + Ok(()) + } + + fn compile_program_single( + &mut self, + program: &ast::Program, + symbol_scope: SymbolScope, + ) -> Result<(), CompileError> { + self.scope_stack.push(symbol_scope); + + let mut emitted_return = false; + + for (i, statement) in program.statements.iter().enumerate() { + let is_last = i == program.statements.len() - 1; + + if let ast::Statement::Expression { ref expression } = statement.node { + self.compile_expression(expression)?; + + if is_last { + self.emit(Instruction::Duplicate); + self.emit(Instruction::PrintExpr); + self.emit(Instruction::ReturnValue); + emitted_return = true; + } else { + self.emit(Instruction::PrintExpr); + } + } else { + self.compile_statement(&statement)?; + } + } + + if !emitted_return { + self.emit(Instruction::LoadConst { + value: bytecode::Constant::None, + }); + self.emit(Instruction::ReturnValue); + } + + Ok(()) + } + + // Compile statement in eval mode: + fn compile_statement_eval( + &mut self, + statements: &[ast::LocatedStatement], + symbol_table: SymbolScope, + ) -> Result<(), CompileError> { + self.scope_stack.push(symbol_table); + for statement in statements { + if let ast::Statement::Expression { ref expression } = statement.node { + self.compile_expression(expression)?; + } else { + return Err(CompileError { + error: CompileErrorType::ExpectExpr, + location: statement.location.clone(), + }); + } + } + self.emit(Instruction::ReturnValue); + Ok(()) + } + + fn compile_statements( + &mut self, + statements: &[ast::LocatedStatement], + ) -> Result<(), CompileError> { + for statement in statements { + self.compile_statement(statement)? + } + Ok(()) + } + + fn scope_for_name(&self, name: &str) -> bytecode::NameScope { + let role = self.lookup_name(name); + match role { + SymbolRole::Global => bytecode::NameScope::Global, + SymbolRole::Nonlocal => bytecode::NameScope::NonLocal, + _ => bytecode::NameScope::Local, + } + } + + fn load_name(&mut self, name: &str) { + let scope = self.scope_for_name(name); + self.emit(Instruction::LoadName { + name: name.to_string(), + scope, + }); + } + + fn store_name(&mut self, name: &str) { + let scope = self.scope_for_name(name); + self.emit(Instruction::StoreName { + name: name.to_string(), + scope, + }); + } + + fn compile_statement(&mut self, statement: &ast::LocatedStatement) -> Result<(), CompileError> { + trace!("Compiling {:?}", statement); + self.set_source_location(&statement.location); + + match &statement.node { + ast::Statement::Import { import_parts } => { + for ast::SingleImport { + module, + symbol, + alias, + } in import_parts + { + match symbol { + Some(name) if name == "*" => { + self.emit(Instruction::ImportStar { + name: module.clone(), + }); + } + _ => { + self.emit(Instruction::Import { + name: module.clone(), + symbol: symbol.clone(), + }); + let name = match alias { + Some(alias) => alias.clone(), + None => match symbol { + Some(symbol) => symbol.clone(), + None => module.clone(), + }, + }; + self.store_name(&name); + } + } + } + } + ast::Statement::Expression { expression } => { + self.compile_expression(expression)?; + + // Pop result of stack, since we not use it: + self.emit(Instruction::Pop); + } + ast::Statement::Global { .. } | ast::Statement::Nonlocal { .. } => { + // Handled during symbol table construction. + } + ast::Statement::If { test, body, orelse } => { + let end_label = self.new_label(); + match orelse { + None => { + // Only if: + self.compile_test(test, None, Some(end_label), EvalContext::Statement)?; + self.compile_statements(body)?; + self.set_label(end_label); + } + Some(statements) => { + // if - else: + let else_label = self.new_label(); + self.compile_test(test, None, Some(else_label), EvalContext::Statement)?; + self.compile_statements(body)?; + self.emit(Instruction::Jump { target: end_label }); + + // else: + self.set_label(else_label); + self.compile_statements(statements)?; + } + } + self.set_label(end_label); + } + ast::Statement::While { test, body, orelse } => { + let start_label = self.new_label(); + let else_label = self.new_label(); + let end_label = self.new_label(); + self.emit(Instruction::SetupLoop { + start: start_label, + end: end_label, + }); + + self.set_label(start_label); + + self.compile_test(test, None, Some(else_label), EvalContext::Statement)?; + + let was_in_loop = self.in_loop; + self.in_loop = true; + self.compile_statements(body)?; + self.in_loop = was_in_loop; + self.emit(Instruction::Jump { + target: start_label, + }); + self.set_label(else_label); + self.emit(Instruction::PopBlock); + if let Some(orelse) = orelse { + self.compile_statements(orelse)?; + } + self.set_label(end_label); + } + ast::Statement::With { items, body } => { + let end_label = self.new_label(); + for item in items { + self.compile_expression(&item.context_expr)?; + self.emit(Instruction::SetupWith { end: end_label }); + match &item.optional_vars { + Some(var) => { + self.compile_store(var)?; + } + None => { + self.emit(Instruction::Pop); + } + } + } + + self.compile_statements(body)?; + for _ in 0..items.len() { + self.emit(Instruction::CleanupWith { end: end_label }); + } + self.set_label(end_label); + } + ast::Statement::For { + target, + iter, + body, + orelse, + } => self.compile_for(target, iter, body, orelse)?, + ast::Statement::AsyncFor { .. } => { + unimplemented!("async for"); + } + ast::Statement::Raise { exception, cause } => match exception { + Some(value) => { + self.compile_expression(value)?; + match cause { + Some(cause) => { + self.compile_expression(cause)?; + self.emit(Instruction::Raise { argc: 2 }); + } + None => { + self.emit(Instruction::Raise { argc: 1 }); + } + } + } + None => { + self.emit(Instruction::Raise { argc: 0 }); + } + }, + ast::Statement::Try { + body, + handlers, + orelse, + finalbody, + } => self.compile_try_statement(body, handlers, orelse, finalbody)?, + ast::Statement::FunctionDef { + name, + args, + body, + decorator_list, + returns, + } => self.compile_function_def(name, args, body, decorator_list, returns)?, + ast::Statement::AsyncFunctionDef { .. } => { + unimplemented!("async def"); + } + ast::Statement::ClassDef { + name, + body, + bases, + keywords, + decorator_list, + } => self.compile_class_def(name, body, bases, keywords, decorator_list)?, + ast::Statement::Assert { test, msg } => { + // TODO: if some flag, ignore all assert statements! + + let end_label = self.new_label(); + self.compile_test(test, Some(end_label), None, EvalContext::Statement)?; + self.emit(Instruction::LoadName { + name: String::from("AssertionError"), + scope: bytecode::NameScope::Local, + }); + match msg { + Some(e) => { + self.compile_expression(e)?; + self.emit(Instruction::CallFunction { + typ: CallType::Positional(1), + }); + } + None => { + self.emit(Instruction::CallFunction { + typ: CallType::Positional(0), + }); + } + } + self.emit(Instruction::Raise { argc: 1 }); + self.set_label(end_label); + } + ast::Statement::Break => { + if !self.in_loop { + return Err(CompileError { + error: CompileErrorType::InvalidBreak, + location: statement.location.clone(), + }); + } + self.emit(Instruction::Break); + } + ast::Statement::Continue => { + if !self.in_loop { + return Err(CompileError { + error: CompileErrorType::InvalidContinue, + location: statement.location.clone(), + }); + } + self.emit(Instruction::Continue); + } + ast::Statement::Return { value } => { + if !self.in_function_def { + return Err(CompileError { + error: CompileErrorType::InvalidReturn, + location: statement.location.clone(), + }); + } + match value { + Some(v) => { + self.compile_expression(v)?; + } + None => { + self.emit(Instruction::LoadConst { + value: bytecode::Constant::None, + }); + } + } + + self.emit(Instruction::ReturnValue); + } + ast::Statement::Assign { targets, value } => { + self.compile_expression(value)?; + + for (i, target) in targets.iter().enumerate() { + if i + 1 != targets.len() { + self.emit(Instruction::Duplicate); + } + self.compile_store(target)?; + } + } + ast::Statement::AugAssign { target, op, value } => { + self.compile_expression(target)?; + self.compile_expression(value)?; + + // Perform operation: + self.compile_op(op, true); + self.compile_store(target)?; + } + ast::Statement::Delete { targets } => { + for target in targets { + self.compile_delete(target)?; + } + } + ast::Statement::Pass => { + self.emit(Instruction::Pass); + } + } + Ok(()) + } + + fn compile_delete(&mut self, expression: &ast::Expression) -> Result<(), CompileError> { + match expression { + ast::Expression::Identifier { name } => { + self.emit(Instruction::DeleteName { + name: name.to_string(), + }); + } + ast::Expression::Attribute { value, name } => { + self.compile_expression(value)?; + self.emit(Instruction::DeleteAttr { + name: name.to_string(), + }); + } + ast::Expression::Subscript { a, b } => { + self.compile_expression(a)?; + self.compile_expression(b)?; + self.emit(Instruction::DeleteSubscript); + } + ast::Expression::Tuple { elements } => { + for element in elements { + self.compile_delete(element)?; + } + } + _ => { + return Err(CompileError { + error: CompileErrorType::Delete(expression.name()), + location: self.current_source_location.clone(), + }); + } + } + Ok(()) + } + + fn enter_function( + &mut self, + name: &str, + args: &ast::Parameters, + ) -> Result { + let have_defaults = !args.defaults.is_empty(); + if have_defaults { + // Construct a tuple: + let size = args.defaults.len(); + for element in &args.defaults { + self.compile_expression(element)?; + } + self.emit(Instruction::BuildTuple { + size, + unpack: false, + }); + } + + let mut num_kw_only_defaults = 0; + for (kw, default) in args.kwonlyargs.iter().zip(&args.kw_defaults) { + if let Some(default) = default { + self.emit(Instruction::LoadConst { + value: bytecode::Constant::String { + value: kw.arg.clone(), + }, + }); + self.compile_expression(default)?; + num_kw_only_defaults += 1; + } + } + if num_kw_only_defaults > 0 { + self.emit(Instruction::BuildMap { + size: num_kw_only_defaults, + unpack: false, + }); + } + + let line_number = self.get_source_line_number(); + self.code_object_stack.push(CodeObject::new( + args.args.iter().map(|a| a.arg.clone()).collect(), + Varargs::from(&args.vararg), + args.kwonlyargs.iter().map(|a| a.arg.clone()).collect(), + Varargs::from(&args.kwarg), + self.source_path.clone().unwrap(), + line_number, + name.to_string(), + )); + self.enter_scope(); + + let mut flags = bytecode::FunctionOpArg::empty(); + if have_defaults { + flags |= bytecode::FunctionOpArg::HAS_DEFAULTS; + } + if num_kw_only_defaults > 0 { + flags |= bytecode::FunctionOpArg::HAS_KW_ONLY_DEFAULTS; + } + + Ok(flags) + } + + fn prepare_decorators( + &mut self, + decorator_list: &[ast::Expression], + ) -> Result<(), CompileError> { + for decorator in decorator_list { + self.compile_expression(decorator)?; + } + Ok(()) + } + + fn apply_decorators(&mut self, decorator_list: &[ast::Expression]) { + // Apply decorators: + for _ in decorator_list { + self.emit(Instruction::CallFunction { + typ: CallType::Positional(1), + }); + } + } + + fn compile_try_statement( + &mut self, + body: &[ast::LocatedStatement], + handlers: &[ast::ExceptHandler], + orelse: &Option>, + finalbody: &Option>, + ) -> Result<(), CompileError> { + let mut handler_label = self.new_label(); + let finally_label = self.new_label(); + let else_label = self.new_label(); + // try: + self.emit(Instruction::SetupExcept { + handler: handler_label, + }); + self.compile_statements(body)?; + self.emit(Instruction::PopBlock); + self.emit(Instruction::Jump { target: else_label }); + + // except handlers: + self.set_label(handler_label); + // Exception is on top of stack now + handler_label = self.new_label(); + for handler in handlers { + // If we gave a typ, + // check if this handler can handle the exception: + if let Some(exc_type) = &handler.typ { + // Duplicate exception for test: + self.emit(Instruction::Duplicate); + + // Check exception type: + self.emit(Instruction::LoadName { + name: String::from("isinstance"), + scope: bytecode::NameScope::Local, + }); + self.emit(Instruction::Rotate { amount: 2 }); + self.compile_expression(exc_type)?; + self.emit(Instruction::CallFunction { + typ: CallType::Positional(2), + }); + + // We cannot handle this exception type: + self.emit(Instruction::JumpIfFalse { + target: handler_label, + }); + + // We have a match, store in name (except x as y) + if let Some(alias) = &handler.name { + self.store_name(alias); + } else { + // Drop exception from top of stack: + self.emit(Instruction::Pop); + } + } else { + // Catch all! + // Drop exception from top of stack: + self.emit(Instruction::Pop); + } + + // Handler code: + self.compile_statements(&handler.body)?; + self.emit(Instruction::PopException); + self.emit(Instruction::Jump { + target: finally_label, + }); + + // Emit a new label for the next handler + self.set_label(handler_label); + handler_label = self.new_label(); + } + self.emit(Instruction::Jump { + target: handler_label, + }); + self.set_label(handler_label); + // If code flows here, we have an unhandled exception, + // emit finally code and raise again! + // Duplicate finally code here: + // TODO: this bytecode is now duplicate, could this be + // improved? + if let Some(statements) = finalbody { + self.compile_statements(statements)?; + } + self.emit(Instruction::Raise { argc: 0 }); + + // We successfully ran the try block: + // else: + self.set_label(else_label); + if let Some(statements) = orelse { + self.compile_statements(statements)?; + } + + // finally: + self.set_label(finally_label); + if let Some(statements) = finalbody { + self.compile_statements(statements)?; + } + // unimplemented!(); + Ok(()) + } + + fn compile_function_def( + &mut self, + name: &str, + args: &ast::Parameters, + body: &[ast::LocatedStatement], + decorator_list: &[ast::Expression], + returns: &Option, // TODO: use type hint somehow.. + ) -> Result<(), CompileError> { + // Create bytecode for this function: + // remember to restore self.in_loop to the original after the function is compiled + let was_in_loop = self.in_loop; + let was_in_function_def = self.in_function_def; + self.in_loop = false; + self.in_function_def = true; + + let old_qualified_path = self.current_qualified_path.clone(); + let qualified_name = self.create_qualified_name(name, ""); + self.current_qualified_path = Some(self.create_qualified_name(name, ".")); + + self.prepare_decorators(decorator_list)?; + + let mut flags = self.enter_function(name, args)?; + + let (new_body, doc_str) = get_doc(body); + + self.compile_statements(new_body)?; + + // Emit None at end: + self.emit(Instruction::LoadConst { + value: bytecode::Constant::None, + }); + self.emit(Instruction::ReturnValue); + let code = self.pop_code_object(); + self.leave_scope(); + + // Prepare type annotations: + let mut num_annotations = 0; + + // Return annotation: + if let Some(annotation) = returns { + // key: + self.emit(Instruction::LoadConst { + value: bytecode::Constant::String { + value: "return".to_string(), + }, + }); + // value: + self.compile_expression(annotation)?; + num_annotations += 1; + } + + for arg in args.args.iter() { + if let Some(annotation) = &arg.annotation { + self.emit(Instruction::LoadConst { + value: bytecode::Constant::String { + value: arg.arg.to_string(), + }, + }); + self.compile_expression(&annotation)?; + num_annotations += 1; + } + } + + if num_annotations > 0 { + flags |= bytecode::FunctionOpArg::HAS_ANNOTATIONS; + self.emit(Instruction::BuildMap { + size: num_annotations, + unpack: false, + }); + } + + self.emit(Instruction::LoadConst { + value: bytecode::Constant::Code { + code: Box::new(code), + }, + }); + self.emit(Instruction::LoadConst { + value: bytecode::Constant::String { + value: qualified_name, + }, + }); + + // Turn code object into function object: + self.emit(Instruction::MakeFunction { flags }); + self.store_docstring(doc_str); + self.apply_decorators(decorator_list); + + self.store_name(name); + + self.current_qualified_path = old_qualified_path; + self.in_loop = was_in_loop; + self.in_function_def = was_in_function_def; + Ok(()) + } + + fn compile_class_def( + &mut self, + name: &str, + body: &[ast::LocatedStatement], + bases: &[ast::Expression], + keywords: &[ast::Keyword], + decorator_list: &[ast::Expression], + ) -> Result<(), CompileError> { + let was_in_loop = self.in_loop; + self.in_loop = false; + + let old_qualified_path = self.current_qualified_path.clone(); + let qualified_name = self.create_qualified_name(name, ""); + self.current_qualified_path = Some(qualified_name.clone()); + + self.prepare_decorators(decorator_list)?; + self.emit(Instruction::LoadBuildClass); + let line_number = self.get_source_line_number(); + self.code_object_stack.push(CodeObject::new( + vec![], + Varargs::None, + vec![], + Varargs::None, + self.source_path.clone().unwrap(), + line_number, + name.to_string(), + )); + self.enter_scope(); + + let (new_body, doc_str) = get_doc(body); + + self.emit(Instruction::LoadName { + name: "__name__".to_string(), + scope: bytecode::NameScope::Local, + }); + self.emit(Instruction::StoreName { + name: "__module__".to_string(), + scope: bytecode::NameScope::Local, + }); + self.compile_statements(new_body)?; + self.emit(Instruction::LoadConst { + value: bytecode::Constant::None, + }); + self.emit(Instruction::ReturnValue); + + let code = self.pop_code_object(); + self.leave_scope(); + + self.emit(Instruction::LoadConst { + value: bytecode::Constant::Code { + code: Box::new(code), + }, + }); + self.emit(Instruction::LoadConst { + value: bytecode::Constant::String { + value: name.to_string(), + }, + }); + + // Turn code object into function object: + self.emit(Instruction::MakeFunction { + flags: bytecode::FunctionOpArg::empty(), + }); + + self.emit(Instruction::LoadConst { + value: bytecode::Constant::String { + value: qualified_name, + }, + }); + + for base in bases { + self.compile_expression(base)?; + } + + if !keywords.is_empty() { + let mut kwarg_names = vec![]; + for keyword in keywords { + if let Some(name) = &keyword.name { + kwarg_names.push(bytecode::Constant::String { + value: name.to_string(), + }); + } else { + // This means **kwargs! + panic!("name must be set"); + } + self.compile_expression(&keyword.value)?; + } + + self.emit(Instruction::LoadConst { + value: bytecode::Constant::Tuple { + elements: kwarg_names, + }, + }); + self.emit(Instruction::CallFunction { + typ: CallType::Keyword(2 + keywords.len() + bases.len()), + }); + } else { + self.emit(Instruction::CallFunction { + typ: CallType::Positional(2 + bases.len()), + }); + } + + self.store_docstring(doc_str); + self.apply_decorators(decorator_list); + + self.store_name(name); + self.current_qualified_path = old_qualified_path; + self.in_loop = was_in_loop; + Ok(()) + } + + fn store_docstring(&mut self, doc_str: Option) { + if let Some(doc_string) = doc_str { + // Duplicate top of stack (the function or class object) + self.emit(Instruction::Duplicate); + + // Doc string value: + self.emit(Instruction::LoadConst { + value: bytecode::Constant::String { + value: doc_string.to_string(), + }, + }); + + self.emit(Instruction::Rotate { amount: 2 }); + self.emit(Instruction::StoreAttr { + name: "__doc__".to_string(), + }); + } + } + + fn compile_for( + &mut self, + target: &ast::Expression, + iter: &ast::Expression, + body: &[ast::LocatedStatement], + orelse: &Option>, + ) -> Result<(), CompileError> { + // Start loop + let start_label = self.new_label(); + let else_label = self.new_label(); + let end_label = self.new_label(); + self.emit(Instruction::SetupLoop { + start: start_label, + end: end_label, + }); + + // The thing iterated: + self.compile_expression(iter)?; + + // Retrieve Iterator + self.emit(Instruction::GetIter); + + self.set_label(start_label); + self.emit(Instruction::ForIter { target: else_label }); + + // Start of loop iteration, set targets: + self.compile_store(target)?; + + let was_in_loop = self.in_loop; + self.in_loop = true; + self.compile_statements(body)?; + self.in_loop = was_in_loop; + + self.emit(Instruction::Jump { + target: start_label, + }); + self.set_label(else_label); + self.emit(Instruction::PopBlock); + if let Some(orelse) = orelse { + self.compile_statements(orelse)?; + } + self.set_label(end_label); + Ok(()) + } + + fn compile_chained_comparison( + &mut self, + vals: &[ast::Expression], + ops: &[ast::Comparison], + ) -> Result<(), CompileError> { + assert!(!ops.is_empty()); + assert_eq!(vals.len(), ops.len() + 1); + + let to_operator = |op: &ast::Comparison| match op { + ast::Comparison::Equal => bytecode::ComparisonOperator::Equal, + ast::Comparison::NotEqual => bytecode::ComparisonOperator::NotEqual, + ast::Comparison::Less => bytecode::ComparisonOperator::Less, + ast::Comparison::LessOrEqual => bytecode::ComparisonOperator::LessOrEqual, + ast::Comparison::Greater => bytecode::ComparisonOperator::Greater, + ast::Comparison::GreaterOrEqual => bytecode::ComparisonOperator::GreaterOrEqual, + ast::Comparison::In => bytecode::ComparisonOperator::In, + ast::Comparison::NotIn => bytecode::ComparisonOperator::NotIn, + ast::Comparison::Is => bytecode::ComparisonOperator::Is, + ast::Comparison::IsNot => bytecode::ComparisonOperator::IsNot, + }; + + // a == b == c == d + // compile into (pseudocode): + // result = a == b + // if result: + // result = b == c + // if result: + // result = c == d + + // initialize lhs outside of loop + self.compile_expression(&vals[0])?; + + let break_label = self.new_label(); + let last_label = self.new_label(); + + // for all comparisons except the last (as the last one doesn't need a conditional jump) + let ops_slice = &ops[0..ops.len()]; + let vals_slice = &vals[1..ops.len()]; + for (op, val) in ops_slice.iter().zip(vals_slice.iter()) { + self.compile_expression(val)?; + // store rhs for the next comparison in chain + self.emit(Instruction::Duplicate); + self.emit(Instruction::Rotate { amount: 3 }); + + self.emit(Instruction::CompareOperation { + op: to_operator(op), + }); + + // if comparison result is false, we break with this value; if true, try the next one. + // (CPython compresses these three opcodes into JUMP_IF_FALSE_OR_POP) + self.emit(Instruction::Duplicate); + self.emit(Instruction::JumpIfFalse { + target: break_label, + }); + self.emit(Instruction::Pop); + } + + // handle the last comparison + self.compile_expression(vals.last().unwrap())?; + self.emit(Instruction::CompareOperation { + op: to_operator(ops.last().unwrap()), + }); + self.emit(Instruction::Jump { target: last_label }); + + // early exit left us with stack: `rhs, comparison_result`. We need to clean up rhs. + self.set_label(break_label); + self.emit(Instruction::Rotate { amount: 2 }); + self.emit(Instruction::Pop); + + self.set_label(last_label); + Ok(()) + } + + fn compile_store(&mut self, target: &ast::Expression) -> Result<(), CompileError> { + match target { + ast::Expression::Identifier { name } => { + self.store_name(name); + } + ast::Expression::Subscript { a, b } => { + self.compile_expression(a)?; + self.compile_expression(b)?; + self.emit(Instruction::StoreSubscript); + } + ast::Expression::Attribute { value, name } => { + self.compile_expression(value)?; + self.emit(Instruction::StoreAttr { + name: name.to_string(), + }); + } + ast::Expression::List { elements } | ast::Expression::Tuple { elements } => { + let mut seen_star = false; + + // Scan for star args: + for (i, element) in elements.iter().enumerate() { + if let ast::Expression::Starred { .. } = element { + if seen_star { + return Err(CompileError { + error: CompileErrorType::StarArgs, + location: self.current_source_location.clone(), + }); + } else { + seen_star = true; + self.emit(Instruction::UnpackEx { + before: i, + after: elements.len() - i - 1, + }); + } + } + } + + if !seen_star { + self.emit(Instruction::UnpackSequence { + size: elements.len(), + }); + } + + for element in elements { + if let ast::Expression::Starred { value } = element { + self.compile_store(value)?; + } else { + self.compile_store(element)?; + } + } + } + _ => { + return Err(CompileError { + error: CompileErrorType::Assign(target.name()), + location: self.current_source_location.clone(), + }); + } + } + + Ok(()) + } + + fn compile_op(&mut self, op: &ast::Operator, inplace: bool) { + let i = match op { + ast::Operator::Add => bytecode::BinaryOperator::Add, + ast::Operator::Sub => bytecode::BinaryOperator::Subtract, + ast::Operator::Mult => bytecode::BinaryOperator::Multiply, + ast::Operator::MatMult => bytecode::BinaryOperator::MatrixMultiply, + ast::Operator::Div => bytecode::BinaryOperator::Divide, + ast::Operator::FloorDiv => bytecode::BinaryOperator::FloorDivide, + ast::Operator::Mod => bytecode::BinaryOperator::Modulo, + ast::Operator::Pow => bytecode::BinaryOperator::Power, + ast::Operator::LShift => bytecode::BinaryOperator::Lshift, + ast::Operator::RShift => bytecode::BinaryOperator::Rshift, + ast::Operator::BitOr => bytecode::BinaryOperator::Or, + ast::Operator::BitXor => bytecode::BinaryOperator::Xor, + ast::Operator::BitAnd => bytecode::BinaryOperator::And, + }; + self.emit(Instruction::BinaryOperation { op: i, inplace }); + } + + fn compile_test( + &mut self, + expression: &ast::Expression, + true_label: Option