Skip to content

Commit e4322f5

Browse files
authored
Merge pull request RustPython#1112 from RustPython/parser-error-cleaning
parser error handling improvements.
2 parents dbc263c + 97f9825 commit e4322f5

File tree

11 files changed

+163
-114
lines changed

11 files changed

+163
-114
lines changed

Cargo.lock

Lines changed: 0 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

compiler/src/error.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
use rustpython_parser::error::{ParseError, ParseErrorType};
2-
use rustpython_parser::lexer::Location;
2+
use rustpython_parser::location::Location;
33

44
use std::error::Error;
55
use std::fmt;

compiler/src/symboltable.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ Inspirational file: https://github.com/python/cpython/blob/master/Python/symtabl
99

1010
use crate::error::{CompileError, CompileErrorType};
1111
use rustpython_parser::ast;
12-
use rustpython_parser::lexer::Location;
12+
use rustpython_parser::location::Location;
1313
use std::collections::HashMap;
1414

1515
pub fn make_symbol_table(program: &ast::Program) -> Result<SymbolScope, SymbolTableError> {

parser/Cargo.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,5 +19,4 @@ num-bigint = "0.2"
1919
num-traits = "0.2"
2020
unicode-xid = "0.1.0"
2121
unic-emoji-char = "0.9.0"
22-
serde = { version = "1.0.66", features = ["derive"] }
2322
wtf8 = "0.0.3"

parser/src/ast.rs

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,8 @@
22
//!
33
//! Roughly equivalent to this: https://docs.python.org/3/library/ast.html
44
5-
pub use super::lexer::Location;
5+
pub use crate::location::Location;
66
use num_bigint::BigInt;
7-
use serde::{Deserialize, Serialize};
87

98
/*
109
#[derive(Debug)]
@@ -390,7 +389,7 @@ pub enum Number {
390389
}
391390

392391
/// Transforms a value prior to formatting it.
393-
#[derive(Copy, Clone, Debug, PartialEq, Serialize, Deserialize)]
392+
#[derive(Copy, Clone, Debug, PartialEq)]
394393
pub enum ConversionFlag {
395394
/// Converts by calling `str(<value>)`.
396395
Str,

parser/src/error.rs

Lines changed: 78 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,89 @@
11
//! Define internal parse error types
22
//! The goal is to provide a matching and a safe error API, maksing errors from LALR
3-
extern crate lalrpop_util;
4-
use self::lalrpop_util::ParseError as InnerError;
3+
use lalrpop_util::ParseError as InnerError;
4+
use lalrpop_util::ParseError as LalrpopError;
55

6-
use crate::lexer::{LexicalError, LexicalErrorType, Location};
6+
use crate::location::Location;
77
use crate::token::Tok;
88

99
use std::error::Error;
1010
use std::fmt;
1111

12+
/// Represents an error during lexical scanning.
13+
#[derive(Debug, PartialEq)]
14+
pub struct LexicalError {
15+
pub error: LexicalErrorType,
16+
pub location: Location,
17+
}
18+
19+
#[derive(Debug, PartialEq)]
20+
pub enum LexicalErrorType {
21+
StringError,
22+
UnicodeError,
23+
NestingError,
24+
UnrecognizedToken { tok: char },
25+
FStringError(FStringErrorType),
26+
OtherError(String),
27+
}
28+
29+
impl fmt::Display for LexicalErrorType {
30+
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
31+
match self {
32+
LexicalErrorType::StringError => write!(f, "Got unexpected string"),
33+
LexicalErrorType::FStringError(error) => write!(f, "Got error in f-string: {}", error),
34+
LexicalErrorType::UnicodeError => write!(f, "Got unexpected unicode"),
35+
LexicalErrorType::NestingError => write!(f, "Got unexpected nesting"),
36+
LexicalErrorType::UnrecognizedToken { tok } => {
37+
write!(f, "Got unexpected token {}", tok)
38+
}
39+
LexicalErrorType::OtherError(msg) => write!(f, "{}", msg),
40+
}
41+
}
42+
}
43+
44+
// TODO: consolidate these with ParseError
45+
#[derive(Debug, PartialEq)]
46+
pub struct FStringError {
47+
pub error: FStringErrorType,
48+
pub location: Location,
49+
}
50+
51+
#[derive(Debug, PartialEq)]
52+
pub enum FStringErrorType {
53+
UnclosedLbrace,
54+
UnopenedRbrace,
55+
InvalidExpression(Box<ParseErrorType>),
56+
InvalidConversionFlag,
57+
EmptyExpression,
58+
MismatchedDelimiter,
59+
}
60+
61+
impl fmt::Display for FStringErrorType {
62+
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
63+
match self {
64+
FStringErrorType::UnclosedLbrace => write!(f, "Unclosed '('"),
65+
FStringErrorType::UnopenedRbrace => write!(f, "Unopened ')'"),
66+
FStringErrorType::InvalidExpression(error) => {
67+
write!(f, "Invalid expression: {}", error)
68+
}
69+
FStringErrorType::InvalidConversionFlag => write!(f, "Invalid conversion flag"),
70+
FStringErrorType::EmptyExpression => write!(f, "Empty expression"),
71+
FStringErrorType::MismatchedDelimiter => write!(f, "Mismatched delimiter"),
72+
}
73+
}
74+
}
75+
76+
impl From<FStringError> for LalrpopError<Location, Tok, LexicalError> {
77+
fn from(err: FStringError) -> Self {
78+
lalrpop_util::ParseError::User {
79+
error: LexicalError {
80+
error: LexicalErrorType::FStringError(err.error),
81+
location: err.location,
82+
},
83+
}
84+
}
85+
}
86+
1287
/// Represents an error during parsing
1388
#[derive(Debug, PartialEq)]
1489
pub struct ParseError {

parser/src/fstring.rs

Lines changed: 21 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -2,37 +2,14 @@ use std::iter;
22
use std::mem;
33
use std::str;
44

5-
use lalrpop_util::ParseError as LalrpopError;
6-
75
use crate::ast::{ConversionFlag, StringGroup};
8-
use crate::lexer::{LexicalError, LexicalErrorType, Location, Tok};
6+
use crate::error::{FStringError, FStringErrorType};
7+
use crate::location::Location;
98
use crate::parser::parse_expression;
109

11-
use self::FStringError::*;
10+
use self::FStringErrorType::*;
1211
use self::StringGroup::*;
1312

14-
// TODO: consolidate these with ParseError
15-
#[derive(Debug, PartialEq)]
16-
pub enum FStringError {
17-
UnclosedLbrace,
18-
UnopenedRbrace,
19-
InvalidExpression,
20-
InvalidConversionFlag,
21-
EmptyExpression,
22-
MismatchedDelimiter,
23-
}
24-
25-
impl From<FStringError> for LalrpopError<Location, Tok, LexicalError> {
26-
fn from(_err: FStringError) -> Self {
27-
lalrpop_util::ParseError::User {
28-
error: LexicalError {
29-
error: LexicalErrorType::StringError,
30-
location: Default::default(),
31-
},
32-
}
33-
}
34-
}
35-
3613
struct FStringParser<'a> {
3714
chars: iter::Peekable<str::Chars<'a>>,
3815
}
@@ -44,7 +21,7 @@ impl<'a> FStringParser<'a> {
4421
}
4522
}
4623

47-
fn parse_formatted_value(&mut self) -> Result<StringGroup, FStringError> {
24+
fn parse_formatted_value(&mut self) -> Result<StringGroup, FStringErrorType> {
4825
let mut expression = String::new();
4926
let mut spec = String::new();
5027
let mut delims = Vec::new();
@@ -103,7 +80,8 @@ impl<'a> FStringParser<'a> {
10380
}
10481
return Ok(FormattedValue {
10582
value: Box::new(
106-
parse_expression(expression.trim()).map_err(|_| InvalidExpression)?,
83+
parse_expression(expression.trim())
84+
.map_err(|e| InvalidExpression(Box::new(e.error)))?,
10785
),
10886
conversion,
10987
spec,
@@ -127,7 +105,7 @@ impl<'a> FStringParser<'a> {
127105
Err(UnclosedLbrace)
128106
}
129107

130-
fn parse(mut self) -> Result<StringGroup, FStringError> {
108+
fn parse(mut self) -> Result<StringGroup, FStringErrorType> {
131109
let mut content = String::new();
132110
let mut values = vec![];
133111

@@ -175,10 +153,20 @@ impl<'a> FStringParser<'a> {
175153
}
176154
}
177155

178-
pub fn parse_fstring(source: &str) -> Result<StringGroup, FStringError> {
156+
/// Parse an f-string into a string group.
157+
fn parse_fstring(source: &str) -> Result<StringGroup, FStringErrorType> {
179158
FStringParser::new(source).parse()
180159
}
181160

161+
/// Parse an fstring from a string, located at a certain position in the sourcecode.
162+
/// In case of errors, we will get the location and the error returned.
163+
pub fn parse_located_fstring(
164+
source: &str,
165+
location: Location,
166+
) -> Result<StringGroup, FStringError> {
167+
parse_fstring(source).map_err(|error| FStringError { error, location })
168+
}
169+
182170
#[cfg(test)]
183171
mod tests {
184172
use crate::ast;
@@ -232,6 +220,8 @@ mod tests {
232220
fn test_parse_invalid_fstring() {
233221
assert_eq!(parse_fstring("{"), Err(UnclosedLbrace));
234222
assert_eq!(parse_fstring("}"), Err(UnopenedRbrace));
235-
assert_eq!(parse_fstring("{class}"), Err(InvalidExpression));
223+
224+
// TODO: check for InvalidExpression enum?
225+
assert!(parse_fstring("{class}").is_err());
236226
}
237227
}

parser/src/lexer.rs

Lines changed: 11 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,12 @@ extern crate unic_emoji_char;
55
extern crate unicode_xid;
66

77
pub use super::token::Tok;
8+
use crate::error::{LexicalError, LexicalErrorType};
9+
use crate::location::Location;
810
use num_bigint::BigInt;
911
use num_traits::Num;
10-
use serde::{Deserialize, Serialize};
1112
use std::cmp::Ordering;
1213
use std::collections::HashMap;
13-
use std::fmt;
1414
use std::str::FromStr;
1515
use unic_emoji_char::is_emoji_presentation;
1616
use unicode_xid::UnicodeXID;
@@ -60,61 +60,6 @@ pub struct Lexer<T: Iterator<Item = char>> {
6060
keywords: HashMap<String, Tok>,
6161
}
6262

63-
#[derive(Debug, PartialEq)]
64-
pub struct LexicalError {
65-
pub error: LexicalErrorType,
66-
pub location: Location,
67-
}
68-
69-
#[derive(Debug, PartialEq)]
70-
pub enum LexicalErrorType {
71-
StringError,
72-
UnicodeError,
73-
NestingError,
74-
UnrecognizedToken { tok: char },
75-
OtherError(String),
76-
}
77-
78-
impl fmt::Display for LexicalErrorType {
79-
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
80-
match *self {
81-
LexicalErrorType::StringError => write!(f, "Got unexpected string"),
82-
LexicalErrorType::UnicodeError => write!(f, "Got unexpected unicode"),
83-
LexicalErrorType::NestingError => write!(f, "Got unexpected nesting"),
84-
LexicalErrorType::UnrecognizedToken { tok } => {
85-
write!(f, "Got unexpected token {}", tok)
86-
}
87-
LexicalErrorType::OtherError(ref msg) => write!(f, "{}", msg),
88-
}
89-
}
90-
}
91-
92-
#[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize)]
93-
pub struct Location {
94-
row: usize,
95-
column: usize,
96-
}
97-
98-
impl fmt::Display for Location {
99-
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
100-
write!(f, "line {} column {}", self.row, self.column)
101-
}
102-
}
103-
104-
impl Location {
105-
pub fn new(row: usize, column: usize) -> Self {
106-
Location { row, column }
107-
}
108-
109-
pub fn row(&self) -> usize {
110-
self.row
111-
}
112-
113-
pub fn column(&self) -> usize {
114-
self.column
115-
}
116-
}
117-
11863
pub fn get_keywords() -> HashMap<String, Tok> {
11964
let mut keywords: HashMap<String, Tok> = HashMap::new();
12065

@@ -299,8 +244,7 @@ where
299244
lxr.next_char();
300245
lxr.next_char();
301246
// Start at top row (=1) left column (=1)
302-
lxr.location.row = 1;
303-
lxr.location.column = 1;
247+
lxr.location.reset();
304248
lxr
305249
}
306250

@@ -615,7 +559,10 @@ where
615559
let tok = if is_bytes {
616560
if string_content.is_ascii() {
617561
Tok::Bytes {
618-
value: lex_byte(string_content)?,
562+
value: lex_byte(string_content).map_err(|error| LexicalError {
563+
error,
564+
location: self.get_pos(),
565+
})?,
619566
}
620567
} else {
621568
return Err(LexicalError {
@@ -684,7 +631,7 @@ where
684631
let nxt = self.chars.next();
685632
self.chr0 = self.chr1;
686633
self.chr1 = nxt;
687-
self.location.column += 1;
634+
self.location.go_right();
688635
c
689636
}
690637

@@ -693,8 +640,7 @@ where
693640
}
694641

695642
fn new_line(&mut self) {
696-
self.location.row += 1;
697-
self.location.column = 1;
643+
self.location.newline();
698644
}
699645

700646
/// Given we are at the start of a line, count the number of spaces and/or tabs until the first character.
@@ -1254,7 +1200,7 @@ where
12541200
}
12551201
}
12561202

1257-
fn lex_byte(s: String) -> Result<Vec<u8>, LexicalError> {
1203+
fn lex_byte(s: String) -> Result<Vec<u8>, LexicalErrorType> {
12581204
let mut res = vec![];
12591205
let mut escape = false; //flag if previous was \
12601206
let mut hex_on = false; // hex mode on or off
@@ -1273,10 +1219,7 @@ fn lex_byte(s: String) -> Result<Vec<u8>, LexicalError> {
12731219
hex_value.clear();
12741220
}
12751221
} else {
1276-
return Err(LexicalError {
1277-
error: LexicalErrorType::StringError,
1278-
location: Default::default(),
1279-
});
1222+
return Err(LexicalErrorType::StringError);
12801223
}
12811224
} else {
12821225
match (c, escape) {

parser/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ pub mod ast;
66
pub mod error;
77
mod fstring;
88
pub mod lexer;
9+
pub mod location;
910
pub mod parser;
1011
lalrpop_mod!(
1112
#[allow(clippy::all)]

0 commit comments

Comments
 (0)