Skip to content

Commit 91a897d

Browse files
authored
Merge pull request RustPython#4442 from bluetech/non-logical-newline-token
lexer: Add `NonLogicalNewline` token
2 parents 9bbe745 + e64faa7 commit 91a897d

File tree

3 files changed

+84
-3
lines changed

3 files changed

+84
-3
lines changed

compiler/parser/src/lexer.rs

Lines changed: 81 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1075,10 +1075,13 @@ where
10751075
self.next_char();
10761076
let tok_end = self.get_pos();
10771077

1078-
// Depending on the nesting level, we emit newline or not:
1078+
// Depending on the nesting level, we emit a logical or
1079+
// non-logical newline:
10791080
if self.nesting == 0 {
10801081
self.at_begin_of_line = true;
10811082
self.emit((tok_start, Tok::Newline, tok_end));
1083+
} else {
1084+
self.emit((tok_start, Tok::NonLogicalNewline, tok_end));
10821085
}
10831086
}
10841087
' ' | '\t' | '\x0C' => {
@@ -1464,7 +1467,16 @@ mod tests {
14641467
$(
14651468
#[test]
14661469
fn $name() {
1467-
let source = format!("x = [{} 1,2{}]{}", $eol, $eol, $eol);
1470+
let source = r"x = [
1471+
1472+
1,2
1473+
,(3,
1474+
4,
1475+
), {
1476+
5,
1477+
6,\
1478+
7}]
1479+
".replace("\n", $eol);
14681480
let tokens = lex_source(&source);
14691481
assert_eq!(
14701482
tokens,
@@ -1474,9 +1486,32 @@ mod tests {
14741486
},
14751487
Tok::Equal,
14761488
Tok::Lsqb,
1489+
Tok::NonLogicalNewline,
1490+
Tok::NonLogicalNewline,
14771491
Tok::Int { value: BigInt::from(1) },
14781492
Tok::Comma,
14791493
Tok::Int { value: BigInt::from(2) },
1494+
Tok::NonLogicalNewline,
1495+
Tok::Comma,
1496+
Tok::Lpar,
1497+
Tok::Int { value: BigInt::from(3) },
1498+
Tok::Comma,
1499+
Tok::NonLogicalNewline,
1500+
Tok::Int { value: BigInt::from(4) },
1501+
Tok::Comma,
1502+
Tok::NonLogicalNewline,
1503+
Tok::Rpar,
1504+
Tok::Comma,
1505+
Tok::Lbrace,
1506+
Tok::NonLogicalNewline,
1507+
Tok::Int { value: BigInt::from(5) },
1508+
Tok::Comma,
1509+
Tok::NonLogicalNewline,
1510+
Tok::Int { value: BigInt::from(6) },
1511+
Tok::Comma,
1512+
// Continuation here - no NonLogicalNewline.
1513+
Tok::Int { value: BigInt::from(7) },
1514+
Tok::Rbrace,
14801515
Tok::Rsqb,
14811516
Tok::Newline,
14821517
]
@@ -1492,6 +1527,50 @@ mod tests {
14921527
test_newline_in_brackets_unix_eol: UNIX_EOL,
14931528
}
14941529

1530+
#[test]
1531+
fn test_non_logical_newline_in_string_continuation() {
1532+
let source = r"(
1533+
'a'
1534+
'b'
1535+
1536+
'c' \
1537+
'd'
1538+
)";
1539+
let tokens = lex_source(source);
1540+
assert_eq!(
1541+
tokens,
1542+
vec![
1543+
Tok::Lpar,
1544+
Tok::NonLogicalNewline,
1545+
stok("a"),
1546+
Tok::NonLogicalNewline,
1547+
stok("b"),
1548+
Tok::NonLogicalNewline,
1549+
Tok::NonLogicalNewline,
1550+
stok("c"),
1551+
stok("d"),
1552+
Tok::NonLogicalNewline,
1553+
Tok::Rpar,
1554+
Tok::Newline,
1555+
]
1556+
);
1557+
}
1558+
1559+
#[test]
1560+
fn test_logical_newline_line_comment() {
1561+
let source = "#Hello\n#World";
1562+
let tokens = lex_source(source);
1563+
assert_eq!(
1564+
tokens,
1565+
vec![
1566+
Tok::Comment("#Hello".to_owned()),
1567+
// tokenize.py does put an NL here...
1568+
Tok::Comment("#World".to_owned()),
1569+
// ... and here, but doesn't seem very useful.
1570+
]
1571+
);
1572+
}
1573+
14951574
#[test]
14961575
fn test_operators() {
14971576
let source = "//////=/ /";

compiler/parser/src/parser.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ pub fn parse_located(
9696
let marker_token = (Default::default(), mode.to_marker(), Default::default());
9797
let tokenizer = iter::once(Ok(marker_token))
9898
.chain(lxr)
99-
.filter_ok(|(_, tok, _)| !matches!(tok, Tok::Comment { .. }));
99+
.filter_ok(|(_, tok, _)| !matches!(tok, Tok::Comment { .. } | Tok::NonLogicalNewline));
100100

101101
python::TopParser::new()
102102
.parse(tokenizer)

compiler/parser/src/token.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ pub enum Tok {
2525
triple_quoted: bool,
2626
},
2727
Newline,
28+
NonLogicalNewline,
2829
Indent,
2930
Dedent,
3031
StartModule,
@@ -136,6 +137,7 @@ impl fmt::Display for Tok {
136137
write!(f, "{kind}{quotes}{value}{quotes}")
137138
}
138139
Newline => f.write_str("Newline"),
140+
NonLogicalNewline => f.write_str("NonLogicalNewline"),
139141
Indent => f.write_str("Indent"),
140142
Dedent => f.write_str("Dedent"),
141143
StartModule => f.write_str("StartProgram"),

0 commit comments

Comments
 (0)