Skip to content

Commit

Permalink
syntax: fix scanString bug affecting REPL (google#266)
Browse files Browse the repository at this point in the history
Prior to this change, the REPL incorrectly scanned
regular and raw string literals containing newlines
because scanString falsely assumed that only
triple-quoted strings can span lines.
```
>>> 'a\
... b'
<stdin>:1:2: string literal has invalid quotes
>>> r'a
... b'
<stdin>:1:2: string literal has invalid quotes
```
Now it does the right thing:
```
>>> 'a\
... b'
"ab"
>>> r'a\
... b'
"a\\\nb"
```
  • Loading branch information
adonovan authored Mar 26, 2020
1 parent 16e44b1 commit e8819e8
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 12 deletions.
31 changes: 19 additions & 12 deletions syntax/scan.go
Original file line number Diff line number Diff line change
Expand Up @@ -805,13 +805,26 @@ func (sc *scanner) scanString(val *tokenValue, quote rune) Token {
start := sc.pos
triple := len(sc.rest) >= 3 && sc.rest[0] == byte(quote) && sc.rest[1] == byte(quote) && sc.rest[2] == byte(quote)
sc.readRune()

// String literals may contain escaped or unescaped newlines,
// causing them to span multiple lines (gulps) of REPL input;
// they are the only such token. Thus we cannot call endToken,
// as it assumes sc.rest is unchanged since startToken.
// Instead, buffer the token here.
// TODO(adonovan): opt: buffer only if we encounter a newline.
raw := new(strings.Builder)

// Copy the prefix, e.g. r' or " (see startToken).
raw.Write(sc.token[:len(sc.token)-len(sc.rest)])

if !triple {
// Precondition: startToken was already called.
// single-quoted string literal
for {
if sc.eof() {
sc.error(val.pos, "unexpected EOF in string")
}
c := sc.readRune()
raw.WriteRune(c)
if c == quote {
break
}
Expand All @@ -822,22 +835,16 @@ func (sc *scanner) scanString(val *tokenValue, quote rune) Token {
if sc.eof() {
sc.error(val.pos, "unexpected EOF in string")
}
sc.readRune()
c = sc.readRune()
raw.WriteRune(c)
}
}
sc.endToken(val)
} else {
// triple-quoted string literal
sc.readRune()
raw.WriteRune(quote)
sc.readRune()

// A triple-quoted string literal may span multiple
// gulps of REPL input; it is the only such token.
// Thus we must avoid {start,end}Token.
raw := new(strings.Builder)

// Copy the prefix, e.g. r''' or """ (see startToken).
raw.Write(sc.token[:len(sc.token)-len(sc.rest)])
raw.WriteRune(quote)

quoteCount := 0
for {
Expand All @@ -862,8 +869,8 @@ func (sc *scanner) scanString(val *tokenValue, quote rune) Token {
raw.WriteRune(c)
}
}
val.raw = raw.String()
}
val.raw = raw.String()

s, _, err := unquote(val.raw)
if err != nil {
Expand Down
1 change: 1 addition & 0 deletions syntax/scan_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ pass`, "pass newline pass EOF"}, // consecutive newlines are consolidated
2`, `x = 1 + 2 EOF`},
{`x = 'a\nb'`, `x = "a\nb" EOF`},
{`x = r'a\nb'`, `x = "a\\nb" EOF`},
{"x = 'a\\\nb'", `x = "ab" EOF`},
{`x = '\''`, `x = "'" EOF`},
{`x = "\""`, `x = "\"" EOF`},
{`x = r'\''`, `x = "\\'" EOF`},
Expand Down

0 comments on commit e8819e8

Please sign in to comment.