Skip to content

Commit

Permalink
fix printing of strings with \x00 followed by ascii digits
Browse files Browse the repository at this point in the history
Summary:
consider the string `"\x00\x31"`. separately, `\x00` can be printed as `\0` and `\x31` is `1`. but together, you'd get `\01`, which is a legacy octal 1.

so, look ahead and print `\x00` when the next codepoint is a digit so we get `\x001` instead.

Reviewed By: pieterv

Differential Revision: D15416584

fbshipit-source-id: f98562ff2de5ed7d935d8410fe14eb8de595af45
  • Loading branch information
mroch authored and facebook-github-bot committed May 20, 2019
1 parent 0aa954c commit e952176
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 3 deletions.
6 changes: 6 additions & 0 deletions src/parser_utils/output/__tests__/js_layout_generator_test.ml
Original file line number Diff line number Diff line change
Expand Up @@ -2374,6 +2374,12 @@ let tests = "js_layout_generator" >::: [
let ast = expression_of_string "\"\xF0\x9F\x92\xA9\"" in
assert_expression ~ctxt {|"\ud83d\udca9"|} ast;

(* zero followed by ASCII number *)
let ast = expression_of_string "\"\x00\x31\"" in
assert_expression ~ctxt {|"\x001"|} ast; (* not `\01`! *)
let ast = expression_of_string "\"\x00\x39\"" in
assert_expression ~ctxt {|"\x009"|} ast; (* not `\09`! *)

(* unprintable ascii, escaped *)
let ast = expression_of_string {|"\x07"|} in
assert_expression ~ctxt {|"\x07"|} ast;
Expand Down
38 changes: 35 additions & 3 deletions src/parser_utils/output/js_layout_generator.ml
Original file line number Diff line number Diff line change
Expand Up @@ -253,12 +253,44 @@ let better_quote =
if double > single then "'" else "\""

let utf8_escape =
let f ~quote buf _i = function
(* a null character can be printed as \x00 or \0. but if the next character is an ASCII digit,
then using \0 would create \01 (for example), which is a legacy octal 1. so, rather than simply
fold over the codepoints, we have to look ahead at the next character as well. *)
let lookahead_fold_wtf_8 :
?pos:int ->
?len:int ->
(next: (int * Wtf8.codepoint) option -> 'a -> int -> Wtf8.codepoint -> 'a) ->
'a -> string -> 'a
=
let lookahead ~f (prev, buf) i cp =
let next = Some (i, cp) in
let buf = match prev with
| Some (prev_i, prev_cp) -> f ~next buf prev_i prev_cp
| None -> buf
in
(next, buf)
in
fun ?pos ?len f acc str ->
str
|> Wtf8.fold_wtf_8 ?pos ?len (lookahead ~f) (None, acc)
|> fun (last, acc) ->
begin match last with
| Some (i, cp) -> f ~next:None acc i cp
| None -> acc
end
in
let f ~quote ~next buf _i = function
| Wtf8.Malformed -> buf
| Wtf8.Point cp ->
begin match cp with
(* SingleEscapeCharacter: http://www.ecma-international.org/ecma-262/6.0/#table-34 *)
| 0x0 -> Buffer.add_string buf "\\0"; buf
| 0x0 ->
let zero = match next with
| Some (_i, Wtf8.Point n) when 0x30 <= n && n <= 0x39 -> "\\x00"
| _ -> "\\0"
in
Buffer.add_string buf zero;
buf
| 0x8 -> Buffer.add_string buf "\\b"; buf
| 0x9 -> Buffer.add_string buf "\\t"; buf
| 0xA -> Buffer.add_string buf "\\n"; buf
Expand Down Expand Up @@ -296,7 +328,7 @@ let utf8_escape =
in
fun ~quote str ->
str
|> Wtf8.fold_wtf_8 (f ~quote) (Buffer.create (String.length str))
|> lookahead_fold_wtf_8 (f ~quote) (Buffer.create (String.length str))
|> Buffer.contents

let layout_from_comment anchor loc_node (loc_cm, comment) =
Expand Down

0 comments on commit e952176

Please sign in to comment.