fix printing of strings with \x00 followed by ascii digits

Summary: consider the string `"\x00\x31"`. separately, `\x00` can be printed as `\0` and `\x31` is `1`. but together, you'd get `\01`, which is a legacy octal 1. so, look ahead and print `\x00` when the next codepoint is a digit so we get `\x001` instead. Reviewed By: pieterv Differential Revision: D15416584 fbshipit-source-id: f98562ff2de5ed7d935d8410fe14eb8de595af45
vicapow · May 20, 2019 · e952176 · e952176
1 parent 0aa954c
commit e952176
Show file tree

Hide file tree

Showing 2 changed files with 41 additions and 3 deletions.
diff --git a/src/parser_utils/output/__tests__/js_layout_generator_test.ml b/src/parser_utils/output/__tests__/js_layout_generator_test.ml
@@ -2374,6 +2374,12 @@ let tests = "js_layout_generator" >::: [
       let ast = expression_of_string "\"\xF0\x9F\x92\xA9\"" in
       assert_expression ~ctxt {|"\ud83d\udca9"|} ast;
 
+      (* zero followed by ASCII number *)
+      let ast = expression_of_string "\"\x00\x31\"" in
+      assert_expression ~ctxt {|"\x001"|} ast; (* not `\01`! *)
+      let ast = expression_of_string "\"\x00\x39\"" in
+      assert_expression ~ctxt {|"\x009"|} ast; (* not `\09`! *)
+
       (* unprintable ascii, escaped *)
       let ast = expression_of_string {|"\x07"|} in
       assert_expression ~ctxt {|"\x07"|} ast;

diff --git a/src/parser_utils/output/js_layout_generator.ml b/src/parser_utils/output/js_layout_generator.ml
@@ -253,12 +253,44 @@ let better_quote =
     if double > single then "'" else "\""
 
 let utf8_escape =
-  let f ~quote buf _i = function
+  (* a null character can be printed as \x00 or \0. but if the next character is an ASCII digit,
+     then using \0 would create \01 (for example), which is a legacy octal 1. so, rather than simply
+     fold over the codepoints, we have to look ahead at the next character as well. *)
+  let lookahead_fold_wtf_8 :
+      ?pos:int ->
+      ?len:int ->
+      (next: (int * Wtf8.codepoint) option -> 'a -> int -> Wtf8.codepoint -> 'a) ->
+      'a -> string -> 'a
+  =
+    let lookahead ~f (prev, buf) i cp =
+      let next = Some (i, cp) in
+      let buf = match prev with
+      | Some (prev_i, prev_cp) -> f ~next buf prev_i prev_cp
+      | None -> buf
+      in
+      (next, buf)
+    in
+    fun ?pos ?len f acc str ->
+      str
+      |> Wtf8.fold_wtf_8 ?pos ?len (lookahead ~f) (None, acc)
+      |> fun (last, acc) ->
+        begin match last with
+        | Some (i, cp) -> f ~next:None acc i cp
+        | None -> acc
+        end
+  in
+  let f ~quote ~next buf _i = function
   | Wtf8.Malformed -> buf
   | Wtf8.Point cp ->
     begin match cp with
     (* SingleEscapeCharacter: http://www.ecma-international.org/ecma-262/6.0/#table-34 *)
-    | 0x0 -> Buffer.add_string buf "\\0"; buf
+    | 0x0 ->
+        let zero = match next with
+        | Some (_i, Wtf8.Point n) when 0x30 <= n && n <= 0x39 -> "\\x00"
+        | _ -> "\\0"
+        in
+        Buffer.add_string buf zero;
+        buf
     | 0x8 -> Buffer.add_string buf "\\b"; buf
     | 0x9 -> Buffer.add_string buf "\\t"; buf
     | 0xA -> Buffer.add_string buf "\\n"; buf
@@ -296,7 +328,7 @@ let utf8_escape =
   in
   fun ~quote str ->
     str
-    |> Wtf8.fold_wtf_8 (f ~quote) (Buffer.create (String.length str))
+    |> lookahead_fold_wtf_8 (f ~quote) (Buffer.create (String.length str))
     |> Buffer.contents
 
 let layout_from_comment anchor loc_node (loc_cm, comment) =