Skip to content

Commit

Permalink
dev-ml/markup: fix build with uutf 1.0
Browse files Browse the repository at this point in the history
Package-Manager: portage-2.3.2
  • Loading branch information
aballier committed Nov 28, 2016
1 parent 6eabcc4 commit 56c0c3a
Show file tree
Hide file tree
Showing 3 changed files with 1,366 additions and 3 deletions.
273 changes: 273 additions & 0 deletions dev-ml/markup/files/test.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,273 @@
Index: markup.ml-0.7.2/test/test_encoding.ml
===================================================================
--- markup.ml-0.7.2.orig/test/test_encoding.ml
+++ markup.ml-0.7.2/test/test_encoding.ml
@@ -15,9 +15,9 @@ let test_ucs_4 (f : Encoding.t) name s1
expect_error (1, 2) (`Decoding_error (bad_bytes, name))
begin fun report ->
let chars = s1 |> string |> f ~report in
- next_option chars ok (assert_equal (Some (Char.code 'f')));
+ next_option chars ok (assert_equal (Some ((Uchar.of_int (Char.code 'f')))));
next_option chars ok (assert_equal (Some Uutf.u_rep));
- next_option chars ok (assert_equal (Some (Char.code 'o')));
+ next_option chars ok (assert_equal (Some ((Uchar.of_int (Char.code 'o')))));
next_option chars ok (assert_equal None);
next_option chars ok (assert_equal None)
end;
@@ -25,9 +25,9 @@ let test_ucs_4 (f : Encoding.t) name s1
expect_error (2, 2) (`Decoding_error ("\x00\x00\x00", name))
begin fun report ->
let chars = s2 |> string |> f ~report in
- next_option chars ok (assert_equal (Some (Char.code 'f')));
- next_option chars ok (assert_equal (Some 0x000A));
- next_option chars ok (assert_equal (Some (Char.code 'o')));
+ next_option chars ok (assert_equal (Some (Uchar.of_int (Char.code 'f'))));
+ next_option chars ok (assert_equal (Some (Uchar.of_int 0x000A)));
+ next_option chars ok (assert_equal (Some (Uchar.of_int (Char.code 'o'))));
next_option chars ok (assert_equal (Some Uutf.u_rep));
next_option chars ok (assert_equal None);
next_option chars ok (assert_equal None)
@@ -38,12 +38,12 @@ let tests = [
let s = "\xef\xbb\xbffoo\xf0\x9f\x90\x99bar\xa0more" in
expect_error (1, 8) (`Decoding_error ("\xa0", "utf-8")) begin fun report ->
let chars = s |> string |> utf_8 ~report in
- next_n 3 chars ok (assert_equal (List.map Char.code ['f'; 'o'; 'o']));
- next_option chars ok (assert_equal (Some 0x1F419));
- next_n 3 chars ok (assert_equal (List.map Char.code ['b'; 'a'; 'r']));
+ next_n 3 chars ok (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['f'; 'o'; 'o']));
+ next_option chars ok (assert_equal (Some (Uchar.of_int 0x1F419)));
+ next_n 3 chars ok (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['b'; 'a'; 'r']));
next_option chars ok (assert_equal (Some Uutf.u_rep));
next_n 4 chars ok
- (assert_equal (List.map Char.code ['m'; 'o'; 'r'; 'e']));
+ (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['m'; 'o'; 'r'; 'e']));
next_option chars ok (assert_equal None);
next_option chars ok (assert_equal None)
end);
@@ -53,11 +53,11 @@ let tests = [
expect_error (1, 6) (`Decoding_error ("\xdc\x19", "utf-16be"))
begin fun report ->
let chars = s |> string |> utf_16be ~report in
- next_n 3 chars ok (assert_equal (List.map Char.code ['f'; 'o'; 'o']));
- next_option chars ok (assert_equal (Some 0x1F419));
- next_option chars ok (assert_equal (Some (Char.code 'b')));
+ next_n 3 chars ok (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['f'; 'o'; 'o']));
+ next_option chars ok (assert_equal (Some (Uchar.of_int 0x1F419)));
+ next_option chars ok (assert_equal (Some (Uchar.of_int (Char.code 'b'))));
next_option chars ok (assert_equal (Some Uutf.u_rep));
- next_n 16 chars ok (assert_equal (List.map Char.code ['a'; 'r']));
+ next_n 16 chars ok (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['a'; 'r']));
next_option chars ok (assert_equal None);
next_option chars ok (assert_equal None)
end);
@@ -67,11 +67,11 @@ let tests = [
expect_error (1, 6) (`Decoding_error ("\x19\xdc", "utf-16le"))
begin fun report ->
let chars = s |> string |> utf_16le ~report in
- next_n 3 chars ok (assert_equal (List.map Char.code ['f'; 'o'; 'o']));
- next_option chars ok (assert_equal (Some 0x1F419));
- next_option chars ok (assert_equal (Some (Char.code 'b')));
+ next_n 3 chars ok (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['f'; 'o'; 'o']));
+ next_option chars ok (assert_equal (Some (Uchar.of_int 0x1F419)));
+ next_option chars ok (assert_equal (Some (Uchar.of_int (Char.code 'b'))));
next_option chars ok (assert_equal (Some Uutf.u_rep));
- next_n 16 chars ok (assert_equal (List.map Char.code ['a'; 'r']));
+ next_n 16 chars ok (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['a'; 'r']));
next_option chars ok (assert_equal None);
next_option chars ok (assert_equal None)
end);
@@ -79,7 +79,7 @@ let tests = [
("encoding.iso_8859_1" >:: fun _ ->
let chars = string "foo\xa0" |> iso_8859_1 in
next_n 4 chars
- ok (assert_equal (List.map Char.code ['f'; 'o'; 'o'; '\xa0']));
+ ok (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['f'; 'o'; 'o'; '\xa0']));
next_option chars ok (assert_equal None);
next_option chars ok (assert_equal None));

@@ -88,26 +88,26 @@ let tests = [
expect_error (1, 4) (`Decoding_error ("\xa0", "us-ascii"))
begin fun report ->
let chars = s |> string |> us_ascii ~report in
- next_n 3 chars ok (assert_equal (List.map Char.code ['f'; 'o'; 'o']));
+ next_n 3 chars ok (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['f'; 'o'; 'o']));
next_option chars ok (assert_equal (Some Uutf.u_rep));
- next_n 3 chars ok (assert_equal (List.map Char.code ['b'; 'a'; 'r']));
+ next_n 3 chars ok (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['b'; 'a'; 'r']));
next_option chars ok (assert_equal None);
next_option chars ok (assert_equal None)
end);

("encoding.windows_1251" >:: fun _ ->
let chars = string "foo\xe0\xe1\xe2bar" |> windows_1251 in
- next_n 3 chars ok (assert_equal (List.map Char.code ['f'; 'o'; 'o']));
- next_n 3 chars ok (assert_equal [0x0430; 0x0431; 0x0432]);
- next_n 3 chars ok (assert_equal (List.map Char.code ['b'; 'a'; 'r']));
+ next_n 3 chars ok (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['f'; 'o'; 'o']));
+ next_n 3 chars ok (assert_equal [Uchar.of_int 0x0430; Uchar.of_int 0x0431; Uchar.of_int 0x0432]);
+ next_n 3 chars ok (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['b'; 'a'; 'r']));
next_option chars ok (assert_equal None);
next_option chars ok (assert_equal None));

("encoding.windows_1252" >:: fun _ ->
let chars = string "foo\x80\x83bar" |> windows_1252 in
- next_n 3 chars ok (assert_equal (List.map Char.code ['f'; 'o'; 'o']));
- next_n 2 chars ok (assert_equal [0x20AC; 0x0192]);
- next_n 3 chars ok (assert_equal (List.map Char.code ['b'; 'a'; 'r']));
+ next_n 3 chars ok (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['f'; 'o'; 'o']));
+ next_n 2 chars ok (assert_equal [Uchar.of_int 0x20AC; Uchar.of_int 0x0192]);
+ next_n 3 chars ok (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['b'; 'a'; 'r']));
next_option chars ok (assert_equal None);
next_option chars ok (assert_equal None));

@@ -137,7 +137,7 @@ let tests = [

("encoding.ebcdic" >:: fun _ ->
let chars = string "\x86\x96\x96" |> ebcdic in
- next_n 3 chars ok (assert_equal (List.map Char.code ['f'; 'o'; 'o']));
+ next_n 3 chars ok (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['f'; 'o'; 'o']));
next_option chars ok (assert_equal None);
next_option chars ok (assert_equal None));
]
Index: markup.ml-0.7.2/test/test_html_tokenizer.ml
===================================================================
--- markup.ml-0.7.2.orig/test/test_html_tokenizer.ml
+++ markup.ml-0.7.2/test/test_html_tokenizer.ml
@@ -134,7 +134,7 @@ let tests = [
expect "&#1000000000000000000000000000000;"
[ 1, 1, E (`Bad_token ("&#1000000000000000000000000000000;",
reference, "out of range"));
- 1, 1, S (`Char Uutf.u_rep);
+ 1, 1, S (`Char (Uchar.to_int Uutf.u_rep));
1, 35, S `EOF];

expect "&#1000000000000000000000000000000"
@@ -142,22 +142,22 @@ let tests = [
reference, "missing ';' at end"));
1, 1, E (`Bad_token ("&#1000000000000000000000000000000",
reference, "out of range"));
- 1, 1, S (`Char Uutf.u_rep);
+ 1, 1, S (`Char (Uchar.to_int Uutf.u_rep));
1, 34, S `EOF];

expect "�"
[ 1, 1, E (`Bad_token ("�", reference, "out of range"));
- 1, 1, S (`Char Uutf.u_rep);
+ 1, 1, S (`Char (Uchar.to_int Uutf.u_rep));
1, 9, S `EOF];

expect "�"
[ 1, 1, E (`Bad_token ("�", reference, "out of range"));
- 1, 1, S (`Char Uutf.u_rep);
+ 1, 1, S (`Char (Uchar.to_int Uutf.u_rep));
1, 11, S `EOF];

expect "�"
[ 1, 1, E (`Bad_token ("�", reference, "out of range"));
- 1, 1, S (`Char Uutf.u_rep);
+ 1, 1, S (`Char (Uchar.to_int Uutf.u_rep));
1, 5, S `EOF];

expect ""
@@ -264,7 +264,7 @@ let tests = [
expect ~state:`RCDATA "f\x00</foo>"
([ 1, 1, S (`Char 0x66);
1, 2, E (`Bad_token ("U+0000", "content", "null"));
- 1, 2, S (`Char Uutf.u_rep)] @
+ 1, 2, S (`Char (Uchar.to_int Uutf.u_rep))] @
(char_sequence ~start:3 "</foo>"));

expect ~state:`RCDATA "<title>f</title >"
@@ -302,7 +302,7 @@ let tests = [
expect ~state:`RAWTEXT "f\x00</foo>"
([ 1, 1, S (`Char 0x66);
1, 2, E (`Bad_token ("U+0000", "content", "null"));
- 1, 2, S (`Char Uutf.u_rep)] @
+ 1, 2, S (`Char (Uchar.to_int Uutf.u_rep))] @
(char_sequence ~start:3 "</foo>")));

("html.tokenizer.script-data" >:: fun _ ->
@@ -330,7 +330,7 @@ let tests = [
expect ~state:`Script_data "f<!--o\x00o"
((char_sequence ~no_eof:true "f<!--o") @
[1, 7, E (`Bad_token ("U+0000", "script", "null"));
- 1, 7, S (`Char Uutf.u_rep);
+ 1, 7, S (`Char (Uchar.to_int Uutf.u_rep));
1, 8, S (`Char 0x6F);
1, 9, E (`Unexpected_eoi "script");
1, 9, S `EOF]);
@@ -363,7 +363,7 @@ let tests = [
expect ~state:`Script_data "f<!--a-\x00-"
((char_sequence ~no_eof:true "f<!--a-") @
[ 1, 8, E (`Bad_token ("U+0000", "script", "null"));
- 1, 8, S (`Char Uutf.u_rep);
+ 1, 8, S (`Char (Uchar.to_int Uutf.u_rep));
1, 9, S (`Char 0x02D);
1, 10, E (`Unexpected_eoi "script");
1, 10, S `EOF]);
@@ -371,7 +371,7 @@ let tests = [
expect ~state:`Script_data "f<!--a--\x00--"
((char_sequence ~no_eof:true "f<!--a--") @
[ 1, 9, E (`Bad_token ("U+0000", "script", "null"));
- 1, 9, S (`Char Uutf.u_rep);
+ 1, 9, S (`Char (Uchar.to_int Uutf.u_rep));
1, 10, S (`Char 0x02D);
1, 11, S (`Char 0x02D);
1, 12, E (`Unexpected_eoi "script");
@@ -380,14 +380,14 @@ let tests = [
expect ~state:`Script_data "f<!--<script>\x00"
((char_sequence ~no_eof:true "f<!--<script>") @
[ 1, 14, E (`Bad_token ("U+0000", "script", "null"));
- 1, 14, S (`Char Uutf.u_rep);
+ 1, 14, S (`Char (Uchar.to_int Uutf.u_rep));
1, 15, E (`Unexpected_eoi "script");
1, 15, S `EOF]);

expect ~state:`Script_data "f<!--<script>-\x00-"
((char_sequence ~no_eof:true "f<!--<script>-") @
[ 1, 15, E (`Bad_token ("U+0000", "script", "null"));
- 1, 15, S (`Char Uutf.u_rep);
+ 1, 15, S (`Char (Uchar.to_int Uutf.u_rep));
1, 16, S (`Char 0x2D);
1, 17, E (`Unexpected_eoi "script");
1, 17, S `EOF]);
@@ -395,7 +395,7 @@ let tests = [
expect ~state:`Script_data "f<!--<script>--\x00--"
((char_sequence ~no_eof:true "f<!--<script>--") @
[ 1, 16, E (`Bad_token ("U+0000", "script", "null"));
- 1, 16, S (`Char Uutf.u_rep);
+ 1, 16, S (`Char (Uchar.to_int Uutf.u_rep));
1, 17, S (`Char 0x2D);
1, 18, S (`Char 0x2D);
1, 19, E (`Unexpected_eoi "script");
@@ -413,7 +413,7 @@ let tests = [
expect ~state:`Script_data "f\x00</foo>"
([ 1, 1, S (`Char 0x66);
1, 2, E (`Bad_token ("U+0000", "content", "null"));
- 1, 2, S (`Char Uutf.u_rep)] @
+ 1, 2, S (`Char (Uchar.to_int Uutf.u_rep))] @
(char_sequence ~start:3 "</foo>")));

("html.tokenizer.plaintext" >:: fun _ ->
@@ -424,7 +424,7 @@ let tests = [
expect ~state:`PLAINTEXT "f\x00</foo>"
([ 1, 1, S (`Char 0x66);
1, 2, E (`Bad_token ("U+0000", "content", "null"));
- 1, 2, S (`Char Uutf.u_rep)] @
+ 1, 2, S (`Char (Uchar.to_int Uutf.u_rep))] @
(char_sequence ~start:3 "</foo>")));

("html.tokenizer.comment" >:: fun _ ->
Index: markup.ml-0.7.2/test/test_input.ml
===================================================================
--- markup.ml-0.7.2.orig/test/test_input.ml
+++ markup.ml-0.7.2/test/test_input.ml
@@ -71,7 +71,7 @@ let tests = [
end);

("input.bom" >:: fun _ ->
- [0xFEFF; 0x66]
+ [Uchar.of_int 0xFEFF; Uchar.of_int 0x66]
|> of_list
|> preprocess is_valid_xml_char Error.ignore_errors
|> fst
Loading

0 comments on commit 56c0c3a

Please sign in to comment.