Skip to content

Commit

Permalink
Bug 1176668 - Fix overflow avoidance in numeric character reference h…
Browse files Browse the repository at this point in the history
…andling. r=wchen.
  • Loading branch information
hsivonen committed Aug 25, 2015
1 parent bbfc28c commit 15ebbe2
Show file tree
Hide file tree
Showing 4 changed files with 109 additions and 47 deletions.
49 changes: 22 additions & 27 deletions parser/html/javasrc/Tokenizer.java
Original file line number Diff line number Diff line change
Expand Up @@ -357,8 +357,6 @@ public class Tokenizer implements Locator {

private int charRefBufMark;

private int prevValue;

protected int value;

private boolean seenDigits;
Expand Down Expand Up @@ -3217,7 +3215,6 @@ public boolean tokenizeBuffer(UTF16Buffer buffer) throws SAXException {
break stateloop;
}
c = checkChar(buf, pos);
prevValue = -1;
value = 0;
seenDigits = false;
/*
Expand Down Expand Up @@ -3269,21 +3266,18 @@ public boolean tokenizeBuffer(UTF16Buffer buffer) throws SAXException {
}
c = checkChar(buf, pos);
}
// Deal with overflow gracefully
if (value < prevValue) {
value = 0x110000; // Value above Unicode range but
// within int
// range
}
prevValue = value;
/*
* Consume as many characters as match the range of
* characters given above.
*/
assert value >= 0: "value must not become negative.";
if (c >= '0' && c <= '9') {
seenDigits = true;
value *= 10;
value += c - '0';
// Avoid overflow
if (value <= 0x10FFFF) {
value *= 10;
value += c - '0';
}
continue;
} else if (c == ';') {
if (seenDigits) {
Expand Down Expand Up @@ -3350,31 +3344,34 @@ public boolean tokenizeBuffer(UTF16Buffer buffer) throws SAXException {
break stateloop;
}
c = checkChar(buf, pos);
// Deal with overflow gracefully
if (value < prevValue) {
value = 0x110000; // Value above Unicode range but
// within int
// range
}
prevValue = value;
/*
* Consume as many characters as match the range of
* characters given above.
*/
assert value >= 0: "value must not become negative.";
if (c >= '0' && c <= '9') {
seenDigits = true;
value *= 16;
value += c - '0';
// Avoid overflow
if (value <= 0x10FFFF) {
value *= 16;
value += c - '0';
}
continue;
} else if (c >= 'A' && c <= 'F') {
seenDigits = true;
value *= 16;
value += c - 'A' + 10;
// Avoid overflow
if (value <= 0x10FFFF) {
value *= 16;
value += c - 'A' + 10;
}
continue;
} else if (c >= 'a' && c <= 'f') {
seenDigits = true;
value *= 16;
value += c - 'a' + 10;
// Avoid overflow
if (value <= 0x10FFFF) {
value *= 16;
value += c - 'a' + 10;
}
continue;
} else if (c == ';') {
if (seenDigits) {
Expand Down Expand Up @@ -6613,7 +6610,6 @@ public void resetToDataState() {
hi = 0; // will always be overwritten before use anyway
candidate = -1;
charRefBufMark = 0;
prevValue = -1;
value = 0;
seenDigits = false;
endTag = false;
Expand Down Expand Up @@ -6663,7 +6659,6 @@ public void loadState(Tokenizer other) throws SAXException {
hi = other.hi;
candidate = other.candidate;
charRefBufMark = other.charRefBufMark;
prevValue = other.prevValue;
value = other.value;
seenDigits = other.seenDigits;
endTag = other.endTag;
Expand Down
37 changes: 18 additions & 19 deletions parser/html/nsHtml5Tokenizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1658,7 +1658,6 @@ nsHtml5Tokenizer::stateLoop(int32_t state, char16_t c, int32_t pos, char16_t* bu
NS_HTML5_BREAK(stateloop);
}
c = checkChar(buf, pos);
prevValue = -1;
value = 0;
seenDigits = false;
switch(c) {
Expand All @@ -1684,14 +1683,13 @@ nsHtml5Tokenizer::stateLoop(int32_t state, char16_t c, int32_t pos, char16_t* bu
}
c = checkChar(buf, pos);
}
if (value < prevValue) {
value = 0x110000;
}
prevValue = value;
MOZ_ASSERT(value >= 0, "value must not become negative.");
if (c >= '0' && c <= '9') {
seenDigits = true;
value *= 10;
value += c - '0';
if (value <= 0x10FFFF) {
value *= 10;
value += c - '0';
}
continue;
} else if (c == ';') {
if (seenDigits) {
Expand Down Expand Up @@ -1750,24 +1748,27 @@ nsHtml5Tokenizer::stateLoop(int32_t state, char16_t c, int32_t pos, char16_t* bu
NS_HTML5_BREAK(stateloop);
}
c = checkChar(buf, pos);
if (value < prevValue) {
value = 0x110000;
}
prevValue = value;
MOZ_ASSERT(value >= 0, "value must not become negative.");
if (c >= '0' && c <= '9') {
seenDigits = true;
value *= 16;
value += c - '0';
if (value <= 0x10FFFF) {
value *= 16;
value += c - '0';
}
continue;
} else if (c >= 'A' && c <= 'F') {
seenDigits = true;
value *= 16;
value += c - 'A' + 10;
if (value <= 0x10FFFF) {
value *= 16;
value += c - 'A' + 10;
}
continue;
} else if (c >= 'a' && c <= 'f') {
seenDigits = true;
value *= 16;
value += c - 'a' + 10;
if (value <= 0x10FFFF) {
value *= 16;
value += c - 'a' + 10;
}
continue;
} else if (c == ';') {
if (seenDigits) {
Expand Down Expand Up @@ -3950,7 +3951,6 @@ nsHtml5Tokenizer::resetToDataState()
hi = 0;
candidate = -1;
charRefBufMark = 0;
prevValue = -1;
value = 0;
seenDigits = false;
endTag = false;
Expand Down Expand Up @@ -3999,7 +3999,6 @@ nsHtml5Tokenizer::loadState(nsHtml5Tokenizer* other)
hi = other->hi;
candidate = other->candidate;
charRefBufMark = other->charRefBufMark;
prevValue = other->prevValue;
value = other->value;
seenDigits = other->seenDigits;
endTag = other->endTag;
Expand Down
1 change: 0 additions & 1 deletion parser/html/nsHtml5Tokenizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,6 @@ class nsHtml5Tokenizer
int32_t hi;
int32_t candidate;
int32_t charRefBufMark;
int32_t prevValue;
protected:
int32_t value;
private:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -721,3 +721,72 @@ FOO&#xFFFFFF;ZOO
| <head>
| <body>
| "FOO�ZOO"

#data
FOO&#11111111111
#errors
(1,3): expected-doctype-but-got-chars
(1,13): illegal-codepoint-for-numeric-entity
(1,13): eof-in-numeric-entity
#document
| <html>
| <head>
| <body>
| "FOO�"

#data
FOO&#1111111111
#errors
(1,3): expected-doctype-but-got-chars
(1,13): illegal-codepoint-for-numeric-entity
(1,13): eof-in-numeric-entity
#document
| <html>
| <head>
| <body>
| "FOO�"

#data
FOO&#111111111111
#errors
(1,3): expected-doctype-but-got-chars
(1,13): illegal-codepoint-for-numeric-entity
(1,13): eof-in-numeric-entity
#document
| <html>
| <head>
| <body>
| "FOO�"

#data
FOO&#11111111111ZOO
#errors
(1,3): expected-doctype-but-got-chars
(1,13): illegal-codepoint-for-numeric-entity
#document
| <html>
| <head>
| <body>
| "FOO�ZOO"

#data
FOO&#1111111111ZOO
#errors
(1,3): expected-doctype-but-got-chars
(1,13): illegal-codepoint-for-numeric-entity
#document
| <html>
| <head>
| <body>
| "FOO�ZOO"

#data
FOO&#111111111111ZOO
#errors
(1,3): expected-doctype-but-got-chars
(1,13): illegal-codepoint-for-numeric-entity
#document
| <html>
| <head>
| <body>
| "FOO�ZOO"

0 comments on commit 15ebbe2

Please sign in to comment.