Skip to content

Commit

Permalink
Make PUBLIC/SYSTEM matching case-insensitive
Browse files Browse the repository at this point in the history
Some websites may use "public" or "system" in doctypes, or completely
messed up casing such as PuBlIc (unlikely, but possible). This ensures
we don't care about the exact casing used.

This fixes https://gitlab.com/yorickpeterse/oga/issues/199
  • Loading branch information
yorickpeterse committed Jan 8, 2020
1 parent 10e9101 commit f00fa40
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 2 deletions.
2 changes: 1 addition & 1 deletion ext/ragel/base_lexer.rl
Original file line number Diff line number Diff line change
Expand Up @@ -289,7 +289,7 @@
# Machine for processing doctypes. Doctype values such as the public
# and system IDs are treated as T_STRING tokens.
doctype := |*
'PUBLIC' | 'SYSTEM' => {
'PUBLIC'i | 'SYSTEM'i => {
callback(id_on_doctype_type, data, encoding, ts, te);
};

Expand Down
28 changes: 27 additions & 1 deletion spec/oga/xml/lexer/doctype_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@

# Technically not valid, put in place to make sure that the Ragel rules are
# not too greedy.
it 'lexes an inline doftype followed by a system ID' do
it 'lexes an inline doctype followed by a system ID' do
expect(lex('<!DOCTYPE html [<!ELEMENT foo>] "foo">')).to eq([
[:T_DOCTYPE_START, nil, 1],
[:T_DOCTYPE_NAME, 'html', 1],
Expand All @@ -119,5 +119,31 @@
[:T_DOCTYPE_END, nil, 1]
])
end

it 'does not care about the casing when using a public doctype' do
expect(lex('<!DoCtYpE HtMl PuBlIc [<!ELEMENT foo>] "foo">')).to eq([
[:T_DOCTYPE_START, nil, 1],
[:T_DOCTYPE_NAME, 'HtMl', 1],
[:T_DOCTYPE_TYPE, 'PuBlIc', 1],
[:T_DOCTYPE_INLINE, '<!ELEMENT foo>', 1],
[:T_STRING_DQUOTE, nil, 1],
[:T_STRING_BODY, 'foo', 1],
[:T_STRING_DQUOTE, nil, 1],
[:T_DOCTYPE_END, nil, 1]
])
end

it 'does not care about the casing when using a system doctype' do
expect(lex('<!DoCtYpE HtMl SyStEm [<!ELEMENT foo>] "foo">')).to eq([
[:T_DOCTYPE_START, nil, 1],
[:T_DOCTYPE_NAME, 'HtMl', 1],
[:T_DOCTYPE_TYPE, 'SyStEm', 1],
[:T_DOCTYPE_INLINE, '<!ELEMENT foo>', 1],
[:T_STRING_DQUOTE, nil, 1],
[:T_STRING_BODY, 'foo', 1],
[:T_STRING_DQUOTE, nil, 1],
[:T_DOCTYPE_END, nil, 1]
])
end
end
end

0 comments on commit f00fa40

Please sign in to comment.