Skip to content

Commit

Permalink
Filter out links with double-encoded nulls that crash Chrome
Browse files Browse the repository at this point in the history
  • Loading branch information
JordanMilne committed Sep 24, 2015
1 parent 9372deb commit 42f96dc
Show file tree
Hide file tree
Showing 4 changed files with 26 additions and 6 deletions.
10 changes: 7 additions & 3 deletions r2/r2/lib/souptest.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import os
import re
import sys
import urllib
import urlparse

import lxml.etree
Expand Down Expand Up @@ -100,7 +101,10 @@ def souptest_sniff_node(node):
# work around CRBUG-464270
parsed_url = urlparse.urlparse(lv)
if parsed_url.hostname and len(parsed_url.hostname) > 255:
raise SoupHostnameLengthError(parsed_url.hostname)
raise SoupDetectedCrasherError(parsed_url.hostname)
# work around for Chrome crash with "%%30%30" - Sep 2015
if "%00" in urllib.unquote(parsed_url.path):
raise SoupDetectedCrasherError(lv)
else:
# Processing instructions and friends fall down here.
raise SoupUnsupportedNodeError(node)
Expand Down Expand Up @@ -222,8 +226,8 @@ class SoupUnsupportedTagError(SoupReprError):
HUMAN_MESSAGE = "Unsupported tag"


class SoupHostnameLengthError(SoupReprError):
HUMAN_MESSAGE = "Hostname too long"
class SoupDetectedCrasherError(SoupReprError):
HUMAN_MESSAGE = "Known crasher posted"


class SoupUnsupportedEntityError(SoupReprError):
Expand Down
5 changes: 4 additions & 1 deletion r2/r2/lib/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
from copy import deepcopy
from datetime import date, datetime, timedelta
from decimal import Decimal
from urllib import unquote_plus
from urllib import unquote_plus, unquote
from urllib2 import urlopen, Request
from urlparse import urlparse, urlunparse

Expand Down Expand Up @@ -364,6 +364,9 @@ def sanitize_url(url, require_scheme=False, valid_schemes=VALID_SCHEMES):
# work around CRBUG-464270
if len(u.hostname) > 255:
return None
# work around for Chrome crash with "%%30%30" - Sep 2015
if "%00" in unquote(u.path):
return None
if u.username is not None or u.password is not None:
return None

Expand Down
4 changes: 2 additions & 2 deletions r2/r2/lib/validator/validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
from r2.lib.db.operators import asc, desc
from r2.lib.souptest import (
SoupError,
SoupHostnameLengthError,
SoupDetectedCrasherError,
SoupUnsupportedEntityError,
)
from r2.lib.template_helpers import add_sr
Expand Down Expand Up @@ -648,7 +648,7 @@ def run(self, text, text2=''):
user = c.user.name

# work around CRBUG-464270
if isinstance(e, SoupHostnameLengthError):
if isinstance(e, SoupDetectedCrasherError):
# We want a general idea of how often this is triggered, and
# by what
g.log.warning("CHROME HAX by %s: %s" % (user, text))
Expand Down
13 changes: 13 additions & 0 deletions r2/r2/tests/unit/lib/souptest_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@

from r2.lib.souptest import (
souptest_fragment,
SoupDetectedCrasherError,
SoupError,
SoupSyntaxError,
SoupUnexpectedCDataSectionError,
Expand Down Expand Up @@ -118,3 +119,15 @@ def test_schemes(self):
self.assertFragmentValid('<a href="/google.com">a</a>')
self.assertFragmentRaises('<a href="javascript://google.com">a</a>',
SoupUnsupportedSchemeError)

def test_crashers(self):
# Chrome crashes on weirdly encoded nulls.
self.assertFragmentRaises('<a href="http://example.com/%%30%30">foo</a>',
SoupDetectedCrasherError)
self.assertFragmentRaises('<a href="http://example.com/%0%30">foo</a>',
SoupDetectedCrasherError)
self.assertFragmentRaises('<a href="http://example.com/%%300">foo</a>',
SoupDetectedCrasherError)
# Chrome crashes on extremely long hostnames
self.assertFragmentRaises('<a href="http://%s.com">foo</a>' % ("x" * 300),
SoupDetectedCrasherError)

0 comments on commit 42f96dc

Please sign in to comment.