Skip to content

Commit

Permalink
Fix some issues with unicode in URLs
Browse files Browse the repository at this point in the history
First, `UrlParser.update_query` didn't like 7-bit unclean values.
`unicode()` should work everywhere `str()` did.

Second, the check for emedded NBSPs in `UrlParser.is_web_safe_url`
could be bypassed since `b'\xa0'` couldn't automatically be promoted
to unicode (thus `u'\xa0'` != b'\xa0'.) The check was fixed to handle
the NBSP char in either unicode or byte strings.
  • Loading branch information
JordanMilne committed May 18, 2015
1 parent d0108cf commit d7acfce
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 2 deletions.
5 changes: 3 additions & 2 deletions r2/r2/lib/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -522,7 +522,7 @@ def update_query(self, **updates):
# Since in HTTP everything's a string, coercing values to strings now
# makes equality testing easier. Python will throw an error if you try
# to pass in a non-string key, so that's already taken care of for us.
updates = {k: str(v) for k, v in updates.iteritems()}
updates = {k: _force_unicode(v) for k, v in updates.iteritems()}
self.query_dict.update(updates)

@property
Expand Down Expand Up @@ -715,7 +715,8 @@ def is_web_safe_url(self):
# should be safe enough to allow after three slashes. Opera 12's the
# only browser that trips over them, and it doesn't fall for
# `http:///foo.com/`.
if match.group(0) == '\xa0':
# Check both in case unicode promotion fails
if match.group(0) in {u'\xa0', '\xa0'}:
if match.string[0:match.start(0)].count('/') < 3:
return False
else:
Expand Down
13 changes: 13 additions & 0 deletions r2/r2/tests/unit/lib/urlparser_test.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#!/usr/bin/env python
# coding=utf-8
# The contents of this file are subject to the Common Public Attribution
# License Version 1.0. (the "License"); you may not use this file except in
# compliance with the License. You may obtain a copy of the License at
Expand Down Expand Up @@ -138,6 +139,12 @@ def test_nbsp_allowances(self):
self.assertIsNotSafeRedditUrl("\xa0http://%s/" % g.domain)
self.assertIsSafeRedditUrl("http://%s/\xa0" % g.domain)
self.assertIsSafeRedditUrl("/foo/bar/\xa0baz")
# Make sure this works if the URL is unicode
self.assertIsNotSafeRedditUrl(u"http://\xa0.%s/" % g.domain)
self.assertIsNotSafeRedditUrl(u"\xa0http://%s/" % g.domain)
self.assertIsSafeRedditUrl(u"http://%s/\xa0" % g.domain)
self.assertIsSafeRedditUrl(u"/foo/bar/\xa0baz")



class TestSwitchSubdomainByExtension(unittest.TestCase):
Expand Down Expand Up @@ -281,3 +288,9 @@ def test_integer_query_params(self):
u2 = UrlParser('http://example.com/')
u2.update_query(page=1234)
self.assertEquals(u, u2)

def test_unicode_query_params(self):
u = UrlParser(u'http://example.com/?page=unicode:(')
u2 = UrlParser('http://example.com/')
u2.update_query(page=u'unicode:(')
self.assertEquals(u, u2)

0 comments on commit d7acfce

Please sign in to comment.