Skip to content

Commit

Permalink
Merge pull request tornadoweb#1852 from bdarnell/parse-cookie
Browse files Browse the repository at this point in the history
httputil: Rewrite cookie parsing
  • Loading branch information
ajdavis authored Sep 30, 2016
2 parents 1d3d801 + 1664ce3 commit c0f99ba
Show file tree
Hide file tree
Showing 8 changed files with 173 additions and 12 deletions.
1 change: 1 addition & 0 deletions docs/releases.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ Release notes
.. toctree::
:maxdepth: 2

releases/v4.4.2
releases/v4.4.1
releases/v4.4.0
releases/v4.3.0
Expand Down
22 changes: 22 additions & 0 deletions docs/releases/v4.4.2.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
What's new in Tornado 4.4.2
===========================

Oct 1, 2016
------------

Security fixes
~~~~~~~~~~~~~~

* A difference in cookie parsing between Tornado and web browsers
(especially when combined with Google Analytics) could allow an
attacker to set arbitrary cookies and bypass XSRF protection. The
cookie parser has been rewritten to fix this attack.

Backwards-compatibility notes
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

* Cookies containing certain special characters (in particular semicolon
and square brackets) are now parsed differently.
* If the cookie header contains a combination of valid and invalid cookies,
the valid ones will be returned (older versions of Tornado would reject the
entire header for a single invalid cookie).
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ def build_extension(self, ext):

kwargs = {}

version = "4.4.1"
version = "4.4.2"

with open('README.rst') as f:
kwargs['long_description'] = f.read()
Expand Down
4 changes: 2 additions & 2 deletions tornado/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,5 +25,5 @@
# is zero for an official release, positive for a development branch,
# or negative for a release candidate or beta (after the base version
# number has been incremented)
version = "4.4.1"
version_info = (4, 4, 1, 0)
version = "4.4.2"
version_info = (4, 4, 2, 0)
93 changes: 90 additions & 3 deletions tornado/httputil.py
Original file line number Diff line number Diff line change
Expand Up @@ -379,10 +379,18 @@ def cookies(self):
self._cookies = Cookie.SimpleCookie()
if "Cookie" in self.headers:
try:
self._cookies.load(
native_str(self.headers["Cookie"]))
parsed = parse_cookie(self.headers["Cookie"])
except Exception:
self._cookies = {}
pass
else:
for k, v in parsed.items():
try:
self._cookies[k] = v
except Exception:
# SimpleCookie imposes some restrictions on keys;
# parse_cookie does not. Discard any cookies
# with disallowed keys.
pass
return self._cookies

def write(self, chunk, callback=None):
Expand Down Expand Up @@ -909,3 +917,82 @@ def split_host_and_port(netloc):
host = netloc
port = None
return (host, port)

_OctalPatt = re.compile(r"\\[0-3][0-7][0-7]")
_QuotePatt = re.compile(r"[\\].")
_nulljoin = ''.join

def _unquote_cookie(str):
"""Handle double quotes and escaping in cookie values.
This method is copied verbatim from the Python 3.5 standard
library (http.cookies._unquote) so we don't have to depend on
non-public interfaces.
"""
# If there aren't any doublequotes,
# then there can't be any special characters. See RFC 2109.
if str is None or len(str) < 2:
return str
if str[0] != '"' or str[-1] != '"':
return str

# We have to assume that we must decode this string.
# Down to work.

# Remove the "s
str = str[1:-1]

# Check for special sequences. Examples:
# \012 --> \n
# \" --> "
#
i = 0
n = len(str)
res = []
while 0 <= i < n:
o_match = _OctalPatt.search(str, i)
q_match = _QuotePatt.search(str, i)
if not o_match and not q_match: # Neither matched
res.append(str[i:])
break
# else:
j = k = -1
if o_match:
j = o_match.start(0)
if q_match:
k = q_match.start(0)
if q_match and (not o_match or k < j): # QuotePatt matched
res.append(str[i:k])
res.append(str[k+1])
i = k + 2
else: # OctalPatt matched
res.append(str[i:j])
res.append(chr(int(str[j+1:j+4], 8)))
i = j + 4
return _nulljoin(res)


def parse_cookie(cookie):
"""Parse a ``Cookie`` HTTP header into a dict of name/value pairs.
This function attempts to mimic browser cookie parsing behavior;
it specifically does not follow any of the cookie-related RFCs
(because browsers don't either).
The algorithm used is identical to that used by Django version 1.9.10.
.. versionadded:: 4.4.2
"""
cookiedict = {}
for chunk in cookie.split(str(';')):
if str('=') in chunk:
key, val = chunk.split(str('='), 1)
else:
# Assume an empty name per
# https://bugzilla.mozilla.org/show_bug.cgi?id=169091
key, val = str(''), chunk
key, val = key.strip(), val.strip()
if key or val:
# unquote using Python's algorithm.
cookiedict[key] = _unquote_cookie(val)
return cookiedict
6 changes: 3 additions & 3 deletions tornado/platform/asyncio.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@
.. note::
Tornado requires the `~asyncio.BaseEventLoop.add_reader` family of methods,
so it is not compatible with the `~asyncio.ProactorEventLoop` on Windows.
Use the `~asyncio.SelectorEventLoop` instead.
Tornado requires the `~asyncio.AbstractEventLoop.add_reader` family of
methods, so it is not compatible with the `~asyncio.ProactorEventLoop` on
Windows. Use the `~asyncio.SelectorEventLoop` instead.
"""

from __future__ import absolute_import, division, print_function, with_statement
Expand Down
53 changes: 52 additions & 1 deletion tornado/test/httputil_test.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-


from __future__ import absolute_import, division, print_function, with_statement
from tornado.httputil import url_concat, parse_multipart_form_data, HTTPHeaders, format_timestamp, HTTPServerRequest, parse_request_start_line
from tornado.httputil import url_concat, parse_multipart_form_data, HTTPHeaders, format_timestamp, HTTPServerRequest, parse_request_start_line, parse_cookie
from tornado.escape import utf8, native_str
from tornado.log import gen_log
from tornado.testing import ExpectLog
Expand Down Expand Up @@ -378,3 +379,53 @@ def test_parse_request_start_line(self):
self.assertEqual(parsed_start_line.method, self.METHOD)
self.assertEqual(parsed_start_line.path, self.PATH)
self.assertEqual(parsed_start_line.version, self.VERSION)


class ParseCookieTest(unittest.TestCase):
# These tests copied from Django:
# https://github.com/django/django/pull/6277/commits/da810901ada1cae9fc1f018f879f11a7fb467b28
def test_python_cookies(self):
"""
Test cases copied from Python's Lib/test/test_http_cookies.py
"""
self.assertEqual(parse_cookie('chips=ahoy; vienna=finger'), {'chips': 'ahoy', 'vienna': 'finger'})
# Here parse_cookie() differs from Python's cookie parsing in that it
# treats all semicolons as delimiters, even within quotes.
self.assertEqual(
parse_cookie('keebler="E=mc2; L=\\"Loves\\"; fudge=\\012;"'),
{'keebler': '"E=mc2', 'L': '\\"Loves\\"', 'fudge': '\\012', '': '"'}
)
# Illegal cookies that have an '=' char in an unquoted value.
self.assertEqual(parse_cookie('keebler=E=mc2'), {'keebler': 'E=mc2'})
# Cookies with ':' character in their name.
self.assertEqual(parse_cookie('key:term=value:term'), {'key:term': 'value:term'})
# Cookies with '[' and ']'.
self.assertEqual(parse_cookie('a=b; c=[; d=r; f=h'), {'a': 'b', 'c': '[', 'd': 'r', 'f': 'h'})

def test_cookie_edgecases(self):
# Cookies that RFC6265 allows.
self.assertEqual(parse_cookie('a=b; Domain=example.com'), {'a': 'b', 'Domain': 'example.com'})
# parse_cookie() has historically kept only the last cookie with the
# same name.
self.assertEqual(parse_cookie('a=b; h=i; a=c'), {'a': 'c', 'h': 'i'})

def test_invalid_cookies(self):
"""
Cookie strings that go against RFC6265 but browsers will send if set
via document.cookie.
"""
# Chunks without an equals sign appear as unnamed values per
# https://bugzilla.mozilla.org/show_bug.cgi?id=169091
self.assertIn('django_language', parse_cookie('abc=def; unnamed; django_language=en').keys())
# Even a double quote may be an unamed value.
self.assertEqual(parse_cookie('a=b; "; c=d'), {'a': 'b', '': '"', 'c': 'd'})
# Spaces in names and values, and an equals sign in values.
self.assertEqual(parse_cookie('a b c=d e = f; gh=i'), {'a b c': 'd e = f', 'gh': 'i'})
# More characters the spec forbids.
self.assertEqual(parse_cookie('a b,c<>@:/[]?{}=d " =e,f g'), {'a b,c<>@:/[]?{}': 'd " =e,f g'})
# Unicode characters. The spec only allows ASCII.
self.assertEqual(parse_cookie('saint=André Bessette'), {'saint': native_str('André Bessette')})
# Browsers don't send extra whitespace or semicolons in Cookie headers,
# but parse_cookie() should parse whitespace the same way
# document.cookie parses whitespace.
self.assertEqual(parse_cookie(' = b ; ; = ; c = ; '), {'': 'b', 'c': ''})
4 changes: 2 additions & 2 deletions tornado/test/web_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,8 +279,8 @@ def test_cookie_special_char(self):

data = [('foo=a=b', 'a=b'),
('foo="a=b"', 'a=b'),
('foo="a;b"', 'a;b'),
# ('foo=a\\073b', 'a;b'), # even encoded, ";" is a delimiter
('foo="a;b"', '"a'), # even quoted, ";" is a delimiter
('foo=a\\073b', 'a\\073b'), # escapes only decoded in quotes
('foo="a\\073b"', 'a;b'),
('foo="a\\"b"', 'a"b'),
]
Expand Down

0 comments on commit c0f99ba

Please sign in to comment.