Merge pull request #1852 from bdarnell/parse-cookie

httputil: Rewrite cookie parsing
This commit is contained in:
A. Jesse Jiryu Davis 2016-09-30 18:38:48 -04:00 committed by GitHub
commit c0f99bac0a
8 changed files with 173 additions and 12 deletions

View File

@ -4,6 +4,7 @@ Release notes
.. toctree::
:maxdepth: 2
releases/v4.4.2
releases/v4.4.1
releases/v4.4.0
releases/v4.3.0

22
docs/releases/v4.4.2.rst Normal file
View File

@ -0,0 +1,22 @@
What's new in Tornado 4.4.2
===========================
Oct 1, 2016
------------
Security fixes
~~~~~~~~~~~~~~
* A difference in cookie parsing between Tornado and web browsers
(especially when combined with Google Analytics) could allow an
attacker to set arbitrary cookies and bypass XSRF protection. The
cookie parser has been rewritten to fix this attack.
Backwards-compatibility notes
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
* Cookies containing certain special characters (in particular semicolon
and square brackets) are now parsed differently.
* If the cookie header contains a combination of valid and invalid cookies,
the valid ones will be returned (older versions of Tornado would reject the
entire header for a single invalid cookie).

View File

@ -103,7 +103,7 @@ http://api.mongodb.org/python/current/installation.html#osx
kwargs = {}
version = "4.4.1"
version = "4.4.2"
with open('README.rst') as f:
kwargs['long_description'] = f.read()

View File

@ -25,5 +25,5 @@ from __future__ import absolute_import, division, print_function, with_statement
# is zero for an official release, positive for a development branch,
# or negative for a release candidate or beta (after the base version
# number has been incremented)
version = "4.4.1"
version_info = (4, 4, 1, 0)
version = "4.4.2"
version_info = (4, 4, 2, 0)

View File

@ -379,10 +379,18 @@ class HTTPServerRequest(object):
self._cookies = Cookie.SimpleCookie()
if "Cookie" in self.headers:
try:
self._cookies.load(
native_str(self.headers["Cookie"]))
parsed = parse_cookie(self.headers["Cookie"])
except Exception:
self._cookies = {}
pass
else:
for k, v in parsed.items():
try:
self._cookies[k] = v
except Exception:
# SimpleCookie imposes some restrictions on keys;
# parse_cookie does not. Discard any cookies
# with disallowed keys.
pass
return self._cookies
def write(self, chunk, callback=None):
@ -909,3 +917,82 @@ def split_host_and_port(netloc):
host = netloc
port = None
return (host, port)
_OctalPatt = re.compile(r"\\[0-3][0-7][0-7]")
_QuotePatt = re.compile(r"[\\].")
_nulljoin = ''.join
def _unquote_cookie(str):
"""Handle double quotes and escaping in cookie values.
This method is copied verbatim from the Python 3.5 standard
library (http.cookies._unquote) so we don't have to depend on
non-public interfaces.
"""
# If there aren't any doublequotes,
# then there can't be any special characters. See RFC 2109.
if str is None or len(str) < 2:
return str
if str[0] != '"' or str[-1] != '"':
return str
# We have to assume that we must decode this string.
# Down to work.
# Remove the "s
str = str[1:-1]
# Check for special sequences. Examples:
# \012 --> \n
# \" --> "
#
i = 0
n = len(str)
res = []
while 0 <= i < n:
o_match = _OctalPatt.search(str, i)
q_match = _QuotePatt.search(str, i)
if not o_match and not q_match: # Neither matched
res.append(str[i:])
break
# else:
j = k = -1
if o_match:
j = o_match.start(0)
if q_match:
k = q_match.start(0)
if q_match and (not o_match or k < j): # QuotePatt matched
res.append(str[i:k])
res.append(str[k+1])
i = k + 2
else: # OctalPatt matched
res.append(str[i:j])
res.append(chr(int(str[j+1:j+4], 8)))
i = j + 4
return _nulljoin(res)
def parse_cookie(cookie):
"""Parse a ``Cookie`` HTTP header into a dict of name/value pairs.
This function attempts to mimic browser cookie parsing behavior;
it specifically does not follow any of the cookie-related RFCs
(because browsers don't either).
The algorithm used is identical to that used by Django version 1.9.10.
.. versionadded:: 4.4.2
"""
cookiedict = {}
for chunk in cookie.split(str(';')):
if str('=') in chunk:
key, val = chunk.split(str('='), 1)
else:
# Assume an empty name per
# https://bugzilla.mozilla.org/show_bug.cgi?id=169091
key, val = str(''), chunk
key, val = key.strip(), val.strip()
if key or val:
# unquote using Python's algorithm.
cookiedict[key] = _unquote_cookie(val)
return cookiedict

View File

@ -14,9 +14,9 @@ loops.
.. note::
Tornado requires the `~asyncio.BaseEventLoop.add_reader` family of methods,
so it is not compatible with the `~asyncio.ProactorEventLoop` on Windows.
Use the `~asyncio.SelectorEventLoop` instead.
Tornado requires the `~asyncio.AbstractEventLoop.add_reader` family of
methods, so it is not compatible with the `~asyncio.ProactorEventLoop` on
Windows. Use the `~asyncio.SelectorEventLoop` instead.
"""
from __future__ import absolute_import, division, print_function, with_statement

View File

@ -1,8 +1,9 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import absolute_import, division, print_function, with_statement
from tornado.httputil import url_concat, parse_multipart_form_data, HTTPHeaders, format_timestamp, HTTPServerRequest, parse_request_start_line
from tornado.httputil import url_concat, parse_multipart_form_data, HTTPHeaders, format_timestamp, HTTPServerRequest, parse_request_start_line, parse_cookie
from tornado.escape import utf8, native_str
from tornado.log import gen_log
from tornado.testing import ExpectLog
@ -378,3 +379,53 @@ class ParseRequestStartLineTest(unittest.TestCase):
self.assertEqual(parsed_start_line.method, self.METHOD)
self.assertEqual(parsed_start_line.path, self.PATH)
self.assertEqual(parsed_start_line.version, self.VERSION)
class ParseCookieTest(unittest.TestCase):
# These tests copied from Django:
# https://github.com/django/django/pull/6277/commits/da810901ada1cae9fc1f018f879f11a7fb467b28
def test_python_cookies(self):
"""
Test cases copied from Python's Lib/test/test_http_cookies.py
"""
self.assertEqual(parse_cookie('chips=ahoy; vienna=finger'), {'chips': 'ahoy', 'vienna': 'finger'})
# Here parse_cookie() differs from Python's cookie parsing in that it
# treats all semicolons as delimiters, even within quotes.
self.assertEqual(
parse_cookie('keebler="E=mc2; L=\\"Loves\\"; fudge=\\012;"'),
{'keebler': '"E=mc2', 'L': '\\"Loves\\"', 'fudge': '\\012', '': '"'}
)
# Illegal cookies that have an '=' char in an unquoted value.
self.assertEqual(parse_cookie('keebler=E=mc2'), {'keebler': 'E=mc2'})
# Cookies with ':' character in their name.
self.assertEqual(parse_cookie('key:term=value:term'), {'key:term': 'value:term'})
# Cookies with '[' and ']'.
self.assertEqual(parse_cookie('a=b; c=[; d=r; f=h'), {'a': 'b', 'c': '[', 'd': 'r', 'f': 'h'})
def test_cookie_edgecases(self):
# Cookies that RFC6265 allows.
self.assertEqual(parse_cookie('a=b; Domain=example.com'), {'a': 'b', 'Domain': 'example.com'})
# parse_cookie() has historically kept only the last cookie with the
# same name.
self.assertEqual(parse_cookie('a=b; h=i; a=c'), {'a': 'c', 'h': 'i'})
def test_invalid_cookies(self):
"""
Cookie strings that go against RFC6265 but browsers will send if set
via document.cookie.
"""
# Chunks without an equals sign appear as unnamed values per
# https://bugzilla.mozilla.org/show_bug.cgi?id=169091
self.assertIn('django_language', parse_cookie('abc=def; unnamed; django_language=en').keys())
# Even a double quote may be an unamed value.
self.assertEqual(parse_cookie('a=b; "; c=d'), {'a': 'b', '': '"', 'c': 'd'})
# Spaces in names and values, and an equals sign in values.
self.assertEqual(parse_cookie('a b c=d e = f; gh=i'), {'a b c': 'd e = f', 'gh': 'i'})
# More characters the spec forbids.
self.assertEqual(parse_cookie('a b,c<>@:/[]?{}=d " =e,f g'), {'a b,c<>@:/[]?{}': 'd " =e,f g'})
# Unicode characters. The spec only allows ASCII.
self.assertEqual(parse_cookie('saint=André Bessette'), {'saint': native_str('André Bessette')})
# Browsers don't send extra whitespace or semicolons in Cookie headers,
# but parse_cookie() should parse whitespace the same way
# document.cookie parses whitespace.
self.assertEqual(parse_cookie(' = b ; ; = ; c = ; '), {'': 'b', 'c': ''})

View File

@ -279,8 +279,8 @@ class CookieTest(WebTestCase):
data = [('foo=a=b', 'a=b'),
('foo="a=b"', 'a=b'),
('foo="a;b"', 'a;b'),
# ('foo=a\\073b', 'a;b'), # even encoded, ";" is a delimiter
('foo="a;b"', '"a'), # even quoted, ";" is a delimiter
('foo=a\\073b', 'a\\073b'), # escapes only decoded in quotes
('foo="a\\073b"', 'a;b'),
('foo="a\\"b"', 'a"b'),
]