Merge pull request #1852 from bdarnell/parse-cookie
httputil: Rewrite cookie parsing
This commit is contained in:
commit
c0f99bac0a
|
@ -4,6 +4,7 @@ Release notes
|
|||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
releases/v4.4.2
|
||||
releases/v4.4.1
|
||||
releases/v4.4.0
|
||||
releases/v4.3.0
|
||||
|
|
|
@ -0,0 +1,22 @@
|
|||
What's new in Tornado 4.4.2
|
||||
===========================
|
||||
|
||||
Oct 1, 2016
|
||||
------------
|
||||
|
||||
Security fixes
|
||||
~~~~~~~~~~~~~~
|
||||
|
||||
* A difference in cookie parsing between Tornado and web browsers
|
||||
(especially when combined with Google Analytics) could allow an
|
||||
attacker to set arbitrary cookies and bypass XSRF protection. The
|
||||
cookie parser has been rewritten to fix this attack.
|
||||
|
||||
Backwards-compatibility notes
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
* Cookies containing certain special characters (in particular semicolon
|
||||
and square brackets) are now parsed differently.
|
||||
* If the cookie header contains a combination of valid and invalid cookies,
|
||||
the valid ones will be returned (older versions of Tornado would reject the
|
||||
entire header for a single invalid cookie).
|
2
setup.py
2
setup.py
|
@ -103,7 +103,7 @@ http://api.mongodb.org/python/current/installation.html#osx
|
|||
|
||||
kwargs = {}
|
||||
|
||||
version = "4.4.1"
|
||||
version = "4.4.2"
|
||||
|
||||
with open('README.rst') as f:
|
||||
kwargs['long_description'] = f.read()
|
||||
|
|
|
@ -25,5 +25,5 @@ from __future__ import absolute_import, division, print_function, with_statement
|
|||
# is zero for an official release, positive for a development branch,
|
||||
# or negative for a release candidate or beta (after the base version
|
||||
# number has been incremented)
|
||||
version = "4.4.1"
|
||||
version_info = (4, 4, 1, 0)
|
||||
version = "4.4.2"
|
||||
version_info = (4, 4, 2, 0)
|
||||
|
|
|
@ -379,10 +379,18 @@ class HTTPServerRequest(object):
|
|||
self._cookies = Cookie.SimpleCookie()
|
||||
if "Cookie" in self.headers:
|
||||
try:
|
||||
self._cookies.load(
|
||||
native_str(self.headers["Cookie"]))
|
||||
parsed = parse_cookie(self.headers["Cookie"])
|
||||
except Exception:
|
||||
self._cookies = {}
|
||||
pass
|
||||
else:
|
||||
for k, v in parsed.items():
|
||||
try:
|
||||
self._cookies[k] = v
|
||||
except Exception:
|
||||
# SimpleCookie imposes some restrictions on keys;
|
||||
# parse_cookie does not. Discard any cookies
|
||||
# with disallowed keys.
|
||||
pass
|
||||
return self._cookies
|
||||
|
||||
def write(self, chunk, callback=None):
|
||||
|
@ -909,3 +917,82 @@ def split_host_and_port(netloc):
|
|||
host = netloc
|
||||
port = None
|
||||
return (host, port)
|
||||
|
||||
_OctalPatt = re.compile(r"\\[0-3][0-7][0-7]")
|
||||
_QuotePatt = re.compile(r"[\\].")
|
||||
_nulljoin = ''.join
|
||||
|
||||
def _unquote_cookie(str):
|
||||
"""Handle double quotes and escaping in cookie values.
|
||||
|
||||
This method is copied verbatim from the Python 3.5 standard
|
||||
library (http.cookies._unquote) so we don't have to depend on
|
||||
non-public interfaces.
|
||||
"""
|
||||
# If there aren't any doublequotes,
|
||||
# then there can't be any special characters. See RFC 2109.
|
||||
if str is None or len(str) < 2:
|
||||
return str
|
||||
if str[0] != '"' or str[-1] != '"':
|
||||
return str
|
||||
|
||||
# We have to assume that we must decode this string.
|
||||
# Down to work.
|
||||
|
||||
# Remove the "s
|
||||
str = str[1:-1]
|
||||
|
||||
# Check for special sequences. Examples:
|
||||
# \012 --> \n
|
||||
# \" --> "
|
||||
#
|
||||
i = 0
|
||||
n = len(str)
|
||||
res = []
|
||||
while 0 <= i < n:
|
||||
o_match = _OctalPatt.search(str, i)
|
||||
q_match = _QuotePatt.search(str, i)
|
||||
if not o_match and not q_match: # Neither matched
|
||||
res.append(str[i:])
|
||||
break
|
||||
# else:
|
||||
j = k = -1
|
||||
if o_match:
|
||||
j = o_match.start(0)
|
||||
if q_match:
|
||||
k = q_match.start(0)
|
||||
if q_match and (not o_match or k < j): # QuotePatt matched
|
||||
res.append(str[i:k])
|
||||
res.append(str[k+1])
|
||||
i = k + 2
|
||||
else: # OctalPatt matched
|
||||
res.append(str[i:j])
|
||||
res.append(chr(int(str[j+1:j+4], 8)))
|
||||
i = j + 4
|
||||
return _nulljoin(res)
|
||||
|
||||
|
||||
def parse_cookie(cookie):
|
||||
"""Parse a ``Cookie`` HTTP header into a dict of name/value pairs.
|
||||
|
||||
This function attempts to mimic browser cookie parsing behavior;
|
||||
it specifically does not follow any of the cookie-related RFCs
|
||||
(because browsers don't either).
|
||||
|
||||
The algorithm used is identical to that used by Django version 1.9.10.
|
||||
|
||||
.. versionadded:: 4.4.2
|
||||
"""
|
||||
cookiedict = {}
|
||||
for chunk in cookie.split(str(';')):
|
||||
if str('=') in chunk:
|
||||
key, val = chunk.split(str('='), 1)
|
||||
else:
|
||||
# Assume an empty name per
|
||||
# https://bugzilla.mozilla.org/show_bug.cgi?id=169091
|
||||
key, val = str(''), chunk
|
||||
key, val = key.strip(), val.strip()
|
||||
if key or val:
|
||||
# unquote using Python's algorithm.
|
||||
cookiedict[key] = _unquote_cookie(val)
|
||||
return cookiedict
|
||||
|
|
|
@ -14,9 +14,9 @@ loops.
|
|||
|
||||
.. note::
|
||||
|
||||
Tornado requires the `~asyncio.BaseEventLoop.add_reader` family of methods,
|
||||
so it is not compatible with the `~asyncio.ProactorEventLoop` on Windows.
|
||||
Use the `~asyncio.SelectorEventLoop` instead.
|
||||
Tornado requires the `~asyncio.AbstractEventLoop.add_reader` family of
|
||||
methods, so it is not compatible with the `~asyncio.ProactorEventLoop` on
|
||||
Windows. Use the `~asyncio.SelectorEventLoop` instead.
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import, division, print_function, with_statement
|
||||
|
|
|
@ -1,8 +1,9 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
|
||||
from __future__ import absolute_import, division, print_function, with_statement
|
||||
from tornado.httputil import url_concat, parse_multipart_form_data, HTTPHeaders, format_timestamp, HTTPServerRequest, parse_request_start_line
|
||||
from tornado.httputil import url_concat, parse_multipart_form_data, HTTPHeaders, format_timestamp, HTTPServerRequest, parse_request_start_line, parse_cookie
|
||||
from tornado.escape import utf8, native_str
|
||||
from tornado.log import gen_log
|
||||
from tornado.testing import ExpectLog
|
||||
|
@ -378,3 +379,53 @@ class ParseRequestStartLineTest(unittest.TestCase):
|
|||
self.assertEqual(parsed_start_line.method, self.METHOD)
|
||||
self.assertEqual(parsed_start_line.path, self.PATH)
|
||||
self.assertEqual(parsed_start_line.version, self.VERSION)
|
||||
|
||||
|
||||
class ParseCookieTest(unittest.TestCase):
|
||||
# These tests copied from Django:
|
||||
# https://github.com/django/django/pull/6277/commits/da810901ada1cae9fc1f018f879f11a7fb467b28
|
||||
def test_python_cookies(self):
|
||||
"""
|
||||
Test cases copied from Python's Lib/test/test_http_cookies.py
|
||||
"""
|
||||
self.assertEqual(parse_cookie('chips=ahoy; vienna=finger'), {'chips': 'ahoy', 'vienna': 'finger'})
|
||||
# Here parse_cookie() differs from Python's cookie parsing in that it
|
||||
# treats all semicolons as delimiters, even within quotes.
|
||||
self.assertEqual(
|
||||
parse_cookie('keebler="E=mc2; L=\\"Loves\\"; fudge=\\012;"'),
|
||||
{'keebler': '"E=mc2', 'L': '\\"Loves\\"', 'fudge': '\\012', '': '"'}
|
||||
)
|
||||
# Illegal cookies that have an '=' char in an unquoted value.
|
||||
self.assertEqual(parse_cookie('keebler=E=mc2'), {'keebler': 'E=mc2'})
|
||||
# Cookies with ':' character in their name.
|
||||
self.assertEqual(parse_cookie('key:term=value:term'), {'key:term': 'value:term'})
|
||||
# Cookies with '[' and ']'.
|
||||
self.assertEqual(parse_cookie('a=b; c=[; d=r; f=h'), {'a': 'b', 'c': '[', 'd': 'r', 'f': 'h'})
|
||||
|
||||
def test_cookie_edgecases(self):
|
||||
# Cookies that RFC6265 allows.
|
||||
self.assertEqual(parse_cookie('a=b; Domain=example.com'), {'a': 'b', 'Domain': 'example.com'})
|
||||
# parse_cookie() has historically kept only the last cookie with the
|
||||
# same name.
|
||||
self.assertEqual(parse_cookie('a=b; h=i; a=c'), {'a': 'c', 'h': 'i'})
|
||||
|
||||
def test_invalid_cookies(self):
|
||||
"""
|
||||
Cookie strings that go against RFC6265 but browsers will send if set
|
||||
via document.cookie.
|
||||
"""
|
||||
# Chunks without an equals sign appear as unnamed values per
|
||||
# https://bugzilla.mozilla.org/show_bug.cgi?id=169091
|
||||
self.assertIn('django_language', parse_cookie('abc=def; unnamed; django_language=en').keys())
|
||||
# Even a double quote may be an unamed value.
|
||||
self.assertEqual(parse_cookie('a=b; "; c=d'), {'a': 'b', '': '"', 'c': 'd'})
|
||||
# Spaces in names and values, and an equals sign in values.
|
||||
self.assertEqual(parse_cookie('a b c=d e = f; gh=i'), {'a b c': 'd e = f', 'gh': 'i'})
|
||||
# More characters the spec forbids.
|
||||
self.assertEqual(parse_cookie('a b,c<>@:/[]?{}=d " =e,f g'), {'a b,c<>@:/[]?{}': 'd " =e,f g'})
|
||||
# Unicode characters. The spec only allows ASCII.
|
||||
self.assertEqual(parse_cookie('saint=André Bessette'), {'saint': native_str('André Bessette')})
|
||||
# Browsers don't send extra whitespace or semicolons in Cookie headers,
|
||||
# but parse_cookie() should parse whitespace the same way
|
||||
# document.cookie parses whitespace.
|
||||
self.assertEqual(parse_cookie(' = b ; ; = ; c = ; '), {'': 'b', 'c': ''})
|
||||
|
|
|
@ -279,8 +279,8 @@ class CookieTest(WebTestCase):
|
|||
|
||||
data = [('foo=a=b', 'a=b'),
|
||||
('foo="a=b"', 'a=b'),
|
||||
('foo="a;b"', 'a;b'),
|
||||
# ('foo=a\\073b', 'a;b'), # even encoded, ";" is a delimiter
|
||||
('foo="a;b"', '"a'), # even quoted, ";" is a delimiter
|
||||
('foo=a\\073b', 'a\\073b'), # escapes only decoded in quotes
|
||||
('foo="a\\073b"', 'a;b'),
|
||||
('foo="a\\"b"', 'a"b'),
|
||||
]
|
||||
|
|
Loading…
Reference in New Issue