From 50dd1f7dd68ed2f526adfebd5caa342e26da4517 Mon Sep 17 00:00:00 2001 From: Martin Panter Date: Sun, 17 Apr 2016 02:17:03 +0000 Subject: [PATCH] Issue #26717: Stop encoding Latin-1-ized WSGI paths with UTF-8 Patch by Anthony Sottile. --- Lib/test/test_wsgiref.py | 24 ++++++++++++++++++++++++ Lib/wsgiref/simple_server.py | 2 +- Misc/ACKS | 1 + Misc/NEWS | 3 +++ 4 files changed, 29 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_wsgiref.py b/Lib/test/test_wsgiref.py index 3f800eff2d6..b7d02e868c9 100644 --- a/Lib/test/test_wsgiref.py +++ b/Lib/test/test_wsgiref.py @@ -1,3 +1,4 @@ +from unittest import mock from unittest import TestCase from wsgiref.util import setup_testing_defaults from wsgiref.headers import Headers @@ -221,6 +222,29 @@ def app(e, s): b"data", out) + def test_cp1252_url(self): + def app(e, s): + s("200 OK", [ + ("Content-Type", "text/plain"), + ("Date", "Wed, 24 Dec 2008 13:29:32 GMT"), + ]) + # PEP3333 says environ variables are decoded as latin1. + # Encode as latin1 to get original bytes + return [e["PATH_INFO"].encode("latin1")] + + out, err = run_amock( + validator(app), data=b"GET /\x80%80 HTTP/1.0") + self.assertEqual( + [ + b"HTTP/1.0 200 OK", + mock.ANY, + b"Content-Type: text/plain", + b"Date: Wed, 24 Dec 2008 13:29:32 GMT", + b"", + b"/\x80\x80", + ], + out.splitlines()) + class UtilityTests(TestCase): diff --git a/Lib/wsgiref/simple_server.py b/Lib/wsgiref/simple_server.py index 378b316bbd4..e396788cde1 100644 --- a/Lib/wsgiref/simple_server.py +++ b/Lib/wsgiref/simple_server.py @@ -82,7 +82,7 @@ def get_environ(self): else: path,query = self.path,'' - env['PATH_INFO'] = urllib.parse.unquote_to_bytes(path).decode('iso-8859-1') + env['PATH_INFO'] = urllib.parse.unquote(path, 'iso-8859-1') env['QUERY_STRING'] = query host = self.address_string() diff --git a/Misc/ACKS b/Misc/ACKS index 01b42f4f3e8..e293ddc9c6d 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -1376,6 +1376,7 @@ Nir Soffer Paul Sokolovsky Evgeny Sologubov Cody Somerville +Anthony Sottile Edoardo Spadolini Geoffrey Spear Clay Spence diff --git a/Misc/NEWS b/Misc/NEWS index ec6626ffd01..94d8255eabd 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -107,6 +107,9 @@ Core and Builtins Library ------- +- Issue #26717: Stop encoding Latin-1-ized WSGI paths with UTF-8. Patch by + Anthony Sottile. + - Issue #26735: Fix :func:`os.urandom` on Solaris 11.3 and newer when reading more than 1,024 bytes: call ``getrandom()`` multiple times with a limit of 1024 bytes per call.