From 536c7acd13426d42dc863ae8b50e6c3a4cb2e858 Mon Sep 17 00:00:00 2001 From: Maximilian Hils Date: Fri, 1 Jul 2016 14:10:48 -0700 Subject: [PATCH] py3++ --- mitmproxy/filt.py | 47 ++++++++++++++++++------------- mitmproxy/protocol/http_replay.py | 2 +- netlib/http/headers.py | 6 ++-- netlib/http/message.py | 8 ++++-- netlib/http/request.py | 10 +++++-- netlib/strutils.py | 10 ------- test/mitmproxy/test_server.py | 10 +++---- test/netlib/test_strutils.py | 4 --- tox.ini | 7 +++-- 9 files changed, 56 insertions(+), 48 deletions(-) diff --git a/mitmproxy/filt.py b/mitmproxy/filt.py index d98e37493..b1b72aa7c 100644 --- a/mitmproxy/filt.py +++ b/mitmproxy/filt.py @@ -35,6 +35,7 @@ from __future__ import absolute_import, print_function, division import re import sys +from netlib import strutils import pyparsing as pp @@ -78,38 +79,43 @@ class FResp(_Action): help = "Match response" def __call__(self, f): - return True if f.response else False + return bool(f.response) class _Rex(_Action): flags = 0 + is_binary = True def __init__(self, expr): self.expr = expr + if self.is_binary: + expr = strutils.escaped_str_to_bytes(expr) try: - self.re = re.compile(self.expr, self.flags) + self.re = re.compile(expr, self.flags) except: raise ValueError("Cannot compile expression.") -def _check_content_type(expr, o): - val = o.headers.get("content-type") - if val and re.search(expr, val): - return True - return False +def _check_content_type(rex, message): + return any( + name.lower() == b"content-type" and + rex.search(value) + for name, value in message.headers.fields + ) class FAsset(_Action): code = "a" help = "Match asset in response: CSS, Javascript, Flash, images." ASSET_TYPES = [ - "text/javascript", - "application/x-javascript", - "application/javascript", - "text/css", - "image/.*", - "application/x-shockwave-flash" + b"text/javascript", + b"application/x-javascript", + b"application/javascript", + b"text/css", + b"image/.*", + b"application/x-shockwave-flash" ] + ASSET_TYPES = [re.compile(x) for x in ASSET_TYPES] def __call__(self, f): if f.response: @@ -124,9 +130,9 @@ class FContentType(_Rex): help = "Content-type header" def __call__(self, f): - if _check_content_type(self.expr, f.request): + if _check_content_type(self.re, f.request): return True - elif f.response and _check_content_type(self.expr, f.response): + elif f.response and _check_content_type(self.re, f.response): return True return False @@ -136,7 +142,7 @@ class FRequestContentType(_Rex): help = "Request Content-Type header" def __call__(self, f): - return _check_content_type(self.expr, f.request) + return _check_content_type(self.re, f.request) class FResponseContentType(_Rex): @@ -145,7 +151,7 @@ class FResponseContentType(_Rex): def __call__(self, f): if f.response: - return _check_content_type(self.expr, f.response) + return _check_content_type(self.re, f.response) return False @@ -222,7 +228,7 @@ class FMethod(_Rex): flags = re.IGNORECASE def __call__(self, f): - return bool(self.re.search(f.request.method)) + return bool(self.re.search(f.request.data.method)) class FDomain(_Rex): @@ -231,12 +237,13 @@ class FDomain(_Rex): flags = re.IGNORECASE def __call__(self, f): - return bool(self.re.search(f.request.host)) + return bool(self.re.search(f.request.data.host)) class FUrl(_Rex): code = "u" help = "URL" + is_binary = False # FUrl is special, because it can be "naked". @classmethod @@ -252,6 +259,7 @@ class FUrl(_Rex): class FSrc(_Rex): code = "src" help = "Match source address" + is_binary = False def __call__(self, f): return f.client_conn.address and self.re.search(repr(f.client_conn.address)) @@ -260,6 +268,7 @@ class FSrc(_Rex): class FDst(_Rex): code = "dst" help = "Match destination address" + is_binary = False def __call__(self, f): return f.server_conn.address and self.re.search(repr(f.server_conn.address)) diff --git a/mitmproxy/protocol/http_replay.py b/mitmproxy/protocol/http_replay.py index e804eba9b..986de845d 100644 --- a/mitmproxy/protocol/http_replay.py +++ b/mitmproxy/protocol/http_replay.py @@ -49,7 +49,7 @@ class RequestReplayThread(basethread.BaseThread): server = models.ServerConnection(server_address, (self.config.host, 0)) server.connect() if r.scheme == "https": - connect_request = models.make_connect_request((r.host, r.port)) + connect_request = models.make_connect_request((r.data.host, r.port)) server.wfile.write(http1.assemble_request(connect_request)) server.wfile.flush() resp = http1.read_response( diff --git a/netlib/http/headers.py b/netlib/http/headers.py index 14888ea96..f052a53b9 100644 --- a/netlib/http/headers.py +++ b/netlib/http/headers.py @@ -156,8 +156,10 @@ class Headers(multidict.MultiDict): Returns: The number of replacements made. """ - pattern = _always_bytes(pattern) - repl = _always_bytes(repl) + if isinstance(pattern, six.text_type): + pattern = strutils.escaped_str_to_bytes(pattern) + if isinstance(repl, six.text_type): + repl = strutils.escaped_str_to_bytes(repl) pattern = re.compile(pattern, flags) replacements = 0 diff --git a/netlib/http/message.py b/netlib/http/message.py index b633b6710..0583c246f 100644 --- a/netlib/http/message.py +++ b/netlib/http/message.py @@ -1,5 +1,6 @@ from __future__ import absolute_import, print_function, division +import re import warnings import six @@ -196,11 +197,14 @@ class Message(basetypes.Serializable): Returns: The number of replacements made. """ - # TODO: Proper distinction between text and bytes. + if isinstance(pattern, six.text_type): + pattern = strutils.escaped_str_to_bytes(pattern) + if isinstance(repl, six.text_type): + repl = strutils.escaped_str_to_bytes(repl) replacements = 0 if self.content: with decoded(self): - self.content, replacements = strutils.safe_subn( + self.content, replacements = re.subn( pattern, repl, self.content, flags=flags ) replacements += self.headers.replace(pattern, repl, flags) diff --git a/netlib/http/request.py b/netlib/http/request.py index b64ccc51a..ff057b79c 100644 --- a/netlib/http/request.py +++ b/netlib/http/request.py @@ -65,10 +65,14 @@ class Request(message.Message): Returns: The number of replacements made. """ - # TODO: Proper distinction between text and bytes. + if isinstance(pattern, six.text_type): + pattern = strutils.escaped_str_to_bytes(pattern) + if isinstance(repl, six.text_type): + repl = strutils.escaped_str_to_bytes(repl) + c = super(Request, self).replace(pattern, repl, flags) - self.path, pc = strutils.safe_subn( - pattern, repl, self.path, flags=flags + self.path, pc = re.subn( + pattern, repl, self.data.path, flags=flags ) c += pc return c diff --git a/netlib/strutils.py b/netlib/strutils.py index 5ad41c7e9..ca6eaa42e 100644 --- a/netlib/strutils.py +++ b/netlib/strutils.py @@ -1,4 +1,3 @@ -import re import unicodedata import codecs @@ -56,15 +55,6 @@ def clean_bin(s, keep_spacing=True): ) -def safe_subn(pattern, repl, target, *args, **kwargs): - """ - There are Unicode conversion problems with re.subn. We try to smooth - that over by casting the pattern and replacement to strings. We really - need a better solution that is aware of the actual content ecoding. - """ - return re.subn(str(pattern), str(repl), target, *args, **kwargs) - - def bytes_to_escaped_str(data): """ Take bytes and return a safe string that can be displayed to the user. diff --git a/test/mitmproxy/test_server.py b/test/mitmproxy/test_server.py index 5415b660b..c5caa21fa 100644 --- a/test/mitmproxy/test_server.py +++ b/test/mitmproxy/test_server.py @@ -518,7 +518,7 @@ class TestTransparent(tservers.TransparentProxyTest, CommonMixin, TcpMixin): d = self.pathod('200:b"foo"') self._tcpproxy_off() - assert d.content == "bar" + assert d.content == b"bar" self.master.unload_scripts() @@ -641,7 +641,7 @@ class MasterRedirectRequest(tservers.TestMaster): @controller.handler def response(self, f): - f.response.content = str(f.client_conn.address.port) + f.response.content = bytes(f.client_conn.address.port) f.response.headers["server-conn-id"] = str(f.server_conn.source_address.port) super(MasterRedirectRequest, self).response(f) @@ -723,7 +723,7 @@ class TestStreamRequest(tservers.HTTPProxyTest): def test_stream_chunked(self): connection = socket.socket(socket.AF_INET, socket.SOCK_STREAM) connection.connect(("127.0.0.1", self.proxy.port)) - fconn = connection.makefile() + fconn = connection.makefile("rb") spec = '200:h"Transfer-Encoding"="chunked":r:b"4\\r\\nthis\\r\\n11\\r\\nisatest__reachhex\\r\\n0\\r\\n\\r\\n"' connection.send( b"GET %s/p/%s HTTP/1.1\r\n" % @@ -736,7 +736,7 @@ class TestStreamRequest(tservers.HTTPProxyTest): assert resp.status_code == 200 chunks = list(http1.read_body(fconn, None)) - assert chunks == ["this", "isatest__reachhex"] + assert chunks == [b"this", b"isatest__reachhex"] connection.close() @@ -848,7 +848,7 @@ class TestUpstreamProxy(tservers.HTTPUpstreamProxyTest, CommonMixin, AppMixin): p = self.pathoc() req = p.request("get:'%s/p/418:b\"foo\"'" % self.server.urlbase) - assert req.content == "ORLY" + assert req.content == b"ORLY" assert req.status_code == 418 diff --git a/test/netlib/test_strutils.py b/test/netlib/test_strutils.py index 84a0dded6..a50fc40a3 100644 --- a/test/netlib/test_strutils.py +++ b/test/netlib/test_strutils.py @@ -29,10 +29,6 @@ def test_clean_bin(): assert strutils.clean_bin(u"\u2605") == u"\u2605" -def test_safe_subn(): - assert strutils.safe_subn("foo", u"bar", "\xc2foo") - - def test_bytes_to_escaped_str(): assert strutils.bytes_to_escaped_str(b"foo") == "foo" assert strutils.bytes_to_escaped_str(b"\b") == r"\x08" diff --git a/tox.ini b/tox.ini index 8469ccaeb..899fffc66 100644 --- a/tox.ini +++ b/tox.ini @@ -2,7 +2,7 @@ envlist = py27, py35, docs, lint [testenv] -usedevelop=True +usedevelop = True deps = {env:CI_DEPS:} -rrequirements.txt @@ -16,7 +16,7 @@ commands = [testenv:py35] setenv = - TESTS = test/netlib test/pathod/ test/mitmproxy/script test/mitmproxy/test_contentview.py test/mitmproxy/test_custom_contentview.py test/mitmproxy/test_app.py test/mitmproxy/test_controller.py test/mitmproxy/test_fuzzing.py test/mitmproxy/test_script.py test/mitmproxy/test_web_app.py test/mitmproxy/test_utils.py test/mitmproxy/test_stateobject.py test/mitmproxy/test_cmdline.py test/mitmproxy/test_contrib_tnetstring.py test/mitmproxy/test_proxy.py test/mitmproxy/test_protocol_http1.py test/mitmproxy/test_platform_pf.py test/mitmproxy/test_server.py + TESTS = test/netlib test/pathod/ test/mitmproxy/script test/mitmproxy/test_contentview.py test/mitmproxy/test_custom_contentview.py test/mitmproxy/test_app.py test/mitmproxy/test_controller.py test/mitmproxy/test_fuzzing.py test/mitmproxy/test_script.py test/mitmproxy/test_web_app.py test/mitmproxy/test_utils.py test/mitmproxy/test_stateobject.py test/mitmproxy/test_cmdline.py test/mitmproxy/test_contrib_tnetstring.py test/mitmproxy/test_proxy.py test/mitmproxy/test_protocol_http1.py test/mitmproxy/test_platform_pf.py test/mitmproxy/test_server.py test/mitmproxy/test_filt.py HOME = {envtmpdir} [testenv:docs] @@ -25,4 +25,7 @@ commands = sphinx-build -W -b html -d {envtmpdir}/doctrees . {envtmpdir}/html [testenv:lint] deps = flake8>=2.6.2, <3 +usedevelop = False +skip_install = True +skipsdist = True commands = flake8 --jobs 8 --count mitmproxy netlib pathod examples test