From a6b3551934e2b8768177d6831ca08f97f5bdae44 Mon Sep 17 00:00:00 2001 From: Maximilian Hils Date: Mon, 4 Jul 2016 13:58:09 -0700 Subject: [PATCH] raise ValueError if content-encoding is invalid --- mitmproxy/console/common.py | 40 +++++++++++++++++++++--------- mitmproxy/console/flowview.py | 38 ++++++++++++++++++++--------- mitmproxy/dump.py | 13 +++++++--- mitmproxy/filt.py | 36 ++++++++++++++++++--------- mitmproxy/flow/export.py | 18 +++++++++----- netlib/http/message.py | 42 ++++++++++++++++++++++---------- netlib/http/request.py | 12 ++++++--- netlib/wsgi.py | 6 ++++- test/netlib/http/test_message.py | 18 ++++++++------ 9 files changed, 154 insertions(+), 69 deletions(-) diff --git a/mitmproxy/console/common.py b/mitmproxy/console/common.py index b4369c0c8..ef220b4cc 100644 --- a/mitmproxy/console/common.py +++ b/mitmproxy/console/common.py @@ -256,24 +256,34 @@ def copy_flow_format_data(part, scope, flow): else: data = "" if scope in ("q", "a"): - if flow.request.content is None: + request = flow.request.copy() + try: + request.decode() + except ValueError: + pass + if request.raw_content is None: return None, "Request content is missing" if part == "h": - data += netlib.http.http1.assemble_request(flow.request) + data += netlib.http.http1.assemble_request(request) elif part == "c": - data += flow.request.content + data += request.raw_content else: raise ValueError("Unknown part: {}".format(part)) - if scope == "a" and flow.request.content and flow.response: + if scope == "a" and flow.request.raw_content and flow.response: # Add padding between request and response data += "\r\n" * 2 if scope in ("s", "a") and flow.response: - if flow.response.content is None: + response = flow.response.copy() + try: + response.decode() + except ValueError: + pass + if response.raw_content is None: return None, "Response content is missing" if part == "h": - data += netlib.http.http1.assemble_response(flow.response) + data += netlib.http.http1.assemble_response(response) elif part == "c": - data += flow.response.content + data += response.raw_content else: raise ValueError("Unknown part: {}".format(part)) return data, False @@ -361,8 +371,8 @@ def ask_save_body(part, master, state, flow): "q" (request), "s" (response) or None (ask user if necessary). """ - request_has_content = flow.request and flow.request.content - response_has_content = flow.response and flow.response.content + request_has_content = flow.request and flow.request.raw_content + response_has_content = flow.response and flow.response.raw_content if part is None: # We first need to determine whether we want to save the request or the @@ -383,14 +393,22 @@ def ask_save_body(part, master, state, flow): ask_save_body("q", master, state, flow) elif part == "q" and request_has_content: + try: + content = flow.request.content + except ValueError: + content = flow.request.raw_content ask_save_path( "Save request content", - flow.request.content + content ) elif part == "s" and response_has_content: + try: + content = flow.response.content + except ValueError: + content = flow.response.raw_content ask_save_path( "Save response content", - flow.response.content + content ) else: signals.status_message.send(message="No content to save.") diff --git a/mitmproxy/console/flowview.py b/mitmproxy/console/flowview.py index 208b0d447..c4bb6c408 100644 --- a/mitmproxy/console/flowview.py +++ b/mitmproxy/console/flowview.py @@ -199,26 +199,34 @@ class FlowView(tabs.Tabs): def _get_content_view(self, viewmode, message, max_lines, _): + try: + content = message.content + if content != message.raw_content: + enc = "[decoded {}]".format( + message.headers.get("content-encoding") + ) + else: + enc = None + except ValueError: + content = message.raw_content + enc = "[cannot decode]" try: query = None if isinstance(message, models.HTTPRequest): query = message.query description, lines = contentviews.get_content_view( - viewmode, message.content, headers=message.headers, query=query + viewmode, content, headers=message.headers, query=query ) except exceptions.ContentViewException: s = "Content viewer failed: \n" + traceback.format_exc() signals.add_event(s, "error") description, lines = contentviews.get_content_view( - contentviews.get("Raw"), message.content, headers=message.headers + contentviews.get("Raw"), content, headers=message.headers ) description = description.replace("Raw", "Couldn't parse: falling back to Raw") - if message.content != message.raw_content: - description = "[decoded {enc}] {desc}".format( - enc=message.headers.get("content-encoding"), - desc=description - ) + if enc: + description = " ".join(enc, description) # Give hint that you have to tab for the response. if description == "No content" and isinstance(message, models.HTTPRequest): @@ -419,10 +427,14 @@ class FlowView(tabs.Tabs): # editing message bodies, this can cause problems. For now, I # just strip the newlines off the end of the body when we return # from an editor. - c = self.master.spawn_editor(message.content or b"") + try: + content = message.content + except ValueError: + content = message.raw_content + c = self.master.spawn_editor(content or b"") message.content = c.rstrip(b"\n") elif part == "f": - if not message.urlencoded_form and message.content: + if not message.urlencoded_form and message.raw_content: signals.status_prompt_onekey.send( prompt = "Existing body is not a URL-encoded form. Clear and edit?", keys = [ @@ -682,10 +694,14 @@ class FlowView(tabs.Tabs): ) key = None elif key == "v": - if conn.content: + if conn.raw_content: t = conn.headers.get("content-type") if "EDITOR" in os.environ or "PAGER" in os.environ: - self.master.spawn_external_viewer(conn.content, t) + try: + content = conn.content + except ValueError: + content = conn.raw_content + self.master.spawn_external_viewer(content, t) else: signals.status_message.send( message = "Error! Set $EDITOR or $PAGER." diff --git a/mitmproxy/dump.py b/mitmproxy/dump.py index ea242bba9..0a9b76a73 100644 --- a/mitmproxy/dump.py +++ b/mitmproxy/dump.py @@ -187,15 +187,20 @@ class DumpMaster(flow.FlowMaster): ) self.echo(headers, indent=4) if self.o.flow_detail >= 3: - if message.content is None: + try: + content = message.content + except ValueError: + content = message.raw_content + + if content is None: self.echo("(content missing)", indent=4) - elif message.content: + elif content: self.echo("") try: type, lines = contentviews.get_content_view( contentviews.get("Auto"), - message.content, + content, headers=getattr(message, "headers", None) ) except exceptions.ContentViewException: @@ -203,7 +208,7 @@ class DumpMaster(flow.FlowMaster): self.add_event(s, "debug") type, lines = contentviews.get_content_view( contentviews.get("Raw"), - message.content, + content, headers=getattr(message, "headers", None) ) diff --git a/mitmproxy/filt.py b/mitmproxy/filt.py index 95bae1ae4..e8687b9f3 100644 --- a/mitmproxy/filt.py +++ b/mitmproxy/filt.py @@ -193,12 +193,18 @@ class FBod(_Rex): help = "Body" def __call__(self, f): - if f.request and f.request.content: - if self.re.search(f.request.content): - return True - if f.response and f.response.content: - if self.re.search(f.response.content): - return True + if f.request and f.request.raw_content: + try: + if self.re.search(f.request.content): + return True + except ValueError: + pass + if f.response and f.response.raw_content: + try: + if self.re.search(f.response.content): + return True + except ValueError: + pass return False @@ -207,9 +213,12 @@ class FBodRequest(_Rex): help = "Request body" def __call__(self, f): - if f.request and f.request.content: - if self.re.search(f.request.content): - return True + if f.request and f.request.raw_content: + try: + if self.re.search(f.request.content): + return True + except ValueError: + pass class FBodResponse(_Rex): @@ -217,9 +226,12 @@ class FBodResponse(_Rex): help = "Response body" def __call__(self, f): - if f.response and f.response.content: - if self.re.search(f.response.content): - return True + if f.response and f.response.raw_content: + try: + if self.re.search(f.response.content): + return True + except ValueError: + pass class FMethod(_Rex): diff --git a/mitmproxy/flow/export.py b/mitmproxy/flow/export.py index f0ac02abc..9da18f22d 100644 --- a/mitmproxy/flow/export.py +++ b/mitmproxy/flow/export.py @@ -19,17 +19,23 @@ def dictstr(items, indent): def curl_command(flow): data = "curl " - for k, v in flow.request.headers.fields: + request = flow.request.copy() + try: + request.decode() + except ValueError: + pass + + for k, v in request.headers.fields: data += "-H '%s:%s' " % (k, v) - if flow.request.method != "GET": - data += "-X %s " % flow.request.method + if request.method != "GET": + data += "-X %s " % request.method - full_url = flow.request.scheme + "://" + flow.request.host + flow.request.path + full_url = request.scheme + "://" + request.host + request.path data += "'%s'" % full_url - if flow.request.content: - data += " --data-binary '%s'" % flow.request.content + if request.raw_content: + data += " --data-binary '%s'" % request.raw_content return data diff --git a/netlib/http/message.py b/netlib/http/message.py index ca3a4145a..86ff64d1e 100644 --- a/netlib/http/message.py +++ b/netlib/http/message.py @@ -124,6 +124,9 @@ class Message(basetypes.Serializable): """ The HTTP message body decoded with the content-encoding header (e.g. gzip) + Raises: + ValueError, when getting the content and the content-encoding is invalid. + See also: :py:class:`raw_content`, :py:attr:`text` """ ce = self.headers.get("content-encoding") @@ -132,17 +135,21 @@ class Message(basetypes.Serializable): self._content_cache.encoding == ce ) if not cached: - try: - if not ce: - raise ValueError() + if ce: decoded = encoding.decode(self.raw_content, ce) - except ValueError: + else: decoded = self.raw_content self._content_cache = CachedDecode(self.raw_content, ce, decoded) return self._content_cache.decoded @content.setter def content(self, value): + if value is not None and not isinstance(value, bytes): + raise TypeError( + "Message content must be bytes, not {}. " + "Please use .text if you want to assign a str." + .format(type(value).__name__) + ) ce = self.headers.get("content-encoding") cached = ( self._content_cache.decoded == value and @@ -150,15 +157,15 @@ class Message(basetypes.Serializable): ) if not cached: try: - if not ce: - raise ValueError() - encoded = encoding.encode(value, ce) + if ce and value is not None: + encoded = encoding.encode(value, ce) + else: + encoded = value except ValueError: - # Do we have an unknown content-encoding? - # If so, we want to remove it. - if value and ce: - self.headers.pop("content-encoding", None) - ce = None + # So we have an invalid content-encoding? + # Let's remove it! + del self.headers["content-encoding"] + ce = None encoded = value self._content_cache = CachedDecode(encoded, ce, value) self.raw_content = self._content_cache.encoded @@ -262,6 +269,9 @@ class Message(basetypes.Serializable): Decodes body based on the current Content-Encoding header, then removes the header. If there is no Content-Encoding header, no action is taken. + + Raises: + ValueError, when the content-encoding is invalid. """ self.raw_content = self.content self.headers.pop("content-encoding", None) @@ -269,10 +279,16 @@ class Message(basetypes.Serializable): def encode(self, e): """ Encodes body with the encoding e, where e is "gzip", "deflate" or "identity". + Any existing content-encodings are overwritten, + the content is not decoded beforehand. + + Raises: + ValueError, when the specified content-encoding is invalid. """ - self.decode() # remove the current encoding self.headers["content-encoding"] = e self.content = self.raw_content + if "content-encoding" not in self.headers: + raise ValueError("Invalid content encoding {}".format(repr(e))) def replace(self, pattern, repl, flags=0): """ diff --git a/netlib/http/request.py b/netlib/http/request.py index 4ce94549a..a8ec62388 100644 --- a/netlib/http/request.py +++ b/netlib/http/request.py @@ -347,7 +347,10 @@ class Request(message.Message): def _get_urlencoded_form(self): is_valid_content_type = "application/x-www-form-urlencoded" in self.headers.get("content-type", "").lower() if is_valid_content_type: - return tuple(netlib.http.url.decode(self.content)) + try: + return tuple(netlib.http.url.decode(self.content)) + except ValueError: + pass return () def _set_urlencoded_form(self, value): @@ -356,7 +359,7 @@ class Request(message.Message): This will overwrite the existing content if there is one. """ self.headers["content-type"] = "application/x-www-form-urlencoded" - self.content = netlib.http.url.encode(value) + self.content = netlib.http.url.encode(value).encode() @urlencoded_form.setter def urlencoded_form(self, value): @@ -376,7 +379,10 @@ class Request(message.Message): def _get_multipart_form(self): is_valid_content_type = "multipart/form-data" in self.headers.get("content-type", "").lower() if is_valid_content_type: - return multipart.decode(self.headers, self.content) + try: + return multipart.decode(self.headers, self.content) + except ValueError: + pass return () def _set_multipart_form(self, value): diff --git a/netlib/wsgi.py b/netlib/wsgi.py index c66fddc25..2444f449f 100644 --- a/netlib/wsgi.py +++ b/netlib/wsgi.py @@ -60,10 +60,14 @@ class WSGIAdaptor(object): else: path_info = path query = '' + try: + content = flow.request.content + except ValueError: + content = flow.request.raw_content environ = { 'wsgi.version': (1, 0), 'wsgi.url_scheme': strutils.native(flow.request.scheme, "latin-1"), - 'wsgi.input': BytesIO(flow.request.content or b""), + 'wsgi.input': BytesIO(content or b""), 'wsgi.errors': errsoc, 'wsgi.multithread': True, 'wsgi.multiprocess': False, diff --git a/test/netlib/http/test_message.py b/test/netlib/http/test_message.py index e1707a913..ed7d3da5e 100644 --- a/test/netlib/http/test_message.py +++ b/test/netlib/http/test_message.py @@ -5,7 +5,7 @@ import mock import six from netlib.tutils import tresp -from netlib import http +from netlib import http, tutils def _test_passthrough_attr(message, attr): @@ -92,9 +92,6 @@ class TestMessage(object): assert resp.data.content == b"bar" assert resp.headers["content-length"] == "0" - def test_content_basic(self): - _test_passthrough_attr(tresp(), "content") - def test_headers(self): _test_passthrough_attr(tresp(), "headers") @@ -149,18 +146,22 @@ class TestMessageContentEncoding(object): r = tresp() r.headers["content-encoding"] = "zopfli" r.raw_content = b"foo" - assert r.content == b"foo" + with tutils.raises(ValueError): + assert r.content assert r.headers["content-encoding"] def test_cannot_decode(self): r = tresp() r.encode("gzip") r.raw_content = b"foo" - assert r.content == b"foo" + with tutils.raises(ValueError): + assert r.content assert r.headers["content-encoding"] - r.decode() + + with tutils.raises(ValueError): + r.decode() assert r.raw_content == b"foo" - assert "content-encoding" not in r.headers + assert "content-encoding" in r.headers def test_cannot_encode(self): r = tresp() @@ -213,6 +214,7 @@ class TestMessageText(object): r.encode("identity") with mock.patch("netlib.encoding.encode") as e: + e.return_value = b"" r.text = u"ü" assert e.call_count == 0 r.text = u"ä"