From 9ea68ebd284ce13d765519a20dd7cfe998c0ae1c Mon Sep 17 00:00:00 2001 From: Aldo Cortesi Date: Tue, 31 May 2016 14:34:09 +1200 Subject: [PATCH 01/14] Improve handling of pseudo-headers - The canonical source for :method, :scheme and :path are the .method, .scheme and .path attributes on the request object. - These pseudo-headers are stripped after reading the request, and re-inserted just before sending. - The :authority header remains, and should be handled analagously to the Host header in HTTP1 with respect to display and user interaction. --- mitmproxy/protocol/http2.py | 10 +++++++++- netlib/http/http2/connections.py | 14 ++++++++------ netlib/multidict.py | 8 ++++++++ test/netlib/http/http2/test_connections.py | 5 ++++- 4 files changed, 29 insertions(+), 8 deletions(-) diff --git a/mitmproxy/protocol/http2.py b/mitmproxy/protocol/http2.py index 30763c664..bdc851283 100644 --- a/mitmproxy/protocol/http2.py +++ b/mitmproxy/protocol/http2.py @@ -306,6 +306,9 @@ class Http2SingleStreamLayer(_HttpTransmissionLayer, threading.Thread): method = self.request_headers.get(':method', 'GET') scheme = self.request_headers.get(':scheme', 'https') path = self.request_headers.get(':path', '/') + self.request_headers.clear(":method") + self.request_headers.clear(":scheme") + self.request_headers.clear(":path") host = None port = None @@ -362,10 +365,15 @@ class Http2SingleStreamLayer(_HttpTransmissionLayer, threading.Thread): self.server_stream_id = self.server_conn.h2.get_next_available_stream_id() self.server_to_client_stream_ids[self.server_stream_id] = self.client_stream_id + headers = message.headers.copy() + headers.insert(0, ":path", message.path) + headers.insert(0, ":method", message.method) + headers.insert(0, ":scheme", message.scheme) + self.server_conn.h2.safe_send_headers( self.is_zombie, self.server_stream_id, - message.headers + headers ) self.server_conn.h2.safe_send_body( self.is_zombie, diff --git a/netlib/http/http2/connections.py b/netlib/http/http2/connections.py index b988d6eff..6b91f2ff6 100644 --- a/netlib/http/http2/connections.py +++ b/netlib/http/http2/connections.py @@ -98,6 +98,11 @@ class HTTP2Protocol(object): method = headers.get(':method', 'GET') scheme = headers.get(':scheme', 'https') path = headers.get(':path', '/') + + headers.clear(":method") + headers.clear(":scheme") + headers.clear(":path") + host = None port = None @@ -202,12 +207,9 @@ class HTTP2Protocol(object): if ':authority' not in headers: headers.insert(0, b':authority', authority.encode('ascii')) - if ':scheme' not in headers: - headers.insert(0, b':scheme', request.scheme.encode('ascii')) - if ':path' not in headers: - headers.insert(0, b':path', request.path.encode('ascii')) - if ':method' not in headers: - headers.insert(0, b':method', request.method.encode('ascii')) + headers.insert(0, b':scheme', request.scheme.encode('ascii')) + headers.insert(0, b':path', request.path.encode('ascii')) + headers.insert(0, b':method', request.method.encode('ascii')) if hasattr(request, 'stream_id'): stream_id = request.stream_id diff --git a/netlib/multidict.py b/netlib/multidict.py index 98fde7e33..f8876cbd5 100644 --- a/netlib/multidict.py +++ b/netlib/multidict.py @@ -171,6 +171,14 @@ class _MultiDict(MutableMapping, Serializable): else: return super(_MultiDict, self).items() + def clear(self, key): + """ + Removes all items with the specified key, and does not raise an + exception if the key does not exist. + """ + if key in self: + del self[key] + def to_dict(self): """ Get the MultiDict as a plain Python dict. diff --git a/test/netlib/http/http2/test_connections.py b/test/netlib/http/http2/test_connections.py index ff462ba64..69667d1cb 100644 --- a/test/netlib/http/http2/test_connections.py +++ b/test/netlib/http/http2/test_connections.py @@ -312,7 +312,10 @@ class TestReadRequest(tservers.ServerTestBase): req = protocol.read_request(NotImplemented) assert req.stream_id - assert req.headers.fields == ((b':method', b'GET'), (b':path', b'/'), (b':scheme', b'https')) + assert req.headers.fields == () + assert req.method == "GET" + assert req.path == "/" + assert req.scheme == "https" assert req.content == b'foobar' From 4de4223b2ddb4417be0d6a2fa0556d531a494091 Mon Sep 17 00:00:00 2001 From: Aldo Cortesi Date: Tue, 31 May 2016 15:12:20 +1200 Subject: [PATCH 02/14] Extend pseudo-header treatment to :status on responses --- mitmproxy/protocol/http2.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/mitmproxy/protocol/http2.py b/mitmproxy/protocol/http2.py index bdc851283..b41016768 100644 --- a/mitmproxy/protocol/http2.py +++ b/mitmproxy/protocol/http2.py @@ -387,12 +387,14 @@ class Http2SingleStreamLayer(_HttpTransmissionLayer, threading.Thread): self.response_arrived.wait() status_code = int(self.response_headers.get(':status', 502)) + headers = self.response_headers.copy() + headers.clear(":status") return HTTPResponse( http_version=b"HTTP/2.0", status_code=status_code, reason='', - headers=self.response_headers, + headers=headers, content=None, timestamp_start=self.timestamp_start, timestamp_end=self.timestamp_end, @@ -412,10 +414,12 @@ class Http2SingleStreamLayer(_HttpTransmissionLayer, threading.Thread): raise Http2ProtocolException("Zombie Stream") def send_response_headers(self, response): + headers = response.headers.copy() + headers.insert(0, ":status", str(response.status_code)) self.client_conn.h2.safe_send_headers( self.is_zombie, self.client_stream_id, - response.headers + headers ) if self.zombie: # pragma: no cover raise Http2ProtocolException("Zombie Stream") From 08fbe6f1118455bc44d05db30b83bdf81feda2a0 Mon Sep 17 00:00:00 2001 From: Aldo Cortesi Date: Tue, 31 May 2016 17:16:31 +1200 Subject: [PATCH 03/14] Start cleaning up netlib.utils - Remove http2 functions, move to http2.frame - Remove Serializable, move to netlib.basetypes --- mitmproxy/models/tcp.py | 4 +- mitmproxy/protocol/http2.py | 5 +- mitmproxy/stateobject.py | 4 +- netlib/basetypes.py | 33 ++++++++++++ netlib/certutils.py | 4 +- netlib/http/http2/connections.py | 59 +++++++++++----------- netlib/http/http2/frame.py | 21 ++++++++ netlib/http/message.py | 17 +++---- netlib/http/request.py | 2 +- netlib/multidict.py | 5 +- netlib/odict.py | 8 +-- netlib/tcp.py | 4 +- netlib/utils.py | 52 ------------------- test/mitmproxy/test_protocol_http2.py | 14 ++--- test/netlib/http/http2/test_connections.py | 26 +++++----- test/netlib/test_basetypes.py | 27 ++++++++++ test/netlib/test_utils.py | 27 ---------- 17 files changed, 157 insertions(+), 155 deletions(-) create mode 100644 netlib/basetypes.py create mode 100644 netlib/http/http2/frame.py create mode 100644 test/netlib/test_basetypes.py diff --git a/mitmproxy/models/tcp.py b/mitmproxy/models/tcp.py index b87a74acf..c7cfb9f8a 100644 --- a/mitmproxy/models/tcp.py +++ b/mitmproxy/models/tcp.py @@ -1,11 +1,11 @@ import time from typing import List -from netlib.utils import Serializable +import netlib.basetypes from .flow import Flow -class TCPMessage(Serializable): +class TCPMessage(netlib.basetypes.Serializable): def __init__(self, from_client, content, timestamp=None): self.content = content diff --git a/mitmproxy/protocol/http2.py b/mitmproxy/protocol/http2.py index b41016768..24460ec91 100644 --- a/mitmproxy/protocol/http2.py +++ b/mitmproxy/protocol/http2.py @@ -14,7 +14,8 @@ from hyperframe.frame import PriorityFrame from netlib.tcp import ssl_read_select from netlib.exceptions import HttpException from netlib.http import Headers -from netlib.utils import http2_read_raw_frame, parse_url +from netlib.utils import parse_url +from netlib.http.http2 import frame from .base import Layer from .http import _HttpTransmissionLayer, HttpLayer @@ -233,7 +234,7 @@ class Http2Layer(Layer): with source_conn.h2.lock: try: - raw_frame = b''.join(http2_read_raw_frame(source_conn.rfile)) + raw_frame = b''.join(frame.http2_read_raw_frame(source_conn.rfile)) except: # read frame failed: connection closed self._kill_all_streams() diff --git a/mitmproxy/stateobject.py b/mitmproxy/stateobject.py index 765c35d6c..eb57fa00a 100644 --- a/mitmproxy/stateobject.py +++ b/mitmproxy/stateobject.py @@ -3,7 +3,7 @@ from __future__ import absolute_import import six from typing import List, Any -from netlib.utils import Serializable +import netlib.basetypes def _is_list(cls): @@ -13,7 +13,7 @@ def _is_list(cls): return issubclass(cls, List) or is_list_bugfix -class StateObject(Serializable): +class StateObject(netlib.basetypes.Serializable): """ An object with serializable state. diff --git a/netlib/basetypes.py b/netlib/basetypes.py new file mode 100644 index 000000000..d03246ff1 --- /dev/null +++ b/netlib/basetypes.py @@ -0,0 +1,33 @@ +import six +import abc + +@six.add_metaclass(abc.ABCMeta) +class Serializable(object): + """ + Abstract Base Class that defines an API to save an object's state and restore it later on. + """ + + @classmethod + @abc.abstractmethod + def from_state(cls, state): + """ + Create a new object from the given state. + """ + raise NotImplementedError() + + @abc.abstractmethod + def get_state(self): + """ + Retrieve object state. + """ + raise NotImplementedError() + + @abc.abstractmethod + def set_state(self, state): + """ + Set object state to the given state. + """ + raise NotImplementedError() + + def copy(self): + return self.from_state(self.get_state()) diff --git a/netlib/certutils.py b/netlib/certutils.py index 34e01ed37..4a19d170a 100644 --- a/netlib/certutils.py +++ b/netlib/certutils.py @@ -12,7 +12,7 @@ from pyasn1.codec.der.decoder import decode from pyasn1.error import PyAsn1Error import OpenSSL -from .utils import Serializable +from . import basetypes # Default expiry must not be too long: https://github.com/mitmproxy/mitmproxy/issues/815 @@ -364,7 +364,7 @@ class _GeneralNames(univ.SequenceOf): constraint.ValueSizeConstraint(1, 1024) -class SSLCert(Serializable): +class SSLCert(basetypes.Serializable): def __init__(self, cert): """ diff --git a/netlib/http/http2/connections.py b/netlib/http/http2/connections.py index 6b91f2ff6..03f1804b2 100644 --- a/netlib/http/http2/connections.py +++ b/netlib/http/http2/connections.py @@ -2,11 +2,12 @@ from __future__ import (absolute_import, print_function, division) import itertools import time +import hyperframe.frame + from hpack.hpack import Encoder, Decoder from ... import utils from .. import Headers, Response, Request - -from hyperframe import frame +from . import frame class TCPHandler(object): @@ -38,12 +39,12 @@ class HTTP2Protocol(object): CLIENT_CONNECTION_PREFACE = b'PRI * HTTP/2.0\r\n\r\nSM\r\n\r\n' HTTP2_DEFAULT_SETTINGS = { - frame.SettingsFrame.HEADER_TABLE_SIZE: 4096, - frame.SettingsFrame.ENABLE_PUSH: 1, - frame.SettingsFrame.MAX_CONCURRENT_STREAMS: None, - frame.SettingsFrame.INITIAL_WINDOW_SIZE: 2 ** 16 - 1, - frame.SettingsFrame.MAX_FRAME_SIZE: 2 ** 14, - frame.SettingsFrame.MAX_HEADER_LIST_SIZE: None, + hyperframe.frame.SettingsFrame.HEADER_TABLE_SIZE: 4096, + hyperframe.frame.SettingsFrame.ENABLE_PUSH: 1, + hyperframe.frame.SettingsFrame.MAX_CONCURRENT_STREAMS: None, + hyperframe.frame.SettingsFrame.INITIAL_WINDOW_SIZE: 2 ** 16 - 1, + hyperframe.frame.SettingsFrame.MAX_FRAME_SIZE: 2 ** 14, + hyperframe.frame.SettingsFrame.MAX_HEADER_LIST_SIZE: None, } def __init__( @@ -253,9 +254,9 @@ class HTTP2Protocol(object): magic = self.tcp_handler.rfile.safe_read(magic_length) assert magic == self.CLIENT_CONNECTION_PREFACE - frm = frame.SettingsFrame(settings={ - frame.SettingsFrame.ENABLE_PUSH: 0, - frame.SettingsFrame.MAX_CONCURRENT_STREAMS: 1, + frm = hyperframe.frame.SettingsFrame(settings={ + hyperframe.frame.SettingsFrame.ENABLE_PUSH: 0, + hyperframe.frame.SettingsFrame.MAX_CONCURRENT_STREAMS: 1, }) self.send_frame(frm, hide=True) self._receive_settings(hide=True) @@ -266,7 +267,7 @@ class HTTP2Protocol(object): self.tcp_handler.wfile.write(self.CLIENT_CONNECTION_PREFACE) - self.send_frame(frame.SettingsFrame(), hide=True) + self.send_frame(hyperframe.frame.SettingsFrame(), hide=True) self._receive_settings(hide=True) # server announces own settings self._receive_settings(hide=True) # server acks my settings @@ -279,18 +280,18 @@ class HTTP2Protocol(object): def read_frame(self, hide=False): while True: - frm = utils.http2_read_frame(self.tcp_handler.rfile) + frm = frame.http2_read_frame(self.tcp_handler.rfile) if not hide and self.dump_frames: # pragma no cover print(frm.human_readable("<<")) - if isinstance(frm, frame.PingFrame): - raw_bytes = frame.PingFrame(flags=['ACK'], payload=frm.payload).serialize() + if isinstance(frm, hyperframe.frame.PingFrame): + raw_bytes = hyperframe.frame.PingFrame(flags=['ACK'], payload=frm.payload).serialize() self.tcp_handler.wfile.write(raw_bytes) self.tcp_handler.wfile.flush() continue - if isinstance(frm, frame.SettingsFrame) and 'ACK' not in frm.flags: + if isinstance(frm, hyperframe.frame.SettingsFrame) and 'ACK' not in frm.flags: self._apply_settings(frm.settings, hide) - if isinstance(frm, frame.DataFrame) and frm.flow_controlled_length > 0: + if isinstance(frm, hyperframe.frame.DataFrame) and frm.flow_controlled_length > 0: self._update_flow_control_window(frm.stream_id, frm.flow_controlled_length) return frm @@ -302,7 +303,7 @@ class HTTP2Protocol(object): return True def _handle_unexpected_frame(self, frm): - if isinstance(frm, frame.SettingsFrame): + if isinstance(frm, hyperframe.frame.SettingsFrame): return if self.unhandled_frame_cb: self.unhandled_frame_cb(frm) @@ -310,7 +311,7 @@ class HTTP2Protocol(object): def _receive_settings(self, hide=False): while True: frm = self.read_frame(hide) - if isinstance(frm, frame.SettingsFrame): + if isinstance(frm, hyperframe.frame.SettingsFrame): break else: self._handle_unexpected_frame(frm) @@ -334,26 +335,26 @@ class HTTP2Protocol(object): old_value = '-' self.http2_settings[setting] = value - frm = frame.SettingsFrame(flags=['ACK']) + frm = hyperframe.frame.SettingsFrame(flags=['ACK']) self.send_frame(frm, hide) def _update_flow_control_window(self, stream_id, increment): - frm = frame.WindowUpdateFrame(stream_id=0, window_increment=increment) + frm = hyperframe.frame.WindowUpdateFrame(stream_id=0, window_increment=increment) self.send_frame(frm) - frm = frame.WindowUpdateFrame(stream_id=stream_id, window_increment=increment) + frm = hyperframe.frame.WindowUpdateFrame(stream_id=stream_id, window_increment=increment) self.send_frame(frm) def _create_headers(self, headers, stream_id, end_stream=True): def frame_cls(chunks): for i in chunks: if i == 0: - yield frame.HeadersFrame, i + yield hyperframe.frame.HeadersFrame, i else: - yield frame.ContinuationFrame, i + yield hyperframe.frame.ContinuationFrame, i header_block_fragment = self.encoder.encode(headers.fields) - chunk_size = self.http2_settings[frame.SettingsFrame.MAX_FRAME_SIZE] + chunk_size = self.http2_settings[hyperframe.frame.SettingsFrame.MAX_FRAME_SIZE] chunks = range(0, len(header_block_fragment), chunk_size) frms = [frm_cls( flags=[], @@ -374,9 +375,9 @@ class HTTP2Protocol(object): if body is None or len(body) == 0: return b'' - chunk_size = self.http2_settings[frame.SettingsFrame.MAX_FRAME_SIZE] + chunk_size = self.http2_settings[hyperframe.frame.SettingsFrame.MAX_FRAME_SIZE] chunks = range(0, len(body), chunk_size) - frms = [frame.DataFrame( + frms = [hyperframe.frame.DataFrame( flags=[], stream_id=stream_id, data=body[i:i + chunk_size]) for i in chunks] @@ -400,7 +401,7 @@ class HTTP2Protocol(object): while True: frm = self.read_frame() if ( - (isinstance(frm, frame.HeadersFrame) or isinstance(frm, frame.ContinuationFrame)) and + (isinstance(frm, hyperframe.frame.HeadersFrame) or isinstance(frm, hyperframe.frame.ContinuationFrame)) and (stream_id is None or frm.stream_id == stream_id) ): stream_id = frm.stream_id @@ -414,7 +415,7 @@ class HTTP2Protocol(object): while body_expected: frm = self.read_frame() - if isinstance(frm, frame.DataFrame) and frm.stream_id == stream_id: + if isinstance(frm, hyperframe.frame.DataFrame) and frm.stream_id == stream_id: body += frm.data if 'END_STREAM' in frm.flags: break diff --git a/netlib/http/http2/frame.py b/netlib/http/http2/frame.py new file mode 100644 index 000000000..d45be6461 --- /dev/null +++ b/netlib/http/http2/frame.py @@ -0,0 +1,21 @@ +import codecs + +import hyperframe + + +def http2_read_raw_frame(rfile): + header = rfile.safe_read(9) + length = int(codecs.encode(header[:3], 'hex_codec'), 16) + + if length == 4740180: + raise ValueError("Length field looks more like HTTP/1.1: %s" % rfile.peek(20)) + + body = rfile.safe_read(length) + return [header, body] + + +def http2_read_frame(rfile): + header, body = http2_read_raw_frame(rfile) + frame, length = hyperframe.frame.Frame.parse_frame_header(header) + frame.parse_body(memoryview(body)) + return frame diff --git a/netlib/http/message.py b/netlib/http/message.py index 13d401a74..d9654f26e 100644 --- a/netlib/http/message.py +++ b/netlib/http/message.py @@ -4,9 +4,8 @@ import warnings import six -from .headers import Headers -from .. import encoding, utils -from ..utils import always_bytes +from .. import encoding, utils, basetypes +from . import headers if six.PY2: # pragma: no cover def _native(x): @@ -20,10 +19,10 @@ else: return x.decode("utf-8", "surrogateescape") def _always_bytes(x): - return always_bytes(x, "utf-8", "surrogateescape") + return utils.always_bytes(x, "utf-8", "surrogateescape") -class MessageData(utils.Serializable): +class MessageData(basetypes.Serializable): def __eq__(self, other): if isinstance(other, MessageData): return self.__dict__ == other.__dict__ @@ -38,7 +37,7 @@ class MessageData(utils.Serializable): def set_state(self, state): for k, v in state.items(): if k == "headers": - v = Headers.from_state(v) + v = headers.Headers.from_state(v) setattr(self, k, v) def get_state(self): @@ -48,11 +47,11 @@ class MessageData(utils.Serializable): @classmethod def from_state(cls, state): - state["headers"] = Headers.from_state(state["headers"]) + state["headers"] = headers.Headers.from_state(state["headers"]) return cls(**state) -class Message(utils.Serializable): +class Message(basetypes.Serializable): def __eq__(self, other): if isinstance(other, Message): return self.data == other.data @@ -72,7 +71,7 @@ class Message(utils.Serializable): @classmethod def from_state(cls, state): - state["headers"] = Headers.from_state(state["headers"]) + state["headers"] = headers.Headers.from_state(state["headers"]) return cls(**state) @property diff --git a/netlib/http/request.py b/netlib/http/request.py index fa8d54aa5..80a9ae653 100644 --- a/netlib/http/request.py +++ b/netlib/http/request.py @@ -6,7 +6,7 @@ import six from six.moves import urllib from netlib import utils -from netlib.http import cookies +from . import cookies from .. import encoding from ..multidict import MultiDictView from .headers import Headers diff --git a/netlib/multidict.py b/netlib/multidict.py index f8876cbd5..6139d60ad 100644 --- a/netlib/multidict.py +++ b/netlib/multidict.py @@ -9,12 +9,11 @@ except ImportError: # pragma: no cover from collections import MutableMapping # Workaround for Python < 3.3 import six - -from .utils import Serializable +from . import basetypes @six.add_metaclass(ABCMeta) -class _MultiDict(MutableMapping, Serializable): +class _MultiDict(MutableMapping, basetypes.Serializable): def __repr__(self): fields = ( repr(field) diff --git a/netlib/odict.py b/netlib/odict.py index 8a638dabc..87887a294 100644 --- a/netlib/odict.py +++ b/netlib/odict.py @@ -3,10 +3,10 @@ import copy import six -from .utils import Serializable, safe_subn +from . import basetypes, utils -class ODict(Serializable): +class ODict(basetypes.Serializable): """ A dictionary-like object for managing ordered (key, value) data. Think @@ -139,9 +139,9 @@ class ODict(Serializable): """ new, count = [], 0 for k, v in self.lst: - k, c = safe_subn(pattern, repl, k, *args, **kwargs) + k, c = utils.safe_subn(pattern, repl, k, *args, **kwargs) count += c - v, c = safe_subn(pattern, repl, v, *args, **kwargs) + v, c = utils.safe_subn(pattern, repl, v, *args, **kwargs) count += c new.append([k, v]) self.lst = new diff --git a/netlib/tcp.py b/netlib/tcp.py index c7231dbb6..5662c9737 100644 --- a/netlib/tcp.py +++ b/netlib/tcp.py @@ -16,7 +16,7 @@ import six import OpenSSL from OpenSSL import SSL -from . import certutils, version_check, utils +from . import certutils, version_check, basetypes # This is a rather hackish way to make sure that # the latest version of pyOpenSSL is actually installed. @@ -302,7 +302,7 @@ class Reader(_FileLike): raise NotImplementedError("Can only peek into (pyOpenSSL) sockets") -class Address(utils.Serializable): +class Address(basetypes.Serializable): """ This class wraps an IPv4/IPv6 tuple to provide named attributes and diff --git a/netlib/utils.py b/netlib/utils.py index 174f616de..770ad6a6a 100644 --- a/netlib/utils.py +++ b/netlib/utils.py @@ -3,46 +3,12 @@ import os.path import re import codecs import unicodedata -from abc import ABCMeta, abstractmethod import importlib import inspect import six from six.moves import urllib -import hyperframe - - -@six.add_metaclass(ABCMeta) -class Serializable(object): - """ - Abstract Base Class that defines an API to save an object's state and restore it later on. - """ - - @classmethod - @abstractmethod - def from_state(cls, state): - """ - Create a new object from the given state. - """ - raise NotImplementedError() - - @abstractmethod - def get_state(self): - """ - Retrieve object state. - """ - raise NotImplementedError() - - @abstractmethod - def set_state(self, state): - """ - Set object state to the given state. - """ - raise NotImplementedError() - - def copy(self): - return self.from_state(self.get_state()) def always_bytes(unicode_or_bytes, *encode_args): @@ -395,24 +361,6 @@ def multipartdecode(headers, content): return [] -def http2_read_raw_frame(rfile): - header = rfile.safe_read(9) - length = int(codecs.encode(header[:3], 'hex_codec'), 16) - - if length == 4740180: - raise ValueError("Length field looks more like HTTP/1.1: %s" % rfile.peek(20)) - - body = rfile.safe_read(length) - return [header, body] - - -def http2_read_frame(rfile): - header, body = http2_read_raw_frame(rfile) - frame, length = hyperframe.frame.Frame.parse_frame_header(header) - frame.parse_body(memoryview(body)) - return frame - - def safe_subn(pattern, repl, target, *args, **kwargs): """ There are Unicode conversion problems with re.subn. We try to smooth diff --git a/test/mitmproxy/test_protocol_http2.py b/test/mitmproxy/test_protocol_http2.py index 4a7620147..5ab42caeb 100644 --- a/test/mitmproxy/test_protocol_http2.py +++ b/test/mitmproxy/test_protocol_http2.py @@ -13,7 +13,7 @@ from mitmproxy.cmdline import APP_HOST, APP_PORT import netlib from ..netlib import tservers as netlib_tservers -from netlib.utils import http2_read_raw_frame +from netlib.http.http2 import frame from . import tservers @@ -48,7 +48,7 @@ class _Http2ServerBase(netlib_tservers.ServerTestBase): done = False while not done: try: - raw = b''.join(http2_read_raw_frame(self.rfile)) + raw = b''.join(frame.http2_read_raw_frame(self.rfile)) events = h2_conn.receive_data(raw) except: break @@ -200,7 +200,7 @@ class TestSimple(_Http2TestBase, _Http2ServerBase): done = False while not done: try: - events = h2_conn.receive_data(b''.join(http2_read_raw_frame(client.rfile))) + events = h2_conn.receive_data(b''.join(frame.http2_read_raw_frame(client.rfile))) except: break client.wfile.write(h2_conn.data_to_send()) @@ -270,7 +270,7 @@ class TestWithBodies(_Http2TestBase, _Http2ServerBase): done = False while not done: try: - events = h2_conn.receive_data(b''.join(http2_read_raw_frame(client.rfile))) + events = h2_conn.receive_data(b''.join(frame.http2_read_raw_frame(client.rfile))) except: break client.wfile.write(h2_conn.data_to_send()) @@ -362,7 +362,7 @@ class TestPushPromise(_Http2TestBase, _Http2ServerBase): responses = 0 while not done: try: - raw = b''.join(http2_read_raw_frame(client.rfile)) + raw = b''.join(frame.http2_read_raw_frame(client.rfile)) events = h2_conn.receive_data(raw) except: break @@ -412,7 +412,7 @@ class TestPushPromise(_Http2TestBase, _Http2ServerBase): responses = 0 while not done: try: - events = h2_conn.receive_data(b''.join(http2_read_raw_frame(client.rfile))) + events = h2_conn.receive_data(b''.join(frame.http2_read_raw_frame(client.rfile))) except: break client.wfile.write(h2_conn.data_to_send()) @@ -479,7 +479,7 @@ class TestConnectionLost(_Http2TestBase, _Http2ServerBase): done = False while not done: try: - raw = b''.join(http2_read_raw_frame(client.rfile)) + raw = b''.join(frame.http2_read_raw_frame(client.rfile)) h2_conn.receive_data(raw) except: break diff --git a/test/netlib/http/http2/test_connections.py b/test/netlib/http/http2/test_connections.py index 69667d1cb..be68a28cb 100644 --- a/test/netlib/http/http2/test_connections.py +++ b/test/netlib/http/http2/test_connections.py @@ -1,12 +1,12 @@ import mock import codecs -from hyperframe import frame - -from netlib import tcp, http, utils +import hyperframe +from netlib import tcp, http from netlib.tutils import raises from netlib.exceptions import TcpDisconnect from netlib.http.http2.connections import HTTP2Protocol, TCPHandler +from netlib.http.http2 import frame from ... import tservers @@ -111,11 +111,11 @@ class TestPerformServerConnectionPreface(tservers.ServerTestBase): self.wfile.flush() # check empty settings frame - raw = utils.http2_read_raw_frame(self.rfile) + raw = frame.http2_read_raw_frame(self.rfile) assert raw == codecs.decode('00000c040000000000000200000000000300000001', 'hex_codec') # check settings acknowledgement - raw = utils.http2_read_raw_frame(self.rfile) + raw = frame.http2_read_raw_frame(self.rfile) assert raw == codecs.decode('000000040100000000', 'hex_codec') # send settings acknowledgement @@ -214,19 +214,19 @@ class TestApplySettings(tservers.ServerTestBase): protocol = HTTP2Protocol(c) protocol._apply_settings({ - frame.SettingsFrame.ENABLE_PUSH: 'foo', - frame.SettingsFrame.MAX_CONCURRENT_STREAMS: 'bar', - frame.SettingsFrame.INITIAL_WINDOW_SIZE: 'deadbeef', + hyperframe.frame.SettingsFrame.ENABLE_PUSH: 'foo', + hyperframe.frame.SettingsFrame.MAX_CONCURRENT_STREAMS: 'bar', + hyperframe.frame.SettingsFrame.INITIAL_WINDOW_SIZE: 'deadbeef', }) assert c.rfile.safe_read(2) == b"OK" assert protocol.http2_settings[ - frame.SettingsFrame.ENABLE_PUSH] == 'foo' + hyperframe.frame.SettingsFrame.ENABLE_PUSH] == 'foo' assert protocol.http2_settings[ - frame.SettingsFrame.MAX_CONCURRENT_STREAMS] == 'bar' + hyperframe.frame.SettingsFrame.MAX_CONCURRENT_STREAMS] == 'bar' assert protocol.http2_settings[ - frame.SettingsFrame.INITIAL_WINDOW_SIZE] == 'deadbeef' + hyperframe.frame.SettingsFrame.INITIAL_WINDOW_SIZE] == 'deadbeef' class TestCreateHeaders(object): @@ -258,7 +258,7 @@ class TestCreateHeaders(object): (b'server', b'version')]) protocol = HTTP2Protocol(self.c) - protocol.http2_settings[frame.SettingsFrame.MAX_FRAME_SIZE] = 8 + protocol.http2_settings[hyperframe.frame.SettingsFrame.MAX_FRAME_SIZE] = 8 bytes = protocol._create_headers(headers, 1, end_stream=True) assert len(bytes) == 3 assert bytes[0] == codecs.decode('000008010100000001828487408294e783', 'hex_codec') @@ -281,7 +281,7 @@ class TestCreateBody(object): def test_create_body_multiple_frames(self): protocol = HTTP2Protocol(self.c) - protocol.http2_settings[frame.SettingsFrame.MAX_FRAME_SIZE] = 5 + protocol.http2_settings[hyperframe.frame.SettingsFrame.MAX_FRAME_SIZE] = 5 bytes = protocol._create_body(b'foobarmehm42', 1) assert len(bytes) == 3 assert bytes[0] == codecs.decode('000005000000000001666f6f6261', 'hex_codec') diff --git a/test/netlib/test_basetypes.py b/test/netlib/test_basetypes.py new file mode 100644 index 000000000..2a7eea818 --- /dev/null +++ b/test/netlib/test_basetypes.py @@ -0,0 +1,27 @@ +from netlib import basetypes + +class SerializableDummy(basetypes.Serializable): + def __init__(self, i): + self.i = i + + def get_state(self): + return self.i + + def set_state(self, i): + self.i = i + + def from_state(self, state): + return type(self)(state) + + +class TestSerializable: + + def test_copy(self): + a = SerializableDummy(42) + assert a.i == 42 + b = a.copy() + assert b.i == 42 + + a.set_state(1) + assert a.i == 1 + assert b.i == 42 diff --git a/test/netlib/test_utils.py b/test/netlib/test_utils.py index e4c81a482..cd629d777 100644 --- a/test/netlib/test_utils.py +++ b/test/netlib/test_utils.py @@ -144,33 +144,6 @@ def test_parse_content_type(): assert v == ('text', 'html', {'charset': 'UTF-8'}) -class SerializableDummy(utils.Serializable): - def __init__(self, i): - self.i = i - - def get_state(self): - return self.i - - def set_state(self, i): - self.i = i - - def from_state(self, state): - return type(self)(state) - - -class TestSerializable: - - def test_copy(self): - a = SerializableDummy(42) - assert a.i == 42 - b = a.copy() - assert b.i == 42 - - a.set_state(1) - assert a.i == 1 - assert b.i == 42 - - def test_safe_subn(): assert utils.safe_subn("foo", u"bar", "\xc2foo") From 4e6c9c4e935458d23add259dc63c5e0a85fba9c8 Mon Sep 17 00:00:00 2001 From: Aldo Cortesi Date: Tue, 31 May 2016 18:42:56 +1200 Subject: [PATCH 04/14] Extract url functions from netlib.utils and move to netlib.http.url --- mitmproxy/cmdline.py | 4 +- mitmproxy/console/flowlist.py | 4 +- mitmproxy/contentviews.py | 5 +- mitmproxy/protocol/http2.py | 4 +- netlib/http/http1/read.py | 3 +- netlib/http/http2/connections.py | 4 +- netlib/http/request.py | 19 +++--- netlib/http/url.py | 95 ++++++++++++++++++++++++++++++ netlib/utils.py | 95 ------------------------------ test/mitmproxy/test_contentview.py | 6 +- test/netlib/http/test_url.py | 65 ++++++++++++++++++++ test/netlib/test_utils.py | 64 -------------------- 12 files changed, 186 insertions(+), 182 deletions(-) create mode 100644 netlib/http/url.py create mode 100644 test/netlib/http/test_url.py diff --git a/mitmproxy/cmdline.py b/mitmproxy/cmdline.py index 8476f6f33..d8bbe4480 100644 --- a/mitmproxy/cmdline.py +++ b/mitmproxy/cmdline.py @@ -6,7 +6,7 @@ import base64 import configargparse from netlib.tcp import Address, sslversion_choices -import netlib.utils +import netlib.http.url from . import filt, utils, version from .proxy import config @@ -105,7 +105,7 @@ def parse_setheader(s): def parse_server_spec(url): try: - p = netlib.utils.parse_url(url) + p = netlib.http.url.parse_url(url) if p[0] not in ("http", "https"): raise ValueError() except ValueError: diff --git a/mitmproxy/console/flowlist.py b/mitmproxy/console/flowlist.py index 78b30231c..66d92fe18 100644 --- a/mitmproxy/console/flowlist.py +++ b/mitmproxy/console/flowlist.py @@ -1,7 +1,7 @@ from __future__ import absolute_import import urwid -import netlib.utils +import netlib.http.url from . import common, signals @@ -343,7 +343,7 @@ class FlowListBox(urwid.ListBox): ) def new_request(self, url, method): - parts = netlib.utils.parse_url(str(url)) + parts = netlib.http.url.parse_url(str(url)) if not parts: signals.status_message.send(message="Invalid Url") return diff --git a/mitmproxy/contentviews.py b/mitmproxy/contentviews.py index 1b0f389fb..5c562f95c 100644 --- a/mitmproxy/contentviews.py +++ b/mitmproxy/contentviews.py @@ -27,7 +27,8 @@ import html2text import six from netlib.odict import ODict from netlib import encoding -from netlib.utils import clean_bin, hexdump, urldecode, multipartdecode, parse_content_type +from netlib.http import url +from netlib.utils import clean_bin, hexdump, multipartdecode, parse_content_type from . import utils from .exceptions import ContentViewException from .contrib import jsbeautifier @@ -257,7 +258,7 @@ class ViewURLEncoded(View): content_types = ["application/x-www-form-urlencoded"] def __call__(self, data, **metadata): - d = urldecode(data) + d = url.urldecode(data) return "URLEncoded form", format_dict(ODict(d)) diff --git a/mitmproxy/protocol/http2.py b/mitmproxy/protocol/http2.py index 24460ec91..f4a6cf9d2 100644 --- a/mitmproxy/protocol/http2.py +++ b/mitmproxy/protocol/http2.py @@ -14,8 +14,8 @@ from hyperframe.frame import PriorityFrame from netlib.tcp import ssl_read_select from netlib.exceptions import HttpException from netlib.http import Headers -from netlib.utils import parse_url from netlib.http.http2 import frame +import netlib.http.url from .base import Layer from .http import _HttpTransmissionLayer, HttpLayer @@ -320,7 +320,7 @@ class Http2SingleStreamLayer(_HttpTransmissionLayer, threading.Thread): else: # pragma: no cover first_line_format = "absolute" # FIXME: verify if path or :host contains what we need - scheme, host, port, _ = parse_url(path) + scheme, host, port, _ = netlib.http.url.parse_url(path) if authority: host, _, port = authority.partition(':') diff --git a/netlib/http/http1/read.py b/netlib/http/http1/read.py index d30976bdd..f776d0b5a 100644 --- a/netlib/http/http1/read.py +++ b/netlib/http/http1/read.py @@ -6,6 +6,7 @@ import re from ... import utils from ...exceptions import HttpReadDisconnect, HttpSyntaxException, HttpException, TcpDisconnect from .. import Request, Response, Headers +from .. import url def read_request(rfile, body_size_limit=None): @@ -240,7 +241,7 @@ def _read_request_line(rfile): scheme, path = None, None else: form = "absolute" - scheme, host, port, path = utils.parse_url(path) + scheme, host, port, path = url.parse_url(path) _check_http_version(http_version) except ValueError: diff --git a/netlib/http/http2/connections.py b/netlib/http/http2/connections.py index 03f1804b2..4c15ee07a 100644 --- a/netlib/http/http2/connections.py +++ b/netlib/http/http2/connections.py @@ -6,7 +6,7 @@ import hyperframe.frame from hpack.hpack import Encoder, Decoder from ... import utils -from .. import Headers, Response, Request +from .. import Headers, Response, Request, url from . import frame @@ -118,7 +118,7 @@ class HTTP2Protocol(object): else: first_line_format = "absolute" # FIXME: verify if path or :host contains what we need - scheme, host, port, _ = utils.parse_url(path) + scheme, host, port, _ = url.parse_url(path) scheme = scheme.decode('ascii') host = host.decode('ascii') diff --git a/netlib/http/request.py b/netlib/http/request.py index 80a9ae653..170066f77 100644 --- a/netlib/http/request.py +++ b/netlib/http/request.py @@ -6,6 +6,7 @@ import six from six.moves import urllib from netlib import utils +import netlib.http.url from . import cookies from .. import encoding from ..multidict import MultiDictView @@ -179,11 +180,11 @@ class Request(Message): """ if self.first_line_format == "authority": return "%s:%d" % (self.host, self.port) - return utils.unparse_url(self.scheme, self.host, self.port, self.path) + return netlib.http.url.unparse_url(self.scheme, self.host, self.port, self.path) @url.setter def url(self, url): - self.scheme, self.host, self.port, self.path = utils.parse_url(url) + self.scheme, self.host, self.port, self.path = netlib.http.url.parse_url(url) def _parse_host_header(self): """Extract the host and port from Host header""" @@ -219,7 +220,7 @@ class Request(Message): """ if self.first_line_format == "authority": return "%s:%d" % (self.pretty_host, self.port) - return utils.unparse_url(self.scheme, self.pretty_host, self.port, self.path) + return netlib.http.url.unparse_url(self.scheme, self.pretty_host, self.port, self.path) @property def query(self): @@ -234,12 +235,12 @@ class Request(Message): def _get_query(self): _, _, _, _, query, _ = urllib.parse.urlparse(self.url) - return tuple(utils.urldecode(query)) + return tuple(netlib.http.url.urldecode(query)) def _set_query(self, value): - query = utils.urlencode(value) + query = netlib.http.url.urlencode(value) scheme, netloc, path, params, _, fragment = urllib.parse.urlparse(self.url) - _, _, _, self.path = utils.parse_url( + _, _, _, self.path = netlib.http.url.parse_url( urllib.parse.urlunparse([scheme, netloc, path, params, query, fragment])) @query.setter @@ -287,7 +288,7 @@ class Request(Message): components = map(lambda x: urllib.parse.quote(x, safe=""), components) path = "/" + "/".join(components) scheme, netloc, _, params, query, fragment = urllib.parse.urlparse(self.url) - _, _, _, self.path = utils.parse_url( + _, _, _, self.path = netlib.http.url.parse_url( urllib.parse.urlunparse([scheme, netloc, path, params, query, fragment])) def anticache(self): @@ -339,7 +340,7 @@ class Request(Message): def _get_urlencoded_form(self): is_valid_content_type = "application/x-www-form-urlencoded" in self.headers.get("content-type", "").lower() if is_valid_content_type: - return tuple(utils.urldecode(self.content)) + return tuple(netlib.http.url.urldecode(self.content)) return () def _set_urlencoded_form(self, value): @@ -348,7 +349,7 @@ class Request(Message): This will overwrite the existing content if there is one. """ self.headers["content-type"] = "application/x-www-form-urlencoded" - self.content = utils.urlencode(value) + self.content = netlib.http.url.urlencode(value) @urlencoded_form.setter def urlencoded_form(self, value): diff --git a/netlib/http/url.py b/netlib/http/url.py new file mode 100644 index 000000000..3961998b2 --- /dev/null +++ b/netlib/http/url.py @@ -0,0 +1,95 @@ +import six +from six.moves import urllib + +from .. import utils + +# PY2 workaround +def decode_parse_result(result, enc): + if hasattr(result, "decode"): + return result.decode(enc) + else: + return urllib.parse.ParseResult(*[x.decode(enc) for x in result]) + + +# PY2 workaround +def encode_parse_result(result, enc): + if hasattr(result, "encode"): + return result.encode(enc) + else: + return urllib.parse.ParseResult(*[x.encode(enc) for x in result]) + + +def parse_url(url): + """ + URL-parsing function that checks that + - port is an integer 0-65535 + - host is a valid IDNA-encoded hostname with no null-bytes + - path is valid ASCII + + Args: + A URL (as bytes or as unicode) + + Returns: + A (scheme, host, port, path) tuple + + Raises: + ValueError, if the URL is not properly formatted. + """ + parsed = urllib.parse.urlparse(url) + + if not parsed.hostname: + raise ValueError("No hostname given") + + if isinstance(url, six.binary_type): + host = parsed.hostname + + # this should not raise a ValueError, + # but we try to be very forgiving here and accept just everything. + # decode_parse_result(parsed, "ascii") + else: + host = parsed.hostname.encode("idna") + parsed = encode_parse_result(parsed, "ascii") + + port = parsed.port + if not port: + port = 443 if parsed.scheme == b"https" else 80 + + full_path = urllib.parse.urlunparse( + (b"", b"", parsed.path, parsed.params, parsed.query, parsed.fragment) + ) + if not full_path.startswith(b"/"): + full_path = b"/" + full_path + + if not utils.is_valid_host(host): + raise ValueError("Invalid Host") + if not utils.is_valid_port(port): + raise ValueError("Invalid Port") + + return parsed.scheme, host, port, full_path + + +def unparse_url(scheme, host, port, path=""): + """ + Returns a URL string, constructed from the specified components. + + Args: + All args must be str. + """ + if path == "*": + path = "" + return "%s://%s%s" % (scheme, utils.hostport(scheme, host, port), path) + + +def urlencode(s): + """ + Takes a list of (key, value) tuples and returns a urlencoded string. + """ + s = [tuple(i) for i in s] + return urllib.parse.urlencode(s, False) + + +def urldecode(s): + """ + Takes a urlencoded string and returns a list of (key, value) tuples. + """ + return urllib.parse.parse_qsl(s, keep_blank_values=True) diff --git a/netlib/utils.py b/netlib/utils.py index 770ad6a6a..cd8aa55a5 100644 --- a/netlib/utils.py +++ b/netlib/utils.py @@ -8,9 +8,6 @@ import inspect import six -from six.moves import urllib - - def always_bytes(unicode_or_bytes, *encode_args): if isinstance(unicode_or_bytes, six.text_type): return unicode_or_bytes.encode(*encode_args) @@ -188,71 +185,6 @@ def is_valid_port(port): return 0 <= port <= 65535 -# PY2 workaround -def decode_parse_result(result, enc): - if hasattr(result, "decode"): - return result.decode(enc) - else: - return urllib.parse.ParseResult(*[x.decode(enc) for x in result]) - - -# PY2 workaround -def encode_parse_result(result, enc): - if hasattr(result, "encode"): - return result.encode(enc) - else: - return urllib.parse.ParseResult(*[x.encode(enc) for x in result]) - - -def parse_url(url): - """ - URL-parsing function that checks that - - port is an integer 0-65535 - - host is a valid IDNA-encoded hostname with no null-bytes - - path is valid ASCII - - Args: - A URL (as bytes or as unicode) - - Returns: - A (scheme, host, port, path) tuple - - Raises: - ValueError, if the URL is not properly formatted. - """ - parsed = urllib.parse.urlparse(url) - - if not parsed.hostname: - raise ValueError("No hostname given") - - if isinstance(url, six.binary_type): - host = parsed.hostname - - # this should not raise a ValueError, - # but we try to be very forgiving here and accept just everything. - # decode_parse_result(parsed, "ascii") - else: - host = parsed.hostname.encode("idna") - parsed = encode_parse_result(parsed, "ascii") - - port = parsed.port - if not port: - port = 443 if parsed.scheme == b"https" else 80 - - full_path = urllib.parse.urlunparse( - (b"", b"", parsed.path, parsed.params, parsed.query, parsed.fragment) - ) - if not full_path.startswith(b"/"): - full_path = b"/" + full_path - - if not is_valid_host(host): - raise ValueError("Invalid Host") - if not is_valid_port(port): - raise ValueError("Invalid Port") - - return parsed.scheme, host, port, full_path - - def get_header_tokens(headers, key): """ Retrieve all tokens for a header key. A number of different headers @@ -278,33 +210,6 @@ def hostport(scheme, host, port): return "%s:%d" % (host, port) -def unparse_url(scheme, host, port, path=""): - """ - Returns a URL string, constructed from the specified components. - - Args: - All args must be str. - """ - if path == "*": - path = "" - return "%s://%s%s" % (scheme, hostport(scheme, host, port), path) - - -def urlencode(s): - """ - Takes a list of (key, value) tuples and returns a urlencoded string. - """ - s = [tuple(i) for i in s] - return urllib.parse.urlencode(s, False) - - -def urldecode(s): - """ - Takes a urlencoded string and returns a list of (key, value) tuples. - """ - return urllib.parse.parse_qsl(s, keep_blank_values=True) - - def parse_content_type(c): """ A simple parser for content-type values. Returns a (type, subtype, diff --git a/test/mitmproxy/test_contentview.py b/test/mitmproxy/test_contentview.py index c00afa5f7..57e5ae994 100644 --- a/test/mitmproxy/test_contentview.py +++ b/test/mitmproxy/test_contentview.py @@ -1,8 +1,8 @@ from mitmproxy.exceptions import ContentViewException from netlib.http import Headers from netlib.odict import ODict -import netlib.utils from netlib import encoding +from netlib.http import url import mitmproxy.contentviews as cv from . import tutils @@ -60,10 +60,10 @@ class TestContentView: assert f[0] == "Query" def test_view_urlencoded(self): - d = netlib.utils.urlencode([("one", "two"), ("three", "four")]) + d = url.urlencode([("one", "two"), ("three", "four")]) v = cv.ViewURLEncoded() assert v(d) - d = netlib.utils.urlencode([("adsfa", "")]) + d = url.urlencode([("adsfa", "")]) v = cv.ViewURLEncoded() assert v(d) diff --git a/test/netlib/http/test_url.py b/test/netlib/http/test_url.py new file mode 100644 index 000000000..d777a949f --- /dev/null +++ b/test/netlib/http/test_url.py @@ -0,0 +1,65 @@ +from netlib import tutils +from netlib.http import url + +def test_parse_url(): + with tutils.raises(ValueError): + url.parse_url("") + + s, h, po, pa = url.parse_url(b"http://foo.com:8888/test") + assert s == b"http" + assert h == b"foo.com" + assert po == 8888 + assert pa == b"/test" + + s, h, po, pa = url.parse_url("http://foo/bar") + assert s == b"http" + assert h == b"foo" + assert po == 80 + assert pa == b"/bar" + + s, h, po, pa = url.parse_url(b"http://user:pass@foo/bar") + assert s == b"http" + assert h == b"foo" + assert po == 80 + assert pa == b"/bar" + + s, h, po, pa = url.parse_url(b"http://foo") + assert pa == b"/" + + s, h, po, pa = url.parse_url(b"https://foo") + assert po == 443 + + with tutils.raises(ValueError): + url.parse_url(b"https://foo:bar") + + # Invalid IDNA + with tutils.raises(ValueError): + url.parse_url("http://\xfafoo") + # Invalid PATH + with tutils.raises(ValueError): + url.parse_url("http:/\xc6/localhost:56121") + # Null byte in host + with tutils.raises(ValueError): + url.parse_url("http://foo\0") + # Port out of range + _, _, port, _ = url.parse_url("http://foo:999999") + assert port == 80 + # Invalid IPv6 URL - see http://www.ietf.org/rfc/rfc2732.txt + with tutils.raises(ValueError): + url.parse_url('http://lo[calhost') + + +def test_unparse_url(): + assert url.unparse_url("http", "foo.com", 99, "") == "http://foo.com:99" + assert url.unparse_url("http", "foo.com", 80, "/bar") == "http://foo.com/bar" + assert url.unparse_url("https", "foo.com", 80, "") == "https://foo.com:80" + assert url.unparse_url("https", "foo.com", 443, "") == "https://foo.com" + + +def test_urlencode(): + assert url.urlencode([('foo', 'bar')]) + + +def test_urldecode(): + s = "one=two&three=four" + assert len(url.urldecode(s)) == 2 diff --git a/test/netlib/test_utils.py b/test/netlib/test_utils.py index cd629d777..f93156674 100644 --- a/test/netlib/test_utils.py +++ b/test/netlib/test_utils.py @@ -38,70 +38,6 @@ def test_pretty_size(): assert utils.pretty_size(1024 * 1024) == "1MB" -def test_parse_url(): - with tutils.raises(ValueError): - utils.parse_url("") - - s, h, po, pa = utils.parse_url(b"http://foo.com:8888/test") - assert s == b"http" - assert h == b"foo.com" - assert po == 8888 - assert pa == b"/test" - - s, h, po, pa = utils.parse_url("http://foo/bar") - assert s == b"http" - assert h == b"foo" - assert po == 80 - assert pa == b"/bar" - - s, h, po, pa = utils.parse_url(b"http://user:pass@foo/bar") - assert s == b"http" - assert h == b"foo" - assert po == 80 - assert pa == b"/bar" - - s, h, po, pa = utils.parse_url(b"http://foo") - assert pa == b"/" - - s, h, po, pa = utils.parse_url(b"https://foo") - assert po == 443 - - with tutils.raises(ValueError): - utils.parse_url(b"https://foo:bar") - - # Invalid IDNA - with tutils.raises(ValueError): - utils.parse_url("http://\xfafoo") - # Invalid PATH - with tutils.raises(ValueError): - utils.parse_url("http:/\xc6/localhost:56121") - # Null byte in host - with tutils.raises(ValueError): - utils.parse_url("http://foo\0") - # Port out of range - _, _, port, _ = utils.parse_url("http://foo:999999") - assert port == 80 - # Invalid IPv6 URL - see http://www.ietf.org/rfc/rfc2732.txt - with tutils.raises(ValueError): - utils.parse_url('http://lo[calhost') - - -def test_unparse_url(): - assert utils.unparse_url("http", "foo.com", 99, "") == "http://foo.com:99" - assert utils.unparse_url("http", "foo.com", 80, "/bar") == "http://foo.com/bar" - assert utils.unparse_url("https", "foo.com", 80, "") == "https://foo.com:80" - assert utils.unparse_url("https", "foo.com", 443, "") == "https://foo.com" - - -def test_urlencode(): - assert utils.urlencode([('foo', 'bar')]) - - -def test_urldecode(): - s = "one=two&three=four" - assert len(utils.urldecode(s)) == 2 - - def test_get_header_tokens(): headers = Headers() assert utils.get_header_tokens(headers, "foo") == [] From 6dda2b2ee544c3890f04b7bf99272998e29992b6 Mon Sep 17 00:00:00 2001 From: Aldo Cortesi Date: Tue, 31 May 2016 18:46:19 +1200 Subject: [PATCH 05/14] Module is part of the name - url.decode, not url.urldecode A pattern we need to use far more often in the codebase --- mitmproxy/cmdline.py | 2 +- mitmproxy/console/flowlist.py | 2 +- mitmproxy/console/statusbar.py | 2 +- mitmproxy/contentviews.py | 2 +- mitmproxy/protocol/http2.py | 2 +- netlib/http/http1/read.py | 2 +- netlib/http/http2/connections.py | 2 +- netlib/http/request.py | 18 +++++++------- netlib/http/url.py | 8 +++--- test/mitmproxy/test_contentview.py | 4 +-- test/netlib/http/test_url.py | 40 +++++++++++++++--------------- 11 files changed, 42 insertions(+), 42 deletions(-) diff --git a/mitmproxy/cmdline.py b/mitmproxy/cmdline.py index d8bbe4480..ad0427d2e 100644 --- a/mitmproxy/cmdline.py +++ b/mitmproxy/cmdline.py @@ -105,7 +105,7 @@ def parse_setheader(s): def parse_server_spec(url): try: - p = netlib.http.url.parse_url(url) + p = netlib.http.url.parse(url) if p[0] not in ("http", "https"): raise ValueError() except ValueError: diff --git a/mitmproxy/console/flowlist.py b/mitmproxy/console/flowlist.py index 66d92fe18..1e65e3ebf 100644 --- a/mitmproxy/console/flowlist.py +++ b/mitmproxy/console/flowlist.py @@ -343,7 +343,7 @@ class FlowListBox(urwid.ListBox): ) def new_request(self, url, method): - parts = netlib.http.url.parse_url(str(url)) + parts = netlib.http.url.parse(str(url)) if not parts: signals.status_message.send(message="Invalid Url") return diff --git a/mitmproxy/console/statusbar.py b/mitmproxy/console/statusbar.py index 4cc63a54a..20656cb4a 100644 --- a/mitmproxy/console/statusbar.py +++ b/mitmproxy/console/statusbar.py @@ -203,7 +203,7 @@ class StatusBar(urwid.WidgetWrap): if self.master.server.config.mode in ["reverse", "upstream"]: dst = self.master.server.config.upstream_server - r.append("[dest:%s]" % netlib.utils.unparse_url( + r.append("[dest:%s]" % netlib.utils.unparse( dst.scheme, dst.address.host, dst.address.port diff --git a/mitmproxy/contentviews.py b/mitmproxy/contentviews.py index 5c562f95c..75e4273fd 100644 --- a/mitmproxy/contentviews.py +++ b/mitmproxy/contentviews.py @@ -258,7 +258,7 @@ class ViewURLEncoded(View): content_types = ["application/x-www-form-urlencoded"] def __call__(self, data, **metadata): - d = url.urldecode(data) + d = url.decode(data) return "URLEncoded form", format_dict(ODict(d)) diff --git a/mitmproxy/protocol/http2.py b/mitmproxy/protocol/http2.py index f4a6cf9d2..ffea7fae7 100644 --- a/mitmproxy/protocol/http2.py +++ b/mitmproxy/protocol/http2.py @@ -320,7 +320,7 @@ class Http2SingleStreamLayer(_HttpTransmissionLayer, threading.Thread): else: # pragma: no cover first_line_format = "absolute" # FIXME: verify if path or :host contains what we need - scheme, host, port, _ = netlib.http.url.parse_url(path) + scheme, host, port, _ = netlib.http.url.parse(path) if authority: host, _, port = authority.partition(':') diff --git a/netlib/http/http1/read.py b/netlib/http/http1/read.py index f776d0b5a..93fca04ea 100644 --- a/netlib/http/http1/read.py +++ b/netlib/http/http1/read.py @@ -241,7 +241,7 @@ def _read_request_line(rfile): scheme, path = None, None else: form = "absolute" - scheme, host, port, path = url.parse_url(path) + scheme, host, port, path = url.parse(path) _check_http_version(http_version) except ValueError: diff --git a/netlib/http/http2/connections.py b/netlib/http/http2/connections.py index 4c15ee07a..7c7ad6939 100644 --- a/netlib/http/http2/connections.py +++ b/netlib/http/http2/connections.py @@ -118,7 +118,7 @@ class HTTP2Protocol(object): else: first_line_format = "absolute" # FIXME: verify if path or :host contains what we need - scheme, host, port, _ = url.parse_url(path) + scheme, host, port, _ = url.parse(path) scheme = scheme.decode('ascii') host = host.decode('ascii') diff --git a/netlib/http/request.py b/netlib/http/request.py index 170066f77..d552bc70b 100644 --- a/netlib/http/request.py +++ b/netlib/http/request.py @@ -180,11 +180,11 @@ class Request(Message): """ if self.first_line_format == "authority": return "%s:%d" % (self.host, self.port) - return netlib.http.url.unparse_url(self.scheme, self.host, self.port, self.path) + return netlib.http.url.unparse(self.scheme, self.host, self.port, self.path) @url.setter def url(self, url): - self.scheme, self.host, self.port, self.path = netlib.http.url.parse_url(url) + self.scheme, self.host, self.port, self.path = netlib.http.url.parse(url) def _parse_host_header(self): """Extract the host and port from Host header""" @@ -220,7 +220,7 @@ class Request(Message): """ if self.first_line_format == "authority": return "%s:%d" % (self.pretty_host, self.port) - return netlib.http.url.unparse_url(self.scheme, self.pretty_host, self.port, self.path) + return netlib.http.url.unparse(self.scheme, self.pretty_host, self.port, self.path) @property def query(self): @@ -235,12 +235,12 @@ class Request(Message): def _get_query(self): _, _, _, _, query, _ = urllib.parse.urlparse(self.url) - return tuple(netlib.http.url.urldecode(query)) + return tuple(netlib.http.url.decode(query)) def _set_query(self, value): - query = netlib.http.url.urlencode(value) + query = netlib.http.url.encode(value) scheme, netloc, path, params, _, fragment = urllib.parse.urlparse(self.url) - _, _, _, self.path = netlib.http.url.parse_url( + _, _, _, self.path = netlib.http.url.parse( urllib.parse.urlunparse([scheme, netloc, path, params, query, fragment])) @query.setter @@ -288,7 +288,7 @@ class Request(Message): components = map(lambda x: urllib.parse.quote(x, safe=""), components) path = "/" + "/".join(components) scheme, netloc, _, params, query, fragment = urllib.parse.urlparse(self.url) - _, _, _, self.path = netlib.http.url.parse_url( + _, _, _, self.path = netlib.http.url.parse( urllib.parse.urlunparse([scheme, netloc, path, params, query, fragment])) def anticache(self): @@ -340,7 +340,7 @@ class Request(Message): def _get_urlencoded_form(self): is_valid_content_type = "application/x-www-form-urlencoded" in self.headers.get("content-type", "").lower() if is_valid_content_type: - return tuple(netlib.http.url.urldecode(self.content)) + return tuple(netlib.http.url.decode(self.content)) return () def _set_urlencoded_form(self, value): @@ -349,7 +349,7 @@ class Request(Message): This will overwrite the existing content if there is one. """ self.headers["content-type"] = "application/x-www-form-urlencoded" - self.content = netlib.http.url.urlencode(value) + self.content = netlib.http.url.encode(value) @urlencoded_form.setter def urlencoded_form(self, value): diff --git a/netlib/http/url.py b/netlib/http/url.py index 3961998b2..147ed5725 100644 --- a/netlib/http/url.py +++ b/netlib/http/url.py @@ -19,7 +19,7 @@ def encode_parse_result(result, enc): return urllib.parse.ParseResult(*[x.encode(enc) for x in result]) -def parse_url(url): +def parse(url): """ URL-parsing function that checks that - port is an integer 0-65535 @@ -68,7 +68,7 @@ def parse_url(url): return parsed.scheme, host, port, full_path -def unparse_url(scheme, host, port, path=""): +def unparse(scheme, host, port, path=""): """ Returns a URL string, constructed from the specified components. @@ -80,7 +80,7 @@ def unparse_url(scheme, host, port, path=""): return "%s://%s%s" % (scheme, utils.hostport(scheme, host, port), path) -def urlencode(s): +def encode(s): """ Takes a list of (key, value) tuples and returns a urlencoded string. """ @@ -88,7 +88,7 @@ def urlencode(s): return urllib.parse.urlencode(s, False) -def urldecode(s): +def decode(s): """ Takes a urlencoded string and returns a list of (key, value) tuples. """ diff --git a/test/mitmproxy/test_contentview.py b/test/mitmproxy/test_contentview.py index 57e5ae994..9142bdad2 100644 --- a/test/mitmproxy/test_contentview.py +++ b/test/mitmproxy/test_contentview.py @@ -60,10 +60,10 @@ class TestContentView: assert f[0] == "Query" def test_view_urlencoded(self): - d = url.urlencode([("one", "two"), ("three", "four")]) + d = url.encode([("one", "two"), ("three", "four")]) v = cv.ViewURLEncoded() assert v(d) - d = url.urlencode([("adsfa", "")]) + d = url.encode([("adsfa", "")]) v = cv.ViewURLEncoded() assert v(d) diff --git a/test/netlib/http/test_url.py b/test/netlib/http/test_url.py index d777a949f..697c83a81 100644 --- a/test/netlib/http/test_url.py +++ b/test/netlib/http/test_url.py @@ -1,65 +1,65 @@ from netlib import tutils from netlib.http import url -def test_parse_url(): +def test_parse(): with tutils.raises(ValueError): - url.parse_url("") + url.parse("") - s, h, po, pa = url.parse_url(b"http://foo.com:8888/test") + s, h, po, pa = url.parse(b"http://foo.com:8888/test") assert s == b"http" assert h == b"foo.com" assert po == 8888 assert pa == b"/test" - s, h, po, pa = url.parse_url("http://foo/bar") + s, h, po, pa = url.parse("http://foo/bar") assert s == b"http" assert h == b"foo" assert po == 80 assert pa == b"/bar" - s, h, po, pa = url.parse_url(b"http://user:pass@foo/bar") + s, h, po, pa = url.parse(b"http://user:pass@foo/bar") assert s == b"http" assert h == b"foo" assert po == 80 assert pa == b"/bar" - s, h, po, pa = url.parse_url(b"http://foo") + s, h, po, pa = url.parse(b"http://foo") assert pa == b"/" - s, h, po, pa = url.parse_url(b"https://foo") + s, h, po, pa = url.parse(b"https://foo") assert po == 443 with tutils.raises(ValueError): - url.parse_url(b"https://foo:bar") + url.parse(b"https://foo:bar") # Invalid IDNA with tutils.raises(ValueError): - url.parse_url("http://\xfafoo") + url.parse("http://\xfafoo") # Invalid PATH with tutils.raises(ValueError): - url.parse_url("http:/\xc6/localhost:56121") + url.parse("http:/\xc6/localhost:56121") # Null byte in host with tutils.raises(ValueError): - url.parse_url("http://foo\0") + url.parse("http://foo\0") # Port out of range - _, _, port, _ = url.parse_url("http://foo:999999") + _, _, port, _ = url.parse("http://foo:999999") assert port == 80 # Invalid IPv6 URL - see http://www.ietf.org/rfc/rfc2732.txt with tutils.raises(ValueError): - url.parse_url('http://lo[calhost') + url.parse('http://lo[calhost') -def test_unparse_url(): - assert url.unparse_url("http", "foo.com", 99, "") == "http://foo.com:99" - assert url.unparse_url("http", "foo.com", 80, "/bar") == "http://foo.com/bar" - assert url.unparse_url("https", "foo.com", 80, "") == "https://foo.com:80" - assert url.unparse_url("https", "foo.com", 443, "") == "https://foo.com" +def test_unparse(): + assert url.unparse("http", "foo.com", 99, "") == "http://foo.com:99" + assert url.unparse("http", "foo.com", 80, "/bar") == "http://foo.com/bar" + assert url.unparse("https", "foo.com", 80, "") == "https://foo.com:80" + assert url.unparse("https", "foo.com", 443, "") == "https://foo.com" def test_urlencode(): - assert url.urlencode([('foo', 'bar')]) + assert url.encode([('foo', 'bar')]) def test_urldecode(): s = "one=two&three=four" - assert len(url.urldecode(s)) == 2 + assert len(url.decode(s)) == 2 From da074bff01fbf359959eaa1e81b75db42e770b8b Mon Sep 17 00:00:00 2001 From: Aldo Cortesi Date: Tue, 31 May 2016 18:51:40 +1200 Subject: [PATCH 06/14] isascii is unused --- netlib/utils.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/netlib/utils.py b/netlib/utils.py index cd8aa55a5..91da471b6 100644 --- a/netlib/utils.py +++ b/netlib/utils.py @@ -32,14 +32,6 @@ def native(s, *encoding_opts): return s -def isascii(bytes): - try: - bytes.decode("ascii") - except ValueError: - return False - return True - - def clean_bin(s, keep_spacing=True): """ Cleans binary data to make it safe to display. From 15b2374ef9d6a8cbafdff7c79694921387836ff3 Mon Sep 17 00:00:00 2001 From: Aldo Cortesi Date: Tue, 31 May 2016 18:54:42 +1200 Subject: [PATCH 07/14] netlib.utils.get_header_tokens -> netlib.http1.read.get_header_tokens Placing this next to its only use. --- netlib/http/http1/read.py | 14 +++++++++++++- netlib/utils.py | 12 ------------ test/netlib/http/http1/test_read.py | 13 ++++++++++++- test/netlib/test_utils.py | 11 ----------- 4 files changed, 25 insertions(+), 25 deletions(-) diff --git a/netlib/http/http1/read.py b/netlib/http/http1/read.py index 93fca04ea..5783ec67d 100644 --- a/netlib/http/http1/read.py +++ b/netlib/http/http1/read.py @@ -9,6 +9,18 @@ from .. import Request, Response, Headers from .. import url +def get_header_tokens(headers, key): + """ + Retrieve all tokens for a header key. A number of different headers + follow a pattern where each header line can containe comma-separated + tokens, and headers can be set multiple times. + """ + if key not in headers: + return [] + tokens = headers[key].split(",") + return [token.strip() for token in tokens] + + def read_request(rfile, body_size_limit=None): request = read_request_head(rfile) expected_body_size = expected_http_body_size(request) @@ -148,7 +160,7 @@ def connection_close(http_version, headers): """ # At first, check if we have an explicit Connection header. if "connection" in headers: - tokens = utils.get_header_tokens(headers, "connection") + tokens = get_header_tokens(headers, "connection") if "close" in tokens: return True elif "keep-alive" in tokens: diff --git a/netlib/utils.py b/netlib/utils.py index 91da471b6..a2d8c97d0 100644 --- a/netlib/utils.py +++ b/netlib/utils.py @@ -177,18 +177,6 @@ def is_valid_port(port): return 0 <= port <= 65535 -def get_header_tokens(headers, key): - """ - Retrieve all tokens for a header key. A number of different headers - follow a pattern where each header line can containe comma-separated - tokens, and headers can be set multiple times. - """ - if key not in headers: - return [] - tokens = headers[key].split(",") - return [token.strip() for token in tokens] - - def hostport(scheme, host, port): """ Returns the host component, with a port specifcation if needed. diff --git a/test/netlib/http/http1/test_read.py b/test/netlib/http/http1/test_read.py index 974aa8956..5285ac1d2 100644 --- a/test/netlib/http/http1/test_read.py +++ b/test/netlib/http/http1/test_read.py @@ -7,11 +7,22 @@ from netlib.http.http1.read import ( read_request, read_response, read_request_head, read_response_head, read_body, connection_close, expected_http_body_size, _get_first_line, _read_request_line, _parse_authority_form, _read_response_line, _check_http_version, - _read_headers, _read_chunked + _read_headers, _read_chunked, get_header_tokens ) from netlib.tutils import treq, tresp, raises +def test_get_header_tokens(): + headers = Headers() + assert get_header_tokens(headers, "foo") == [] + headers["foo"] = "bar" + assert get_header_tokens(headers, "foo") == ["bar"] + headers["foo"] = "bar, voing" + assert get_header_tokens(headers, "foo") == ["bar", "voing"] + headers.set_all("foo", ["bar, voing", "oink"]) + assert get_header_tokens(headers, "foo") == ["bar", "voing", "oink"] + + def test_read_request(): rfile = BytesIO(b"GET / HTTP/1.1\r\n\r\nskip") r = read_request(rfile) diff --git a/test/netlib/test_utils.py b/test/netlib/test_utils.py index f93156674..c4ee3c108 100644 --- a/test/netlib/test_utils.py +++ b/test/netlib/test_utils.py @@ -38,17 +38,6 @@ def test_pretty_size(): assert utils.pretty_size(1024 * 1024) == "1MB" -def test_get_header_tokens(): - headers = Headers() - assert utils.get_header_tokens(headers, "foo") == [] - headers["foo"] = "bar" - assert utils.get_header_tokens(headers, "foo") == ["bar"] - headers["foo"] = "bar, voing" - assert utils.get_header_tokens(headers, "foo") == ["bar", "voing"] - headers.set_all("foo", ["bar, voing", "oink"]) - assert utils.get_header_tokens(headers, "foo") == ["bar", "voing", "oink"] - - def test_multipartdecode(): boundary = 'somefancyboundary' headers = Headers( From ec34cae6181d6af0150ac730d70b96104a07e9d5 Mon Sep 17 00:00:00 2001 From: Aldo Cortesi Date: Tue, 31 May 2016 19:07:55 +1200 Subject: [PATCH 08/14] utils.multipartdecode -> http.multipart.decode also utils.parse_content_type -> http.headers.parse_content_type --- mitmproxy/contentviews.py | 9 ++--- mitmproxy/flow/export.py | 4 +-- netlib/http/headers.py | 27 ++++++++++++++ netlib/http/multipart.py | 32 +++++++++++++++++ netlib/http/request.py | 3 +- netlib/utils.py | 56 ------------------------------ test/netlib/http/test_headers.py | 10 ++++++ test/netlib/http/test_multipart.py | 23 ++++++++++++ test/netlib/test_utils.py | 32 ----------------- 9 files changed, 101 insertions(+), 95 deletions(-) create mode 100644 netlib/http/multipart.py create mode 100644 test/netlib/http/test_multipart.py diff --git a/mitmproxy/contentviews.py b/mitmproxy/contentviews.py index 75e4273fd..08a7e446f 100644 --- a/mitmproxy/contentviews.py +++ b/mitmproxy/contentviews.py @@ -27,8 +27,9 @@ import html2text import six from netlib.odict import ODict from netlib import encoding -from netlib.http import url -from netlib.utils import clean_bin, hexdump, multipartdecode, parse_content_type +import netlib.http.headers +from netlib.http import url, multipart +from netlib.utils import clean_bin, hexdump from . import utils from .exceptions import ContentViewException from .contrib import jsbeautifier @@ -121,7 +122,7 @@ class ViewAuto(View): headers = metadata.get("headers", {}) ctype = headers.get("content-type") if data and ctype: - ct = parse_content_type(ctype) if ctype else None + ct = netlib.http.headers.parse_content_type(ctype) if ctype else None ct = "%s/%s" % (ct[0], ct[1]) if ct in content_types_map: return content_types_map[ct][0](data, **metadata) @@ -275,7 +276,7 @@ class ViewMultipart(View): def __call__(self, data, **metadata): headers = metadata.get("headers", {}) - v = multipartdecode(headers, data) + v = multipart.decode(headers, data) if v: return "Multipart form", self._format(v) diff --git a/mitmproxy/flow/export.py b/mitmproxy/flow/export.py index d2c7bceb5..c2f54554d 100644 --- a/mitmproxy/flow/export.py +++ b/mitmproxy/flow/export.py @@ -5,7 +5,7 @@ from textwrap import dedent from six.moves.urllib.parse import quote, quote_plus import netlib.http -from netlib.utils import parse_content_type +import netlib.http.headers def curl_command(flow): @@ -88,7 +88,7 @@ def raw_request(flow): def is_json(headers, content): if headers: - ct = parse_content_type(headers.get("content-type", "")) + ct = netlib.http.headers.parse_content_type(headers.get("content-type", "")) if ct and "%s/%s" % (ct[0], ct[1]) == "application/json": try: return json.loads(content) diff --git a/netlib/http/headers.py b/netlib/http/headers.py index 6165fd616..8f669ec17 100644 --- a/netlib/http/headers.py +++ b/netlib/http/headers.py @@ -175,3 +175,30 @@ class Headers(MultiDict): fields.append([name, value]) self.fields = fields return replacements + + +def parse_content_type(c): + """ + A simple parser for content-type values. Returns a (type, subtype, + parameters) tuple, where type and subtype are strings, and parameters + is a dict. If the string could not be parsed, return None. + + E.g. the following string: + + text/html; charset=UTF-8 + + Returns: + + ("text", "html", {"charset": "UTF-8"}) + """ + parts = c.split(";", 1) + ts = parts[0].split("/", 1) + if len(ts) != 2: + return None + d = {} + if len(parts) == 2: + for i in parts[1].split(";"): + clause = i.split("=", 1) + if len(clause) == 2: + d[clause[0].strip()] = clause[1].strip() + return ts[0].lower(), ts[1].lower(), d diff --git a/netlib/http/multipart.py b/netlib/http/multipart.py new file mode 100644 index 000000000..a135eb863 --- /dev/null +++ b/netlib/http/multipart.py @@ -0,0 +1,32 @@ +import re + +from . import headers + + +def decode(hdrs, content): + """ + Takes a multipart boundary encoded string and returns list of (key, value) tuples. + """ + v = hdrs.get("content-type") + if v: + v = headers.parse_content_type(v) + if not v: + return [] + try: + boundary = v[2]["boundary"].encode("ascii") + except (KeyError, UnicodeError): + return [] + + rx = re.compile(br'\bname="([^"]+)"') + r = [] + + for i in content.split(b"--" + boundary): + parts = i.splitlines() + if len(parts) > 1 and parts[0][0:2] != b"--": + match = rx.search(parts[1]) + if match: + key = match.group(1) + value = b"".join(parts[3 + parts[2:].index(b""):]) + r.append((key, value)) + return r + return [] diff --git a/netlib/http/request.py b/netlib/http/request.py index d552bc70b..2fcea67dc 100644 --- a/netlib/http/request.py +++ b/netlib/http/request.py @@ -7,6 +7,7 @@ from six.moves import urllib from netlib import utils import netlib.http.url +from netlib.http import multipart from . import cookies from .. import encoding from ..multidict import MultiDictView @@ -369,7 +370,7 @@ class Request(Message): def _get_multipart_form(self): is_valid_content_type = "multipart/form-data" in self.headers.get("content-type", "").lower() if is_valid_content_type: - return utils.multipartdecode(self.headers, self.content) + return multipart.decode(self.headers, self.content) return () def _set_multipart_form(self, value): diff --git a/netlib/utils.py b/netlib/utils.py index a2d8c97d0..a0150e779 100644 --- a/netlib/utils.py +++ b/netlib/utils.py @@ -190,62 +190,6 @@ def hostport(scheme, host, port): return "%s:%d" % (host, port) -def parse_content_type(c): - """ - A simple parser for content-type values. Returns a (type, subtype, - parameters) tuple, where type and subtype are strings, and parameters - is a dict. If the string could not be parsed, return None. - - E.g. the following string: - - text/html; charset=UTF-8 - - Returns: - - ("text", "html", {"charset": "UTF-8"}) - """ - parts = c.split(";", 1) - ts = parts[0].split("/", 1) - if len(ts) != 2: - return None - d = {} - if len(parts) == 2: - for i in parts[1].split(";"): - clause = i.split("=", 1) - if len(clause) == 2: - d[clause[0].strip()] = clause[1].strip() - return ts[0].lower(), ts[1].lower(), d - - -def multipartdecode(headers, content): - """ - Takes a multipart boundary encoded string and returns list of (key, value) tuples. - """ - v = headers.get("content-type") - if v: - v = parse_content_type(v) - if not v: - return [] - try: - boundary = v[2]["boundary"].encode("ascii") - except (KeyError, UnicodeError): - return [] - - rx = re.compile(br'\bname="([^"]+)"') - r = [] - - for i in content.split(b"--" + boundary): - parts = i.splitlines() - if len(parts) > 1 and parts[0][0:2] != b"--": - match = rx.search(parts[1]) - if match: - key = match.group(1) - value = b"".join(parts[3 + parts[2:].index(b""):]) - r.append((key, value)) - return r - return [] - - def safe_subn(pattern, repl, target, *args, **kwargs): """ There are Unicode conversion problems with re.subn. We try to smooth diff --git a/test/netlib/http/test_headers.py b/test/netlib/http/test_headers.py index cd2ca9d11..e12bceaf2 100644 --- a/test/netlib/http/test_headers.py +++ b/test/netlib/http/test_headers.py @@ -1,4 +1,5 @@ from netlib.http import Headers +from netlib.http.headers import parse_content_type from netlib.tutils import raises @@ -72,3 +73,12 @@ class TestHeaders(object): replacements = headers.replace(r"Host: ", "X-Host ") assert replacements == 0 assert headers["Host"] == "example.com" + + +def test_parse_content_type(): + p = parse_content_type + assert p("text/html") == ("text", "html", {}) + assert p("text") is None + + v = p("text/html; charset=UTF-8") + assert v == ('text', 'html', {'charset': 'UTF-8'}) diff --git a/test/netlib/http/test_multipart.py b/test/netlib/http/test_multipart.py new file mode 100644 index 000000000..45ae996b6 --- /dev/null +++ b/test/netlib/http/test_multipart.py @@ -0,0 +1,23 @@ +from netlib.http import Headers +from netlib.http import multipart + +def test_decode(): + boundary = 'somefancyboundary' + headers = Headers( + content_type='multipart/form-data; boundary=' + boundary + ) + content = ( + "--{0}\n" + "Content-Disposition: form-data; name=\"field1\"\n\n" + "value1\n" + "--{0}\n" + "Content-Disposition: form-data; name=\"field2\"\n\n" + "value2\n" + "--{0}--".format(boundary).encode() + ) + + form = multipart.decode(headers, content) + + assert len(form) == 2 + assert form[0] == (b"field1", b"value1") + assert form[1] == (b"field2", b"value2") diff --git a/test/netlib/test_utils.py b/test/netlib/test_utils.py index c4ee3c108..b3cc9a0b5 100644 --- a/test/netlib/test_utils.py +++ b/test/netlib/test_utils.py @@ -1,7 +1,6 @@ # coding=utf-8 from netlib import utils, tutils -from netlib.http import Headers def test_bidi(): @@ -38,37 +37,6 @@ def test_pretty_size(): assert utils.pretty_size(1024 * 1024) == "1MB" -def test_multipartdecode(): - boundary = 'somefancyboundary' - headers = Headers( - content_type='multipart/form-data; boundary=' + boundary - ) - content = ( - "--{0}\n" - "Content-Disposition: form-data; name=\"field1\"\n\n" - "value1\n" - "--{0}\n" - "Content-Disposition: form-data; name=\"field2\"\n\n" - "value2\n" - "--{0}--".format(boundary).encode() - ) - - form = utils.multipartdecode(headers, content) - - assert len(form) == 2 - assert form[0] == (b"field1", b"value1") - assert form[1] == (b"field2", b"value2") - - -def test_parse_content_type(): - p = utils.parse_content_type - assert p("text/html") == ("text", "html", {}) - assert p("text") is None - - v = p("text/html; charset=UTF-8") - assert v == ('text', 'html', {'charset': 'UTF-8'}) - - def test_safe_subn(): assert utils.safe_subn("foo", u"bar", "\xc2foo") From b2f63458fcda7878d5cf674c2f1e9ca7db5bf3ce Mon Sep 17 00:00:00 2001 From: Aldo Cortesi Date: Tue, 31 May 2016 19:32:08 +1200 Subject: [PATCH 09/14] Move human-friendly format functions to netlib.human, remove redundant implementations --- mitmproxy/cmdline.py | 7 +++-- mitmproxy/console/common.py | 7 +++-- mitmproxy/console/statusbar.py | 3 +- mitmproxy/dump.py | 6 ++-- mitmproxy/proxy/config.py | 8 ++++-- mitmproxy/utils.py | 43 ---------------------------- netlib/http/response.py | 4 +-- netlib/human.py | 51 ++++++++++++++++++++++++++++++++++ netlib/utils.py | 16 ----------- netlib/websockets/frame.py | 3 +- pathod/language/base.py | 6 ++-- pathod/pathod_cmdline.py | 4 +-- pathod/utils.py | 23 --------------- test/mitmproxy/test_utils.py | 25 ----------------- test/netlib/test_human.py | 31 +++++++++++++++++++++ test/netlib/test_utils.py | 7 ----- test/pathod/test_utils.py | 7 ----- 17 files changed, 110 insertions(+), 141 deletions(-) create mode 100644 netlib/human.py create mode 100644 test/netlib/test_human.py diff --git a/mitmproxy/cmdline.py b/mitmproxy/cmdline.py index ad0427d2e..5ea76e44c 100644 --- a/mitmproxy/cmdline.py +++ b/mitmproxy/cmdline.py @@ -7,7 +7,8 @@ import configargparse from netlib.tcp import Address, sslversion_choices import netlib.http.url -from . import filt, utils, version +from netlib import human +from . import filt, version from .proxy import config APP_HOST = "mitm.it" @@ -135,7 +136,9 @@ def get_common_options(options): if options.stickyauth_filt: stickyauth = options.stickyauth_filt - stream_large_bodies = utils.parse_size(options.stream_large_bodies) + stream_large_bodies = options.stream_large_bodies + if stream_large_bodies: + stream_large_bodies = human.parse_size(stream_large_bodies) reps = [] for i in options.replace: diff --git a/mitmproxy/console/common.py b/mitmproxy/console/common.py index ba5cfd620..a3ed5dc81 100644 --- a/mitmproxy/console/common.py +++ b/mitmproxy/console/common.py @@ -4,7 +4,8 @@ import urwid import urwid.util import os -import netlib.utils +import netlib +from netlib import human from .. import utils from .. import flow @@ -419,7 +420,7 @@ def format_flow(f, focus, extended=False, hostheader=False, marked=False): ) if f.response: if f.response.content: - contentdesc = netlib.utils.pretty_size(len(f.response.content)) + contentdesc = human.pretty_size(len(f.response.content)) elif f.response.content is None: contentdesc = "[content missing]" else: @@ -427,7 +428,7 @@ def format_flow(f, focus, extended=False, hostheader=False, marked=False): duration = 0 if f.response.timestamp_end and f.request.timestamp_start: duration = f.response.timestamp_end - f.request.timestamp_start - roundtrip = utils.pretty_duration(duration) + roundtrip = human.pretty_duration(duration) d.update(dict( resp_code = f.response.status_code, diff --git a/mitmproxy/console/statusbar.py b/mitmproxy/console/statusbar.py index 20656cb4a..d57ac69cc 100644 --- a/mitmproxy/console/statusbar.py +++ b/mitmproxy/console/statusbar.py @@ -3,6 +3,7 @@ import os.path import urwid import netlib.utils +from netlib import human from . import pathedit, signals, common @@ -193,7 +194,7 @@ class StatusBar(urwid.WidgetWrap): opts.append("following") if self.master.stream_large_bodies: opts.append( - "stream:%s" % netlib.utils.pretty_size( + "stream:%s" % human.pretty_size( self.master.stream_large_bodies.max_size ) ) diff --git a/mitmproxy/dump.py b/mitmproxy/dump.py index 4443995aa..0f54b4d40 100644 --- a/mitmproxy/dump.py +++ b/mitmproxy/dump.py @@ -4,8 +4,8 @@ import sys import click import itertools -from netlib import tcp -from netlib.utils import bytes_to_escaped_str, pretty_size +from netlib import tcp, human +from netlib.utils import bytes_to_escaped_str from . import flow, filt, contentviews, controller from .exceptions import ContentViewException, FlowReadException, ScriptException @@ -287,7 +287,7 @@ class DumpMaster(flow.FlowMaster): if flow.response.content is None: size = "(content missing)" else: - size = pretty_size(len(flow.response.content)) + size = human.pretty_size(len(flow.response.content)) size = click.style(size, bold=True) arrows = click.style("<<", bold=True) diff --git a/mitmproxy/proxy/config.py b/mitmproxy/proxy/config.py index b08470bd7..163e92dd2 100644 --- a/mitmproxy/proxy/config.py +++ b/mitmproxy/proxy/config.py @@ -6,11 +6,11 @@ import re import six from OpenSSL import SSL -from netlib import certutils, tcp +from netlib import certutils, tcp, human from netlib.http import authentication from netlib.tcp import Address, sslversion_choices -from .. import utils, platform +from .. import platform CONF_BASENAME = "mitmproxy" CA_DIR = "~/.mitmproxy" @@ -125,7 +125,9 @@ class ProxyConfig: def process_proxy_options(parser, options): - body_size_limit = utils.parse_size(options.body_size_limit) + body_size_limit = options.body_size_limit + if body_size_limit: + body_size_limit = human.parse_size(body_size_limit) c = 0 mode, upstream_server, upstream_auth = "regular", None, None diff --git a/mitmproxy/utils.py b/mitmproxy/utils.py index e56ac473c..053425004 100644 --- a/mitmproxy/utils.py +++ b/mitmproxy/utils.py @@ -58,20 +58,6 @@ def pretty_json(s): return json.dumps(p, sort_keys=True, indent=4) -def pretty_duration(secs): - formatters = [ - (100, "{:.0f}s"), - (10, "{:2.1f}s"), - (1, "{:1.2f}s"), - ] - - for limit, formatter in formatters: - if secs >= limit: - return formatter.format(secs) - # less than 1 sec - return "{:.0f}ms".format(secs * 1000) - - pkg_data = netlib.utils.Data(__name__) @@ -117,32 +103,3 @@ def clean_hanging_newline(t): if t and t[-1] == "\n": return t[:-1] return t - - -def parse_size(s): - """ - Parses a size specification. Valid specifications are: - - 123: bytes - 123k: kilobytes - 123m: megabytes - 123g: gigabytes - """ - if not s: - return None - mult = None - if s[-1].lower() == "k": - mult = 1024**1 - elif s[-1].lower() == "m": - mult = 1024**2 - elif s[-1].lower() == "g": - mult = 1024**3 - - if mult: - s = s[:-1] - else: - mult = 1 - try: - return int(s) * mult - except ValueError: - raise ValueError("Invalid size specification: %s" % s) diff --git a/netlib/http/response.py b/netlib/http/response.py index a6a5bf474..858b3aea6 100644 --- a/netlib/http/response.py +++ b/netlib/http/response.py @@ -7,7 +7,7 @@ from . import cookies from .headers import Headers from .message import Message, _native, _always_bytes, MessageData from ..multidict import MultiDictView -from .. import utils +from .. import human class ResponseData(MessageData): @@ -36,7 +36,7 @@ class Response(Message): if self.content: details = "{}, {}".format( self.headers.get("content-type", "unknown content type"), - utils.pretty_size(len(self.content)) + human.pretty_size(len(self.content)) ) else: details = "no content" diff --git a/netlib/human.py b/netlib/human.py new file mode 100644 index 000000000..f4640c00c --- /dev/null +++ b/netlib/human.py @@ -0,0 +1,51 @@ +SIZE_UNITS = dict( + b=1024 ** 0, + k=1024 ** 1, + m=1024 ** 2, + g=1024 ** 3, + t=1024 ** 4, +) + + +def pretty_size(size): + suffixes = [ + ("B", 2 ** 10), + ("kB", 2 ** 20), + ("MB", 2 ** 30), + ] + for suf, lim in suffixes: + if size >= lim: + continue + else: + x = round(size / float(lim / 2 ** 10), 2) + if x == int(x): + x = int(x) + return str(x) + suf + + +def parse_size(s): + try: + return int(s) + except ValueError: + pass + for i in SIZE_UNITS.keys(): + if s.endswith(i): + try: + return int(s[:-1]) * SIZE_UNITS[i] + except ValueError: + break + raise ValueError("Invalid size specification.") + + +def pretty_duration(secs): + formatters = [ + (100, "{:.0f}s"), + (10, "{:2.1f}s"), + (1, "{:1.2f}s"), + ] + + for limit, formatter in formatters: + if secs >= limit: + return formatter.format(secs) + # less than 1 sec + return "{:.0f}ms".format(secs * 1000) diff --git a/netlib/utils.py b/netlib/utils.py index a0150e779..6be1c17f3 100644 --- a/netlib/utils.py +++ b/netlib/utils.py @@ -116,22 +116,6 @@ class BiDi(object): return self.values.get(n, default) -def pretty_size(size): - suffixes = [ - ("B", 2 ** 10), - ("kB", 2 ** 20), - ("MB", 2 ** 30), - ] - for suf, lim in suffixes: - if size >= lim: - continue - else: - x = round(size / float(lim / 2 ** 10), 2) - if x == int(x): - x = int(x) - return str(x) + suf - - class Data(object): def __init__(self, name): diff --git a/netlib/websockets/frame.py b/netlib/websockets/frame.py index da5a97f3f..cf8917c17 100644 --- a/netlib/websockets/frame.py +++ b/netlib/websockets/frame.py @@ -9,6 +9,7 @@ import six from .protocol import Masker from netlib import tcp from netlib import utils +from netlib import human MAX_16_BIT_INT = (1 << 16) @@ -98,7 +99,7 @@ class FrameHeader(object): if self.masking_key: vals.append(":key=%s" % repr(self.masking_key)) if self.payload_length: - vals.append(" %s" % utils.pretty_size(self.payload_length)) + vals.append(" %s" % human.pretty_size(self.payload_length)) return "".join(vals) def human_readable(self): diff --git a/pathod/language/base.py b/pathod/language/base.py index 54ca64926..97111ed63 100644 --- a/pathod/language/base.py +++ b/pathod/language/base.py @@ -5,8 +5,8 @@ import pyparsing as pp from six.moves import reduce from netlib.utils import escaped_str_to_bytes, bytes_to_escaped_str +from netlib import human -from .. import utils from . import generators, exceptions @@ -158,7 +158,7 @@ class TokValueGenerate(Token): self.usize, self.unit, self.datatype = usize, unit, datatype def bytes(self): - return self.usize * utils.SIZE_UNITS[self.unit] + return self.usize * human.SIZE_UNITS[self.unit] def get_generator(self, settings_): return generators.RandomGenerator(self.datatype, self.bytes()) @@ -173,7 +173,7 @@ class TokValueGenerate(Token): u = reduce( operator.or_, - [pp.Literal(i) for i in utils.SIZE_UNITS.keys()] + [pp.Literal(i) for i in human.SIZE_UNITS.keys()] ).leaveWhitespace() e = e + pp.Optional(u, default=None) diff --git a/pathod/pathod_cmdline.py b/pathod/pathod_cmdline.py index a7cd24957..a4f05fafe 100644 --- a/pathod/pathod_cmdline.py +++ b/pathod/pathod_cmdline.py @@ -4,7 +4,7 @@ import os import os.path import re -from netlib import tcp +from netlib import tcp, human from . import pathod, version, utils @@ -205,7 +205,7 @@ def args_pathod(argv, stdout_=sys.stdout, stderr_=sys.stderr): sizelimit = None if args.sizelimit: try: - sizelimit = utils.parse_size(args.sizelimit) + sizelimit = human.parse_size(args.sizelimit) except ValueError as v: return parser.error(v) args.sizelimit = sizelimit diff --git a/pathod/utils.py b/pathod/utils.py index 8c6d62902..fe12f541c 100644 --- a/pathod/utils.py +++ b/pathod/utils.py @@ -5,15 +5,6 @@ import netlib.utils from netlib.utils import bytes_to_escaped_str -SIZE_UNITS = dict( - b=1024 ** 0, - k=1024 ** 1, - m=1024 ** 2, - g=1024 ** 3, - t=1024 ** 4, -) - - class MemBool(object): """ @@ -28,20 +19,6 @@ class MemBool(object): return bool(v) -def parse_size(s): - try: - return int(s) - except ValueError: - pass - for i in SIZE_UNITS.keys(): - if s.endswith(i): - try: - return int(s[:-1]) * SIZE_UNITS[i] - except ValueError: - break - raise ValueError("Invalid size specification.") - - def parse_anchor_spec(s): """ Return a tuple, or None on error. diff --git a/test/mitmproxy/test_utils.py b/test/mitmproxy/test_utils.py index db7dec4ab..2af7a3325 100644 --- a/test/mitmproxy/test_utils.py +++ b/test/mitmproxy/test_utils.py @@ -43,21 +43,6 @@ def test_pretty_json(): assert not utils.pretty_json("moo") -def test_pretty_duration(): - assert utils.pretty_duration(0.00001) == "0ms" - assert utils.pretty_duration(0.0001) == "0ms" - assert utils.pretty_duration(0.001) == "1ms" - assert utils.pretty_duration(0.01) == "10ms" - assert utils.pretty_duration(0.1) == "100ms" - assert utils.pretty_duration(1) == "1.00s" - assert utils.pretty_duration(10) == "10.0s" - assert utils.pretty_duration(100) == "100s" - assert utils.pretty_duration(1000) == "1000s" - assert utils.pretty_duration(10000) == "10000s" - assert utils.pretty_duration(1.123) == "1.12s" - assert utils.pretty_duration(0.123) == "123ms" - - def test_LRUCache(): cache = utils.LRUCache(2) @@ -89,13 +74,3 @@ def test_LRUCache(): assert len(cache.cacheList) == 2 assert len(cache.cache) == 2 - - -def test_parse_size(): - assert not utils.parse_size("") - assert utils.parse_size("1") == 1 - assert utils.parse_size("1k") == 1024 - assert utils.parse_size("1m") == 1024**2 - assert utils.parse_size("1g") == 1024**3 - tutils.raises(ValueError, utils.parse_size, "1f") - tutils.raises(ValueError, utils.parse_size, "ak") diff --git a/test/netlib/test_human.py b/test/netlib/test_human.py new file mode 100644 index 000000000..3a445c0b6 --- /dev/null +++ b/test/netlib/test_human.py @@ -0,0 +1,31 @@ +from netlib import human, tutils + +def test_parse_size(): + assert human.parse_size("1") == 1 + assert human.parse_size("1k") == 1024 + assert human.parse_size("1m") == 1024**2 + assert human.parse_size("1g") == 1024**3 + tutils.raises(ValueError, human.parse_size, "1f") + tutils.raises(ValueError, human.parse_size, "ak") + + +def test_pretty_size(): + assert human.pretty_size(100) == "100B" + assert human.pretty_size(1024) == "1kB" + assert human.pretty_size(1024 + (1024 / 2.0)) == "1.5kB" + assert human.pretty_size(1024 * 1024) == "1MB" + + +def test_pretty_duration(): + assert human.pretty_duration(0.00001) == "0ms" + assert human.pretty_duration(0.0001) == "0ms" + assert human.pretty_duration(0.001) == "1ms" + assert human.pretty_duration(0.01) == "10ms" + assert human.pretty_duration(0.1) == "100ms" + assert human.pretty_duration(1) == "1.00s" + assert human.pretty_duration(10) == "10.0s" + assert human.pretty_duration(100) == "100s" + assert human.pretty_duration(1000) == "1000s" + assert human.pretty_duration(10000) == "10000s" + assert human.pretty_duration(1.123) == "1.12s" + assert human.pretty_duration(0.123) == "123ms" diff --git a/test/netlib/test_utils.py b/test/netlib/test_utils.py index b3cc9a0b5..e13029cbd 100644 --- a/test/netlib/test_utils.py +++ b/test/netlib/test_utils.py @@ -30,13 +30,6 @@ def test_clean_bin(): assert utils.clean_bin(u"\u2605") == u"\u2605" -def test_pretty_size(): - assert utils.pretty_size(100) == "100B" - assert utils.pretty_size(1024) == "1kB" - assert utils.pretty_size(1024 + (1024 / 2.0)) == "1.5kB" - assert utils.pretty_size(1024 * 1024) == "1MB" - - def test_safe_subn(): assert utils.safe_subn("foo", u"bar", "\xc2foo") diff --git a/test/pathod/test_utils.py b/test/pathod/test_utils.py index 8026a5762..ab4abbaeb 100644 --- a/test/pathod/test_utils.py +++ b/test/pathod/test_utils.py @@ -13,13 +13,6 @@ def test_membool(): assert m.v == 2 -def test_parse_size(): - assert utils.parse_size("100") == 100 - assert utils.parse_size("100k") == 100 * 1024 - tutils.raises("invalid size spec", utils.parse_size, "foo") - tutils.raises("invalid size spec", utils.parse_size, "100kk") - - def test_parse_anchor_spec(): assert utils.parse_anchor_spec("foo=200") == ("foo", "200") assert utils.parse_anchor_spec("foo") is None From f62efed304d7ecd8f6149ff98577b381b4a3a3c9 Mon Sep 17 00:00:00 2001 From: Aldo Cortesi Date: Tue, 31 May 2016 19:45:48 +1200 Subject: [PATCH 10/14] Unify and make symmetric pretty_size and parse_size --- netlib/human.py | 33 ++++++++++++++++----------------- test/netlib/test_human.py | 12 ++++++++---- 2 files changed, 24 insertions(+), 21 deletions(-) diff --git a/netlib/human.py b/netlib/human.py index f4640c00c..9eccd35b5 100644 --- a/netlib/human.py +++ b/netlib/human.py @@ -1,26 +1,25 @@ -SIZE_UNITS = dict( - b=1024 ** 0, - k=1024 ** 1, - m=1024 ** 2, - g=1024 ** 3, - t=1024 ** 4, -) + +SIZE_TABLE = [ + ("b", 1024 ** 0), + ("k", 1024 ** 1), + ("m", 1024 ** 2), + ("g", 1024 ** 3), + ("t", 1024 ** 4), +] + +SIZE_UNITS = dict(SIZE_TABLE) def pretty_size(size): - suffixes = [ - ("B", 2 ** 10), - ("kB", 2 ** 20), - ("MB", 2 ** 30), - ] - for suf, lim in suffixes: - if size >= lim: - continue - else: - x = round(size / float(lim / 2 ** 10), 2) + for bottom, top in zip(SIZE_TABLE, SIZE_TABLE[1:]): + if bottom[1] <= size < top[1]: + suf = bottom[0] + lim = bottom[1] + x = round(size / lim, 2) if x == int(x): x = int(x) return str(x) + suf + return "%s%s"%(size, SIZE_TABLE[0][0]) def parse_size(s): diff --git a/test/netlib/test_human.py b/test/netlib/test_human.py index 3a445c0b6..464d46468 100644 --- a/test/netlib/test_human.py +++ b/test/netlib/test_human.py @@ -1,6 +1,8 @@ from netlib import human, tutils def test_parse_size(): + assert human.parse_size("0") == 0 + assert human.parse_size("0b") == 0 assert human.parse_size("1") == 1 assert human.parse_size("1k") == 1024 assert human.parse_size("1m") == 1024**2 @@ -10,10 +12,12 @@ def test_parse_size(): def test_pretty_size(): - assert human.pretty_size(100) == "100B" - assert human.pretty_size(1024) == "1kB" - assert human.pretty_size(1024 + (1024 / 2.0)) == "1.5kB" - assert human.pretty_size(1024 * 1024) == "1MB" + assert human.pretty_size(0) == "0b" + assert human.pretty_size(100) == "100b" + assert human.pretty_size(1024) == "1k" + assert human.pretty_size(1024 + (1024 / 2.0)) == "1.5k" + assert human.pretty_size(1024 * 1024) == "1m" + assert human.pretty_size(10 * 1024 * 1024) == "10m" def test_pretty_duration(): From 40a030f215e1943aefdb2eb6fe2a264b9b1ee33c Mon Sep 17 00:00:00 2001 From: Aldo Cortesi Date: Tue, 31 May 2016 19:58:28 +1200 Subject: [PATCH 11/14] Satisfy flake8 --- netlib/basetypes.py | 1 + netlib/http/headers.py | 2 +- netlib/http/url.py | 1 + netlib/human.py | 2 +- netlib/utils.py | 1 + test/netlib/http/test_multipart.py | 1 + test/netlib/http/test_url.py | 1 + test/netlib/test_basetypes.py | 1 + test/netlib/test_human.py | 1 + 9 files changed, 9 insertions(+), 2 deletions(-) diff --git a/netlib/basetypes.py b/netlib/basetypes.py index d03246ff1..9d6c60ba1 100644 --- a/netlib/basetypes.py +++ b/netlib/basetypes.py @@ -1,6 +1,7 @@ import six import abc + @six.add_metaclass(abc.ABCMeta) class Serializable(object): """ diff --git a/netlib/http/headers.py b/netlib/http/headers.py index 8f669ec17..fa7b71808 100644 --- a/netlib/http/headers.py +++ b/netlib/http/headers.py @@ -176,7 +176,7 @@ class Headers(MultiDict): self.fields = fields return replacements - + def parse_content_type(c): """ A simple parser for content-type values. Returns a (type, subtype, diff --git a/netlib/http/url.py b/netlib/http/url.py index 147ed5725..8ce28578b 100644 --- a/netlib/http/url.py +++ b/netlib/http/url.py @@ -3,6 +3,7 @@ from six.moves import urllib from .. import utils + # PY2 workaround def decode_parse_result(result, enc): if hasattr(result, "decode"): diff --git a/netlib/human.py b/netlib/human.py index 9eccd35b5..a007adc78 100644 --- a/netlib/human.py +++ b/netlib/human.py @@ -19,7 +19,7 @@ def pretty_size(size): if x == int(x): x = int(x) return str(x) + suf - return "%s%s"%(size, SIZE_TABLE[0][0]) + return "%s%s" % (size, SIZE_TABLE[0][0]) def parse_size(s): diff --git a/netlib/utils.py b/netlib/utils.py index 6be1c17f3..b8408d1d0 100644 --- a/netlib/utils.py +++ b/netlib/utils.py @@ -8,6 +8,7 @@ import inspect import six + def always_bytes(unicode_or_bytes, *encode_args): if isinstance(unicode_or_bytes, six.text_type): return unicode_or_bytes.encode(*encode_args) diff --git a/test/netlib/http/test_multipart.py b/test/netlib/http/test_multipart.py index 45ae996b6..1d7e00620 100644 --- a/test/netlib/http/test_multipart.py +++ b/test/netlib/http/test_multipart.py @@ -1,6 +1,7 @@ from netlib.http import Headers from netlib.http import multipart + def test_decode(): boundary = 'somefancyboundary' headers = Headers( diff --git a/test/netlib/http/test_url.py b/test/netlib/http/test_url.py index 697c83a81..26b37230e 100644 --- a/test/netlib/http/test_url.py +++ b/test/netlib/http/test_url.py @@ -1,6 +1,7 @@ from netlib import tutils from netlib.http import url + def test_parse(): with tutils.raises(ValueError): url.parse("") diff --git a/test/netlib/test_basetypes.py b/test/netlib/test_basetypes.py index 2a7eea818..aa4157849 100644 --- a/test/netlib/test_basetypes.py +++ b/test/netlib/test_basetypes.py @@ -1,5 +1,6 @@ from netlib import basetypes + class SerializableDummy(basetypes.Serializable): def __init__(self, i): self.i = i diff --git a/test/netlib/test_human.py b/test/netlib/test_human.py index 464d46468..2a5c2a854 100644 --- a/test/netlib/test_human.py +++ b/test/netlib/test_human.py @@ -1,5 +1,6 @@ from netlib import human, tutils + def test_parse_size(): assert human.parse_size("0") == 0 assert human.parse_size("0b") == 0 From 5a75ea3fc65d08c802b4d5fea73e1494ce90aa7d Mon Sep 17 00:00:00 2001 From: Aldo Cortesi Date: Tue, 31 May 2016 20:19:54 +1200 Subject: [PATCH 12/14] Fix test failures --- test/netlib/http/test_response.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/netlib/http/test_response.py b/test/netlib/http/test_response.py index 1faef7ece..b3c2f7364 100644 --- a/test/netlib/http/test_response.py +++ b/test/netlib/http/test_response.py @@ -24,7 +24,7 @@ class TestResponseCore(object): """ def test_repr(self): response = tresp() - assert repr(response) == "Response(200 OK, unknown content type, 7B)" + assert repr(response) == "Response(200 OK, unknown content type, 7b)" response.content = None assert repr(response) == "Response(200 OK, no content)" From 42e91fcfe1d27ca989c75a7939f652fdfcc47604 Mon Sep 17 00:00:00 2001 From: Aldo Cortesi Date: Tue, 31 May 2016 20:32:28 +1200 Subject: [PATCH 13/14] http2.frame -> http2.framereader --- mitmproxy/protocol/http2.py | 4 ++-- netlib/http/http2/{frame.py => framereader.py} | 0 test/mitmproxy/test_protocol_http2.py | 14 +++++++------- test/netlib/http/http2/test_connections.py | 6 +++--- 4 files changed, 12 insertions(+), 12 deletions(-) rename netlib/http/http2/{frame.py => framereader.py} (100%) diff --git a/mitmproxy/protocol/http2.py b/mitmproxy/protocol/http2.py index ffea7fae7..6ca783478 100644 --- a/mitmproxy/protocol/http2.py +++ b/mitmproxy/protocol/http2.py @@ -14,7 +14,7 @@ from hyperframe.frame import PriorityFrame from netlib.tcp import ssl_read_select from netlib.exceptions import HttpException from netlib.http import Headers -from netlib.http.http2 import frame +from netlib.http.http2 import framereader import netlib.http.url from .base import Layer @@ -234,7 +234,7 @@ class Http2Layer(Layer): with source_conn.h2.lock: try: - raw_frame = b''.join(frame.http2_read_raw_frame(source_conn.rfile)) + raw_frame = b''.join(framereader.http2_read_raw_frame(source_conn.rfile)) except: # read frame failed: connection closed self._kill_all_streams() diff --git a/netlib/http/http2/frame.py b/netlib/http/http2/framereader.py similarity index 100% rename from netlib/http/http2/frame.py rename to netlib/http/http2/framereader.py diff --git a/test/mitmproxy/test_protocol_http2.py b/test/mitmproxy/test_protocol_http2.py index 5ab42caeb..23072260c 100644 --- a/test/mitmproxy/test_protocol_http2.py +++ b/test/mitmproxy/test_protocol_http2.py @@ -13,7 +13,7 @@ from mitmproxy.cmdline import APP_HOST, APP_PORT import netlib from ..netlib import tservers as netlib_tservers -from netlib.http.http2 import frame +from netlib.http.http2 import framereader from . import tservers @@ -48,7 +48,7 @@ class _Http2ServerBase(netlib_tservers.ServerTestBase): done = False while not done: try: - raw = b''.join(frame.http2_read_raw_frame(self.rfile)) + raw = b''.join(framereader.http2_read_raw_frame(self.rfile)) events = h2_conn.receive_data(raw) except: break @@ -200,7 +200,7 @@ class TestSimple(_Http2TestBase, _Http2ServerBase): done = False while not done: try: - events = h2_conn.receive_data(b''.join(frame.http2_read_raw_frame(client.rfile))) + events = h2_conn.receive_data(b''.join(framereader.http2_read_raw_frame(client.rfile))) except: break client.wfile.write(h2_conn.data_to_send()) @@ -270,7 +270,7 @@ class TestWithBodies(_Http2TestBase, _Http2ServerBase): done = False while not done: try: - events = h2_conn.receive_data(b''.join(frame.http2_read_raw_frame(client.rfile))) + events = h2_conn.receive_data(b''.join(framereader.http2_read_raw_frame(client.rfile))) except: break client.wfile.write(h2_conn.data_to_send()) @@ -362,7 +362,7 @@ class TestPushPromise(_Http2TestBase, _Http2ServerBase): responses = 0 while not done: try: - raw = b''.join(frame.http2_read_raw_frame(client.rfile)) + raw = b''.join(framereader.http2_read_raw_frame(client.rfile)) events = h2_conn.receive_data(raw) except: break @@ -412,7 +412,7 @@ class TestPushPromise(_Http2TestBase, _Http2ServerBase): responses = 0 while not done: try: - events = h2_conn.receive_data(b''.join(frame.http2_read_raw_frame(client.rfile))) + events = h2_conn.receive_data(b''.join(framereader.http2_read_raw_frame(client.rfile))) except: break client.wfile.write(h2_conn.data_to_send()) @@ -479,7 +479,7 @@ class TestConnectionLost(_Http2TestBase, _Http2ServerBase): done = False while not done: try: - raw = b''.join(frame.http2_read_raw_frame(client.rfile)) + raw = b''.join(framereader.http2_read_raw_frame(client.rfile)) h2_conn.receive_data(raw) except: break diff --git a/test/netlib/http/http2/test_connections.py b/test/netlib/http/http2/test_connections.py index be68a28cb..27cc30bac 100644 --- a/test/netlib/http/http2/test_connections.py +++ b/test/netlib/http/http2/test_connections.py @@ -6,7 +6,7 @@ from netlib import tcp, http from netlib.tutils import raises from netlib.exceptions import TcpDisconnect from netlib.http.http2.connections import HTTP2Protocol, TCPHandler -from netlib.http.http2 import frame +from netlib.http.http2 import framereader from ... import tservers @@ -111,11 +111,11 @@ class TestPerformServerConnectionPreface(tservers.ServerTestBase): self.wfile.flush() # check empty settings frame - raw = frame.http2_read_raw_frame(self.rfile) + raw = framereader.http2_read_raw_frame(self.rfile) assert raw == codecs.decode('00000c040000000000000200000000000300000001', 'hex_codec') # check settings acknowledgement - raw = frame.http2_read_raw_frame(self.rfile) + raw = framereader.http2_read_raw_frame(self.rfile) assert raw == codecs.decode('000000040100000000', 'hex_codec') # send settings acknowledgement From 4da125b6a098cc0fd8b1fd2878584beb5df75c6c Mon Sep 17 00:00:00 2001 From: Aldo Cortesi Date: Tue, 31 May 2016 20:51:06 +1200 Subject: [PATCH 14/14] Fix tests harder --- netlib/http/http2/connections.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/netlib/http/http2/connections.py b/netlib/http/http2/connections.py index 7c7ad6939..16bdf618a 100644 --- a/netlib/http/http2/connections.py +++ b/netlib/http/http2/connections.py @@ -7,7 +7,7 @@ import hyperframe.frame from hpack.hpack import Encoder, Decoder from ... import utils from .. import Headers, Response, Request, url -from . import frame +from . import framereader class TCPHandler(object): @@ -280,7 +280,7 @@ class HTTP2Protocol(object): def read_frame(self, hide=False): while True: - frm = frame.http2_read_frame(self.tcp_handler.rfile) + frm = framereader.http2_read_frame(self.tcp_handler.rfile) if not hide and self.dump_frames: # pragma no cover print(frm.human_readable("<<"))