From 280d9b862575d79b391e28c80156697d2d674c48 Mon Sep 17 00:00:00 2001 From: Brad Peabody Date: Thu, 17 Jul 2014 22:34:29 -0700 Subject: [PATCH 1/2] added some additional functions for dealing with chunks - needed for mitmproxy streaming capability --- netlib/http.py | 63 +++++++++++++++++++++++++++++++++++++++++- test/test_http.py | 70 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 132 insertions(+), 1 deletion(-) diff --git a/netlib/http.py b/netlib/http.py index 21cde5386..736c2c886 100644 --- a/netlib/http.py +++ b/netlib/http.py @@ -136,6 +136,49 @@ def read_chunked(fp, headers, limit, is_request): break return content +def read_next_chunk(fp, headers, is_request): + """ + Read next piece of a chunked HTTP body. Returns next piece of + content as a string or None if we hit the end. + """ + # TODO: see and understand the FIXME in read_chunked and + # see if we need to apply here? + content = "" + code = 400 if is_request else 502 + line = fp.readline(128) + if line == "": + raise HttpErrorConnClosed(code, "Connection closed prematurely") + try: + length = int(line, 16) + except ValueError: + # TODO: see note in this part of read_chunked() + raise HttpError(code, "Invalid chunked encoding length: %s"%line) + if length > 0: + content += fp.read(length) + print "read content: '%s'" % content + line = fp.readline(5) + if line == '': + raise HttpErrorConnClosed(code, "Connection closed prematurely") + if line != '\r\n': + raise HttpError(code, "Malformed chunked body: '%s' (len=%d)" % (line, length)) + if content == "": + content = None # normalize zero length to None, meaning end of chunked stream + return content # return this chunk + +def write_chunk(fp, content): + """ + Write a chunk with chunked encoding format, returns True + if there should be more chunks or False if you passed + None, meaning this was the last chunk. + """ + if content == None or content == "": + fp.write("0\r\n\r\n") + return False + fp.write("%x\r\n" % len(content)) + fp.write(content) + fp.write("\r\n") + return True + def get_header_tokens(headers, key): """ @@ -350,4 +393,22 @@ def read_http_body(rfile, headers, limit, is_request): not_done = rfile.read(1) if not_done: raise HttpError(400 if is_request else 509, "HTTP Body too large. Limit is %s," % limit) - return content \ No newline at end of file + return content + +def expected_http_body_size(headers, is_request): + """ + Returns length of body expected or -1 if not + known and we should just read until end of + stream. + """ + if "content-length" in headers: + try: + l = int(headers["content-length"][0]) + if l < 0: + raise ValueError() + return l + except ValueError: + raise HttpError(400 if is_request else 502, "Invalid content-length header: %s"%headers["content-length"]) + elif is_request: + return 0 + return -1 diff --git a/test/test_http.py b/test/test_http.py index df351dc79..e1dffab85 100644 --- a/test/test_http.py +++ b/test/test_http.py @@ -38,6 +38,57 @@ def test_read_chunked(): tutils.raises("too large", http.read_chunked, s, None, 2, True) +def test_read_next_chunk(): + s = cStringIO.StringIO( + "4\r\n" + + "mitm\r\n" + + "5\r\n" + + "proxy\r\n" + + "e\r\n" + + " in\r\n\r\nchunks.\r\n" + + "0\r\n" + + "\r\n") + assert http.read_next_chunk(s, None, False) == "mitm" + assert http.read_next_chunk(s, None, False) == "proxy" + assert http.read_next_chunk(s, None, False) == " in\r\n\r\nchunks." + assert http.read_next_chunk(s, None, False) == None + + s = cStringIO.StringIO("") + tutils.raises("closed prematurely", http.read_next_chunk, s, None, False) + + s = cStringIO.StringIO("1\r\na\r\n0\r\n") + http.read_next_chunk(s, None, False) + tutils.raises("closed prematurely", http.read_next_chunk, s, None, False) + + s = cStringIO.StringIO("1\r\nfoo") + tutils.raises("malformed chunked body", http.read_next_chunk, s, None, False) + + s = cStringIO.StringIO("foo\r\nfoo") + tutils.raises(http.HttpError, http.read_next_chunk, s, None, False) + +def test_write_chunk(): + + expected = ("" + + "4\r\n" + + "mitm\r\n" + + "5\r\n" + + "proxy\r\n" + + "e\r\n" + + " in\r\n\r\nchunks.\r\n" + + "0\r\n" + + "\r\n") + + s = cStringIO.StringIO() + http.write_chunk(s, "mitm") + http.write_chunk(s, "proxy") + http.write_chunk(s, " in\r\n\r\nchunks.") + http.write_chunk(s, None) + + print len(s.getvalue()) + print len(expected) + + assert s.getvalue() == expected + def test_connection_close(): h = odict.ODictCaseless() assert http.connection_close((1, 0), h) @@ -111,6 +162,25 @@ def test_read_http_body(): s = cStringIO.StringIO("5\r\naaaaa\r\n0\r\n\r\n") assert http.read_http_body(s, h, 100, False) == "aaaaa" +def test_expected_http_body_size(): + # gibber in the content-length field + h = odict.ODictCaseless() + h["content-length"] = ["foo"] + tutils.raises(http.HttpError, http.expected_http_body_size, h, False) + # negative number in the content-length field + h = odict.ODictCaseless() + h["content-length"] = ["-7"] + tutils.raises(http.HttpError, http.expected_http_body_size, h, False) + # explicit length + h = odict.ODictCaseless() + h["content-length"] = ["5"] + assert http.expected_http_body_size(h, False) == 5 + # no length + h = odict.ODictCaseless() + assert http.expected_http_body_size(h, False) == -1 + # no length request + h = odict.ODictCaseless() + assert http.expected_http_body_size(h, True) == 0 def test_parse_http_protocol(): assert http.parse_http_protocol("HTTP/1.1") == (1, 1) From 6bd5df79f82a33b7e725afb5f279bda4cba41935 Mon Sep 17 00:00:00 2001 From: Maximilian Hils Date: Mon, 21 Jul 2014 14:01:24 +0200 Subject: [PATCH 2/2] refactor response length handling --- netlib/http.py | 183 ++++++++++++++++++++-------------------------- test/test_http.py | 99 +++++++------------------ 2 files changed, 107 insertions(+), 175 deletions(-) diff --git a/netlib/http.py b/netlib/http.py index 736c2c886..f88e66523 100644 --- a/netlib/http.py +++ b/netlib/http.py @@ -1,4 +1,5 @@ import string, urlparse, binascii +import sys import odict, utils @@ -88,14 +89,14 @@ def read_headers(fp): # We're being liberal in what we accept, here. if i > 0: name = line[:i] - value = line[i+1:].strip() + value = line[i + 1:].strip() ret.append([name, value]) else: return None return odict.ODictCaseless(ret) -def read_chunked(fp, headers, limit, is_request): +def read_chunked(fp, limit, is_request): """ Read a chunked HTTP body. @@ -103,10 +104,9 @@ def read_chunked(fp, headers, limit, is_request): """ # FIXME: Should check if chunked is the final encoding in the headers # http://tools.ietf.org/html/draft-ietf-httpbis-p1-messaging-16#section-3.3 3.3 2. - content = "" total = 0 code = 400 if is_request else 502 - while 1: + while True: line = fp.readline(128) if line == "": raise HttpErrorConnClosed(code, "Connection closed prematurely") @@ -114,70 +114,19 @@ def read_chunked(fp, headers, limit, is_request): try: length = int(line, 16) except ValueError: - # FIXME: Not strictly correct - this could be from the server, in which - # case we should send a 502. - raise HttpError(code, "Invalid chunked encoding length: %s"%line) - if not length: - break + raise HttpError(code, "Invalid chunked encoding length: %s" % line) total += length if limit is not None and total > limit: - msg = "HTTP Body too large."\ - " Limit is %s, chunked content length was at least %s"%(limit, total) + msg = "HTTP Body too large." \ + " Limit is %s, chunked content length was at least %s" % (limit, total) raise HttpError(code, msg) - content += fp.read(length) - line = fp.readline(5) - if line != '\r\n': + chunk = fp.read(length) + suffix = fp.readline(5) + if suffix != '\r\n': raise HttpError(code, "Malformed chunked body") - while 1: - line = fp.readline() - if line == "": - raise HttpErrorConnClosed(code, "Connection closed prematurely") - if line == '\r\n' or line == '\n': - break - return content - -def read_next_chunk(fp, headers, is_request): - """ - Read next piece of a chunked HTTP body. Returns next piece of - content as a string or None if we hit the end. - """ - # TODO: see and understand the FIXME in read_chunked and - # see if we need to apply here? - content = "" - code = 400 if is_request else 502 - line = fp.readline(128) - if line == "": - raise HttpErrorConnClosed(code, "Connection closed prematurely") - try: - length = int(line, 16) - except ValueError: - # TODO: see note in this part of read_chunked() - raise HttpError(code, "Invalid chunked encoding length: %s"%line) - if length > 0: - content += fp.read(length) - print "read content: '%s'" % content - line = fp.readline(5) - if line == '': - raise HttpErrorConnClosed(code, "Connection closed prematurely") - if line != '\r\n': - raise HttpError(code, "Malformed chunked body: '%s' (len=%d)" % (line, length)) - if content == "": - content = None # normalize zero length to None, meaning end of chunked stream - return content # return this chunk - -def write_chunk(fp, content): - """ - Write a chunk with chunked encoding format, returns True - if there should be more chunks or False if you passed - None, meaning this was the last chunk. - """ - if content == None or content == "": - fp.write("0\r\n\r\n") - return False - fp.write("%x\r\n" % len(content)) - fp.write(content) - fp.write("\r\n") - return True + yield line, chunk, '\r\n' + if length == 0: + return def get_header_tokens(headers, key): @@ -307,6 +256,7 @@ def parse_init_http(line): def connection_close(httpversion, headers): """ Checks the message to see if the client connection should be closed according to RFC 2616 Section 8.1 + Note that a connection should be closed as well if the response has been read until end of the stream. """ # At first, check if we have an explicit Connection header. if "connection" in headers: @@ -323,7 +273,7 @@ def connection_close(httpversion, headers): def parse_response_line(line): parts = line.strip().split(" ", 2) - if len(parts) == 2: # handle missing message gracefully + if len(parts) == 2: # handle missing message gracefully parts.append("") if len(parts) != 3: return None @@ -335,37 +285,38 @@ def parse_response_line(line): return (proto, code, msg) -def read_response(rfile, method, body_size_limit, include_body=True): +def read_response(rfile, request_method, body_size_limit, include_body=True): """ Return an (httpversion, code, msg, headers, content) tuple. """ line = rfile.readline() - if line == "\r\n" or line == "\n": # Possible leftover from previous message + if line == "\r\n" or line == "\n": # Possible leftover from previous message line = rfile.readline() if not line: raise HttpErrorConnClosed(502, "Server disconnect.") parts = parse_response_line(line) if not parts: - raise HttpError(502, "Invalid server response: %s"%repr(line)) + raise HttpError(502, "Invalid server response: %s" % repr(line)) proto, code, msg = parts httpversion = parse_http_protocol(proto) if httpversion is None: - raise HttpError(502, "Invalid HTTP version in line: %s"%repr(proto)) + raise HttpError(502, "Invalid HTTP version in line: %s" % repr(proto)) headers = read_headers(rfile) if headers is None: raise HttpError(502, "Invalid headers.") - # Parse response body according to http://tools.ietf.org/html/draft-ietf-httpbis-p1-messaging-16#section-3.3 - if method in ["HEAD", "CONNECT"] or (code in [204, 304]) or 100 <= code <= 199: - content = "" - elif include_body: - content = read_http_body(rfile, headers, body_size_limit, False) + if include_body: + content = read_http_body(rfile, headers, body_size_limit, request_method, code, False) else: - content = None # if include_body==False then a None content means the body should be read separately + content = None # if include_body==False then a None content means the body should be read separately return httpversion, code, msg, headers, content -def read_http_body(rfile, headers, limit, is_request): +def read_http_body(*args, **kwargs): + return "".join(content for _, content, _ in read_http_body_chunked(*args, **kwargs)) + + +def read_http_body_chunked(rfile, headers, limit, request_method, response_code, is_request, max_chunk_size=None): """ Read an HTTP message body: @@ -374,41 +325,69 @@ def read_http_body(rfile, headers, limit, is_request): limit: Size limit. is_request: True if the body to read belongs to a request, False otherwise """ - if has_chunked_encoding(headers): - content = read_chunked(rfile, headers, limit, is_request) - elif "content-length" in headers: - try: - l = int(headers["content-length"][0]) - if l < 0: - raise ValueError() - except ValueError: - raise HttpError(400 if is_request else 502, "Invalid content-length header: %s"%headers["content-length"]) - if limit is not None and l > limit: - raise HttpError(400 if is_request else 509, "HTTP Body too large. Limit is %s, content-length was %s"%(limit, l)) - content = rfile.read(l) - elif is_request: - content = "" + if max_chunk_size is None: + max_chunk_size = limit or sys.maxint + + expected_size = expected_http_body_size(headers, is_request, request_method, response_code) + + if expected_size is None: + if has_chunked_encoding(headers): + # Python 3: yield from + for x in read_chunked(rfile, limit, is_request): + yield x + else: # pragma: nocover + raise HttpError(400 if is_request else 502, "Content-Length unknown but no chunked encoding") + elif expected_size >= 0: + if limit is not None and expected_size > limit: + raise HttpError(400 if is_request else 509, + "HTTP Body too large. Limit is %s, content-length was %s" % (limit, expected_size)) + bytes_left = expected_size + while bytes_left: + chunk_size = min(bytes_left, max_chunk_size) + yield "", rfile.read(chunk_size), "" + bytes_left -= chunk_size else: - content = rfile.read(limit if limit else -1) + bytes_left = limit or -1 + while bytes_left: + chunk_size = min(bytes_left, max_chunk_size) + content = rfile.read(chunk_size) + if not content: + return + yield "", content, "" + bytes_left -= chunk_size not_done = rfile.read(1) if not_done: raise HttpError(400 if is_request else 509, "HTTP Body too large. Limit is %s," % limit) - return content -def expected_http_body_size(headers, is_request): + +def expected_http_body_size(headers, is_request, request_method, response_code): """ - Returns length of body expected or -1 if not - known and we should just read until end of - stream. + Returns the expected body length: + - a positive integer, if the size is known in advance + - None, if the size in unknown in advance (chunked encoding) + - -1, if all data should be read until end of stream. """ + + # Determine response size according to http://tools.ietf.org/html/draft-ietf-httpbis-p1-messaging-16#section-3.3 + if request_method: + request_method = request_method.upper() + + if (not is_request and ( + request_method == "HEAD" or + (request_method == "CONNECT" and response_code == 200) or + response_code in [204, 304] or + 100 <= response_code <= 199)): + return 0 + if has_chunked_encoding(headers): + return None if "content-length" in headers: try: - l = int(headers["content-length"][0]) - if l < 0: + size = int(headers["content-length"][0]) + if size < 0: raise ValueError() - return l + return size except ValueError: - raise HttpError(400 if is_request else 502, "Invalid content-length header: %s"%headers["content-length"]) - elif is_request: + raise HttpError(400 if is_request else 502, "Invalid content-length header: %s" % headers["content-length"]) + if is_request: return 0 - return -1 + return -1 \ No newline at end of file diff --git a/test/test_http.py b/test/test_http.py index e1dffab85..497e80e20 100644 --- a/test/test_http.py +++ b/test/test_http.py @@ -16,79 +16,32 @@ def test_has_chunked_encoding(): def test_read_chunked(): + + h = odict.ODictCaseless() + h["transfer-encoding"] = ["chunked"] s = cStringIO.StringIO("1\r\na\r\n0\r\n") - tutils.raises("closed prematurely", http.read_chunked, s, None, None, True) + + tutils.raises("malformed chunked body", http.read_http_body, s, h, None, "GET", None, True) s = cStringIO.StringIO("1\r\na\r\n0\r\n\r\n") - assert http.read_chunked(s, None, None, True) == "a" + assert http.read_http_body(s, h, None, "GET", None, True) == "a" s = cStringIO.StringIO("\r\n\r\n1\r\na\r\n0\r\n\r\n") - assert http.read_chunked(s, None, None, True) == "a" + assert http.read_http_body(s, h, None, "GET", None, True) == "a" s = cStringIO.StringIO("\r\n") - tutils.raises("closed prematurely", http.read_chunked, s, None, None, True) + tutils.raises("closed prematurely", http.read_http_body, s, h, None, "GET", None, True) s = cStringIO.StringIO("1\r\nfoo") - tutils.raises("malformed chunked body", http.read_chunked, s, None, None, True) + tutils.raises("malformed chunked body", http.read_http_body, s, h, None, "GET", None, True) s = cStringIO.StringIO("foo\r\nfoo") - tutils.raises(http.HttpError, http.read_chunked, s, None, None, True) + tutils.raises(http.HttpError, http.read_http_body, s, h, None, "GET", None, True) s = cStringIO.StringIO("5\r\naaaaa\r\n0\r\n\r\n") - tutils.raises("too large", http.read_chunked, s, None, 2, True) + tutils.raises("too large", http.read_http_body, s, h, 2, "GET", None, True) -def test_read_next_chunk(): - s = cStringIO.StringIO( - "4\r\n" + - "mitm\r\n" + - "5\r\n" + - "proxy\r\n" + - "e\r\n" + - " in\r\n\r\nchunks.\r\n" + - "0\r\n" + - "\r\n") - assert http.read_next_chunk(s, None, False) == "mitm" - assert http.read_next_chunk(s, None, False) == "proxy" - assert http.read_next_chunk(s, None, False) == " in\r\n\r\nchunks." - assert http.read_next_chunk(s, None, False) == None - - s = cStringIO.StringIO("") - tutils.raises("closed prematurely", http.read_next_chunk, s, None, False) - - s = cStringIO.StringIO("1\r\na\r\n0\r\n") - http.read_next_chunk(s, None, False) - tutils.raises("closed prematurely", http.read_next_chunk, s, None, False) - - s = cStringIO.StringIO("1\r\nfoo") - tutils.raises("malformed chunked body", http.read_next_chunk, s, None, False) - - s = cStringIO.StringIO("foo\r\nfoo") - tutils.raises(http.HttpError, http.read_next_chunk, s, None, False) - -def test_write_chunk(): - - expected = ("" + - "4\r\n" + - "mitm\r\n" + - "5\r\n" + - "proxy\r\n" + - "e\r\n" + - " in\r\n\r\nchunks.\r\n" + - "0\r\n" + - "\r\n") - - s = cStringIO.StringIO() - http.write_chunk(s, "mitm") - http.write_chunk(s, "proxy") - http.write_chunk(s, " in\r\n\r\nchunks.") - http.write_chunk(s, None) - - print len(s.getvalue()) - print len(expected) - - assert s.getvalue() == expected - def test_connection_close(): h = odict.ODictCaseless() assert http.connection_close((1, 0), h) @@ -114,73 +67,73 @@ def test_get_header_tokens(): def test_read_http_body_request(): h = odict.ODictCaseless() r = cStringIO.StringIO("testing") - assert http.read_http_body(r, h, None, True) == "" + assert http.read_http_body(r, h, None, "GET", None, True) == "" def test_read_http_body_response(): h = odict.ODictCaseless() s = cStringIO.StringIO("testing") - assert http.read_http_body(s, h, None, False) == "testing" + assert http.read_http_body(s, h, None, "GET", 200, False) == "testing" def test_read_http_body(): # test default case h = odict.ODictCaseless() h["content-length"] = [7] s = cStringIO.StringIO("testing") - assert http.read_http_body(s, h, None, False) == "testing" + assert http.read_http_body(s, h, None, "GET", 200, False) == "testing" # test content length: invalid header h["content-length"] = ["foo"] s = cStringIO.StringIO("testing") - tutils.raises(http.HttpError, http.read_http_body, s, h, None, False) + tutils.raises(http.HttpError, http.read_http_body, s, h, None, "GET", 200, False) # test content length: invalid header #2 h["content-length"] = [-1] s = cStringIO.StringIO("testing") - tutils.raises(http.HttpError, http.read_http_body, s, h, None, False) + tutils.raises(http.HttpError, http.read_http_body, s, h, None, "GET", 200, False) # test content length: content length > actual content h["content-length"] = [5] s = cStringIO.StringIO("testing") - tutils.raises(http.HttpError, http.read_http_body, s, h, 4, False) + tutils.raises(http.HttpError, http.read_http_body, s, h, 4, "GET", 200, False) # test content length: content length < actual content s = cStringIO.StringIO("testing") - assert len(http.read_http_body(s, h, None, False)) == 5 + assert len(http.read_http_body(s, h, None, "GET", 200, False)) == 5 # test no content length: limit > actual content h = odict.ODictCaseless() s = cStringIO.StringIO("testing") - assert len(http.read_http_body(s, h, 100, False)) == 7 + assert len(http.read_http_body(s, h, 100, "GET", 200, False)) == 7 # test no content length: limit < actual content s = cStringIO.StringIO("testing") - tutils.raises(http.HttpError, http.read_http_body, s, h, 4, False) + tutils.raises(http.HttpError, http.read_http_body, s, h, 4, "GET", 200, False) # test chunked h = odict.ODictCaseless() h["transfer-encoding"] = ["chunked"] s = cStringIO.StringIO("5\r\naaaaa\r\n0\r\n\r\n") - assert http.read_http_body(s, h, 100, False) == "aaaaa" + assert http.read_http_body(s, h, 100, "GET", 200, False) == "aaaaa" def test_expected_http_body_size(): # gibber in the content-length field h = odict.ODictCaseless() h["content-length"] = ["foo"] - tutils.raises(http.HttpError, http.expected_http_body_size, h, False) + tutils.raises(http.HttpError, http.expected_http_body_size, h, False, "GET", 200) # negative number in the content-length field h = odict.ODictCaseless() h["content-length"] = ["-7"] - tutils.raises(http.HttpError, http.expected_http_body_size, h, False) + tutils.raises(http.HttpError, http.expected_http_body_size, h, False, "GET", 200) # explicit length h = odict.ODictCaseless() h["content-length"] = ["5"] - assert http.expected_http_body_size(h, False) == 5 + assert http.expected_http_body_size(h, False, "GET", 200) == 5 # no length h = odict.ODictCaseless() - assert http.expected_http_body_size(h, False) == -1 + assert http.expected_http_body_size(h, False, "GET", 200) == -1 # no length request h = odict.ODictCaseless() - assert http.expected_http_body_size(h, True) == 0 + assert http.expected_http_body_size(h, True, "GET", None) == 0 def test_parse_http_protocol(): assert http.parse_http_protocol("HTTP/1.1") == (1, 1)