diff --git a/CHANGELOG.md b/CHANGELOG.md index 55880cd7b..d733981d2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,10 +24,9 @@ Mitmproxy has a completely new proxy core, fixing many longstanding issues: This greatly improves testing capabilities, prevents a wide array of race conditions, and increases proper isolation between layers. -We wanted to bring these improvements out, so we have a few temporary regressions: +We wanted to bring these improvements out, so we have a few regressions: * Support for HTTP/2 Push Promises has been dropped. -* body_size_limit is currently unsupported. * upstream_auth is currently unsupported. If you depend on these features, please raise your voice in @@ -68,6 +67,7 @@ If you depend on these features, please raise your voice in "red ball" marker, a single character, or an emoji like `:grapes:`. Use the `~marker` filter to filter on marker characters. (@rbdixon) * New `flow.comment` command to add a comment to the flow. Add `~comment ` filter syntax to search flow comments. (@rbdixon) * Fix multipart forms losing `boundary` values on edit (@roytu) +* `Transfer-Encoding: chunked` HTTP message bodies are now retained if they are below the stream_large_bodies limit. * --- TODO: add new PRs above this line --- * ... and various other fixes, documentation improvements, dependency version bumps, etc. diff --git a/mitmproxy/addons/__init__.py b/mitmproxy/addons/__init__.py index 5af458171..07ebd5387 100644 --- a/mitmproxy/addons/__init__.py +++ b/mitmproxy/addons/__init__.py @@ -22,7 +22,6 @@ from mitmproxy.addons import modifybody from mitmproxy.addons import modifyheaders from mitmproxy.addons import stickyauth from mitmproxy.addons import stickycookie -from mitmproxy.addons import streambodies from mitmproxy.addons import save from mitmproxy.addons import tlsconfig from mitmproxy.addons import upstream_auth @@ -54,7 +53,6 @@ def default_addons(): modifyheaders.ModifyHeaders(), stickyauth.StickyAuth(), stickycookie.StickyCookie(), - streambodies.StreamBodies(), save.Save(), tlsconfig.TlsConfig(), upstream_auth.UpstreamAuth(), diff --git a/mitmproxy/addons/core.py b/mitmproxy/addons/core.py index 73e64d168..5a9c22b3d 100644 --- a/mitmproxy/addons/core.py +++ b/mitmproxy/addons/core.py @@ -2,7 +2,7 @@ import typing import os -from mitmproxy.utils import emoji, human +from mitmproxy.utils import emoji from mitmproxy import ctx, hooks from mitmproxy import exceptions from mitmproxy import command @@ -19,40 +19,12 @@ LISTEN_PORT = 8080 class Core: - def load(self, loader): - loader.add_option( - "body_size_limit", typing.Optional[str], None, - """ - Byte size limit of HTTP request and response bodies. Understands - k/m/g suffixes, i.e. 3m for 3 megabytes. - """ - ) - loader.add_option( - "keep_host_header", bool, False, - """ - Reverse Proxy: Keep the original host header instead of rewriting it - to the reverse proxy target. - """ - ) - def configure(self, updated): opts = ctx.options if opts.add_upstream_certs_to_client_chain and not opts.upstream_cert: raise exceptions.OptionsError( "add_upstream_certs_to_client_chain requires the upstream_cert option to be enabled." ) - if "body_size_limit" in updated: - if opts.body_size_limit: # pragma: no cover - ctx.log.warn( - "body_size_limit is currently nonfunctioning, " - "see https://github.com/mitmproxy/mitmproxy/issues/4348") - try: - human.parse_size(opts.body_size_limit) - except ValueError: - raise exceptions.OptionsError( - "Invalid body size limit specification: %s" % - opts.body_size_limit - ) if "mode" in updated: mode = opts.mode if mode.startswith("reverse:") or mode.startswith("upstream:"): diff --git a/mitmproxy/addons/proxyserver.py b/mitmproxy/addons/proxyserver.py index 571e3b12b..f7a60918a 100644 --- a/mitmproxy/addons/proxyserver.py +++ b/mitmproxy/addons/proxyserver.py @@ -2,7 +2,7 @@ import asyncio import warnings from typing import Dict, Optional, Tuple -from mitmproxy import command, controller, ctx, flow, http, log, master, options, platform, tcp, websocket +from mitmproxy import command, controller, ctx, exceptions, flow, http, log, master, options, platform, tcp, websocket from mitmproxy.flow import Error, Flow from mitmproxy.proxy import commands, events, server_hooks from mitmproxy.proxy import server @@ -90,6 +90,28 @@ class Proxyserver: "server-side greetings, as well as accurately mirror TLS ALPN negotiation.", choices=("eager", "lazy") ) + loader.add_option( + "stream_large_bodies", Optional[str], None, + """ + Stream data to the client if response body exceeds the given + threshold. If streamed, the body will not be stored in any way. + Understands k/m/g suffixes, i.e. 3m for 3 megabytes. + """ + ) + loader.add_option( + "body_size_limit", Optional[str], None, + """ + Byte size limit of HTTP request and response bodies. Understands + k/m/g suffixes, i.e. 3m for 3 megabytes. + """ + ) + loader.add_option( + "keep_host_header", bool, False, + """ + Reverse Proxy: Keep the original host header instead of rewriting it + to the reverse proxy target. + """ + ) loader.add_option( "proxy_debug", bool, False, "Enable debug logs in the proxy core.", @@ -102,6 +124,18 @@ class Proxyserver: self.configure(["listen_port"]) def configure(self, updated): + if "stream_large_bodies" in updated: + try: + human.parse_size(ctx.options.stream_large_bodies) + except ValueError: + raise exceptions.OptionsError(f"Invalid stream_large_bodies specification: " + f"{ctx.options.stream_large_bodies}") + if "body_size_limit" in updated: + try: + human.parse_size(ctx.options.body_size_limit) + except ValueError: + raise exceptions.OptionsError(f"Invalid body_size_limit specification: " + f"{ctx.options.body_size_limit}") if not self.is_running: return if "mode" in updated and ctx.options.mode == "transparent": # pragma: no cover diff --git a/mitmproxy/addons/streambodies.py b/mitmproxy/addons/streambodies.py deleted file mode 100644 index e0cd8d7a3..000000000 --- a/mitmproxy/addons/streambodies.py +++ /dev/null @@ -1,49 +0,0 @@ -import typing - -from mitmproxy.net.http import http1 -from mitmproxy import exceptions -from mitmproxy import ctx -from mitmproxy.utils import human - - -class StreamBodies: - def __init__(self): - self.max_size = None - - def load(self, loader): - loader.add_option( - "stream_large_bodies", typing.Optional[str], None, - """ - Stream data to the client if response body exceeds the given - threshold. If streamed, the body will not be stored in any way. - Understands k/m/g suffixes, i.e. 3m for 3 megabytes. - """ - ) - - def configure(self, updated): - if "stream_large_bodies" in updated and ctx.options.stream_large_bodies: - try: - self.max_size = human.parse_size(ctx.options.stream_large_bodies) - except ValueError as e: - raise exceptions.OptionsError(e) - - def run(self, f, is_request): - if self.max_size: - r = f.request if is_request else f.response - try: - expected_size = http1.expected_http_body_size( - f.request, f.response if not is_request else None - ) - except ValueError: - f.kill() - return - if expected_size and not r.raw_content and not (0 <= expected_size <= self.max_size): - # r.stream may already be a callable, which we want to preserve. - r.stream = r.stream or True - ctx.log.info("Streaming {} {}".format("response from" if not is_request else "request to", f.request.host)) - - def requestheaders(self, f): - self.run(f, True) - - def responseheaders(self, f): - self.run(f, False) diff --git a/mitmproxy/net/http/http1/read.py b/mitmproxy/net/http/http1/read.py index b0436e42a..7700bc856 100644 --- a/mitmproxy/net/http/http1/read.py +++ b/mitmproxy/net/http/http1/read.py @@ -40,13 +40,9 @@ def connection_close(http_version, headers): def expected_http_body_size( request: Request, - response: Optional[Response] = None, - expect_continue_as_0: bool = True -): + response: Optional[Response] = None +) -> Optional[int]: """ - Args: - - expect_continue_as_0: If true, incorrectly predict a body size of 0 for requests which are waiting - for a 100 Continue response. Returns: The expected body length: - a positive integer, if the size is known in advance @@ -62,8 +58,6 @@ def expected_http_body_size( headers = request.headers if request.method.upper() == "CONNECT": return 0 - if expect_continue_as_0 and headers.get("expect", "").lower() == "100-continue": - return 0 else: headers = response.headers if request.method.upper() == "HEAD": @@ -75,8 +69,6 @@ def expected_http_body_size( if response.status_code in (204, 304): return 0 - if "chunked" in headers.get("transfer-encoding", "").lower(): - return None if "content-length" in headers: sizes = headers.get_all("content-length") different_content_length_headers = any(x != sizes[0] for x in sizes) @@ -86,6 +78,8 @@ def expected_http_body_size( if size < 0: raise ValueError("Negative Content Length") return size + if "chunked" in headers.get("transfer-encoding", "").lower(): + return None if not response: return 0 return -1 diff --git a/mitmproxy/net/http/status_codes.py b/mitmproxy/net/http/status_codes.py index d31a47dd4..aac38ba0b 100644 --- a/mitmproxy/net/http/status_codes.py +++ b/mitmproxy/net/http/status_codes.py @@ -30,7 +30,7 @@ CONFLICT = 409 GONE = 410 LENGTH_REQUIRED = 411 PRECONDITION_FAILED = 412 -REQUEST_ENTITY_TOO_LARGE = 413 +PAYLOAD_TOO_LARGE = 413 REQUEST_URI_TOO_LONG = 414 UNSUPPORTED_MEDIA_TYPE = 415 REQUESTED_RANGE_NOT_SATISFIABLE = 416 @@ -87,7 +87,7 @@ RESPONSES = { GONE: "Gone", LENGTH_REQUIRED: "Length Required", PRECONDITION_FAILED: "Precondition Failed", - REQUEST_ENTITY_TOO_LARGE: "Request Entity Too Large", + PAYLOAD_TOO_LARGE: "Payload Too Large", REQUEST_URI_TOO_LONG: "Request-URI Too Long", UNSUPPORTED_MEDIA_TYPE: "Unsupported Media Type", REQUESTED_RANGE_NOT_SATISFIABLE: "Requested Range not satisfiable", diff --git a/mitmproxy/proxy/layers/http/__init__.py b/mitmproxy/proxy/layers/http/__init__.py index 1283657d6..bef2451ae 100644 --- a/mitmproxy/proxy/layers/http/__init__.py +++ b/mitmproxy/proxy/layers/http/__init__.py @@ -9,6 +9,7 @@ from mitmproxy import flow, http from mitmproxy.connection import Connection, Server from mitmproxy.net import server_spec from mitmproxy.net.http import status_codes, url +from mitmproxy.net.http.http1 import expected_http_body_size from mitmproxy.proxy import commands, events, layer, tunnel from mitmproxy.proxy.layers import tcp, tls, websocket from mitmproxy.proxy.layers.http import _upstream_proxy @@ -167,12 +168,12 @@ class HttpStream(layer.Layer): try: host, port = url.parse_authority(self.flow.request.host_header or "", check=True) except ValueError: - self.flow.response = http.Response.make( - 400, - "HTTP request has no host header, destination unknown." + yield SendHttp( + ResponseProtocolError(self.stream_id, "HTTP request has no host header, destination unknown.", 400), + self.context.client ) self.client_state = self.state_errored - return (yield from self.send_response()) + return else: if port is None: port = 443 if self.context.client.tls else 80 @@ -194,6 +195,9 @@ class HttpStream(layer.Layer): self.context.server.address[1], ) + if not event.end_stream and (yield from self.check_body_size(True)): + return + yield HttpRequestHeadersHook(self.flow) if (yield from self.check_killed(True)): return @@ -220,6 +224,7 @@ class HttpStream(layer.Layer): RequestHeaders(self.stream_id, self.flow.request, end_stream=False), self.context.server ) + yield commands.Log(f"Streaming request to {self.flow.request.host}.") self.client_state = self.state_stream_request_body @expect(RequestData, RequestTrailers, RequestEndOfMessage) @@ -256,6 +261,7 @@ class HttpStream(layer.Layer): def state_consume_request_body(self, event: events.Event) -> layer.CommandGenerator[None]: if isinstance(event, RequestData): self.request_body_buf += event.data + yield from self.check_body_size(True) elif isinstance(event, RequestTrailers): assert self.flow.request self.flow.request.trailers = event.trailers @@ -292,15 +298,25 @@ class HttpStream(layer.Layer): @expect(ResponseHeaders) def state_wait_for_response_headers(self, event: ResponseHeaders) -> layer.CommandGenerator[None]: self.flow.response = event.response + + if not event.end_stream and (yield from self.check_body_size(False)): + return + yield HttpResponseHeadersHook(self.flow) if (yield from self.check_killed(True)): return + elif self.flow.response.stream: - yield SendHttp(event, self.context.client) - self.server_state = self.state_stream_response_body + yield from self.start_response_stream() else: self.server_state = self.state_consume_response_body + def start_response_stream(self) -> layer.CommandGenerator[None]: + assert self.flow.response + yield SendHttp(ResponseHeaders(self.stream_id, self.flow.response, end_stream=False), self.context.client) + yield commands.Log(f"Streaming response from {self.flow.request.host}.") + self.server_state = self.state_stream_response_body + @expect(ResponseData, ResponseTrailers, ResponseEndOfMessage) def state_stream_response_body(self, event: events.Event) -> layer.CommandGenerator[None]: assert self.flow.response @@ -321,6 +337,7 @@ class HttpStream(layer.Layer): def state_consume_response_body(self, event: events.Event) -> layer.CommandGenerator[None]: if isinstance(event, ResponseData): self.response_body_buf += event.data + yield from self.check_body_size(False) elif isinstance(event, ResponseTrailers): assert self.flow.response self.flow.response.trailers = event.trailers @@ -381,6 +398,76 @@ class HttpStream(layer.Layer): self._handle_event = self.passthrough return + def check_body_size(self, request: bool) -> layer.CommandGenerator[bool]: + """ + Check if the body size exceeds limits imposed by stream_large_bodies or body_size_limit. + + Returns `True` if the body size exceeds body_size_limit and further processing should be stopped. + """ + if not (self.context.options.stream_large_bodies or self.context.options.body_size_limit): + return False + + # Step 1: Determine the expected body size. This can either come from a known content-length header, + # or from the amount of currently buffered bytes (e.g. for chunked encoding). + response = not request + expected_size: Optional[int] + # the 'late' case: we already started consuming the body + if request and self.request_body_buf: + expected_size = len(self.request_body_buf) + elif response and self.response_body_buf: + expected_size = len(self.response_body_buf) + else: + # the 'early' case: we have not started consuming the body + try: + expected_size = expected_http_body_size(self.flow.request, self.flow.response if response else None) + except ValueError: # pragma: no cover + # we just don't stream/kill malformed content-length headers. + expected_size = None + + if expected_size is None or expected_size <= 0: + return False + + # Step 2: Do we need to abort this? + max_total_size = human.parse_size(self.context.options.body_size_limit) + if max_total_size is not None and expected_size > max_total_size: + if request and not self.request_body_buf: + yield HttpRequestHeadersHook(self.flow) + if response and not self.response_body_buf: + yield HttpResponseHeadersHook(self.flow) + + err_msg = f"{'Request' if request else 'Response'} body exceeds mitmproxy's body_size_limit." + err_code = 413 if request else 502 + + self.flow.error = flow.Error(err_msg) + yield HttpErrorHook(self.flow) + yield SendHttp(ResponseProtocolError(self.stream_id, err_msg, err_code), self.context.client) + self.client_state = self.state_errored + if response: + yield SendHttp(RequestProtocolError(self.stream_id, err_msg, err_code), self.context.server) + self.server_state = self.state_errored + return True + + # Step 3: Do we need to stream this? + max_stream_size = human.parse_size(self.context.options.stream_large_bodies) + if max_stream_size is not None and expected_size > max_stream_size: + if request: + self.flow.request.stream = True + if self.request_body_buf: + # clear buffer and then fake a DataReceived event with everything we had in the buffer so far. + body_buf = self.request_body_buf + self.request_body_buf = b"" + yield from self.start_request_stream() + yield from self.handle_event(RequestData(self.stream_id, body_buf)) + if response: + assert self.flow.response + self.flow.response.stream = True + if self.response_body_buf: + body_buf = self.response_body_buf + self.response_body_buf = b"" + yield from self.start_response_stream() + yield from self.handle_event(ResponseData(self.stream_id, body_buf)) + return False + def check_killed(self, emit_error_hook: bool) -> layer.CommandGenerator[bool]: killed_by_us = ( self.flow.error and self.flow.error.msg == flow.Error.KILLED_MESSAGE @@ -692,14 +779,16 @@ class HttpLayer(layer.Layer): return elif connection.error: stream = self.command_sources.pop(event) - yield from self.event_to_child(stream, GetHttpConnectionCompleted(event, (None, connection.error))) + yield from self.event_to_child(stream, + GetHttpConnectionCompleted(event, (None, connection.error))) return elif connection.connected: # see "tricky multiplexing edge case" in make_http_connection for an explanation h2_to_h1 = self.context.client.alpn == b"h2" and connection.alpn != b"h2" if not h2_to_h1: stream = self.command_sources.pop(event) - yield from self.event_to_child(stream, GetHttpConnectionCompleted(event, (connection, None))) + yield from self.event_to_child(stream, + GetHttpConnectionCompleted(event, (connection, None))) return else: pass # the connection is at least half-closed already, we want a new one. diff --git a/mitmproxy/proxy/layers/http/_http1.py b/mitmproxy/proxy/layers/http/_http1.py index bf8934a0c..f43d1c6cf 100644 --- a/mitmproxy/proxy/layers/http/_http1.py +++ b/mitmproxy/proxy/layers/http/_http1.py @@ -235,7 +235,7 @@ class Http1Server(Http1Connection): request_head = [bytes(x) for x in request_head] # TODO: Make url.parse compatible with bytearrays try: self.request = http1.read_request_head(request_head) - expected_body_size = http1.expected_http_body_size(self.request, expect_continue_as_0=False) + expected_body_size = http1.expected_http_body_size(self.request) except ValueError as e: yield commands.Log(f"{human.format_address(self.conn.peername)}: {e}") yield commands.CloseConnection(self.conn) @@ -272,6 +272,10 @@ class Http1Client(Http1Connection): super().__init__(context, context.server) def send(self, event: HttpEvent) -> layer.CommandGenerator[None]: + if isinstance(event, RequestProtocolError): + yield commands.CloseConnection(self.conn) + return + if not self.stream_id: assert isinstance(event, RequestHeaders) self.stream_id = event.stream_id @@ -304,9 +308,6 @@ class Http1Client(Http1Connection): elif http1.expected_http_body_size(self.request, self.response) == -1: yield commands.CloseConnection(self.conn, half_close=True) yield from self.mark_done(request=True) - elif isinstance(event, RequestProtocolError): - yield commands.CloseConnection(self.conn) - return else: raise AssertionError(f"Unexpected event: {event}") diff --git a/test/mitmproxy/addons/test_clientplayback.py b/test/mitmproxy/addons/test_clientplayback.py index 682a097e5..4e152c4b6 100644 --- a/test/mitmproxy/addons/test_clientplayback.py +++ b/test/mitmproxy/addons/test_clientplayback.py @@ -4,6 +4,7 @@ from contextlib import asynccontextmanager import pytest from mitmproxy.addons.clientplayback import ClientPlayback, ReplayHandler +from mitmproxy.addons.proxyserver import Proxyserver from mitmproxy.exceptions import CommandError, OptionsError from mitmproxy.connection import Address from mitmproxy.test import taddons, tflow @@ -47,7 +48,8 @@ async def test_playback(mode): handler_ok.set() cp = ClientPlayback() - with taddons.context(cp) as tctx: + ps = Proxyserver() + with taddons.context(cp, ps) as tctx: async with tcp_server(handler) as addr: cp.running() @@ -78,6 +80,7 @@ async def test_playback_crash(monkeypatch): cp.start_replay([tflow.tflow()]) await tctx.master.await_log("Client replay has crashed!", level="error") assert cp.count() == 0 + cp.done() def test_check(): diff --git a/test/mitmproxy/addons/test_core.py b/test/mitmproxy/addons/test_core.py index 80bd0db18..ebdfbab60 100644 --- a/test/mitmproxy/addons/test_core.py +++ b/test/mitmproxy/addons/test_core.py @@ -160,10 +160,6 @@ def test_options(tmpdir): def test_validation_simple(): sa = core.Core() with taddons.context() as tctx: - with pytest.raises(exceptions.OptionsError): - tctx.configure(sa, body_size_limit = "invalid") - tctx.configure(sa, body_size_limit = "1m") - with pytest.raises(exceptions.OptionsError, match="requires the upstream_cert option to be enabled"): tctx.configure( sa, diff --git a/test/mitmproxy/addons/test_proxyserver.py b/test/mitmproxy/addons/test_proxyserver.py index 6fcf9cac3..e2d8d3c4b 100644 --- a/test/mitmproxy/addons/test_proxyserver.py +++ b/test/mitmproxy/addons/test_proxyserver.py @@ -3,10 +3,11 @@ from contextlib import asynccontextmanager import pytest +from mitmproxy import exceptions from mitmproxy.addons.proxyserver import Proxyserver -from mitmproxy.proxy.layers.http import HTTPMode -from mitmproxy.proxy import layers, server_hooks from mitmproxy.connection import Address +from mitmproxy.proxy import layers, server_hooks +from mitmproxy.proxy.layers.http import HTTPMode from mitmproxy.test import taddons, tflow from mitmproxy.test.tflow import tclient_conn, tserver_conn @@ -175,3 +176,15 @@ def test_self_connect(): server_hooks.ServerConnectionHookData(server, client) ) assert server.error == "Stopped mitmproxy from recursively connecting to itself." + + +def test_options(): + ps = Proxyserver() + with taddons.context(ps) as tctx: + with pytest.raises(exceptions.OptionsError): + tctx.configure(ps, body_size_limit="invalid") + tctx.configure(ps, body_size_limit="1m") + + with pytest.raises(exceptions.OptionsError): + tctx.configure(ps, stream_large_bodies="invalid") + tctx.configure(ps, stream_large_bodies="1m") diff --git a/test/mitmproxy/addons/test_script.py b/test/mitmproxy/addons/test_script.py index a7654a39e..12ee070c6 100644 --- a/test/mitmproxy/addons/test_script.py +++ b/test/mitmproxy/addons/test_script.py @@ -204,7 +204,8 @@ class TestScriptLoader: sc.script_run([tflow.tflow(resp=True)], "/") await tctx.master.await_log("No such script") - def test_simple(self, tdata): + @pytest.mark.asyncio + async def test_simple(self, tdata): sc = script.ScriptLoader() with taddons.context(loadcore=False) as tctx: tctx.master.addons.add(sc) diff --git a/test/mitmproxy/addons/test_streambodies.py b/test/mitmproxy/addons/test_streambodies.py deleted file mode 100644 index d62f869f1..000000000 --- a/test/mitmproxy/addons/test_streambodies.py +++ /dev/null @@ -1,31 +0,0 @@ -from mitmproxy import exceptions -from mitmproxy.test import tflow -from mitmproxy.test import taddons -from mitmproxy.addons import streambodies -import pytest - - -def test_simple(): - sa = streambodies.StreamBodies() - with taddons.context(sa) as tctx: - with pytest.raises(exceptions.OptionsError): - tctx.configure(sa, stream_large_bodies = "invalid") - tctx.configure(sa, stream_large_bodies = "10") - - f = tflow.tflow() - f.request.content = b"" - f.request.headers["Content-Length"] = "1024" - assert not f.request.stream - sa.requestheaders(f) - assert f.request.stream - - f = tflow.tflow(resp=True) - f.response.content = b"" - f.response.headers["Content-Length"] = "1024" - assert not f.response.stream - sa.responseheaders(f) - assert f.response.stream - - f = tflow.tflow(resp=True) - f.response.headers["content-length"] = "invalid" - tctx.cycle(sa, f) diff --git a/test/mitmproxy/net/http/http1/test_read.py b/test/mitmproxy/net/http/http1/test_read.py index cdcc8ef33..957735a5a 100644 --- a/test/mitmproxy/net/http/http1/test_read.py +++ b/test/mitmproxy/net/http/http1/test_read.py @@ -63,12 +63,6 @@ def test_expected_http_body_size(): # Expect: 100-continue assert expected_http_body_size( treq(headers=Headers(expect="100-continue", content_length="42")), - expect_continue_as_0=True - ) == 0 - # Expect: 100-continue - assert expected_http_body_size( - treq(headers=Headers(expect="100-continue", content_length="42")), - expect_continue_as_0=False ) == 42 # http://tools.ietf.org/html/rfc7230#section-3.3 @@ -94,6 +88,9 @@ def test_expected_http_body_size(): assert expected_http_body_size( treq(headers=Headers(transfer_encoding="chunked")), ) is None + assert expected_http_body_size( + treq(headers=Headers(transfer_encoding="chunked", content_length="42")), + ) == 42 # explicit length for val in (b"foo", b"-7"): diff --git a/test/mitmproxy/proxy/conftest.py b/test/mitmproxy/proxy/conftest.py index 8477158f3..6037fb69f 100644 --- a/test/mitmproxy/proxy/conftest.py +++ b/test/mitmproxy/proxy/conftest.py @@ -4,7 +4,6 @@ import pytest from hypothesis import settings from mitmproxy import connection, options -from mitmproxy.addons.core import Core from mitmproxy.addons.proxyserver import Proxyserver from mitmproxy.addons.termlog import TermLog from mitmproxy.proxy import context @@ -15,7 +14,6 @@ def tctx() -> context.Context: opts = options.Options() Proxyserver().load(opts) TermLog().load(opts) - Core().load(opts) return context.Context( connection.Client( ("client", 1234), diff --git a/test/mitmproxy/proxy/layers/http/test_http.py b/test/mitmproxy/proxy/layers/http/test_http.py index ed9451cba..ed7ceed77 100644 --- a/test/mitmproxy/proxy/layers/http/test_http.py +++ b/test/mitmproxy/proxy/layers/http/test_http.py @@ -1,14 +1,14 @@ import pytest +from mitmproxy.connection import ConnectionState, Server from mitmproxy.flow import Error from mitmproxy.http import HTTPFlow, Response from mitmproxy.net.server_spec import ServerSpec -from mitmproxy.proxy.layers.http import HTTPMode from mitmproxy.proxy import layer -from mitmproxy.proxy.commands import CloseConnection, OpenConnection, SendData, Log -from mitmproxy.connection import ConnectionState, Server +from mitmproxy.proxy.commands import CloseConnection, Log, OpenConnection, SendData from mitmproxy.proxy.events import ConnectionClosed, DataReceived from mitmproxy.proxy.layers import TCPLayer, http, tls +from mitmproxy.proxy.layers.http import HTTPMode from mitmproxy.proxy.layers.tcp import TcpMessageInjected, TcpStartHook from mitmproxy.proxy.layers.websocket import WebsocketStartHook from mitmproxy.tcp import TCPFlow, TCPMessage @@ -265,37 +265,100 @@ def test_disconnect_while_intercept(tctx): assert flow().server_conn == server2() -def test_response_streaming(tctx): +@pytest.mark.parametrize("why", ["body_size=0", "body_size=3", "addon"]) +@pytest.mark.parametrize("transfer_encoding", ["identity", "chunked"]) +def test_response_streaming(tctx, why, transfer_encoding): """Test HTTP response streaming""" server = Placeholder(Server) flow = Placeholder(HTTPFlow) + playbook = Playbook(http.HttpLayer(tctx, HTTPMode.regular)) + + if why.startswith("body_size"): + tctx.options.stream_large_bodies = why.replace("body_size=", "") def enable_streaming(flow: HTTPFlow): - flow.response.stream = lambda x: x.upper() + if why == "addon": + flow.response.stream = True assert ( - Playbook(http.HttpLayer(tctx, HTTPMode.regular)) - >> DataReceived(tctx.client, b"GET http://example.com/largefile HTTP/1.1\r\nHost: example.com\r\n\r\n") - << http.HttpRequestHeadersHook(flow) - >> reply() - << http.HttpRequestHook(flow) - >> reply() - << OpenConnection(server) - >> reply(None) - << SendData(server, b"GET /largefile HTTP/1.1\r\nHost: example.com\r\n\r\n") - >> DataReceived(server, b"HTTP/1.1 200 OK\r\nContent-Length: 6\r\n\r\nabc") - << http.HttpResponseHeadersHook(flow) - >> reply(side_effect=enable_streaming) - << SendData(tctx.client, b"HTTP/1.1 200 OK\r\nContent-Length: 6\r\n\r\nABC") - >> DataReceived(server, b"def") - << SendData(tctx.client, b"DEF") - << http.HttpResponseHook(flow) - >> reply() + playbook + >> DataReceived(tctx.client, b"GET http://example.com/largefile HTTP/1.1\r\nHost: example.com\r\n\r\n") + << http.HttpRequestHeadersHook(flow) + >> reply() + << http.HttpRequestHook(flow) + >> reply() + << OpenConnection(server) + >> reply(None) + << SendData(server, b"GET /largefile HTTP/1.1\r\nHost: example.com\r\n\r\n") + >> DataReceived(server, b"HTTP/1.1 200 OK\r\n") + ) + if transfer_encoding == "identity": + playbook >> DataReceived(server, b"Content-Length: 6\r\n\r\n" + b"abc") + else: + playbook >> DataReceived(server, b"Transfer-Encoding: chunked\r\n\r\n" + b"3\r\nabc\r\n") + + playbook << http.HttpResponseHeadersHook(flow) + playbook >> reply(side_effect=enable_streaming) + + if transfer_encoding == "identity": + playbook << SendData(tctx.client, b"HTTP/1.1 200 OK\r\n" + b"Content-Length: 6\r\n\r\n" + b"abc") + playbook >> DataReceived(server, b"def") + playbook << SendData(tctx.client, b"def") + else: + if why == "body_size=3": + playbook >> DataReceived(server, b"3\r\ndef\r\n") + playbook << SendData(tctx.client, b"HTTP/1.1 200 OK\r\n" + b"Transfer-Encoding: chunked\r\n\r\n" + b"6\r\nabcdef\r\n") + else: + playbook << SendData(tctx.client, b"HTTP/1.1 200 OK\r\n" + b"Transfer-Encoding: chunked\r\n\r\n" + b"3\r\nabc\r\n") + playbook >> DataReceived(server, b"3\r\ndef\r\n") + playbook << SendData(tctx.client, b"3\r\ndef\r\n") + playbook >> DataReceived(server, b"0\r\n\r\n") + + playbook << http.HttpResponseHook(flow) + playbook >> reply() + + if transfer_encoding == "chunked": + playbook << SendData(tctx.client, b"0\r\n\r\n") + + assert playbook + + +def test_request_stream_modify(tctx): + """Test HTTP response streaming""" + server = Placeholder(Server) + + def enable_streaming(flow: HTTPFlow): + flow.request.stream = lambda x: x.upper() + + assert ( + Playbook(http.HttpLayer(tctx, HTTPMode.regular)) + >> DataReceived(tctx.client, b"POST http://example.com/ HTTP/1.1\r\n" + b"Host: example.com\r\n" + b"Content-Length: 6\r\n\r\n" + b"abc") + << http.HttpRequestHeadersHook(Placeholder(HTTPFlow)) + >> reply(side_effect=enable_streaming) + << OpenConnection(server) + >> reply(None) + << SendData(server, b"POST / HTTP/1.1\r\n" + b"Host: example.com\r\n" + b"Content-Length: 6\r\n\r\n" + b"ABC") ) +@pytest.mark.parametrize("why", ["body_size=0", "body_size=3", "addon"]) +@pytest.mark.parametrize("transfer_encoding", ["identity", "chunked"]) @pytest.mark.parametrize("response", ["normal response", "early response", "early close", "early kill"]) -def test_request_streaming(tctx, response): +def test_request_streaming(tctx, why, transfer_encoding, response): """ Test HTTP request streaming @@ -305,55 +368,94 @@ def test_request_streaming(tctx, response): flow = Placeholder(HTTPFlow) playbook = Playbook(http.HttpLayer(tctx, HTTPMode.regular)) - def enable_streaming(flow: HTTPFlow): - flow.request.stream = lambda x: x.upper() + if why.startswith("body_size"): + tctx.options.stream_large_bodies = why.replace("body_size=", "") + + def enable_streaming(flow: HTTPFlow): + if why == "addon": + flow.request.stream = True + + playbook >> DataReceived(tctx.client, b"POST http://example.com/ HTTP/1.1\r\n" + b"Host: example.com\r\n") + if transfer_encoding == "identity": + playbook >> DataReceived(tctx.client, b"Content-Length: 9\r\n\r\n" + b"abc") + else: + playbook >> DataReceived(tctx.client, b"Transfer-Encoding: chunked\r\n\r\n" + b"3\r\nabc\r\n") + + playbook << http.HttpRequestHeadersHook(flow) + playbook >> reply(side_effect=enable_streaming) + + needs_more_data_before_open = (why == "body_size=3" and transfer_encoding == "chunked") + if needs_more_data_before_open: + playbook >> DataReceived(tctx.client, b"3\r\ndef\r\n") + + playbook << OpenConnection(server) + playbook >> reply(None) + playbook << SendData(server, b"POST / HTTP/1.1\r\n" + b"Host: example.com\r\n") + + if transfer_encoding == "identity": + playbook << SendData(server, b"Content-Length: 9\r\n\r\n" + b"abc") + playbook >> DataReceived(tctx.client, b"def") + playbook << SendData(server, b"def") + else: + if needs_more_data_before_open: + playbook << SendData(server, b"Transfer-Encoding: chunked\r\n\r\n" + b"6\r\nabcdef\r\n") + else: + playbook << SendData(server, b"Transfer-Encoding: chunked\r\n\r\n" + b"3\r\nabc\r\n") + playbook >> DataReceived(tctx.client, b"3\r\ndef\r\n") + playbook << SendData(server, b"3\r\ndef\r\n") - assert ( - playbook - >> DataReceived(tctx.client, b"POST http://example.com/ HTTP/1.1\r\n" - b"Host: example.com\r\n" - b"Content-Length: 6\r\n\r\n" - b"abc") - << http.HttpRequestHeadersHook(flow) - >> reply(side_effect=enable_streaming) - << OpenConnection(server) - >> reply(None) - << SendData(server, b"POST / HTTP/1.1\r\n" - b"Host: example.com\r\n" - b"Content-Length: 6\r\n\r\n" - b"ABC") - ) if response == "normal response": + if transfer_encoding == "identity": + playbook >> DataReceived(tctx.client, b"ghi") + playbook << SendData(server, b"ghi") + else: + playbook >> DataReceived(tctx.client, b"3\r\nghi\r\n0\r\n\r\n") + playbook << SendData(server, b"3\r\nghi\r\n") + + playbook << http.HttpRequestHook(flow) + playbook >> reply() + if transfer_encoding == "chunked": + playbook << SendData(server, b"0\r\n\r\n") assert ( - playbook - >> DataReceived(tctx.client, b"def") - << SendData(server, b"DEF") - << http.HttpRequestHook(flow) - >> reply() - >> DataReceived(server, b"HTTP/1.1 200 OK\r\nContent-Length: 0\r\n\r\n") - << http.HttpResponseHeadersHook(flow) - >> reply() - << http.HttpResponseHook(flow) - >> reply() - << SendData(tctx.client, b"HTTP/1.1 200 OK\r\nContent-Length: 0\r\n\r\n") + playbook + >> DataReceived(server, b"HTTP/1.1 200 OK\r\nContent-Length: 0\r\n\r\n") + << http.HttpResponseHeadersHook(flow) + >> reply() + << http.HttpResponseHook(flow) + >> reply() + << SendData(tctx.client, b"HTTP/1.1 200 OK\r\nContent-Length: 0\r\n\r\n") ) elif response == "early response": # We may receive a response before we have finished sending our request. # We continue sending unless the server closes the connection. # https://tools.ietf.org/html/rfc7231#section-6.5.11 assert ( - playbook - >> DataReceived(server, b"HTTP/1.1 413 Request Entity Too Large\r\nContent-Length: 0\r\n\r\n") - << http.HttpResponseHeadersHook(flow) - >> reply() - << http.HttpResponseHook(flow) - >> reply() - << SendData(tctx.client, b"HTTP/1.1 413 Request Entity Too Large\r\nContent-Length: 0\r\n\r\n") - >> DataReceived(tctx.client, b"def") - << SendData(server, b"DEF") - << http.HttpRequestHook(flow) - >> reply() + playbook + >> DataReceived(server, b"HTTP/1.1 413 Request Entity Too Large\r\nContent-Length: 0\r\n\r\n") + << http.HttpResponseHeadersHook(flow) + >> reply() + << http.HttpResponseHook(flow) + >> reply() + << SendData(tctx.client, b"HTTP/1.1 413 Request Entity Too Large\r\nContent-Length: 0\r\n\r\n") ) + if transfer_encoding == "identity": + playbook >> DataReceived(tctx.client, b"ghi") + playbook << SendData(server, b"ghi") + else: + playbook >> DataReceived(tctx.client, b"3\r\nghi\r\n0\r\n\r\n") + playbook << SendData(server, b"3\r\nghi\r\n") + playbook << http.HttpRequestHook(flow) + playbook >> reply() + if transfer_encoding == "chunked": + playbook << SendData(server, b"0\r\n\r\n") + assert playbook elif response == "early close": assert ( playbook @@ -383,6 +485,60 @@ def test_request_streaming(tctx, response): assert False +@pytest.mark.parametrize("where", ["request", "response"]) +@pytest.mark.parametrize("transfer_encoding", ["identity", "chunked"]) +def test_body_size_limit(tctx, where, transfer_encoding): + """Test HTTP request body_size_limit""" + tctx.options.body_size_limit = "3" + err = Placeholder(bytes) + flow = Placeholder(HTTPFlow) + + if transfer_encoding == "identity": + body = b"Content-Length: 6\r\n\r\nabcdef" + else: + body = b"Transfer-Encoding: chunked\r\n\r\n6\r\nabcdef" + + if where == "request": + assert ( + Playbook(http.HttpLayer(tctx, HTTPMode.regular)) + >> DataReceived(tctx.client, b"POST http://example.com/ HTTP/1.1\r\n" + b"Host: example.com\r\n" + body) + << http.HttpRequestHeadersHook(flow) + >> reply() + << http.HttpErrorHook(flow) + >> reply() + << SendData(tctx.client, err) + << CloseConnection(tctx.client) + ) + assert b"413 Payload Too Large" in err() + assert b"body_size_limit" in err() + else: + server = Placeholder(Server) + assert ( + Playbook(http.HttpLayer(tctx, HTTPMode.regular)) + >> DataReceived(tctx.client, b"GET http://example.com/ HTTP/1.1\r\n" + b"Host: example.com\r\n\r\n") + << http.HttpRequestHeadersHook(flow) + >> reply() + << http.HttpRequestHook(flow) + >> reply() + << OpenConnection(server) + >> reply(None) + << SendData(server, b"GET / HTTP/1.1\r\n" + b"Host: example.com\r\n\r\n") + >> DataReceived(server, b"HTTP/1.1 200 OK\r\n" + body) + << http.HttpResponseHeadersHook(flow) + >> reply() + << http.HttpErrorHook(flow) + >> reply() + << SendData(tctx.client, err) + << CloseConnection(tctx.client) + << CloseConnection(server) + ) + assert b"502 Bad Gateway" in err() + assert b"body_size_limit" in err() + + @pytest.mark.parametrize("connect", [True, False]) def test_server_unreachable(tctx, connect): """Test the scenario where the target server is unreachable.""" @@ -661,14 +817,15 @@ def test_http_proxy_relative_request(tctx): def test_http_proxy_relative_request_no_host_header(tctx): """Test handling of a relative-form "GET /" in regular proxy mode, but without a host header.""" + err = Placeholder(bytes) assert ( Playbook(http.HttpLayer(tctx, HTTPMode.regular), hooks=False) >> DataReceived(tctx.client, b"GET / HTTP/1.1\r\n\r\n") - << SendData(tctx.client, b"HTTP/1.1 400 Bad Request\r\n" - b"content-length: 53\r\n" - b"\r\n" - b"HTTP request has no host header, destination unknown.") + << SendData(tctx.client, err) + << CloseConnection(tctx.client) ) + assert b"400 Bad Request" in err() + assert b"HTTP request has no host header, destination unknown." in err() def test_http_expect(tctx):