diff --git a/CHANGELOG.md b/CHANGELOG.md index 7128432f9..b363cf751 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,17 +24,9 @@ Mitmproxy has a completely new proxy core, fixing many longstanding issues: This greatly improves testing capabilities, prevents a wide array of race conditions, and increases proper isolation between layers. -We wanted to bring these improvements out, so we have a few regressions: - -* Support for HTTP/2 Push Promises has been dropped. -* upstream_auth is currently unsupported. - -If you depend on these features, please raise your voice in -[#4348](https://github.com/mitmproxy/mitmproxy/issues/4348)! - ### Full Changelog -* New Proxy Core based on sans-io pattern (@mhils) +* New Proxy Core (see section above, @mhils) * mitmproxy's command line interface now supports Windows (@mhils) * The `clientconnect`, `clientdisconnect`, `serverconnect`, `serverdisconnect`, and `log` events have been replaced with new events, see addon documentation for details (@mhils) @@ -54,7 +46,8 @@ If you depend on these features, please raise your voice in * Pressing `?` now exits console help view (@abitrolly) * `--modify-headers` now works correctly when modifying a header that is also part of the filter expression (@Prinzhorn) * Fix SNI-related reproducibility issues when exporting to curl/httpie commands. (@dkasak) -* Add option `export_preserve_original_ip` to force exported command to connect to IP from original request. Only supports curl at the moment. (@dkasak) +* Add option `export_preserve_original_ip` to force exported command to connect to IP from original request. + Only supports curl at the moment. (@dkasak) * Major proxy protocol testing (@r00t-) * Switch Docker image release to be based on Debian (@PeterDaveHello) * Multiple Browsers: The `browser.start` command may be executed more than once to start additional @@ -64,11 +57,15 @@ If you depend on these features, please raise your voice in * Flow control: don't read connection data faster than it can be forwarded. (@hazcod) * Fix parsing of certificate issuer/subject with escaped special characters (@Prinzhorn) * Customize markers with emoji, and filters: The `flow.mark` command may be used to mark a flow with either the default - "red ball" marker, a single character, or an emoji like `:grapes:`. Use the `~marker` filter to filter on marker characters. (@rbdixon) -* New `flow.comment` command to add a comment to the flow. Add `~comment ` filter syntax to search flow comments. (@rbdixon) + "red ball" marker, a single character, or an emoji like `:grapes:`. Use the `~marker` filter to filter on marker + characters. (@rbdixon) +* New `flow.comment` command to add a comment to the flow. Add `~comment ` filter syntax to search flow comments. + (@rbdixon) * Fix multipart forms losing `boundary` values on edit (@roytu) * `Transfer-Encoding: chunked` HTTP message bodies are now retained if they are below the stream_large_bodies limit. + (@mhils) * `json()` method for HTTP Request and Response instances will return decoded JSON body. (@rbdixon) +* Support for HTTP/2 Push Promises has been dropped. (@mhils) * --- TODO: add new PRs above this line --- * ... and various other fixes, documentation improvements, dependency version bumps, etc. diff --git a/mitmproxy/addons/upstream_auth.py b/mitmproxy/addons/upstream_auth.py index dc1a64419..96c2767f7 100644 --- a/mitmproxy/addons/upstream_auth.py +++ b/mitmproxy/addons/upstream_auth.py @@ -4,10 +4,11 @@ import base64 from mitmproxy import exceptions from mitmproxy import ctx +from mitmproxy import http from mitmproxy.utils import strutils -def parse_upstream_auth(auth): +def parse_upstream_auth(auth: str) -> bytes: pattern = re.compile(".+:") if pattern.search(auth) is None: raise exceptions.OptionsError( @@ -16,7 +17,7 @@ def parse_upstream_auth(auth): return b"Basic" + b" " + base64.b64encode(strutils.always_bytes(auth)) -class UpstreamAuth(): +class UpstreamAuth: """ This addon handles authentication to systems upstream from us for the upstream proxy and reverse proxy mode. There are 3 cases: @@ -26,8 +27,7 @@ class UpstreamAuth(): - Upstream proxy regular requests - Reverse proxy regular requests (CONNECT is invalid in this mode) """ - def __init__(self): - self.auth = None + auth: typing.Optional[bytes] = None def load(self, loader): loader.add_option( @@ -39,26 +39,19 @@ class UpstreamAuth(): ) def configure(self, updated): - # FIXME: We're doing this because our proxy core is terminally confused - # at the moment. Ideally, we should be able to check if we're in - # reverse proxy mode at the HTTP layer, so that scripts can put the - # proxy in reverse proxy mode for specific requests. if "upstream_auth" in updated: if ctx.options.upstream_auth is None: self.auth = None else: - if ctx.options.upstream_auth: # pragma: no cover - ctx.log.warn("upstream_auth is currently nonfunctioning, " - "see https://github.com/mitmproxy/mitmproxy/issues/4348") self.auth = parse_upstream_auth(ctx.options.upstream_auth) - def http_connect(self, f): - if self.auth and f.mode == "upstream": + def http_connect_upstream(self, f: http.HTTPFlow): + if self.auth: f.request.headers["Proxy-Authorization"] = self.auth - def requestheaders(self, f): + def requestheaders(self, f: http.HTTPFlow): if self.auth: if f.mode == "upstream" and not f.server_conn.via: f.request.headers["Proxy-Authorization"] = self.auth elif ctx.options.mode.startswith("reverse"): - f.request.headers["Proxy-Authorization"] = self.auth + f.request.headers["Authorization"] = self.auth diff --git a/mitmproxy/proxy/layers/http/__init__.py b/mitmproxy/proxy/layers/http/__init__.py index bef2451ae..3ad301573 100644 --- a/mitmproxy/proxy/layers/http/__init__.py +++ b/mitmproxy/proxy/layers/http/__init__.py @@ -564,17 +564,7 @@ class HttpStream(layer.Layer): yield from self.handle_connect_finish() def handle_connect_upstream(self): - assert self.context.server.via.scheme in ("http", "https") - - http_proxy = Server(self.context.server.via.address) - - stack = tunnel.LayerStack() - if self.context.server.via.scheme == "https": - http_proxy.sni = self.context.server.via.address[0] - stack /= tls.ServerTLSLayer(self.context, http_proxy) - stack /= _upstream_proxy.HttpUpstreamProxy(self.context, http_proxy, True) - - self.child_layer = stack[0] + self.child_layer = _upstream_proxy.HttpUpstreamProxy.make(self.context, True)[0] yield from self.handle_connect_finish() def handle_connect_finish(self): @@ -813,21 +803,15 @@ class HttpLayer(layer.Layer): if not can_use_context_connection: context.server = Server(event.address) - if event.tls: - context.server.sni = event.address[0] if event.via: + context.server.via = event.via assert event.via.scheme in ("http", "https") - http_proxy = Server(event.via.address) - - if event.via.scheme == "https": - http_proxy.alpn_offers = tls.HTTP_ALPNS - http_proxy.sni = event.via.address[0] - stack /= tls.ServerTLSLayer(context, http_proxy) - - send_connect = not (self.mode == HTTPMode.upstream and not event.tls) - stack /= _upstream_proxy.HttpUpstreamProxy(context, http_proxy, send_connect) + # We always send a CONNECT request, *except* for plaintext absolute-form HTTP requests in upstream mode. + send_connect = event.tls or self.mode != HTTPMode.upstream + stack /= _upstream_proxy.HttpUpstreamProxy.make(context, send_connect) if event.tls: + context.server.sni = event.address[0] stack /= tls.ServerTLSLayer(context) stack /= HttpClient(context) @@ -850,14 +834,9 @@ class HttpLayer(layer.Layer): stream = self.command_sources.pop(cmd) yield from self.event_to_child(stream, GetHttpConnectionCompleted(cmd, reply)) - # Somewhat ugly edge case: If we do HTTP/2 -> HTTP/1 proxying we don't want - # to handle everything over a single connection. - # Tricky multiplexing edge case: Assume we are doing HTTP/2 -> HTTP/1 proxying, - # - # that receives two responses - # that neither have a content-length specified nor a chunked transfer encoding. - # We can't process these two flows to the same h1 connection as they would both have - # "read until eof" semantics. The only workaround left is to open a separate connection for each flow. + # Tricky multiplexing edge case: Assume we are doing HTTP/2 -> HTTP/1 proxying and the destination server + # only serves responses with HTTP read-until-EOF semantics. In this case we can't process two flows on the + # same connection. The only workaround left is to open a separate connection for each flow. if not command.err and self.context.client.alpn == b"h2" and command.connection.alpn != b"h2": for cmd in waiting[1:]: yield from self.get_connection(cmd, reuse=False) diff --git a/mitmproxy/proxy/layers/http/_hooks.py b/mitmproxy/proxy/layers/http/_hooks.py index 6dd2b43a5..0c3912c89 100644 --- a/mitmproxy/proxy/layers/http/_hooks.py +++ b/mitmproxy/proxy/layers/http/_hooks.py @@ -75,3 +75,17 @@ class HttpConnectHook(commands.StartHook): but all requests going over the newly opened connection will. """ flow: http.HTTPFlow + + +@dataclass +class HttpConnectUpstreamHook(commands.StartHook): + """ + An HTTP CONNECT request is about to be sent to an upstream proxy. + This event can be ignored for most practical purposes. + + This event can be used to set custom authentication headers for upstream proxies. + + CONNECT requests do not generate the usual HTTP handler events, + but all requests going over the newly opened connection will. + """ + flow: http.HTTPFlow diff --git a/mitmproxy/proxy/layers/http/_upstream_proxy.py b/mitmproxy/proxy/layers/http/_upstream_proxy.py index d8d1af348..df22f4abf 100644 --- a/mitmproxy/proxy/layers/http/_upstream_proxy.py +++ b/mitmproxy/proxy/layers/http/_upstream_proxy.py @@ -4,9 +4,10 @@ from typing import Optional, Tuple from h11._receivebuffer import ReceiveBuffer from mitmproxy import http, connection -from mitmproxy.net import server_spec from mitmproxy.net.http import http1 from mitmproxy.proxy import commands, context, layer, tunnel +from mitmproxy.proxy.layers.http._hooks import HttpConnectUpstreamHook +from mitmproxy.proxy.layers import tls from mitmproxy.utils import human @@ -27,25 +28,31 @@ class HttpUpstreamProxy(tunnel.TunnelLayer): tunnel_connection=tunnel_conn, conn=ctx.server ) - - assert self.tunnel_connection.address - self.conn.via = server_spec.ServerSpec( - "https" if self.tunnel_connection.tls else "http", - self.tunnel_connection.address - ) self.buf = ReceiveBuffer() self.send_connect = send_connect + @classmethod + def make(cls, ctx: context.Context, send_connect: bool) -> tunnel.LayerStack: + spec = ctx.server.via + assert spec.scheme in ("http", "https") + + http_proxy = connection.Server(spec.address) + + stack = tunnel.LayerStack() + if spec.scheme == "https": + http_proxy.alpn_offers = tls.HTTP1_ALPNS + http_proxy.sni = spec.address[0] + stack /= tls.ServerTLSLayer(ctx, http_proxy) + stack /= cls(ctx, http_proxy, send_connect) + + return stack + def start_handshake(self) -> layer.CommandGenerator[None]: - if self.tunnel_connection.tls: - # "Secure Web Proxy": We may have negotiated an ALPN when connecting to the upstream proxy. - # The semantics are not really clear here, but we make sure that if we negotiated h2, - # we act as an h2 client. - self.conn.alpn = self.tunnel_connection.alpn if not self.send_connect: return (yield from super().start_handshake()) assert self.conn.address - req = http.Request( + flow = http.HTTPFlow(self.context.client, self.tunnel_connection) + flow.request = http.Request( host=self.conn.address[0], port=self.conn.address[1], method=b"CONNECT", @@ -59,7 +66,8 @@ class HttpUpstreamProxy(tunnel.TunnelLayer): timestamp_start=time.time(), timestamp_end=time.time(), ) - raw = http1.assemble_request(req) + yield HttpConnectUpstreamHook(flow) + raw = http1.assemble_request(flow.request) yield commands.SendData(self.tunnel_connection, raw) def receive_handshake_data(self, data: bytes) -> layer.CommandGenerator[Tuple[bool, Optional[str]]]: @@ -72,17 +80,19 @@ class HttpUpstreamProxy(tunnel.TunnelLayer): try: response = http1.read_response_head(response_head) except ValueError as e: - yield commands.Log(f"{human.format_address(self.tunnel_connection.address)}: {e}") - return False, str(e) + proxyaddr = human.format_address(self.tunnel_connection.address) + yield commands.Log(f"{proxyaddr}: {e}") + return False, f"Error connecting to {proxyaddr}: {e}" if 200 <= response.status_code < 300: if self.buf: yield from self.receive_data(bytes(self.buf)) del self.buf return True, None else: + proxyaddr = human.format_address(self.tunnel_connection.address) raw_resp = b"\n".join(response_head) - yield commands.Log(f"{human.format_address(self.tunnel_connection.address)}: {raw_resp!r}", + yield commands.Log(f"{proxyaddr}: {raw_resp!r}", level="debug") - return False, f"{response.status_code} {response.reason}" + return False, f"Upstream proxy {proxyaddr} refused HTTP CONNECT request: {response.status_code} {response.reason}" else: return False, None diff --git a/mitmproxy/proxy/tunnel.py b/mitmproxy/proxy/tunnel.py index bd622aeb9..b033366a0 100644 --- a/mitmproxy/proxy/tunnel.py +++ b/mitmproxy/proxy/tunnel.py @@ -1,5 +1,5 @@ from enum import Enum, auto -from typing import List, Optional, Tuple +from typing import List, Optional, Tuple, Union from mitmproxy import connection from mitmproxy.proxy import commands, context, events, layer @@ -163,8 +163,13 @@ class LayerStack: def __getitem__(self, item: int) -> Layer: return self._stack.__getitem__(item) - def __truediv__(self, other: Layer) -> "LayerStack": - if self._stack: - self._stack[-1].child_layer = other # type: ignore - self._stack.append(other) + def __truediv__(self, other: Union[Layer, "LayerStack"]) -> "LayerStack": + if isinstance(other, Layer): + if self._stack: + self._stack[-1].child_layer = other # type: ignore + self._stack.append(other) + else: + if self._stack: + self._stack[-1].child_layer = other[0] # type: ignore + self._stack.extend(other._stack) return self diff --git a/test/mitmproxy/proxy/layers/test_modes.py b/test/mitmproxy/proxy/layers/test_modes.py index 12b8aec29..0c32c1069 100644 --- a/test/mitmproxy/proxy/layers/test_modes.py +++ b/test/mitmproxy/proxy/layers/test_modes.py @@ -55,9 +55,7 @@ def test_upstream_https(tctx): serverhello = Placeholder(bytes) request = Placeholder(bytes) tls_finished = Placeholder(bytes) - h2_client_settings_ack = Placeholder(bytes) response = Placeholder(bytes) - h2_server_settings_ack = Placeholder(bytes) assert ( proxy1 @@ -67,7 +65,7 @@ def test_upstream_https(tctx): << OpenConnection(upstream) >> reply(None) << TlsStartHook(Placeholder()) - >> reply_tls_start(alpn=b"h2") + >> reply_tls_start(alpn=b"http/1.1") << SendData(upstream, clienthello) ) assert upstream().address == ("example.mitmproxy.org", 8081) @@ -77,7 +75,7 @@ def test_upstream_https(tctx): << NextLayerHook(Placeholder(NextLayer)) >> reply_next_layer(ClientTLSLayer) << TlsStartHook(Placeholder()) - >> reply_tls_start(alpn=b"h2") + >> reply_tls_start(alpn=b"http/1.1") << SendData(tctx2.client, serverhello) ) assert ( @@ -91,21 +89,18 @@ def test_upstream_https(tctx): << SendData(tctx2.client, tls_finished) << NextLayerHook(Placeholder(NextLayer)) >> reply_next_layer(lambda ctx: http.HttpLayer(ctx, HTTPMode.regular)) - << SendData(tctx2.client, h2_client_settings_ack) << OpenConnection(server) >> reply(None) - << SendData(server, b'GET / HTTP/1.1\r\nhost: example.com\r\n\r\n') + << SendData(server, b'GET / HTTP/1.1\r\nHost: example.com\r\n\r\n') >> DataReceived(server, b"HTTP/1.1 200 OK\r\nContent-Length: 0\r\n\r\n") - << CloseConnection(server) << SendData(tctx2.client, response) ) assert server().address == ("example.com", 80) assert ( proxy1 - >> DataReceived(upstream, tls_finished() + h2_client_settings_ack() + response()) - << SendData(upstream, h2_server_settings_ack) - << SendData(tctx1.client, b"HTTP/1.1 200 OK\r\ncontent-length: 0\r\n\r\n") + >> DataReceived(upstream, tls_finished() + response()) + << SendData(tctx1.client, b"HTTP/1.1 200 OK\r\nContent-Length: 0\r\n\r\n") )