diff --git a/CHANGELOG.md b/CHANGELOG.md index f133bde91..69600efb9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,11 @@ ## Unreleased: mitmproxy next +* Fix a regression from mitmproxy 10.1.6 where `ignore_hosts` would terminate requests + instead of forwarding them. + ([#6559](https://github.com/mitmproxy/mitmproxy/pull/6559), @mhils) +* `ignore_hosts` now waits for the entire HTTP headers if it suspects the connection to be HTTP. + ([#6559](https://github.com/mitmproxy/mitmproxy/pull/6559), @mhils) ## 14 December 2023: mitmproxy 10.1.6 @@ -18,17 +23,17 @@ ([#6225](https://github.com/mitmproxy/mitmproxy/issues/6225), @Llama1412) * Fix bug where response flows from HAR files had incorrect `content-length` headers ([#6548](https://github.com/mitmproxy/mitmproxy/pull/6548), @zanieb) -* Improved handling for `--allow-hosts`/`--ignore-hosts` options in WireGuard mode (#5930). +* Improved handling for `allow_hosts`/`ignore_hosts` options in WireGuard mode (#5930). ([#6513](https://github.com/mitmproxy/mitmproxy/pull/6513), @dsphper) * Fix a bug where TCP connections were not closed properly. ([#6543](https://github.com/mitmproxy/mitmproxy/pull/6543), @mhils) -* DNS resolution is now exempted from `--ignore-hosts` in WireGuard Mode. +* DNS resolution is now exempted from `ignore_hosts` in WireGuard Mode. ([#6513](https://github.com/mitmproxy/mitmproxy/pull/6513), @dsphper) * Fix case sensitivity of URL added to blocklist ([#6493](https://github.com/mitmproxy/mitmproxy/pull/6493), @emanuele-em) * Fix a bug where logging was stopped prematurely during shutdown. ([#6541](https://github.com/mitmproxy/mitmproxy/pull/6541), @mhils) -* For plaintext traffic, `--ignore-hosts` now also takes HTTP/1 host headers into account. +* For plaintext traffic, `ignore_hosts` now also takes HTTP/1 host headers into account. ([#6513](https://github.com/mitmproxy/mitmproxy/pull/6513), @dsphper) * Fix empty cookie attributes being set to `Key=` instead of `Key` ([#5084](https://github.com/mitmproxy/mitmproxy/pull/5084), @Speedlulu) diff --git a/mitmproxy/addons/next_layer.py b/mitmproxy/addons/next_layer.py index ce63f8139..7c065f063 100644 --- a/mitmproxy/addons/next_layer.py +++ b/mitmproxy/addons/next_layer.py @@ -49,6 +49,7 @@ from mitmproxy.proxy.layers import UDPLayer from mitmproxy.proxy.layers.http import HTTPMode from mitmproxy.proxy.layers.quic import quic_parse_client_hello from mitmproxy.proxy.layers.tls import dtls_parse_client_hello +from mitmproxy.proxy.layers.tls import HTTP1_ALPNS from mitmproxy.proxy.layers.tls import HTTP_ALPNS from mitmproxy.proxy.layers.tls import parse_client_hello from mitmproxy.tls import ClientHello @@ -129,7 +130,7 @@ class NextLayer: udp_based = context.client.transport_protocol == "udp" # 1) check for --ignore/--allow - if self._ignore_connection(context, data_client): + if self._ignore_connection(context, data_client, data_server): return ( layers.TCPLayer(context, ignore=True) if tcp_based @@ -182,7 +183,7 @@ class NextLayer: if udp_based: return layers.UDPLayer(context) # 5b) Check for raw tcp mode. - very_likely_http = context.client.alpn and context.client.alpn in HTTP_ALPNS + very_likely_http = context.client.alpn in HTTP_ALPNS probably_no_http = not very_likely_http and ( # the first three bytes should be the HTTP verb, so A-Za-z is expected. len(data_client) < 3 @@ -199,6 +200,7 @@ class NextLayer: self, context: Context, data_client: bytes, + data_server: bytes, ) -> bool | None: """ Returns: @@ -220,13 +222,15 @@ class NextLayer: hostnames.append(peername) if context.server.address and (server_address := context.server.address[0]): hostnames.append(server_address) + # If we already have a destination address, we can also check for HTTP Host headers. + # But we do need the destination, otherwise we don't know where this connection is going to. + if host_header := self._get_host_header(context, data_client, data_server): + hostnames.append(host_header) if ( client_hello := self._get_client_hello(context, data_client) ) and client_hello.sni: hostnames.append(client_hello.sni) - # If the client data is not a TLS record, try to extract the domain from the HTTP request - elif host := self._extract_http1_host_header(data_client): - hostnames.append(host) + if not hostnames: return False @@ -246,14 +250,39 @@ class NextLayer: raise AssertionError() @staticmethod - def _extract_http1_host_header(data_client: bytes) -> str: - pattern = rb"Host:\s+(.+?)\r\n" - match = re.search(pattern, data_client) - return match.group(1).decode() if match else "" + def _get_host_header( + context: Context, + data_client: bytes, + data_server: bytes, + ) -> str | None: + """ + Try to read a host header from data_client. - def _get_client_hello( - self, context: Context, data_client: bytes - ) -> ClientHello | None: + Returns: + The host header value, or None, if no host header was found. + + Raises: + NeedsMoreData, if the HTTP request is incomplete. + """ + if context.client.transport_protocol != "tcp" or data_server: + return None + + host_header_expected = context.client.alpn in HTTP1_ALPNS or re.match( + rb"[A-Z]{3,}.+HTTP/", data_client, re.IGNORECASE + ) + if host_header_expected: + if m := re.search(rb"\r\n(?:Host: (.+))?\r\n", data_client, re.IGNORECASE): + if host := m.group(1): + return host.decode("utf-8", "surrogateescape") + else: + return None # \r\n\r\n - header end came first. + else: + raise NeedsMoreData + else: + return None + + @staticmethod + def _get_client_hello(context: Context, data_client: bytes) -> ClientHello | None: """ Try to read a TLS/DTLS/QUIC ClientHello from data_client. @@ -293,7 +322,8 @@ class NextLayer: case _: # pragma: no cover assert_never(context.client.transport_protocol) - def _setup_reverse_proxy(self, context: Context, data_client: bytes) -> Layer: + @staticmethod + def _setup_reverse_proxy(context: Context, data_client: bytes) -> Layer: spec = cast(mode_specs.ReverseMode, context.client.proxy_mode) stack = tunnel.LayerStack() @@ -353,7 +383,8 @@ class NextLayer: return stack[0] - def _setup_explicit_http_proxy(self, context: Context, data_client: bytes) -> Layer: + @staticmethod + def _setup_explicit_http_proxy(context: Context, data_client: bytes) -> Layer: stack = tunnel.LayerStack() if context.client.transport_protocol == "udp": @@ -368,9 +399,8 @@ class NextLayer: return stack[0] - def _is_destination_in_hosts( - self, context: Context, hosts: Iterable[re.Pattern] - ) -> bool: + @staticmethod + def _is_destination_in_hosts(context: Context, hosts: Iterable[re.Pattern]) -> bool: return any( (context.server.address and rex.search(context.server.address[0])) or (context.client.sni and rex.search(context.client.sni)) diff --git a/test/mitmproxy/addons/test_next_layer.py b/test/mitmproxy/addons/test_next_layer.py index b91a94223..bd67a83e6 100644 --- a/test/mitmproxy/addons/test_next_layer.py +++ b/test/mitmproxy/addons/test_next_layer.py @@ -12,6 +12,7 @@ import pytest from mitmproxy.addons.next_layer import NeedsMoreData from mitmproxy.addons.next_layer import NextLayer from mitmproxy.addons.next_layer import stack_match +from mitmproxy.connection import Address from mitmproxy.connection import Client from mitmproxy.connection import TransportProtocol from mitmproxy.proxy.context import Context @@ -90,7 +91,10 @@ quic_client_hello = bytes.fromhex( dns_query = bytes.fromhex("002a01000001000000000000076578616d706c6503636f6d0000010001") -http_query = b"GET / HTTP/1.1\r\nHost: example.com\r\n\r\n" +http_get = b"GET / HTTP/1.1\r\nHost: example.com\r\n\r\n" +http_get_absolute = b"GET http://example.com/ HTTP/1.1\r\n\r\n" + +http_connect = b"CONNECT example.com:443 HTTP/1.1\r\nHost: example.com:443\r\n\r\n" class TestNextLayer: @@ -103,40 +107,46 @@ class TestNextLayer: ) @pytest.mark.parametrize( - "mode, ignore, allow, transport_protocol, server_address, data_client, result", + "ignore, allow, transport_protocol, server_address, data_client, result", [ - pytest.param( - [], - [], - ["example.com"], - "tcp", - "example.org", - http_query, - False, - id="extract host from http request", - ), - pytest.param( - ["wireguard"], - ["example.com"], - [], - "udp", - "10.0.0.53", - dns_query, - False, - id="special handling for wireguard mode", - ), # ignore pytest.param( - [], [], [], "example.com", "tcp", b"", False, id="nothing ignored" + [], [], "example.com", "tcp", b"", False, id="nothing ignored" ), pytest.param( - [], ["example.com"], [], "tcp", "example.com", b"", True, id="address" + ["example.com"], [], "tcp", "example.com", b"", True, id="address" ), pytest.param( - [], ["1.2.3.4"], [], "tcp", "example.com", b"", True, id="ip address" + ["1.2.3.4"], [], "tcp", "example.com", b"", True, id="ip address" ), pytest.param( + ["example.com"], [], + "tcp", + "192.0.2.1", + http_get, + True, + id="http host header", + ), + pytest.param( + ["example.com"], + [], + "tcp", + "192.0.2.1", + http_get.replace(b"Host", b"X-Host"), + False, + id="http host header missing", + ), + pytest.param( + ["example.com"], + [], + "tcp", + "192.0.2.1", + http_get.split(b"\r\n", 1)[0], + NeedsMoreData, + id="incomplete http host header", + ), + pytest.param( ["example.com"], [], "tcp", @@ -146,7 +156,6 @@ class TestNextLayer: id="partial address match", ), pytest.param( - [], ["example.com"], [], "tcp", @@ -156,7 +165,6 @@ class TestNextLayer: id="no destination info", ), pytest.param( - [], ["example.com"], [], "tcp", @@ -166,7 +174,6 @@ class TestNextLayer: id="no sni", ), pytest.param( - [], ["example.com"], [], "tcp", @@ -176,7 +183,6 @@ class TestNextLayer: id="sni", ), pytest.param( - [], ["example.com"], [], "tcp", @@ -186,7 +192,6 @@ class TestNextLayer: id="incomplete client hello", ), pytest.param( - [], ["example.com"], [], "tcp", @@ -196,7 +201,6 @@ class TestNextLayer: id="invalid client hello", ), pytest.param( - [], ["example.com"], [], "tcp", @@ -206,7 +210,6 @@ class TestNextLayer: id="sni mismatch", ), pytest.param( - [], ["example.com"], [], "udp", @@ -216,7 +219,6 @@ class TestNextLayer: id="dtls sni", ), pytest.param( - [], ["example.com"], [], "udp", @@ -226,7 +228,6 @@ class TestNextLayer: id="incomplete dtls client hello", ), pytest.param( - [], ["example.com"], [], "udp", @@ -236,7 +237,6 @@ class TestNextLayer: id="invalid dtls client hello", ), pytest.param( - [], ["example.com"], [], "udp", @@ -247,7 +247,6 @@ class TestNextLayer: ), # allow pytest.param( - [], [], ["example.com"], "tcp", @@ -257,7 +256,6 @@ class TestNextLayer: id="allow: allow", ), pytest.param( - [], [], ["example.com"], "tcp", @@ -267,7 +265,6 @@ class TestNextLayer: id="allow: ignore", ), pytest.param( - [], [], ["example.com"], "tcp", @@ -280,7 +277,6 @@ class TestNextLayer: ) def test_ignore_connection( self, - mode: list[str], ignore: list[str], allow: list[str], transport_protocol: TransportProtocol, @@ -294,8 +290,6 @@ class TestNextLayer: tctx.configure(nl, ignore_hosts=ignore) if allow: tctx.configure(nl, allow_hosts=allow) - if mode: - tctx.options.mode = mode ctx = Context( Client(peername=("192.168.0.42", 51234), sockname=("0.0.0.0", 8080)), tctx.options, @@ -304,15 +298,11 @@ class TestNextLayer: if server_address: ctx.server.address = (server_address, 443) ctx.server.peername = ("1.2.3.4", 443) - if "wireguard" in tctx.options.mode: - ctx.server.peername = ("10.0.0.53", 53) - ctx.server.address = ("10.0.0.53", 53) - ctx.client.proxy_mode = ProxyMode.parse("wireguard") if result is NeedsMoreData: with pytest.raises(NeedsMoreData): - nl._ignore_connection(ctx, data_client) + nl._ignore_connection(ctx, data_client, b"") else: - assert nl._ignore_connection(ctx, data_client) is result + assert nl._ignore_connection(ctx, data_client, b"") is result def test_next_layer(self, monkeypatch, caplog): caplog.set_level(logging.INFO) @@ -333,7 +323,7 @@ class TestNextLayer: assert m.layer is preexisting m.layer = None - monkeypatch.setattr(m, "data_client", lambda: http_query) + monkeypatch.setattr(m, "data_client", lambda: http_get) nl.next_layer(m) assert m.layer @@ -358,6 +348,7 @@ class TConf: tcp_hosts: Sequence[str] = () udp_hosts: Sequence[str] = () ignore_conn: bool = False + server_address: Address | None = None explicit_proxy_configs = [ @@ -365,8 +356,27 @@ explicit_proxy_configs = [ TConf( before=[modes.HttpProxy], after=[modes.HttpProxy, HttpLayer], + data_client=http_connect, ), - id=f"explicit proxy: regular http", + id=f"explicit proxy: regular http connect", + ), + pytest.param( + TConf( + before=[modes.HttpProxy], + after=[modes.HttpProxy, HttpLayer], + ignore_hosts=[".+"], + data_client=http_connect, + ), + id=f"explicit proxy: regular http connect disregards ignore_hosts", + ), + pytest.param( + TConf( + before=[modes.HttpProxy], + after=[modes.HttpProxy, HttpLayer], + ignore_hosts=[".+"], + data_client=http_get_absolute, + ), + id=f"explicit proxy: HTTP over regular proxy disregards ignore_hosts", ), pytest.param( TConf( @@ -542,6 +552,18 @@ reverse_proxy_configs.extend( ), id="reverse proxy: quic", ), + pytest.param( + TConf( + before=[modes.ReverseProxy], + after=[modes.ReverseProxy, TCPLayer], + proxy_mode=f"reverse:http://example.com", + ignore_hosts=["example.com"], + server_address=("example.com", 80), + data_client=http_get, + ignore_conn=True, + ), + id="reverse proxy: ignore_hosts", + ), ] ) @@ -584,14 +606,15 @@ transparent_proxy_configs = [ http := TConf( before=[modes.TransparentProxy], after=[modes.TransparentProxy, HttpLayer], - data_client=b"GET / HTTP/1.1\r\n", + server_address=("192.0.2.1", 80), + data_client=http_get, ), id="transparent proxy: http", ), pytest.param( dataclasses.replace( http, - tcp_hosts=["example.com"], + tcp_hosts=["192.0.2.1"], after=[modes.TransparentProxy, TCPLayer], ), id="transparent proxy: tcp_hosts", @@ -599,25 +622,17 @@ transparent_proxy_configs = [ pytest.param( dataclasses.replace( http, - ignore_hosts=["example.com"], + ignore_hosts=["192.0.2.1"], after=[modes.TransparentProxy, TCPLayer], ignore_conn=True, ), id="transparent proxy: ignore_hosts", ), pytest.param( - dns := TConf( - before=[modes.TransparentProxy], - after=[modes.TransparentProxy, DNSLayer], - transport_protocol="udp", - data_client=dns_query, - ), - id="transparent proxy: dns", - ), - pytest.param( - TConf( + udp := TConf( before=[modes.TransparentProxy], after=[modes.TransparentProxy, UDPLayer], + server_address=("192.0.2.1", 53), transport_protocol="udp", data_client=b"\xFF", ), @@ -625,12 +640,32 @@ transparent_proxy_configs = [ ), pytest.param( dataclasses.replace( - dns, - udp_hosts=["example.com"], + udp, + after=[modes.TransparentProxy, DNSLayer], + data_client=dns_query, + ), + id="transparent proxy: dns", + ), + pytest.param( + dataclasses.replace( + udp, + udp_hosts=["192.0.2.1"], after=[modes.TransparentProxy, UDPLayer], ), id="transparent proxy: udp_hosts", ), + pytest.param( + TConf( + before=[modes.TransparentProxy], + after=[modes.TransparentProxy, DNSLayer], + proxy_mode="wireguard", + server_address=("10.0.0.53", 53), + ignore_hosts=[".+"], + transport_protocol="udp", + data_client=dns_query, + ), + id="wireguard proxy: dns should not be ignored", + ), ] @@ -658,8 +693,7 @@ def test_next_layer( Client(peername=("192.168.0.42", 51234), sockname=("0.0.0.0", 8080)), tctx.options, ) - ctx.server.address = ("example.com", 42) - # these aren't properly set up, but this does not matter here. + ctx.server.address = test_conf.server_address ctx.client.transport_protocol = test_conf.transport_protocol ctx.client.proxy_mode = ProxyMode.parse(test_conf.proxy_mode) ctx.layers = [x(ctx) for x in test_conf.before] @@ -668,7 +702,7 @@ def test_next_layer( data_client=test_conf.data_client, data_server=test_conf.data_server, ) - assert stack_match(ctx, test_conf.after) + assert stack_match(ctx, test_conf.after), f"Unexpected stack: {ctx.layers}" last_layer = ctx.layers[-1] if isinstance(last_layer, (UDPLayer, TCPLayer)):