fix ignore hosts to not drop connections, improve http header handling (#6559)

fixes #6554
This commit is contained in:
Maximilian Hils 2023-12-18 14:55:49 +01:00 committed by GitHub
parent c0f81a8542
commit 8290c428ca
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 156 additions and 87 deletions

View File

@ -7,6 +7,11 @@
## Unreleased: mitmproxy next
* Fix a regression from mitmproxy 10.1.6 where `ignore_hosts` would terminate requests
instead of forwarding them.
([#6559](https://github.com/mitmproxy/mitmproxy/pull/6559), @mhils)
* `ignore_hosts` now waits for the entire HTTP headers if it suspects the connection to be HTTP.
([#6559](https://github.com/mitmproxy/mitmproxy/pull/6559), @mhils)
## 14 December 2023: mitmproxy 10.1.6
@ -18,17 +23,17 @@
([#6225](https://github.com/mitmproxy/mitmproxy/issues/6225), @Llama1412)
* Fix bug where response flows from HAR files had incorrect `content-length` headers
([#6548](https://github.com/mitmproxy/mitmproxy/pull/6548), @zanieb)
* Improved handling for `--allow-hosts`/`--ignore-hosts` options in WireGuard mode (#5930).
* Improved handling for `allow_hosts`/`ignore_hosts` options in WireGuard mode (#5930).
([#6513](https://github.com/mitmproxy/mitmproxy/pull/6513), @dsphper)
* Fix a bug where TCP connections were not closed properly.
([#6543](https://github.com/mitmproxy/mitmproxy/pull/6543), @mhils)
* DNS resolution is now exempted from `--ignore-hosts` in WireGuard Mode.
* DNS resolution is now exempted from `ignore_hosts` in WireGuard Mode.
([#6513](https://github.com/mitmproxy/mitmproxy/pull/6513), @dsphper)
* Fix case sensitivity of URL added to blocklist
([#6493](https://github.com/mitmproxy/mitmproxy/pull/6493), @emanuele-em)
* Fix a bug where logging was stopped prematurely during shutdown.
([#6541](https://github.com/mitmproxy/mitmproxy/pull/6541), @mhils)
* For plaintext traffic, `--ignore-hosts` now also takes HTTP/1 host headers into account.
* For plaintext traffic, `ignore_hosts` now also takes HTTP/1 host headers into account.
([#6513](https://github.com/mitmproxy/mitmproxy/pull/6513), @dsphper)
* Fix empty cookie attributes being set to `Key=` instead of `Key`
([#5084](https://github.com/mitmproxy/mitmproxy/pull/5084), @Speedlulu)

View File

@ -49,6 +49,7 @@ from mitmproxy.proxy.layers import UDPLayer
from mitmproxy.proxy.layers.http import HTTPMode
from mitmproxy.proxy.layers.quic import quic_parse_client_hello
from mitmproxy.proxy.layers.tls import dtls_parse_client_hello
from mitmproxy.proxy.layers.tls import HTTP1_ALPNS
from mitmproxy.proxy.layers.tls import HTTP_ALPNS
from mitmproxy.proxy.layers.tls import parse_client_hello
from mitmproxy.tls import ClientHello
@ -129,7 +130,7 @@ class NextLayer:
udp_based = context.client.transport_protocol == "udp"
# 1) check for --ignore/--allow
if self._ignore_connection(context, data_client):
if self._ignore_connection(context, data_client, data_server):
return (
layers.TCPLayer(context, ignore=True)
if tcp_based
@ -182,7 +183,7 @@ class NextLayer:
if udp_based:
return layers.UDPLayer(context)
# 5b) Check for raw tcp mode.
very_likely_http = context.client.alpn and context.client.alpn in HTTP_ALPNS
very_likely_http = context.client.alpn in HTTP_ALPNS
probably_no_http = not very_likely_http and (
# the first three bytes should be the HTTP verb, so A-Za-z is expected.
len(data_client) < 3
@ -199,6 +200,7 @@ class NextLayer:
self,
context: Context,
data_client: bytes,
data_server: bytes,
) -> bool | None:
"""
Returns:
@ -220,13 +222,15 @@ class NextLayer:
hostnames.append(peername)
if context.server.address and (server_address := context.server.address[0]):
hostnames.append(server_address)
# If we already have a destination address, we can also check for HTTP Host headers.
# But we do need the destination, otherwise we don't know where this connection is going to.
if host_header := self._get_host_header(context, data_client, data_server):
hostnames.append(host_header)
if (
client_hello := self._get_client_hello(context, data_client)
) and client_hello.sni:
hostnames.append(client_hello.sni)
# If the client data is not a TLS record, try to extract the domain from the HTTP request
elif host := self._extract_http1_host_header(data_client):
hostnames.append(host)
if not hostnames:
return False
@ -246,14 +250,39 @@ class NextLayer:
raise AssertionError()
@staticmethod
def _extract_http1_host_header(data_client: bytes) -> str:
pattern = rb"Host:\s+(.+?)\r\n"
match = re.search(pattern, data_client)
return match.group(1).decode() if match else ""
def _get_host_header(
context: Context,
data_client: bytes,
data_server: bytes,
) -> str | None:
"""
Try to read a host header from data_client.
def _get_client_hello(
self, context: Context, data_client: bytes
) -> ClientHello | None:
Returns:
The host header value, or None, if no host header was found.
Raises:
NeedsMoreData, if the HTTP request is incomplete.
"""
if context.client.transport_protocol != "tcp" or data_server:
return None
host_header_expected = context.client.alpn in HTTP1_ALPNS or re.match(
rb"[A-Z]{3,}.+HTTP/", data_client, re.IGNORECASE
)
if host_header_expected:
if m := re.search(rb"\r\n(?:Host: (.+))?\r\n", data_client, re.IGNORECASE):
if host := m.group(1):
return host.decode("utf-8", "surrogateescape")
else:
return None # \r\n\r\n - header end came first.
else:
raise NeedsMoreData
else:
return None
@staticmethod
def _get_client_hello(context: Context, data_client: bytes) -> ClientHello | None:
"""
Try to read a TLS/DTLS/QUIC ClientHello from data_client.
@ -293,7 +322,8 @@ class NextLayer:
case _: # pragma: no cover
assert_never(context.client.transport_protocol)
def _setup_reverse_proxy(self, context: Context, data_client: bytes) -> Layer:
@staticmethod
def _setup_reverse_proxy(context: Context, data_client: bytes) -> Layer:
spec = cast(mode_specs.ReverseMode, context.client.proxy_mode)
stack = tunnel.LayerStack()
@ -353,7 +383,8 @@ class NextLayer:
return stack[0]
def _setup_explicit_http_proxy(self, context: Context, data_client: bytes) -> Layer:
@staticmethod
def _setup_explicit_http_proxy(context: Context, data_client: bytes) -> Layer:
stack = tunnel.LayerStack()
if context.client.transport_protocol == "udp":
@ -368,9 +399,8 @@ class NextLayer:
return stack[0]
def _is_destination_in_hosts(
self, context: Context, hosts: Iterable[re.Pattern]
) -> bool:
@staticmethod
def _is_destination_in_hosts(context: Context, hosts: Iterable[re.Pattern]) -> bool:
return any(
(context.server.address and rex.search(context.server.address[0]))
or (context.client.sni and rex.search(context.client.sni))

View File

@ -12,6 +12,7 @@ import pytest
from mitmproxy.addons.next_layer import NeedsMoreData
from mitmproxy.addons.next_layer import NextLayer
from mitmproxy.addons.next_layer import stack_match
from mitmproxy.connection import Address
from mitmproxy.connection import Client
from mitmproxy.connection import TransportProtocol
from mitmproxy.proxy.context import Context
@ -90,7 +91,10 @@ quic_client_hello = bytes.fromhex(
dns_query = bytes.fromhex("002a01000001000000000000076578616d706c6503636f6d0000010001")
http_query = b"GET / HTTP/1.1\r\nHost: example.com\r\n\r\n"
http_get = b"GET / HTTP/1.1\r\nHost: example.com\r\n\r\n"
http_get_absolute = b"GET http://example.com/ HTTP/1.1\r\n\r\n"
http_connect = b"CONNECT example.com:443 HTTP/1.1\r\nHost: example.com:443\r\n\r\n"
class TestNextLayer:
@ -103,40 +107,46 @@ class TestNextLayer:
)
@pytest.mark.parametrize(
"mode, ignore, allow, transport_protocol, server_address, data_client, result",
"ignore, allow, transport_protocol, server_address, data_client, result",
[
pytest.param(
[],
[],
["example.com"],
"tcp",
"example.org",
http_query,
False,
id="extract host from http request",
),
pytest.param(
["wireguard"],
["example.com"],
[],
"udp",
"10.0.0.53",
dns_query,
False,
id="special handling for wireguard mode",
),
# ignore
pytest.param(
[], [], [], "example.com", "tcp", b"", False, id="nothing ignored"
[], [], "example.com", "tcp", b"", False, id="nothing ignored"
),
pytest.param(
[], ["example.com"], [], "tcp", "example.com", b"", True, id="address"
["example.com"], [], "tcp", "example.com", b"", True, id="address"
),
pytest.param(
[], ["1.2.3.4"], [], "tcp", "example.com", b"", True, id="ip address"
["1.2.3.4"], [], "tcp", "example.com", b"", True, id="ip address"
),
pytest.param(
["example.com"],
[],
"tcp",
"192.0.2.1",
http_get,
True,
id="http host header",
),
pytest.param(
["example.com"],
[],
"tcp",
"192.0.2.1",
http_get.replace(b"Host", b"X-Host"),
False,
id="http host header missing",
),
pytest.param(
["example.com"],
[],
"tcp",
"192.0.2.1",
http_get.split(b"\r\n", 1)[0],
NeedsMoreData,
id="incomplete http host header",
),
pytest.param(
["example.com"],
[],
"tcp",
@ -146,7 +156,6 @@ class TestNextLayer:
id="partial address match",
),
pytest.param(
[],
["example.com"],
[],
"tcp",
@ -156,7 +165,6 @@ class TestNextLayer:
id="no destination info",
),
pytest.param(
[],
["example.com"],
[],
"tcp",
@ -166,7 +174,6 @@ class TestNextLayer:
id="no sni",
),
pytest.param(
[],
["example.com"],
[],
"tcp",
@ -176,7 +183,6 @@ class TestNextLayer:
id="sni",
),
pytest.param(
[],
["example.com"],
[],
"tcp",
@ -186,7 +192,6 @@ class TestNextLayer:
id="incomplete client hello",
),
pytest.param(
[],
["example.com"],
[],
"tcp",
@ -196,7 +201,6 @@ class TestNextLayer:
id="invalid client hello",
),
pytest.param(
[],
["example.com"],
[],
"tcp",
@ -206,7 +210,6 @@ class TestNextLayer:
id="sni mismatch",
),
pytest.param(
[],
["example.com"],
[],
"udp",
@ -216,7 +219,6 @@ class TestNextLayer:
id="dtls sni",
),
pytest.param(
[],
["example.com"],
[],
"udp",
@ -226,7 +228,6 @@ class TestNextLayer:
id="incomplete dtls client hello",
),
pytest.param(
[],
["example.com"],
[],
"udp",
@ -236,7 +237,6 @@ class TestNextLayer:
id="invalid dtls client hello",
),
pytest.param(
[],
["example.com"],
[],
"udp",
@ -247,7 +247,6 @@ class TestNextLayer:
),
# allow
pytest.param(
[],
[],
["example.com"],
"tcp",
@ -257,7 +256,6 @@ class TestNextLayer:
id="allow: allow",
),
pytest.param(
[],
[],
["example.com"],
"tcp",
@ -267,7 +265,6 @@ class TestNextLayer:
id="allow: ignore",
),
pytest.param(
[],
[],
["example.com"],
"tcp",
@ -280,7 +277,6 @@ class TestNextLayer:
)
def test_ignore_connection(
self,
mode: list[str],
ignore: list[str],
allow: list[str],
transport_protocol: TransportProtocol,
@ -294,8 +290,6 @@ class TestNextLayer:
tctx.configure(nl, ignore_hosts=ignore)
if allow:
tctx.configure(nl, allow_hosts=allow)
if mode:
tctx.options.mode = mode
ctx = Context(
Client(peername=("192.168.0.42", 51234), sockname=("0.0.0.0", 8080)),
tctx.options,
@ -304,15 +298,11 @@ class TestNextLayer:
if server_address:
ctx.server.address = (server_address, 443)
ctx.server.peername = ("1.2.3.4", 443)
if "wireguard" in tctx.options.mode:
ctx.server.peername = ("10.0.0.53", 53)
ctx.server.address = ("10.0.0.53", 53)
ctx.client.proxy_mode = ProxyMode.parse("wireguard")
if result is NeedsMoreData:
with pytest.raises(NeedsMoreData):
nl._ignore_connection(ctx, data_client)
nl._ignore_connection(ctx, data_client, b"")
else:
assert nl._ignore_connection(ctx, data_client) is result
assert nl._ignore_connection(ctx, data_client, b"") is result
def test_next_layer(self, monkeypatch, caplog):
caplog.set_level(logging.INFO)
@ -333,7 +323,7 @@ class TestNextLayer:
assert m.layer is preexisting
m.layer = None
monkeypatch.setattr(m, "data_client", lambda: http_query)
monkeypatch.setattr(m, "data_client", lambda: http_get)
nl.next_layer(m)
assert m.layer
@ -358,6 +348,7 @@ class TConf:
tcp_hosts: Sequence[str] = ()
udp_hosts: Sequence[str] = ()
ignore_conn: bool = False
server_address: Address | None = None
explicit_proxy_configs = [
@ -365,8 +356,27 @@ explicit_proxy_configs = [
TConf(
before=[modes.HttpProxy],
after=[modes.HttpProxy, HttpLayer],
data_client=http_connect,
),
id=f"explicit proxy: regular http",
id=f"explicit proxy: regular http connect",
),
pytest.param(
TConf(
before=[modes.HttpProxy],
after=[modes.HttpProxy, HttpLayer],
ignore_hosts=[".+"],
data_client=http_connect,
),
id=f"explicit proxy: regular http connect disregards ignore_hosts",
),
pytest.param(
TConf(
before=[modes.HttpProxy],
after=[modes.HttpProxy, HttpLayer],
ignore_hosts=[".+"],
data_client=http_get_absolute,
),
id=f"explicit proxy: HTTP over regular proxy disregards ignore_hosts",
),
pytest.param(
TConf(
@ -542,6 +552,18 @@ reverse_proxy_configs.extend(
),
id="reverse proxy: quic",
),
pytest.param(
TConf(
before=[modes.ReverseProxy],
after=[modes.ReverseProxy, TCPLayer],
proxy_mode=f"reverse:http://example.com",
ignore_hosts=["example.com"],
server_address=("example.com", 80),
data_client=http_get,
ignore_conn=True,
),
id="reverse proxy: ignore_hosts",
),
]
)
@ -584,14 +606,15 @@ transparent_proxy_configs = [
http := TConf(
before=[modes.TransparentProxy],
after=[modes.TransparentProxy, HttpLayer],
data_client=b"GET / HTTP/1.1\r\n",
server_address=("192.0.2.1", 80),
data_client=http_get,
),
id="transparent proxy: http",
),
pytest.param(
dataclasses.replace(
http,
tcp_hosts=["example.com"],
tcp_hosts=["192.0.2.1"],
after=[modes.TransparentProxy, TCPLayer],
),
id="transparent proxy: tcp_hosts",
@ -599,25 +622,17 @@ transparent_proxy_configs = [
pytest.param(
dataclasses.replace(
http,
ignore_hosts=["example.com"],
ignore_hosts=["192.0.2.1"],
after=[modes.TransparentProxy, TCPLayer],
ignore_conn=True,
),
id="transparent proxy: ignore_hosts",
),
pytest.param(
dns := TConf(
before=[modes.TransparentProxy],
after=[modes.TransparentProxy, DNSLayer],
transport_protocol="udp",
data_client=dns_query,
),
id="transparent proxy: dns",
),
pytest.param(
TConf(
udp := TConf(
before=[modes.TransparentProxy],
after=[modes.TransparentProxy, UDPLayer],
server_address=("192.0.2.1", 53),
transport_protocol="udp",
data_client=b"\xFF",
),
@ -625,12 +640,32 @@ transparent_proxy_configs = [
),
pytest.param(
dataclasses.replace(
dns,
udp_hosts=["example.com"],
udp,
after=[modes.TransparentProxy, DNSLayer],
data_client=dns_query,
),
id="transparent proxy: dns",
),
pytest.param(
dataclasses.replace(
udp,
udp_hosts=["192.0.2.1"],
after=[modes.TransparentProxy, UDPLayer],
),
id="transparent proxy: udp_hosts",
),
pytest.param(
TConf(
before=[modes.TransparentProxy],
after=[modes.TransparentProxy, DNSLayer],
proxy_mode="wireguard",
server_address=("10.0.0.53", 53),
ignore_hosts=[".+"],
transport_protocol="udp",
data_client=dns_query,
),
id="wireguard proxy: dns should not be ignored",
),
]
@ -658,8 +693,7 @@ def test_next_layer(
Client(peername=("192.168.0.42", 51234), sockname=("0.0.0.0", 8080)),
tctx.options,
)
ctx.server.address = ("example.com", 42)
# these aren't properly set up, but this does not matter here.
ctx.server.address = test_conf.server_address
ctx.client.transport_protocol = test_conf.transport_protocol
ctx.client.proxy_mode = ProxyMode.parse(test_conf.proxy_mode)
ctx.layers = [x(ctx) for x in test_conf.before]
@ -668,7 +702,7 @@ def test_next_layer(
data_client=test_conf.data_client,
data_server=test_conf.data_server,
)
assert stack_match(ctx, test_conf.after)
assert stack_match(ctx, test_conf.after), f"Unexpected stack: {ctx.layers}"
last_layer = ctx.layers[-1]
if isinstance(last_layer, (UDPLayer, TCPLayer)):