From af88265f5cb4164338c71b84de28c83f2f8b2d27 Mon Sep 17 00:00:00 2001 From: Fata Nugraha Date: Fri, 4 Oct 2024 23:22:26 +0800 Subject: [PATCH] Improve HTTP detection heuristic (#7228) * Improve HTTP checking heuristic * fix changelog * Fix checking * [autofix.ci] apply automated fixes * simplify condition --------- Co-authored-by: Fata Nugraha Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com> Co-authored-by: Maximilian Hils --- CHANGELOG.md | 2 ++ mitmproxy/addons/next_layer.py | 7 ++++--- test/mitmproxy/addons/test_next_layer.py | 2 +- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 02b1a7f91..da1cfaf45 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,8 @@ ## Unreleased: mitmproxy next +- Tighten HTTP detection heuristic to better support custom TCP-based protocols. + ([#7228](https://github.com/mitmproxy/mitmproxy/pull/7228), @fatanugraha) ## 02 October 2024: mitmproxy 11.0.0 diff --git a/mitmproxy/addons/next_layer.py b/mitmproxy/addons/next_layer.py index 8a7ce8f4d..3e51f3211 100644 --- a/mitmproxy/addons/next_layer.py +++ b/mitmproxy/addons/next_layer.py @@ -182,9 +182,10 @@ class NextLayer: probably_no_http = ( # the first three bytes should be the HTTP verb, so A-Za-z is expected. len(data_client) < 3 - # HTTP would require whitespace before the first newline - # if we have neither whitespace nor a newline, it's also unlikely to be HTTP. - or (data_client.find(b" ") >= data_client.find(b"\n")) + # HTTP would require whitespace... + or b" " not in data_client + # ...and that whitespace needs to be in the first line. + or (data_client.find(b" ") > data_client.find(b"\n")) or not data_client[:3].isalpha() # a server greeting would be uncharacteristic. or data_server diff --git a/test/mitmproxy/addons/test_next_layer.py b/test/mitmproxy/addons/test_next_layer.py index 21b9ed00b..f2be4d3a0 100644 --- a/test/mitmproxy/addons/test_next_layer.py +++ b/test/mitmproxy/addons/test_next_layer.py @@ -103,7 +103,7 @@ dns_query = bytes.fromhex("002a01000001000000000000076578616d706c6503636f6d00000 # Custom protocol with just base64-encoded messages # https://github.com/mitmproxy/mitmproxy/pull/7087 -custom_base64_proto = b"AAAAAAAAAAAAAAAAAAAAAA==" +custom_base64_proto = b"AAAAAAAAAAAAAAAAAAAAAA==\n" http_get = b"GET / HTTP/1.1\r\nHost: example.com\r\n\r\n" http_get_absolute = b"GET http://example.com/ HTTP/1.1\r\n\r\n"