`--rewrite-host-header` flag for reverse proxy (#1492)

* Rewrite Host header during reverse proxy

* bring back `VERIFIED6`

* Lint fixes

* `--rewrite-host-header` flag

* Pass `--rewrite-host-header` for integration tests

* expect `httpbingo.org` as header now due to host rewrite

* Also pass flag during build & test suite
This commit is contained in:
Abhinav Singh 2024-10-15 10:04:31 +05:30 committed by GitHub
parent 9077c16b1b
commit 050ac1c39a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 133 additions and 39 deletions

View File

@ -610,6 +610,7 @@ jobs:
--enable-web-server
--enable-reverse-proxy
--plugin proxy.plugin.ReverseProxyPlugin
--rewrite-host-header
&&
./tests/integration/test_integration.sh 8899

View File

@ -1075,7 +1075,7 @@ following `Nginx` config:
```console
location /get {
proxy_pass http://httpbin.org/get
proxy_pass http://httpbin.org/get;
}
```
@ -1094,6 +1094,36 @@ Verify using `curl -v localhost:8899/get`:
}
```
#### Rewrite Host Header
With above example, you may sometimes see:
```console
>
* Empty reply from server
* Closing connection
curl: (52) Empty reply from server
```
This is happenening because our default reverse proxy plugin `ReverseProxyPlugin` is configured
with a `http` and a `https` upstream server. And, by default `ReverseProxyPlugin` preserves the
original host header. While this works with `https` upstreams, this doesn't work reliably with
`http` upstreams. To work around this problem use the `--rewrite-host-header` flags.
Example:
```console
proxy --enable-reverse-proxy \
--plugins proxy.plugin.ReverseProxyPlugin \
--rewrite-host-header
```
This will ensure that `Host` header field is set as `httpbin.org` and works with both `http` and
`https` upstreams.
> NOTE: Whether to use `--rewrite-host-header` or not depends upon your use-case.
## Plugin Ordering
When using multiple plugins, depending upon plugin functionality,
@ -2613,7 +2643,7 @@ usage: -m [-h] [--tunnel-hostname TUNNEL_HOSTNAME] [--tunnel-port TUNNEL_PORT]
[--proxy-pool PROXY_POOL] [--enable-web-server]
[--enable-static-server] [--static-server-dir STATIC_SERVER_DIR]
[--min-compression-length MIN_COMPRESSION_LENGTH]
[--enable-reverse-proxy] [--enable-metrics]
[--enable-reverse-proxy] [--rewrite-host-header] [--enable-metrics]
[--metrics-path METRICS_PATH] [--pac-file PAC_FILE]
[--pac-file-url-path PAC_FILE_URL_PATH]
[--cloudflare-dns-mode CLOUDFLARE_DNS_MODE]
@ -2622,7 +2652,7 @@ usage: -m [-h] [--tunnel-hostname TUNNEL_HOSTNAME] [--tunnel-port TUNNEL_PORT]
[--filtered-client-ips FILTERED_CLIENT_IPS]
[--filtered-url-regex-config FILTERED_URL_REGEX_CONFIG]
proxy.py v2.4.6.dev25+g2754b928.d20240812
proxy.py v2.4.8.dev8+gc703edac.d20241013
options:
-h, --help show this help message and exit
@ -2791,6 +2821,9 @@ options:
response that will be compressed (gzipped).
--enable-reverse-proxy
Default: False. Whether to enable reverse proxy core.
--rewrite-host-header
Default: False. If used, reverse proxy server will
rewrite Host header field before sending to upstream.
--enable-metrics Default: False. Enables metrics.
--metrics-path METRICS_PATH
Default: /metrics. Web server path to serve proxy.py

View File

@ -165,6 +165,7 @@ DEFAULT_SSL_CONTEXT_OPTIONS = (
if sys.version_info >= (3, 10)
else (ssl.OP_NO_SSLv2 | ssl.OP_NO_SSLv3 | ssl.OP_NO_TLSv1 | ssl.OP_NO_TLSv1_1)
)
DEFAULT_ENABLE_REWRITE_HOST = False
DEFAULT_DEVTOOLS_DOC_URL = 'http://proxy'
DEFAULT_DEVTOOLS_FRAME_ID = secrets.token_hex(8)

View File

@ -25,8 +25,6 @@ import contextlib
from types import TracebackType
from typing import Any, Dict, List, Type, Tuple, Callable, Optional
import _ssl # noqa: WPS436
from .types import HostPort
from .constants import (
CRLF, COLON, HTTP_1_1, IS_WINDOWS, WHITESPACE, DEFAULT_TIMEOUT,
@ -42,6 +40,9 @@ logger = logging.getLogger(__name__)
def cert_der_to_dict(der: Optional[bytes]) -> Dict[str, Any]:
"""Parse a DER formatted certificate to a python dict"""
# pylint: disable=import-outside-toplevel
import _ssl # noqa: WPS436
if not der:
return {}
with tempfile.NamedTemporaryFile(delete=False) as cert_file:
@ -322,6 +323,7 @@ def set_open_file_limit(soft_limit: int) -> None:
if IS_WINDOWS: # pragma: no cover
return
# pylint: disable=possibly-used-before-assignment
curr_soft_limit, curr_hard_limit = resource.getrlimit(
resource.RLIMIT_NOFILE,
)

View File

@ -49,7 +49,6 @@ class TcpConnection(ABC):
def send(self, data: Union[memoryview, bytes]) -> int:
"""Users must handle BrokenPipeError exceptions"""
# logger.info(data.tobytes())
return self.connection.send(data)
def recv(
@ -67,7 +66,7 @@ class TcpConnection(ABC):
return memoryview(data)
def close(self) -> bool:
if not self.closed:
if not self.closed and self.connection:
self.connection.close()
self.closed = True
return self.closed
@ -97,8 +96,9 @@ class TcpConnection(ABC):
self._num_buffer -= 1
else:
self.buffer[0] = mv[sent:]
del mv
logger.debug('flushed %d bytes to %s' % (sent, self.tag))
# logger.info(mv[:sent].tobytes())
del mv
return sent
def is_reusable(self) -> bool:

View File

@ -283,7 +283,12 @@ class HttpParser:
self.state = httpParserStates.COMPLETE
self.buffer = None if raw == b'' else raw
def build(self, disable_headers: Optional[List[bytes]] = None, for_proxy: bool = False) -> bytes:
def build(
self,
disable_headers: Optional[List[bytes]] = None,
for_proxy: bool = False,
host: Optional[bytes] = None,
) -> bytes:
"""Rebuild the request object."""
assert self.method and self.version and self.type == httpParserTypes.REQUEST_PARSER
if disable_headers is None:
@ -301,11 +306,22 @@ class HttpParser:
path
) if not self._is_https_tunnel else (self.host + COLON + str(self.port).encode())
return build_http_request(
self.method, path, self.version,
headers={} if not self.headers else {
self.headers[k][0]: self.headers[k][1] for k in self.headers if
k.lower() not in disable_headers
},
self.method,
path,
self.version,
headers=(
{}
if not self.headers
else {
self.headers[k][0]: (
self.headers[k][1]
if host is None or self.headers[k][0].lower() != b'host'
else host
)
for k in self.headers
if k.lower() not in disable_headers
}
),
body=body,
no_ua=True,
)

View File

@ -17,10 +17,10 @@ from proxy.http import Url
from proxy.core.base import TcpUpstreamConnectionHandler
from proxy.http.parser import HttpParser
from proxy.http.server import HttpWebServerBasePlugin
from proxy.common.utils import text_
from proxy.common.utils import text_, bytes_
from proxy.http.exception import HttpProtocolException
from proxy.common.constants import (
HTTPS_PROTO, DEFAULT_HTTP_PORT, DEFAULT_HTTPS_PORT,
COLON, HTTP_PROTO, HTTPS_PROTO, DEFAULT_HTTP_PORT, DEFAULT_HTTPS_PORT,
DEFAULT_REVERSE_PROXY_ACCESS_LOG_FORMAT,
)
from ...common.types import Readables, Writables, Descriptors
@ -111,8 +111,8 @@ class ReverseProxy(TcpUpstreamConnectionHandler, HttpWebServerBasePlugin):
assert self.choice and self.choice.hostname
port = (
self.choice.port or DEFAULT_HTTP_PORT
if self.choice.scheme == b'http'
else DEFAULT_HTTPS_PORT
if self.choice.scheme == HTTP_PROTO
else self.choice.port or DEFAULT_HTTPS_PORT
)
self.initialize_upstream(text_(self.choice.hostname), port)
assert self.upstream
@ -120,14 +120,27 @@ class ReverseProxy(TcpUpstreamConnectionHandler, HttpWebServerBasePlugin):
self.upstream.connect()
if self.choice.scheme == HTTPS_PROTO:
self.upstream.wrap(
text_(
self.choice.hostname,
),
text_(self.choice.hostname),
as_non_blocking=True,
ca_file=self.flags.ca_file,
)
request.path = self.choice.remainder
self.upstream.queue(memoryview(request.build()))
self.upstream.queue(
memoryview(
request.build(
host=(
self.choice.hostname
+ (
COLON + bytes_(self.choice.port)
if self.choice.port is not None
else b''
)
if self.flags.rewrite_host_header
else None
),
),
),
)
except ConnectionRefusedError:
raise HttpProtocolException( # pragma: no cover
'Connection refused by upstream server {0}:{1}'.format(

View File

@ -29,8 +29,9 @@ from ...common.types import Readables, Writables, Descriptors
from ...common.utils import text_, build_websocket_handshake_response
from ...common.constants import (
DEFAULT_ENABLE_WEB_SERVER, DEFAULT_STATIC_SERVER_DIR,
DEFAULT_ENABLE_REVERSE_PROXY, DEFAULT_ENABLE_STATIC_SERVER,
DEFAULT_WEB_ACCESS_LOG_FORMAT, DEFAULT_MIN_COMPRESSION_LENGTH,
DEFAULT_ENABLE_REWRITE_HOST, DEFAULT_ENABLE_REVERSE_PROXY,
DEFAULT_ENABLE_STATIC_SERVER, DEFAULT_WEB_ACCESS_LOG_FORMAT,
DEFAULT_MIN_COMPRESSION_LENGTH,
)
@ -78,6 +79,16 @@ flags.add_argument(
help='Default: False. Whether to enable reverse proxy core.',
)
flags.add_argument(
'--rewrite-host-header',
action='store_true',
default=DEFAULT_ENABLE_REWRITE_HOST,
help='Default: '
+ str(DEFAULT_ENABLE_REWRITE_HOST)
+ '. '
+ 'If used, reverse proxy server will rewrite Host header field before sending to upstream.',
)
class HttpWebServerPlugin(HttpProtocolHandlerPlugin):
"""HttpProtocolHandler plugin which handles incoming requests to local web server."""

View File

@ -181,18 +181,30 @@ def proxy_py_subprocess(request: Any) -> Generator[int, None, None]:
ca_cert_dir = TEMP_DIR / ('certificates-%s' % run_id)
os.makedirs(ca_cert_dir, exist_ok=True)
proxy_cmd = (
sys.executable, '-m', 'proxy',
'--hostname', '127.0.0.1',
'--port', '0',
'--port-file', str(port_file),
sys.executable,
'-m',
'proxy',
'--hostname',
'127.0.0.1',
'--port',
'0',
'--port-file',
str(port_file),
'--enable-web-server',
'--plugin', 'proxy.plugin.WebServerPlugin',
'--plugin',
'proxy.plugin.WebServerPlugin',
'--enable-reverse-proxy',
'--plugin', 'proxy.plugin.ReverseProxyPlugin',
'--num-acceptors', '3',
'--num-workers', '3',
'--ca-cert-dir', str(ca_cert_dir),
'--log-level', 'd',
'--plugin',
'proxy.plugin.ReverseProxyPlugin',
'--rewrite-host-header',
'--num-acceptors',
'3',
'--num-workers',
'3',
'--ca-cert-dir',
str(ca_cert_dir),
'--log-level',
'd',
) + tuple(request.param.split())
proxy_proc = Popen(proxy_cmd, stderr=subprocess.STDOUT)
# Needed because port file might not be available immediately

View File

@ -16,6 +16,8 @@
# For github action, we simply bank upon GitHub
# to clean up any background process including
# proxy.py
#
set -x
PROXY_PY_PORT=$1
if [[ -z "$PROXY_PY_PORT" ]]; then
@ -164,8 +166,14 @@ cat downloaded2.whl | $SHASUM -c downloaded2.hash
VERIFIED5=$?
rm downloaded2.whl downloaded2.hash
# Without --rewrite-host-header we will receive localhost:<port> as host header back in response
# read -r -d '' REVERSE_PROXY_RESPONSE << EOM
# "localhost:$PROXY_PY_PORT"
# EOM
# With --rewrite-host-header we will receive httpbingo.org as host header back in response
read -r -d '' REVERSE_PROXY_RESPONSE << EOM
"localhost:$PROXY_PY_PORT"
"httpbingo.org"
EOM
echo "[Test Reverse Proxy Plugin]"
@ -174,8 +182,5 @@ RESPONSE=$($CMD 2> /dev/null)
verify_contains "$RESPONSE" "$REVERSE_PROXY_RESPONSE"
VERIFIED6=$?
# FIXME: VERIFIED6 NOT ASSERTED BECAUSE WE STARTED GETTING EMPTY RESPONSE FROM UPSTREAM
# AFTER CHANGE FROM HTTPBIN TO HTTPBINGO. This test works and passes perfectly when
# run from a local system
EXIT_CODE=$(( $VERIFIED1 || $VERIFIED2 || $VERIFIED3 || $VERIFIED4 || $VERIFIED5 ))
EXIT_CODE=$(( $VERIFIED1 || $VERIFIED2 || $VERIFIED3 || $VERIFIED4 || $VERIFIED5 || $VERIFIED6 ))
exit $EXIT_CODE