2019-12-10 03:38:49 +00:00
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
"""
|
|
|
|
proxy.py
|
|
|
|
~~~~~~~~
|
|
|
|
⚡⚡⚡ Fast, Lightweight, Pluggable, TLS interception capable proxy server focused on
|
|
|
|
Network monitoring, controls & Application development, testing, debugging.
|
|
|
|
|
|
|
|
:copyright: (c) 2013-present by Abhinav Singh and contributors.
|
|
|
|
:license: BSD, see LICENSE for more details.
|
|
|
|
"""
|
|
|
|
import random
|
2021-10-31 19:49:19 +00:00
|
|
|
import logging
|
2019-12-10 03:38:49 +00:00
|
|
|
|
2021-11-19 18:35:24 +00:00
|
|
|
from typing import Dict, List, Optional, Any
|
2021-10-31 19:49:19 +00:00
|
|
|
|
2021-11-11 22:05:23 +00:00
|
|
|
from ..common.flag import flags
|
2021-11-14 21:47:12 +00:00
|
|
|
|
|
|
|
from ..http import Url, httpMethods
|
|
|
|
from ..http.parser import HttpParser
|
2021-10-31 19:49:19 +00:00
|
|
|
from ..http.exception import HttpProtocolException
|
2019-12-10 03:38:49 +00:00
|
|
|
from ..http.proxy import HttpProxyBasePlugin
|
2021-11-14 21:47:12 +00:00
|
|
|
|
2021-11-19 18:35:24 +00:00
|
|
|
from ..core.base import TcpUpstreamConnectionHandler
|
2021-10-31 19:49:19 +00:00
|
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
DEFAULT_HTTP_ACCESS_LOG_FORMAT = '{client_ip}:{client_port} - ' + \
|
|
|
|
'{request_method} {server_host}:{server_port}{request_path} -> ' + \
|
|
|
|
'{upstream_proxy_host}:{upstream_proxy_port} - ' + \
|
|
|
|
'{response_code} {response_reason} - {response_bytes} bytes - ' + \
|
|
|
|
'{connection_time_ms} ms'
|
|
|
|
|
|
|
|
DEFAULT_HTTPS_ACCESS_LOG_FORMAT = '{client_ip}:{client_port} - ' + \
|
|
|
|
'{request_method} {server_host}:{server_port} -> ' + \
|
|
|
|
'{upstream_proxy_host}:{upstream_proxy_port} - ' + \
|
|
|
|
'{response_bytes} bytes - {connection_time_ms} ms'
|
2019-12-10 03:38:49 +00:00
|
|
|
|
2021-11-11 22:05:23 +00:00
|
|
|
# Run two separate instances of proxy.py
|
|
|
|
# on port 9000 and 9001 BUT WITHOUT ProxyPool plugin
|
|
|
|
# to avoid infinite loops.
|
|
|
|
DEFAULT_PROXY_POOL: List[str] = [
|
2021-11-20 14:12:38 +00:00
|
|
|
# Yes you may use the instance running with ProxyPoolPlugin itself.
|
|
|
|
# ProxyPool plugin will act as a no-op.
|
|
|
|
# 'localhost:8899',
|
|
|
|
#
|
|
|
|
# Remote proxies
|
2021-11-11 22:05:23 +00:00
|
|
|
# 'localhost:9000',
|
|
|
|
# 'localhost:9001',
|
|
|
|
]
|
|
|
|
|
|
|
|
flags.add_argument(
|
|
|
|
'--proxy-pool',
|
|
|
|
action='append',
|
|
|
|
nargs=1,
|
|
|
|
default=DEFAULT_PROXY_POOL,
|
|
|
|
help='List of upstream proxies to use in the pool',
|
|
|
|
)
|
|
|
|
|
2019-12-10 03:38:49 +00:00
|
|
|
|
2021-11-19 18:35:24 +00:00
|
|
|
class ProxyPoolPlugin(TcpUpstreamConnectionHandler, HttpProxyBasePlugin):
|
2021-10-31 19:49:19 +00:00
|
|
|
"""Proxy pool plugin simply acts as a proxy adapter for proxy.py itself.
|
|
|
|
|
|
|
|
Imagine this plugin as setting up proxy settings for proxy.py instance itself.
|
|
|
|
All incoming client requests are proxied to configured upstream proxies."""
|
2019-12-10 03:38:49 +00:00
|
|
|
|
|
|
|
def __init__(self, *args: Any, **kwargs: Any) -> None:
|
|
|
|
super().__init__(*args, **kwargs)
|
2021-10-31 19:49:19 +00:00
|
|
|
# Cached attributes to be used during access log override
|
|
|
|
self.request_host_port_path_method: List[Any] = [
|
2021-11-04 11:28:36 +00:00
|
|
|
None, None, None, None,
|
|
|
|
]
|
2021-11-19 18:35:24 +00:00
|
|
|
|
|
|
|
def handle_upstream_data(self, raw: memoryview) -> None:
|
|
|
|
self.client.queue(raw)
|
2019-12-10 03:38:49 +00:00
|
|
|
|
|
|
|
def before_upstream_connection(
|
2021-11-04 11:28:36 +00:00
|
|
|
self, request: HttpParser,
|
|
|
|
) -> Optional[HttpParser]:
|
2021-10-31 19:49:19 +00:00
|
|
|
"""Avoids establishing the default connection to upstream server
|
|
|
|
by returning None.
|
2019-12-10 03:38:49 +00:00
|
|
|
"""
|
2021-10-31 19:49:19 +00:00
|
|
|
# TODO(abhinavsingh): Ideally connection to upstream proxy endpoints
|
|
|
|
# must be bootstrapped within it's own re-usable and gc'd pool, to avoid establishing
|
|
|
|
# a fresh upstream proxy connection for each client request.
|
|
|
|
#
|
2021-11-19 18:35:24 +00:00
|
|
|
# See :class:`~proxy.core.connection.pool.ConnectionPool` which is a work
|
|
|
|
# in progress for SSL cache handling.
|
|
|
|
#
|
2019-12-10 03:38:49 +00:00
|
|
|
# Implement your own logic here e.g. round-robin, least connection etc.
|
2021-11-30 22:48:49 +00:00
|
|
|
endpoint = random.choice(self.flags.proxy_pool)[0].split(':', 1)
|
2021-11-20 14:12:38 +00:00
|
|
|
if endpoint[0] == 'localhost' and endpoint[1] == '8899':
|
|
|
|
return request
|
2021-10-31 19:49:19 +00:00
|
|
|
logger.debug('Using endpoint: {0}:{1}'.format(*endpoint))
|
2021-11-19 18:35:24 +00:00
|
|
|
self.initialize_upstream(endpoint[0], int(endpoint[1]))
|
|
|
|
assert self.upstream
|
2021-10-31 19:49:19 +00:00
|
|
|
try:
|
|
|
|
self.upstream.connect()
|
2021-11-20 14:12:38 +00:00
|
|
|
except TimeoutError:
|
|
|
|
logger.info(
|
|
|
|
'Timed out connecting to upstream proxy {0}:{1}'.format(
|
|
|
|
*endpoint,
|
|
|
|
),
|
|
|
|
)
|
|
|
|
raise HttpProtocolException()
|
2021-10-31 19:49:19 +00:00
|
|
|
except ConnectionRefusedError:
|
|
|
|
# TODO(abhinavsingh): Try another choice, when all (or max configured) choices have
|
|
|
|
# exhausted, retry for configured number of times before giving up.
|
|
|
|
#
|
|
|
|
# Failing upstream proxies, must be removed from the pool temporarily.
|
|
|
|
# A periodic health check must put them back in the pool. This can be achieved
|
|
|
|
# using a datastructure without having to spawn separate thread/process for health
|
|
|
|
# check.
|
|
|
|
logger.info(
|
2021-11-11 21:16:43 +00:00
|
|
|
'Connection refused by upstream proxy {0}:{1}'.format(
|
|
|
|
*endpoint,
|
|
|
|
),
|
2021-11-04 11:28:36 +00:00
|
|
|
)
|
2021-10-31 19:49:19 +00:00
|
|
|
raise HttpProtocolException()
|
|
|
|
logger.debug(
|
2021-11-11 21:16:43 +00:00
|
|
|
'Established connection to upstream proxy {0}:{1}'.format(
|
|
|
|
*endpoint,
|
|
|
|
),
|
2021-11-04 11:28:36 +00:00
|
|
|
)
|
2019-12-10 03:38:49 +00:00
|
|
|
return None
|
|
|
|
|
|
|
|
def handle_client_request(
|
2021-11-04 11:28:36 +00:00
|
|
|
self, request: HttpParser,
|
|
|
|
) -> Optional[HttpParser]:
|
2021-10-31 19:49:19 +00:00
|
|
|
"""Only invoked once after client original proxy request has been received completely."""
|
2021-11-20 14:12:38 +00:00
|
|
|
if not self.upstream:
|
|
|
|
return request
|
2021-10-31 19:49:19 +00:00
|
|
|
assert self.upstream
|
|
|
|
# For log sanity (i.e. to avoid None:None), expose upstream host:port from headers
|
|
|
|
host, port = None, None
|
|
|
|
# Browser or applications may sometime send
|
2021-11-12 13:30:19 +00:00
|
|
|
#
|
|
|
|
# "CONNECT / HTTP/1.0\r\n\r\n"
|
|
|
|
#
|
|
|
|
# for proxy keep alive checks.
|
2021-10-31 19:49:19 +00:00
|
|
|
if request.has_header(b'host'):
|
2021-11-12 13:30:19 +00:00
|
|
|
url = Url.from_bytes(request.header(b'host'))
|
|
|
|
assert url.hostname
|
|
|
|
host, port = url.hostname.decode('utf-8'), url.port
|
|
|
|
port = port if port else (
|
2021-11-30 22:48:49 +00:00
|
|
|
443 if request.is_https_tunnel else 80
|
2021-11-12 13:30:19 +00:00
|
|
|
)
|
2021-10-31 19:49:19 +00:00
|
|
|
path = None if not request.path else request.path.decode()
|
|
|
|
self.request_host_port_path_method = [
|
2021-11-04 11:28:36 +00:00
|
|
|
host, port, path, request.method,
|
|
|
|
]
|
2021-10-31 19:49:19 +00:00
|
|
|
# Queue original request to upstream proxy
|
|
|
|
self.upstream.queue(memoryview(request.build(for_proxy=True)))
|
|
|
|
return request
|
2019-12-10 03:38:49 +00:00
|
|
|
|
2021-10-31 19:49:19 +00:00
|
|
|
def handle_client_data(self, raw: memoryview) -> Optional[memoryview]:
|
|
|
|
"""Only invoked when before_upstream_connection returns None"""
|
|
|
|
# Queue data to the proxy endpoint
|
|
|
|
assert self.upstream
|
|
|
|
self.upstream.queue(raw)
|
|
|
|
return raw
|
2019-12-10 03:38:49 +00:00
|
|
|
|
2021-11-20 14:12:38 +00:00
|
|
|
def handle_upstream_chunk(self, chunk: memoryview) -> memoryview:
|
|
|
|
"""Will never be called since we didn't establish an upstream connection."""
|
|
|
|
if not self.upstream:
|
|
|
|
return chunk
|
|
|
|
raise Exception("This should have never been called")
|
|
|
|
|
2019-12-10 03:38:49 +00:00
|
|
|
def on_upstream_connection_close(self) -> None:
|
2021-10-31 19:49:19 +00:00
|
|
|
"""Called when client connection has been closed."""
|
|
|
|
if self.upstream and not self.upstream.closed:
|
|
|
|
logger.debug('Closing upstream proxy connection')
|
|
|
|
self.upstream.close()
|
|
|
|
self.upstream = None
|
2019-12-10 03:38:49 +00:00
|
|
|
|
2021-10-31 19:49:19 +00:00
|
|
|
def on_access_log(self, context: Dict[str, Any]) -> Optional[Dict[str, Any]]:
|
2021-11-20 14:12:38 +00:00
|
|
|
if not self.upstream:
|
|
|
|
return context
|
|
|
|
addr, port = (self.upstream.addr[0], self.upstream.addr[1]) \
|
|
|
|
if self.upstream else (None, None)
|
2021-10-31 19:49:19 +00:00
|
|
|
context.update({
|
|
|
|
'upstream_proxy_host': addr,
|
|
|
|
'upstream_proxy_port': port,
|
|
|
|
'server_host': self.request_host_port_path_method[0],
|
|
|
|
'server_port': self.request_host_port_path_method[1],
|
|
|
|
'request_path': self.request_host_port_path_method[2],
|
|
|
|
'response_bytes': self.total_size,
|
|
|
|
})
|
|
|
|
self.access_log(context)
|
|
|
|
return None
|
2019-12-10 03:38:49 +00:00
|
|
|
|
2021-10-31 19:49:19 +00:00
|
|
|
def access_log(self, log_attrs: Dict[str, Any]) -> None:
|
|
|
|
access_log_format = DEFAULT_HTTPS_ACCESS_LOG_FORMAT
|
|
|
|
request_method = self.request_host_port_path_method[3]
|
|
|
|
if request_method and request_method != httpMethods.CONNECT:
|
|
|
|
access_log_format = DEFAULT_HTTP_ACCESS_LOG_FORMAT
|
|
|
|
logger.info(access_log_format.format_map(log_attrs))
|