diff --git a/Dashboard.png b/Dashboard.png new file mode 100644 index 00000000..b6732f64 Binary files /dev/null and b/Dashboard.png differ diff --git a/Dockerfile b/Dockerfile index d751a120..29292e22 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM python:3.8-alpine as base +FROM python:3.10-alpine as base FROM base as builder COPY requirements.txt /app/ diff --git a/README.md b/README.md index e40c6761..45064925 100644 --- a/README.md +++ b/README.md @@ -23,8 +23,6 @@ [![Python 3.x](https://img.shields.io/static/v1?label=Python&message=3.6%20%7C%203.7%20%7C%203.8%20%7C%203.9%20%7C%203.10&color=blue)](https://www.python.org/) [![Checked with mypy](https://img.shields.io/static/v1?label=MyPy&message=checked&color=blue)](http://mypy-lang.org/) -[![Become a Backer](https://opencollective.com/proxypy/tiers/backer.svg?avatarHeight=72)](https://opencollective.com/proxypy) - # Table of Contents - [Features](#features) @@ -94,6 +92,8 @@ - [Public Key Infrastructure](#pki) - [API Usage](#api-usage) - [CLI Usage](#cli-usage) +- [Run Dashboard](#run-dashboard) + - [Inspect Traffic](#inspect-traffic) - [Frequently Asked Questions](#frequently-asked-questions) - [Threads vs Threadless](#threads-vs-threadless) - [SyntaxError: invalid syntax](#syntaxerror-invalid-syntax) @@ -1537,6 +1537,45 @@ FILE /Users/abhinav/Dev/proxy.py/proxy/__init__.py ``` +# Run Dashboard + +Dashboard is currently under development and not yet bundled with `pip` packages. To run dashboard, you must checkout the source. + +Dashboard is written in Typescript and SCSS, so let's build it first using: + +```bash +$ make dashboard +``` + +Now start `proxy.py` with dashboard plugin and by overriding root directory for static server: + +```bash +$ proxy --enable-dashboard --static-server-dir dashboard/public +...[redacted]... - Loaded plugin proxy.http.server.HttpWebServerPlugin +...[redacted]... - Loaded plugin proxy.dashboard.dashboard.ProxyDashboard +...[redacted]... - Loaded plugin proxy.dashboard.inspect_traffic.InspectTrafficPlugin +...[redacted]... - Loaded plugin proxy.http.inspector.DevtoolsProtocolPlugin +...[redacted]... - Loaded plugin proxy.http.proxy.HttpProxyPlugin +...[redacted]... - Listening on ::1:8899 +...[redacted]... - Core Event enabled +``` + +Currently, enabling dashboard will also enable all the dashboard plugins. + +Visit dashboard: + +```bash +$ open http://localhost:8899/dashboard/ +``` + +## Inspect Traffic + +Wait for embedded `Chrome Dev Console` to load. Currently, detail about all traffic flowing through `proxy.py` is pushed to the `Inspect Traffic` tab. However, received payloads are not yet integrated with the embedded dev console. + +Current functionality can be verified by opening the `Dev Console` of dashboard and inspecting the websocket connection that dashboard established with the `proxy.py` server. + +[![Proxy.Py Dashboard Inspect Traffic](https://raw.githubusercontent.com/abhinavsingh/proxy.py/v3.4.0/Dashboard.png)](https://github.com/abhinavsingh/proxy.py) + # Frequently Asked Questions ## Threads vs Threadless @@ -1676,7 +1715,7 @@ usage: proxy [-h] [--threadless] [--backlog BACKLOG] [--enable-events] [--hostna [--ca-file CA_FILE] [--ca-signing-key-file CA_SIGNING_KEY_FILE] [--cert-file CERT_FILE] [--disable-headers DISABLE_HEADERS] [--server-recvbuf-size SERVER_RECVBUF_SIZE] [--basic-auth BASIC_AUTH] [--cache-dir CACHE_DIR] [--static-server-dir STATIC_SERVER_DIR] [--pac-file PAC_FILE] [--pac-file-url-path PAC_FILE_URL_PATH] [--filtered-client-ips FILTERED_CLIENT_IPS] -proxy.py v2.3.1 +proxy.py v2.4.0 options: -h, --help show this help message and exit diff --git a/proxy/common/version.py b/proxy/common/version.py index 585f1555..5635e7a7 100644 --- a/proxy/common/version.py +++ b/proxy/common/version.py @@ -8,5 +8,5 @@ :copyright: (c) 2013-present by Abhinav Singh and contributors. :license: BSD, see LICENSE for more details. """ -VERSION = (2, 3, 1) +VERSION = (2, 4, 0) __version__ = '.'.join(map(str, VERSION[0:3])) diff --git a/proxy/core/acceptor/pool.py b/proxy/core/acceptor/pool.py index 65b98c72..35861976 100644 --- a/proxy/core/acceptor/pool.py +++ b/proxy/core/acceptor/pool.py @@ -63,11 +63,10 @@ flags.add_argument( class AcceptorPool: - """AcceptorPool. - - Pre-spawns worker processes to utilize all cores available on the system. + """AcceptorPool pre-spawns worker processes to utilize all cores available on the system. A server socket is initialized and dispatched over a pipe to these workers. - Each worker process then accepts new client connection. + Each worker process then concurrently accepts new client connection over + the initialized server socket. Example usage: @@ -83,7 +82,9 @@ class AcceptorPool: Optionally, AcceptorPool also initialize a global event queue. It is a multiprocess safe queue which can be used to build pubsub patterns - for message sharing or signaling within proxy.py. + for message sharing or signaling. + + TODO(abhinavsingh): Decouple event queue setup & teardown into its own class. """ def __init__(self, flags: argparse.Namespace, @@ -110,9 +111,10 @@ class AcceptorPool: self.socket.bind((str(self.flags.hostname), self.flags.port)) self.socket.listen(self.flags.backlog) self.socket.setblocking(False) - logger.info( - 'Listening on %s:%d' % - (self.flags.hostname, self.flags.port)) + # Override flags.port to match the actual port + # we are listening upon. This is necessary to preserve + # the server port when `--port=0` is used. + self.flags.port = self.socket.getsockname()[1] def start_workers(self) -> None: """Start worker processes.""" @@ -172,7 +174,6 @@ class AcceptorPool: logger.info('Core Event enabled') self.start_event_dispatcher() self.start_workers() - # Send server socket to all acceptor processes. assert self.socket is not None for index in range(self.flags.num_workers): diff --git a/proxy/core/acceptor/threadless.py b/proxy/core/acceptor/threadless.py index 1070b72b..6cfc99e9 100644 --- a/proxy/core/acceptor/threadless.py +++ b/proxy/core/acceptor/threadless.py @@ -33,7 +33,13 @@ logger = logging.getLogger(__name__) class Threadless(multiprocessing.Process): - """Threadless provides an event loop. Use it by implementing Threadless class. + """Threadless process provides an event loop. + + Internally, for each client connection, an instance of `work_klass` + is created. Threadless will invoke necessary lifecycle of the `Work` class + allowing implementations to handle accepted client connections as they wish. + + Note that, all `Work` implementations share the same underlying event loop. When --threadless option is enabled, each Acceptor process also spawns one Threadless process. And instead of spawning new thread @@ -92,8 +98,13 @@ class Threadless(multiprocessing.Process): async def wait_for_tasks( self, tasks: Dict[int, Any]) -> None: for work_id in tasks: - # TODO: Resolving one handle_events here can block resolution of - # other tasks + # TODO: Resolving one handle_events here can block + # resolution of other tasks. This can happen when handle_events + # is slow. + # + # Instead of sequential await, a better option would be to await on + # list of async handle_events. This will allow all handlers to run + # concurrently without blocking each other. try: teardown = await asyncio.wait_for(tasks[work_id], DEFAULT_TIMEOUT) if teardown: @@ -152,6 +163,7 @@ class Threadless(multiprocessing.Process): # until all the logic below completes. # # Invoke Threadless.handle_events + # # TODO: Only send readable / writables that client originally # registered. tasks = {} diff --git a/proxy/http/handler.py b/proxy/http/handler.py index 083174d8..bff6f328 100644 --- a/proxy/http/handler.py +++ b/proxy/http/handler.py @@ -65,7 +65,7 @@ flags.add_argument( class HttpProtocolHandler(Work): """HTTP, HTTPS, HTTP2, WebSockets protocol handler. - Accepts `Client` connection object and manages HttpProtocolHandlerPlugin invocations. + Accepts `Client` connection and delegates to HttpProtocolHandlerPlugin. """ def __init__(self, client: TcpClientConnection, @@ -86,15 +86,28 @@ class HttpProtocolHandler(Work): return self.flags.keyfile is not None and \ self.flags.certfile is not None + def optionally_wrap_socket( + self, conn: socket.socket) -> Union[ssl.SSLSocket, socket.socket]: + """Attempts to wrap accepted client connection using provided certificates. + + Shutdown and closes client connection upon error. + """ + if self.encryption_enabled(): + assert self.flags.keyfile and self.flags.certfile + # TODO(abhinavsingh): Insecure TLS versions must not be accepted by default + conn = wrap_socket(conn, self.flags.keyfile, self.flags.certfile) + return conn + def initialize(self) -> None: """Optionally upgrades connection to HTTPS, set conn in non-blocking mode and initializes plugins.""" conn = self.optionally_wrap_socket(self.client.connection) conn.setblocking(False) + # Update client connection reference if connection was wrapped if self.encryption_enabled(): self.client = TcpClientConnection(conn=conn, addr=self.client.addr) if b'HttpProtocolHandlerPlugin' in self.flags.plugins: for klass in self.flags.plugins[b'HttpProtocolHandlerPlugin']: - instance = klass( + instance: HttpProtocolHandlerPlugin = klass( self.uid, self.flags, self.client, @@ -115,7 +128,6 @@ class HttpProtocolHandler(Work): } if self.client.has_buffer(): events[self.client.connection] |= selectors.EVENT_WRITE - # HttpProtocolHandlerPlugin.get_descriptors for plugin in self.plugins.values(): plugin_read_desc, plugin_write_desc = plugin.get_descriptors() @@ -129,7 +141,6 @@ class HttpProtocolHandler(Work): events[w] = selectors.EVENT_WRITE else: events[w] |= selectors.EVENT_WRITE - return events def handle_events( @@ -189,17 +200,6 @@ class HttpProtocolHandler(Work): logger.debug('Client connection closed') super().shutdown() - def optionally_wrap_socket( - self, conn: socket.socket) -> Union[ssl.SSLSocket, socket.socket]: - """Attempts to wrap accepted client connection using provided certificates. - - Shutdown and closes client connection upon error. - """ - if self.encryption_enabled(): - assert self.flags.keyfile and self.flags.certfile - conn = wrap_socket(conn, self.flags.keyfile, self.flags.certfile) - return conn - def connection_inactive_for(self) -> float: return time.time() - self.last_activity diff --git a/proxy/http/plugin.py b/proxy/http/plugin.py index 2e7fed45..16c046ab 100644 --- a/proxy/http/plugin.py +++ b/proxy/http/plugin.py @@ -68,14 +68,23 @@ class HttpProtocolHandlerPlugin(ABC): @abstractmethod def get_descriptors( self) -> Tuple[List[socket.socket], List[socket.socket]]: + """Implementations must return a list of descriptions that they wish to + read from and write into.""" return [], [] # pragma: no cover @abstractmethod def write_to_descriptors(self, w: Writables) -> bool: + """Implementations must now write/flush data over the socket. + + Note that buffer management is in-build into the connection classes. + Hence implementations MUST call `flush` here, to send any buffered data + over the socket. + """ return False # pragma: no cover @abstractmethod def read_from_descriptors(self, r: Readables) -> bool: + """Implementations must now read data over the socket.""" return False # pragma: no cover @abstractmethod @@ -96,4 +105,7 @@ class HttpProtocolHandlerPlugin(ABC): @abstractmethod def on_client_connection_close(self) -> None: + """Client connection shutdown has been received, flush has been called, + perform any cleanup work here. + """ pass # pragma: no cover diff --git a/proxy/http/proxy/plugin.py b/proxy/http/proxy/plugin.py index 7e5c1b0b..b439ac7f 100644 --- a/proxy/http/proxy/plugin.py +++ b/proxy/http/proxy/plugin.py @@ -8,13 +8,16 @@ :copyright: (c) 2013-present by Abhinav Singh and contributors. :license: BSD, see LICENSE for more details. """ -from abc import ABC, abstractmethod +import socket import argparse -from typing import Optional + from uuid import UUID +from typing import List, Optional, Tuple +from abc import ABC, abstractmethod + from ..parser import HttpParser - +from ...common.types import Readables, Writables from ...core.event import EventQueue from ...core.connection import TcpClientConnection @@ -42,6 +45,36 @@ class HttpProxyBasePlugin(ABC): access a specific plugin by its name.""" return self.__class__.__name__ # pragma: no cover + # TODO(abhinavsingh): get_descriptors, write_to_descriptors, read_from_descriptors + # can be placed into their own abstract class which can then be shared by + # HttpProxyBasePlugin, HttpWebServerBasePlugin and HttpProtocolHandlerPlugin class. + # + # Currently code has been shamelessly copied. Also these methods are not + # marked as abstract to avoid breaking custom plugins written by users for + # previous versions of proxy.py + # + # Since 3.4.0 + # + # @abstractmethod + def get_descriptors( + self) -> Tuple[List[socket.socket], List[socket.socket]]: + return [], [] # pragma: no cover + + # @abstractmethod + def write_to_descriptors(self, w: Writables) -> bool: + """Implementations must now write/flush data over the socket. + + Note that buffer management is in-build into the connection classes. + Hence implementations MUST call `flush` here, to send any buffered data + over the socket. + """ + return False # pragma: no cover + + # @abstractmethod + def read_from_descriptors(self, r: Readables) -> bool: + """Implementations must now read data over the socket.""" + return False # pragma: no cover + @abstractmethod def before_upstream_connection( self, request: HttpParser) -> Optional[HttpParser]: diff --git a/proxy/http/proxy/server.py b/proxy/http/proxy/server.py index c5b6fc55..92a02533 100644 --- a/proxy/http/proxy/server.py +++ b/proxy/http/proxy/server.py @@ -122,7 +122,7 @@ class HttpProxyPlugin(HttpProtocolHandlerPlugin): self.plugins: Dict[str, HttpProxyBasePlugin] = {} if b'HttpProxyBasePlugin' in self.flags.plugins: for klass in self.flags.plugins[b'HttpProxyBasePlugin']: - instance = klass( + instance: HttpProxyBasePlugin = klass( self.uid, self.flags, self.client, @@ -147,10 +147,25 @@ class HttpProxyPlugin(HttpProtocolHandlerPlugin): if self.server and not self.server.closed and \ self.server.has_buffer() and self.server.connection: w.append(self.server.connection) + + # TODO(abhinavsingh): We need to keep a mapping of plugin and + # descriptors registered by them, so that within write/read blocks + # we can invoke the right plugin callbacks. + for plugin in self.plugins.values(): + plugin_read_desc, plugin_write_desc = plugin.get_descriptors() + r.extend(plugin_read_desc) + w.extend(plugin_write_desc) + return r, w def write_to_descriptors(self, w: Writables) -> bool: - if self.request.has_upstream_server() and \ + if self.server and self.server.connection not in w: + # Currently, we just call write/read block of each plugins. It is + # plugins responsibility to ignore this callback, if passed descriptors + # doesn't contain the descriptor they registered. + for plugin in self.plugins.values(): + plugin.write_to_descriptors(w) + elif self.request.has_upstream_server() and \ self.server and not self.server.closed and \ self.server.has_buffer() and \ self.server.connection in w: @@ -172,7 +187,13 @@ class HttpProxyPlugin(HttpProtocolHandlerPlugin): return False def read_from_descriptors(self, r: Readables) -> bool: - if self.request.has_upstream_server() \ + if self.server and self.server.connection not in r: + # Currently, we just call write/read block of each plugins. It is + # plugins responsibility to ignore this callback, if passed descriptors + # doesn't contain the descriptor they registered. + for plugin in self.plugins.values(): + plugin.write_to_descriptors(r) + elif self.request.has_upstream_server() \ and self.server \ and not self.server.closed \ and self.server.connection in r: diff --git a/proxy/http/server/plugin.py b/proxy/http/server/plugin.py index 3cb737ea..491219ca 100644 --- a/proxy/http/server/plugin.py +++ b/proxy/http/server/plugin.py @@ -8,13 +8,17 @@ :copyright: (c) 2013-present by Abhinav Singh and contributors. :license: BSD, see LICENSE for more details. """ -from abc import ABC, abstractmethod +import socket import argparse + +from abc import ABC, abstractmethod from typing import List, Tuple from uuid import UUID + from ..websocket import WebsocketFrame from ..parser import HttpParser +from ...common.types import Readables, Writables from ...core.connection import TcpClientConnection from ...core.event import EventQueue @@ -33,6 +37,36 @@ class HttpWebServerBasePlugin(ABC): self.client = client self.event_queue = event_queue + # TODO(abhinavsingh): get_descriptors, write_to_descriptors, read_from_descriptors + # can be placed into their own abstract class which can then be shared by + # HttpProxyBasePlugin, HttpWebServerBasePlugin and HttpProtocolHandlerPlugin class. + # + # Currently code has been shamelessly copied. Also these methods are not + # marked as abstract to avoid breaking custom plugins written by users for + # previous versions of proxy.py + # + # Since 3.4.0 + # + # @abstractmethod + def get_descriptors( + self) -> Tuple[List[socket.socket], List[socket.socket]]: + return [], [] # pragma: no cover + + # @abstractmethod + def write_to_descriptors(self, w: Writables) -> bool: + """Implementations must now write/flush data over the socket. + + Note that buffer management is in-build into the connection classes. + Hence implementations MUST call `flush` here, to send any buffered data + over the socket. + """ + return False # pragma: no cover + + # @abstractmethod + def read_from_descriptors(self, r: Readables) -> bool: + """Implementations must now read data over the socket.""" + return False # pragma: no cover + @abstractmethod def routes(self) -> List[Tuple[int, str]]: """Return List(protocol, path) that this plugin handles.""" diff --git a/proxy/http/server/web.py b/proxy/http/server/web.py index 882ea7a6..313932fd 100644 --- a/proxy/http/server/web.py +++ b/proxy/http/server/web.py @@ -76,7 +76,7 @@ class HttpWebServerPlugin(HttpProtocolHandlerPlugin): if b'HttpWebServerBasePlugin' in self.flags.plugins: for klass in self.flags.plugins[b'HttpWebServerBasePlugin']: - instance = klass( + instance: HttpWebServerBasePlugin = klass( self.uid, self.flags, self.client, @@ -181,11 +181,16 @@ class HttpWebServerPlugin(HttpProtocolHandlerPlugin): self.client.queue(self.DEFAULT_404_RESPONSE) return True + # TODO(abhinavsingh): Call plugin get/read/write descriptor callbacks + def get_descriptors( + self) -> Tuple[List[socket.socket], List[socket.socket]]: + return [], [] + def write_to_descriptors(self, w: Writables) -> bool: - pass + return False def read_from_descriptors(self, r: Readables) -> bool: - pass + return False def on_client_data(self, raw: memoryview) -> Optional[memoryview]: if self.switched_protocol == httpProtocolTypes.WEBSOCKET: @@ -245,7 +250,3 @@ class HttpWebServerPlugin(HttpProtocolHandlerPlugin): text_(self.request.method), text_(self.request.path), (time.time() - self.start_time) * 1000)) - - def get_descriptors( - self) -> Tuple[List[socket.socket], List[socket.socket]]: - return [], [] diff --git a/proxy/plugin/adblock.json b/proxy/plugin/adblock.json new file mode 100644 index 00000000..3484197c --- /dev/null +++ b/proxy/plugin/adblock.json @@ -0,0 +1,32 @@ +[{ + "regex": "tpc.googlesyndication.com/simgad/.*", + "notes": "Google image ads" +}, +{ + "regex": "tpc.googlesyndication.com/sadbundle/.*", + "notes": "Google animated ad bundles" +}, +{ + "regex": "pagead\\d+.googlesyndication.com/.*", + "notes": "Google tracking" +}, +{ + "regex": "(www){0,1}.google-analytics.com/r/collect\\?.*", + "notes": "Google tracking" +}, +{ + "regex": "(www){0,1}.facebook.com/tr/.*", + "notes": "Facebook tracking" +}, +{ + "regex": "tpc.googlesyndication.com/daca_images/simgad/.*", + "notes": "Google image ads" +}, +{ + "regex": ".*.2mdn.net/videoplayback/.*", + "notes": "Twitch.tv video ads" +}, +{ + "regex": "(www.){0,1}google.com(.*)/pagead/.*", + "notes": "Google ads" +}] diff --git a/proxy/plugin/filter_by_upstream.py b/proxy/plugin/filter_by_upstream.py index a919bd15..7feabccc 100644 --- a/proxy/plugin/filter_by_upstream.py +++ b/proxy/plugin/filter_by_upstream.py @@ -10,20 +10,29 @@ """ from typing import Optional +from ..common.utils import text_ +from ..common.flag import flags from ..http.exception import HttpRequestRejected from ..http.parser import HttpParser from ..http.codes import httpStatusCodes from ..http.proxy import HttpProxyBasePlugin +flags.add_argument( + '--filtered-upstream-hosts', + type=str, + default='facebook.com,www.facebook.com', + help='Default: Blocks Facebook. Comma separated list of IPv4 and IPv6 addresses.' +) + + class FilterByUpstreamHostPlugin(HttpProxyBasePlugin): """Drop traffic by inspecting upstream host.""" - FILTERED_DOMAINS = [b'google.com', b'www.google.com'] - def before_upstream_connection( self, request: HttpParser) -> Optional[HttpParser]: - if request.host in self.FILTERED_DOMAINS: + print(self.flags.filtered_upstream_hosts) + if text_(request.host) in self.flags.filtered_upstream_hosts.split(','): raise HttpRequestRejected( status_code=httpStatusCodes.I_AM_A_TEAPOT, reason=b'I\'m a tea pot', headers={ diff --git a/proxy/plugin/filter_by_url_regex.py b/proxy/plugin/filter_by_url_regex.py index 1e9d5efb..43a40747 100644 --- a/proxy/plugin/filter_by_url_regex.py +++ b/proxy/plugin/filter_by_url_regex.py @@ -8,11 +8,12 @@ :copyright: (c) 2013-present by Abhinav Singh and contributors. :license: BSD, see LICENSE for more details. """ - +import json import logging from typing import Optional, List, Dict, Any +from ..common.flag import flags from ..http.exception import HttpRequestRejected from ..http.parser import HttpParser from ..http.codes import httpStatusCodes @@ -23,6 +24,14 @@ import re logger = logging.getLogger(__name__) +# See adblock.json file in repository for sample example config +flags.add_argument( + '--filtered-url-regex-config', + type=str, + default='', + help='Default: No config. Comma separated list of IPv4 and IPv6 addresses.' +) + class FilterByURLRegexPlugin(HttpProxyBasePlugin): """Drops traffic by inspecting request URL and checking @@ -31,48 +40,12 @@ class FilterByURLRegexPlugin(HttpProxyBasePlugin): filtering ads. """ - FILTER_LIST: List[Dict[str, Any]] = [ - { - 'regex': b'tpc.googlesyndication.com/simgad/.*', - 'status_code': httpStatusCodes.NOT_FOUND, - 'notes': 'Google image ads', - }, - { - 'regex': b'tpc.googlesyndication.com/sadbundle/.*', - 'status_code': httpStatusCodes.NOT_FOUND, - 'notes': 'Google animated ad bundles', - }, - { - 'regex': b'pagead\\d+.googlesyndication.com/.*', - 'status_code': httpStatusCodes.NOT_FOUND, - 'notes': 'Google tracking', - }, - { - 'regex': b'(www){0,1}.google-analytics.com/r/collect\\?.*', - 'status_code': httpStatusCodes.NOT_FOUND, - 'notes': 'Google tracking', - }, - { - 'regex': b'(www){0,1}.facebook.com/tr/.*', - 'status_code': httpStatusCodes.NOT_FOUND, - 'notes': 'Facebook tracking', - }, - { - 'regex': b'tpc.googlesyndication.com/daca_images/simgad/.*', - 'status_code': httpStatusCodes.NOT_FOUND, - 'notes': 'Google image ads', - }, - { - 'regex': b'.*.2mdn.net/videoplayback/.*', - 'status_code': httpStatusCodes.NOT_FOUND, - 'notes': 'Twitch.tv video ads', - }, - { - 'regex': b'(www.){0,1}google.com(.*)/pagead/.*', - 'status_code': httpStatusCodes.NOT_FOUND, - 'notes': 'Google ads', - }, - ] + def __init__(self, *args: Any, **kwargs: Any) -> None: + super().__init__(*args, **kwargs) + self.filters: List[Dict[str, Any]] = [] + if self.flags.filtered_url_regex_config != '': + with open(self.flags.filtered_url_regex_config, 'rb') as f: + self.filters = json.load(f) def before_upstream_connection( self, request: HttpParser) -> Optional[HttpParser]: @@ -80,7 +53,6 @@ class FilterByURLRegexPlugin(HttpProxyBasePlugin): def handle_client_request( self, request: HttpParser) -> Optional[HttpParser]: - # determine host request_host = None if request.host: @@ -96,37 +68,30 @@ class FilterByURLRegexPlugin(HttpProxyBasePlugin): # build URL url = b'%s%s' % ( request_host, - request.path, + request.path ) - # check URL against list rule_number = 1 - for blocked_entry in self.FILTER_LIST: - + for blocked_entry in self.filters: # if regex matches on URL if re.search(text_(blocked_entry['regex']), text_(url)): - # log that the request has been filtered logger.info("Blocked: %r with status_code '%r' by rule number '%r'" % ( text_(url), - blocked_entry['status_code'], + httpStatusCodes.NOT_FOUND, rule_number, )) - # close the connection with the status code from the filter # list raise HttpRequestRejected( - status_code=blocked_entry['status_code'], + status_code=httpStatusCodes.NOT_FOUND, headers={b'Connection': b'close'}, reason=b'Blocked', ) - # stop looping through filter list break - # increment rule number rule_number += 1 - return request def handle_upstream_chunk(self, chunk: memoryview) -> memoryview: diff --git a/proxy/plugin/proxy_pool.py b/proxy/plugin/proxy_pool.py index 3cb664c7..5b4c96cb 100644 --- a/proxy/plugin/proxy_pool.py +++ b/proxy/plugin/proxy_pool.py @@ -35,7 +35,8 @@ class ProxyPoolPlugin(HttpProxyBasePlugin): def before_upstream_connection( self, request: HttpParser) -> Optional[HttpParser]: - """Avoid upstream connection of server in the request. + """Avoids upstream connection to the server by returning None. + Initialize, connection to upstream proxy. """ # Implement your own logic here e.g. round-robin, least connection etc. @@ -47,18 +48,19 @@ class ProxyPoolPlugin(HttpProxyBasePlugin): self, request: HttpParser) -> Optional[HttpParser]: request.path = self.rebuild_original_path(request) self.tunnel(request) - # Returning None indicates core to gracefully + # Returning None indicates the core to gracefully # flush client buffer and teardown the connection return None def handle_upstream_chunk(self, chunk: memoryview) -> memoryview: """Will never be called since we didn't establish an upstream connection.""" - return chunk + raise Exception("This should have never been called") def on_upstream_connection_close(self) -> None: """Will never be called since we didn't establish an upstream connection.""" - pass + raise Exception("This should have never been called") + # TODO(abhinavsingh): Upgrade to use non-blocking get/read/write API. def tunnel(self, request: HttpParser) -> None: """Send to upstream proxy, receive from upstream proxy, queue back to client.""" assert self.conn diff --git a/proxy/plugin/reverse_proxy.py b/proxy/plugin/reverse_proxy.py index 3675b9ea..1a80d1a4 100644 --- a/proxy/plugin/reverse_proxy.py +++ b/proxy/plugin/reverse_proxy.py @@ -54,6 +54,7 @@ class ReverseProxyPlugin(HttpWebServerBasePlugin): (httpProtocolTypes.HTTPS, ReverseProxyPlugin.REVERSE_PROXY_LOCATION) ] + # TODO(abhinavsingh): Upgrade to use non-blocking get/read/write API. def handle_request(self, request: HttpParser) -> None: upstream = random.choice(ReverseProxyPlugin.REVERSE_PROXY_PASS) url = urlparse.urlsplit(upstream) diff --git a/proxy/proxy.py b/proxy/proxy.py index 023c43cb..2a5382ea 100644 --- a/proxy/proxy.py +++ b/proxy/proxy.py @@ -26,6 +26,8 @@ import inspect from types import TracebackType from typing import Dict, List, Optional, Generator, Any, Tuple, Type, Union, cast +from proxy.core.acceptor.work import Work + from .common.utils import bytes_, text_ from .common.types import IpAddress from .common.version import __version__ @@ -122,18 +124,24 @@ flags.add_argument( class Proxy: - """Context manager for controlling core AcceptorPool server lifecycle. + """Context manager to control core AcceptorPool server lifecycle. - By default this context manager starts AcceptorPool with HttpProtocolHandler - worker class. + By default, AcceptorPool is started with HttpProtocolHandler worker class + i.e. we are only expecting HTTP traffic to flow between clients and server. """ def __init__(self, input_args: Optional[List[str]], **opts: Any) -> None: self.flags = Proxy.initialize(input_args, **opts) - self.acceptors: Optional[AcceptorPool] = None + self.pool: Optional[AcceptorPool] = None + # TODO(abhinavsingh): Allow users to override the worker class itself + # e.g. A clear text protocol. Or imagine a TelnetProtocolHandler instead + # of default HttpProtocolHandler. + self.work_klass: Type[Work] = HttpProtocolHandler def write_pid_file(self) -> None: if self.flags.pid_file is not None: + # TODO(abhinavsingh): Multiple instances of proxy.py running on + # same host machine will currently result in overwriting the PID file with open(self.flags.pid_file, 'wb') as pid_file: pid_file.write(bytes_(os.getpid())) @@ -142,11 +150,11 @@ class Proxy: os.remove(self.flags.pid_file) def __enter__(self) -> 'Proxy': - self.acceptors = AcceptorPool( + self.pool = AcceptorPool( flags=self.flags, - work_klass=HttpProtocolHandler + work_klass=self.work_klass ) - self.acceptors.setup() + self.pool.setup() self.write_pid_file() return self @@ -155,8 +163,8 @@ class Proxy: exc_type: Optional[Type[BaseException]], exc_val: Optional[BaseException], exc_tb: Optional[TracebackType]) -> None: - assert self.acceptors - self.acceptors.shutdown() + assert self.pool + self.pool.shutdown() self.delete_pid_file() @staticmethod @@ -199,7 +207,7 @@ class Proxy: 'plugins', args.plugins.split(text_(COMMA)))] ) - # proxy.py currently cannot serve over HTTPS and perform TLS interception + # proxy.py currently cannot serve over HTTPS and also perform TLS interception # at the same time. Check if user is trying to enable both feature # at the same time. if (args.cert_file and args.key_file) and \ @@ -213,6 +221,11 @@ class Proxy: if args.basic_auth: auth_code = base64.b64encode(bytes_(args.basic_auth)) + # https://github.com/python/mypy/issues/5865 + # + # def option(t: object, key: str, default: Any) -> Any: + # return cast(t, opts.get(key, default)) + args.plugins = plugins args.auth_code = cast( Optional[bytes], @@ -436,7 +449,10 @@ def main( input_args: Optional[List[str]] = None, **opts: Any) -> None: try: - with Proxy(input_args=input_args, **opts): + with Proxy(input_args=input_args, **opts) as proxy: + assert proxy.pool is not None + logger.info('Listening on %s:%d' % + (proxy.pool.flags.hostname, proxy.pool.flags.port)) # TODO: Introduce cron feature # https://github.com/abhinavsingh/proxy.py/issues/392 while True: diff --git a/setup.py b/setup.py index 19f6f7b1..849de5f8 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ """ from setuptools import setup, find_packages -VERSION = (2, 3, 1) +VERSION = (2, 4, 0) __version__ = '.'.join(map(str, VERSION[0:3])) __description__ = '''⚡⚡⚡Fast, Lightweight, Pluggable, TLS interception capable proxy server focused on Network monitoring, controls & Application development, testing, debugging.''' @@ -78,6 +78,7 @@ if __name__ == '__main__': 'Programming Language :: Python :: 3.7', 'Programming Language :: Python :: 3.8', 'Programming Language :: Python :: 3.9', + 'Programming Language :: Python :: 3.10', 'Topic :: Internet', 'Topic :: Internet :: Proxy Servers', 'Topic :: Internet :: WWW/HTTP', diff --git a/tests/core/test_acceptor_pool.py b/tests/core/test_acceptor_pool.py index 3142e39b..6b007ea9 100644 --- a/tests/core/test_acceptor_pool.py +++ b/tests/core/test_acceptor_pool.py @@ -49,7 +49,7 @@ class TestAcceptorPool(unittest.TestCase): sock.setsockopt.assert_called_with( socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) sock.bind.assert_called_with( - (str(pool.flags.hostname), pool.flags.port)) + (str(pool.flags.hostname), 8899)) sock.listen.assert_called_with(pool.flags.backlog) sock.setblocking.assert_called_with(False) diff --git a/tests/plugin/test_http_proxy_plugins.py b/tests/plugin/test_http_proxy_plugins.py index 28e85896..cf6c351c 100644 --- a/tests/plugin/test_http_proxy_plugins.py +++ b/tests/plugin/test_http_proxy_plugins.py @@ -15,6 +15,7 @@ import json from urllib import parse as urlparse from unittest import mock from typing import cast +from pathlib import Path from proxy.proxy import Proxy from proxy.core.connection import TcpClientConnection @@ -38,7 +39,11 @@ class TestHttpProxyPluginExamples(unittest.TestCase): mock_selector: mock.Mock) -> None: self.fileno = 10 self._addr = ('127.0.0.1', 54382) - self.flags = Proxy.initialize() + adblock_json_path = Path( + __file__).parent.parent.parent / "proxy" / "plugin" / "adblock.json" + self.flags = Proxy.initialize( + input_args=["--filtered-url-regex-config", + str(adblock_json_path)]) self.plugin = mock.MagicMock() self.mock_fromfd = mock_fromfd @@ -158,9 +163,9 @@ class TestHttpProxyPluginExamples(unittest.TestCase): def test_filter_by_upstream_host_plugin( self, mock_server_conn: mock.Mock) -> None: request = build_http_request( - b'GET', b'http://google.com/', + b'GET', b'http://facebook.com/', headers={ - b'Host': b'google.com', + b'Host': b'facebook.com', } ) self._conn.recv.return_value = request