v3.4.0 (#638)
* Build docker from 3.10-alpine * Bump version to 3.4.0 * Add instructions for how to run dashboard * Order of menu * Override dashboard png path until submitted * Add some doc string for top-level Proxy class. Also some TODOs and warnings regarding PID file overwrite * Allow HttpProxyBasePlugin implementations to register custom descriptors for read/write events * Remove hardcoded adblock regex into json config. Update upstream filter to block facebook, not google * ProxyPoolPlugin and ReverseProxyPlugin must now be updated to use get/read/write descriptor APIs * Add get/read/write descriptor API for HttpWebServerBasePlugin too * Surface actual listening port via flags.port
This commit is contained in:
parent
3d8bf056fc
commit
7448c44cc6
Binary file not shown.
After Width: | Height: | Size: 1.0 MiB |
|
@ -1,4 +1,4 @@
|
|||
FROM python:3.8-alpine as base
|
||||
FROM python:3.10-alpine as base
|
||||
FROM base as builder
|
||||
|
||||
COPY requirements.txt /app/
|
||||
|
|
45
README.md
45
README.md
|
@ -23,8 +23,6 @@
|
|||
[![Python 3.x](https://img.shields.io/static/v1?label=Python&message=3.6%20%7C%203.7%20%7C%203.8%20%7C%203.9%20%7C%203.10&color=blue)](https://www.python.org/)
|
||||
[![Checked with mypy](https://img.shields.io/static/v1?label=MyPy&message=checked&color=blue)](http://mypy-lang.org/)
|
||||
|
||||
[![Become a Backer](https://opencollective.com/proxypy/tiers/backer.svg?avatarHeight=72)](https://opencollective.com/proxypy)
|
||||
|
||||
# Table of Contents
|
||||
|
||||
- [Features](#features)
|
||||
|
@ -94,6 +92,8 @@
|
|||
- [Public Key Infrastructure](#pki)
|
||||
- [API Usage](#api-usage)
|
||||
- [CLI Usage](#cli-usage)
|
||||
- [Run Dashboard](#run-dashboard)
|
||||
- [Inspect Traffic](#inspect-traffic)
|
||||
- [Frequently Asked Questions](#frequently-asked-questions)
|
||||
- [Threads vs Threadless](#threads-vs-threadless)
|
||||
- [SyntaxError: invalid syntax](#syntaxerror-invalid-syntax)
|
||||
|
@ -1537,6 +1537,45 @@ FILE
|
|||
/Users/abhinav/Dev/proxy.py/proxy/__init__.py
|
||||
```
|
||||
|
||||
# Run Dashboard
|
||||
|
||||
Dashboard is currently under development and not yet bundled with `pip` packages. To run dashboard, you must checkout the source.
|
||||
|
||||
Dashboard is written in Typescript and SCSS, so let's build it first using:
|
||||
|
||||
```bash
|
||||
$ make dashboard
|
||||
```
|
||||
|
||||
Now start `proxy.py` with dashboard plugin and by overriding root directory for static server:
|
||||
|
||||
```bash
|
||||
$ proxy --enable-dashboard --static-server-dir dashboard/public
|
||||
...[redacted]... - Loaded plugin proxy.http.server.HttpWebServerPlugin
|
||||
...[redacted]... - Loaded plugin proxy.dashboard.dashboard.ProxyDashboard
|
||||
...[redacted]... - Loaded plugin proxy.dashboard.inspect_traffic.InspectTrafficPlugin
|
||||
...[redacted]... - Loaded plugin proxy.http.inspector.DevtoolsProtocolPlugin
|
||||
...[redacted]... - Loaded plugin proxy.http.proxy.HttpProxyPlugin
|
||||
...[redacted]... - Listening on ::1:8899
|
||||
...[redacted]... - Core Event enabled
|
||||
```
|
||||
|
||||
Currently, enabling dashboard will also enable all the dashboard plugins.
|
||||
|
||||
Visit dashboard:
|
||||
|
||||
```bash
|
||||
$ open http://localhost:8899/dashboard/
|
||||
```
|
||||
|
||||
## Inspect Traffic
|
||||
|
||||
Wait for embedded `Chrome Dev Console` to load. Currently, detail about all traffic flowing through `proxy.py` is pushed to the `Inspect Traffic` tab. However, received payloads are not yet integrated with the embedded dev console.
|
||||
|
||||
Current functionality can be verified by opening the `Dev Console` of dashboard and inspecting the websocket connection that dashboard established with the `proxy.py` server.
|
||||
|
||||
[![Proxy.Py Dashboard Inspect Traffic](https://raw.githubusercontent.com/abhinavsingh/proxy.py/v3.4.0/Dashboard.png)](https://github.com/abhinavsingh/proxy.py)
|
||||
|
||||
# Frequently Asked Questions
|
||||
|
||||
## Threads vs Threadless
|
||||
|
@ -1676,7 +1715,7 @@ usage: proxy [-h] [--threadless] [--backlog BACKLOG] [--enable-events] [--hostna
|
|||
[--ca-file CA_FILE] [--ca-signing-key-file CA_SIGNING_KEY_FILE] [--cert-file CERT_FILE] [--disable-headers DISABLE_HEADERS] [--server-recvbuf-size SERVER_RECVBUF_SIZE] [--basic-auth BASIC_AUTH]
|
||||
[--cache-dir CACHE_DIR] [--static-server-dir STATIC_SERVER_DIR] [--pac-file PAC_FILE] [--pac-file-url-path PAC_FILE_URL_PATH] [--filtered-client-ips FILTERED_CLIENT_IPS]
|
||||
|
||||
proxy.py v2.3.1
|
||||
proxy.py v2.4.0
|
||||
|
||||
options:
|
||||
-h, --help show this help message and exit
|
||||
|
|
|
@ -8,5 +8,5 @@
|
|||
:copyright: (c) 2013-present by Abhinav Singh and contributors.
|
||||
:license: BSD, see LICENSE for more details.
|
||||
"""
|
||||
VERSION = (2, 3, 1)
|
||||
VERSION = (2, 4, 0)
|
||||
__version__ = '.'.join(map(str, VERSION[0:3]))
|
||||
|
|
|
@ -63,11 +63,10 @@ flags.add_argument(
|
|||
|
||||
|
||||
class AcceptorPool:
|
||||
"""AcceptorPool.
|
||||
|
||||
Pre-spawns worker processes to utilize all cores available on the system.
|
||||
"""AcceptorPool pre-spawns worker processes to utilize all cores available on the system.
|
||||
A server socket is initialized and dispatched over a pipe to these workers.
|
||||
Each worker process then accepts new client connection.
|
||||
Each worker process then concurrently accepts new client connection over
|
||||
the initialized server socket.
|
||||
|
||||
Example usage:
|
||||
|
||||
|
@ -83,7 +82,9 @@ class AcceptorPool:
|
|||
|
||||
Optionally, AcceptorPool also initialize a global event queue.
|
||||
It is a multiprocess safe queue which can be used to build pubsub patterns
|
||||
for message sharing or signaling within proxy.py.
|
||||
for message sharing or signaling.
|
||||
|
||||
TODO(abhinavsingh): Decouple event queue setup & teardown into its own class.
|
||||
"""
|
||||
|
||||
def __init__(self, flags: argparse.Namespace,
|
||||
|
@ -110,9 +111,10 @@ class AcceptorPool:
|
|||
self.socket.bind((str(self.flags.hostname), self.flags.port))
|
||||
self.socket.listen(self.flags.backlog)
|
||||
self.socket.setblocking(False)
|
||||
logger.info(
|
||||
'Listening on %s:%d' %
|
||||
(self.flags.hostname, self.flags.port))
|
||||
# Override flags.port to match the actual port
|
||||
# we are listening upon. This is necessary to preserve
|
||||
# the server port when `--port=0` is used.
|
||||
self.flags.port = self.socket.getsockname()[1]
|
||||
|
||||
def start_workers(self) -> None:
|
||||
"""Start worker processes."""
|
||||
|
@ -172,7 +174,6 @@ class AcceptorPool:
|
|||
logger.info('Core Event enabled')
|
||||
self.start_event_dispatcher()
|
||||
self.start_workers()
|
||||
|
||||
# Send server socket to all acceptor processes.
|
||||
assert self.socket is not None
|
||||
for index in range(self.flags.num_workers):
|
||||
|
|
|
@ -33,7 +33,13 @@ logger = logging.getLogger(__name__)
|
|||
|
||||
|
||||
class Threadless(multiprocessing.Process):
|
||||
"""Threadless provides an event loop. Use it by implementing Threadless class.
|
||||
"""Threadless process provides an event loop.
|
||||
|
||||
Internally, for each client connection, an instance of `work_klass`
|
||||
is created. Threadless will invoke necessary lifecycle of the `Work` class
|
||||
allowing implementations to handle accepted client connections as they wish.
|
||||
|
||||
Note that, all `Work` implementations share the same underlying event loop.
|
||||
|
||||
When --threadless option is enabled, each Acceptor process also
|
||||
spawns one Threadless process. And instead of spawning new thread
|
||||
|
@ -92,8 +98,13 @@ class Threadless(multiprocessing.Process):
|
|||
async def wait_for_tasks(
|
||||
self, tasks: Dict[int, Any]) -> None:
|
||||
for work_id in tasks:
|
||||
# TODO: Resolving one handle_events here can block resolution of
|
||||
# other tasks
|
||||
# TODO: Resolving one handle_events here can block
|
||||
# resolution of other tasks. This can happen when handle_events
|
||||
# is slow.
|
||||
#
|
||||
# Instead of sequential await, a better option would be to await on
|
||||
# list of async handle_events. This will allow all handlers to run
|
||||
# concurrently without blocking each other.
|
||||
try:
|
||||
teardown = await asyncio.wait_for(tasks[work_id], DEFAULT_TIMEOUT)
|
||||
if teardown:
|
||||
|
@ -152,6 +163,7 @@ class Threadless(multiprocessing.Process):
|
|||
# until all the logic below completes.
|
||||
#
|
||||
# Invoke Threadless.handle_events
|
||||
#
|
||||
# TODO: Only send readable / writables that client originally
|
||||
# registered.
|
||||
tasks = {}
|
||||
|
|
|
@ -65,7 +65,7 @@ flags.add_argument(
|
|||
class HttpProtocolHandler(Work):
|
||||
"""HTTP, HTTPS, HTTP2, WebSockets protocol handler.
|
||||
|
||||
Accepts `Client` connection object and manages HttpProtocolHandlerPlugin invocations.
|
||||
Accepts `Client` connection and delegates to HttpProtocolHandlerPlugin.
|
||||
"""
|
||||
|
||||
def __init__(self, client: TcpClientConnection,
|
||||
|
@ -86,15 +86,28 @@ class HttpProtocolHandler(Work):
|
|||
return self.flags.keyfile is not None and \
|
||||
self.flags.certfile is not None
|
||||
|
||||
def optionally_wrap_socket(
|
||||
self, conn: socket.socket) -> Union[ssl.SSLSocket, socket.socket]:
|
||||
"""Attempts to wrap accepted client connection using provided certificates.
|
||||
|
||||
Shutdown and closes client connection upon error.
|
||||
"""
|
||||
if self.encryption_enabled():
|
||||
assert self.flags.keyfile and self.flags.certfile
|
||||
# TODO(abhinavsingh): Insecure TLS versions must not be accepted by default
|
||||
conn = wrap_socket(conn, self.flags.keyfile, self.flags.certfile)
|
||||
return conn
|
||||
|
||||
def initialize(self) -> None:
|
||||
"""Optionally upgrades connection to HTTPS, set conn in non-blocking mode and initializes plugins."""
|
||||
conn = self.optionally_wrap_socket(self.client.connection)
|
||||
conn.setblocking(False)
|
||||
# Update client connection reference if connection was wrapped
|
||||
if self.encryption_enabled():
|
||||
self.client = TcpClientConnection(conn=conn, addr=self.client.addr)
|
||||
if b'HttpProtocolHandlerPlugin' in self.flags.plugins:
|
||||
for klass in self.flags.plugins[b'HttpProtocolHandlerPlugin']:
|
||||
instance = klass(
|
||||
instance: HttpProtocolHandlerPlugin = klass(
|
||||
self.uid,
|
||||
self.flags,
|
||||
self.client,
|
||||
|
@ -115,7 +128,6 @@ class HttpProtocolHandler(Work):
|
|||
}
|
||||
if self.client.has_buffer():
|
||||
events[self.client.connection] |= selectors.EVENT_WRITE
|
||||
|
||||
# HttpProtocolHandlerPlugin.get_descriptors
|
||||
for plugin in self.plugins.values():
|
||||
plugin_read_desc, plugin_write_desc = plugin.get_descriptors()
|
||||
|
@ -129,7 +141,6 @@ class HttpProtocolHandler(Work):
|
|||
events[w] = selectors.EVENT_WRITE
|
||||
else:
|
||||
events[w] |= selectors.EVENT_WRITE
|
||||
|
||||
return events
|
||||
|
||||
def handle_events(
|
||||
|
@ -189,17 +200,6 @@ class HttpProtocolHandler(Work):
|
|||
logger.debug('Client connection closed')
|
||||
super().shutdown()
|
||||
|
||||
def optionally_wrap_socket(
|
||||
self, conn: socket.socket) -> Union[ssl.SSLSocket, socket.socket]:
|
||||
"""Attempts to wrap accepted client connection using provided certificates.
|
||||
|
||||
Shutdown and closes client connection upon error.
|
||||
"""
|
||||
if self.encryption_enabled():
|
||||
assert self.flags.keyfile and self.flags.certfile
|
||||
conn = wrap_socket(conn, self.flags.keyfile, self.flags.certfile)
|
||||
return conn
|
||||
|
||||
def connection_inactive_for(self) -> float:
|
||||
return time.time() - self.last_activity
|
||||
|
||||
|
|
|
@ -68,14 +68,23 @@ class HttpProtocolHandlerPlugin(ABC):
|
|||
@abstractmethod
|
||||
def get_descriptors(
|
||||
self) -> Tuple[List[socket.socket], List[socket.socket]]:
|
||||
"""Implementations must return a list of descriptions that they wish to
|
||||
read from and write into."""
|
||||
return [], [] # pragma: no cover
|
||||
|
||||
@abstractmethod
|
||||
def write_to_descriptors(self, w: Writables) -> bool:
|
||||
"""Implementations must now write/flush data over the socket.
|
||||
|
||||
Note that buffer management is in-build into the connection classes.
|
||||
Hence implementations MUST call `flush` here, to send any buffered data
|
||||
over the socket.
|
||||
"""
|
||||
return False # pragma: no cover
|
||||
|
||||
@abstractmethod
|
||||
def read_from_descriptors(self, r: Readables) -> bool:
|
||||
"""Implementations must now read data over the socket."""
|
||||
return False # pragma: no cover
|
||||
|
||||
@abstractmethod
|
||||
|
@ -96,4 +105,7 @@ class HttpProtocolHandlerPlugin(ABC):
|
|||
|
||||
@abstractmethod
|
||||
def on_client_connection_close(self) -> None:
|
||||
"""Client connection shutdown has been received, flush has been called,
|
||||
perform any cleanup work here.
|
||||
"""
|
||||
pass # pragma: no cover
|
||||
|
|
|
@ -8,13 +8,16 @@
|
|||
:copyright: (c) 2013-present by Abhinav Singh and contributors.
|
||||
:license: BSD, see LICENSE for more details.
|
||||
"""
|
||||
from abc import ABC, abstractmethod
|
||||
import socket
|
||||
import argparse
|
||||
from typing import Optional
|
||||
|
||||
from uuid import UUID
|
||||
from typing import List, Optional, Tuple
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
from ..parser import HttpParser
|
||||
|
||||
|
||||
from ...common.types import Readables, Writables
|
||||
from ...core.event import EventQueue
|
||||
from ...core.connection import TcpClientConnection
|
||||
|
||||
|
@ -42,6 +45,36 @@ class HttpProxyBasePlugin(ABC):
|
|||
access a specific plugin by its name."""
|
||||
return self.__class__.__name__ # pragma: no cover
|
||||
|
||||
# TODO(abhinavsingh): get_descriptors, write_to_descriptors, read_from_descriptors
|
||||
# can be placed into their own abstract class which can then be shared by
|
||||
# HttpProxyBasePlugin, HttpWebServerBasePlugin and HttpProtocolHandlerPlugin class.
|
||||
#
|
||||
# Currently code has been shamelessly copied. Also these methods are not
|
||||
# marked as abstract to avoid breaking custom plugins written by users for
|
||||
# previous versions of proxy.py
|
||||
#
|
||||
# Since 3.4.0
|
||||
#
|
||||
# @abstractmethod
|
||||
def get_descriptors(
|
||||
self) -> Tuple[List[socket.socket], List[socket.socket]]:
|
||||
return [], [] # pragma: no cover
|
||||
|
||||
# @abstractmethod
|
||||
def write_to_descriptors(self, w: Writables) -> bool:
|
||||
"""Implementations must now write/flush data over the socket.
|
||||
|
||||
Note that buffer management is in-build into the connection classes.
|
||||
Hence implementations MUST call `flush` here, to send any buffered data
|
||||
over the socket.
|
||||
"""
|
||||
return False # pragma: no cover
|
||||
|
||||
# @abstractmethod
|
||||
def read_from_descriptors(self, r: Readables) -> bool:
|
||||
"""Implementations must now read data over the socket."""
|
||||
return False # pragma: no cover
|
||||
|
||||
@abstractmethod
|
||||
def before_upstream_connection(
|
||||
self, request: HttpParser) -> Optional[HttpParser]:
|
||||
|
|
|
@ -122,7 +122,7 @@ class HttpProxyPlugin(HttpProtocolHandlerPlugin):
|
|||
self.plugins: Dict[str, HttpProxyBasePlugin] = {}
|
||||
if b'HttpProxyBasePlugin' in self.flags.plugins:
|
||||
for klass in self.flags.plugins[b'HttpProxyBasePlugin']:
|
||||
instance = klass(
|
||||
instance: HttpProxyBasePlugin = klass(
|
||||
self.uid,
|
||||
self.flags,
|
||||
self.client,
|
||||
|
@ -147,10 +147,25 @@ class HttpProxyPlugin(HttpProtocolHandlerPlugin):
|
|||
if self.server and not self.server.closed and \
|
||||
self.server.has_buffer() and self.server.connection:
|
||||
w.append(self.server.connection)
|
||||
|
||||
# TODO(abhinavsingh): We need to keep a mapping of plugin and
|
||||
# descriptors registered by them, so that within write/read blocks
|
||||
# we can invoke the right plugin callbacks.
|
||||
for plugin in self.plugins.values():
|
||||
plugin_read_desc, plugin_write_desc = plugin.get_descriptors()
|
||||
r.extend(plugin_read_desc)
|
||||
w.extend(plugin_write_desc)
|
||||
|
||||
return r, w
|
||||
|
||||
def write_to_descriptors(self, w: Writables) -> bool:
|
||||
if self.request.has_upstream_server() and \
|
||||
if self.server and self.server.connection not in w:
|
||||
# Currently, we just call write/read block of each plugins. It is
|
||||
# plugins responsibility to ignore this callback, if passed descriptors
|
||||
# doesn't contain the descriptor they registered.
|
||||
for plugin in self.plugins.values():
|
||||
plugin.write_to_descriptors(w)
|
||||
elif self.request.has_upstream_server() and \
|
||||
self.server and not self.server.closed and \
|
||||
self.server.has_buffer() and \
|
||||
self.server.connection in w:
|
||||
|
@ -172,7 +187,13 @@ class HttpProxyPlugin(HttpProtocolHandlerPlugin):
|
|||
return False
|
||||
|
||||
def read_from_descriptors(self, r: Readables) -> bool:
|
||||
if self.request.has_upstream_server() \
|
||||
if self.server and self.server.connection not in r:
|
||||
# Currently, we just call write/read block of each plugins. It is
|
||||
# plugins responsibility to ignore this callback, if passed descriptors
|
||||
# doesn't contain the descriptor they registered.
|
||||
for plugin in self.plugins.values():
|
||||
plugin.write_to_descriptors(r)
|
||||
elif self.request.has_upstream_server() \
|
||||
and self.server \
|
||||
and not self.server.closed \
|
||||
and self.server.connection in r:
|
||||
|
|
|
@ -8,13 +8,17 @@
|
|||
:copyright: (c) 2013-present by Abhinav Singh and contributors.
|
||||
:license: BSD, see LICENSE for more details.
|
||||
"""
|
||||
from abc import ABC, abstractmethod
|
||||
import socket
|
||||
import argparse
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import List, Tuple
|
||||
from uuid import UUID
|
||||
|
||||
from ..websocket import WebsocketFrame
|
||||
from ..parser import HttpParser
|
||||
|
||||
from ...common.types import Readables, Writables
|
||||
from ...core.connection import TcpClientConnection
|
||||
from ...core.event import EventQueue
|
||||
|
||||
|
@ -33,6 +37,36 @@ class HttpWebServerBasePlugin(ABC):
|
|||
self.client = client
|
||||
self.event_queue = event_queue
|
||||
|
||||
# TODO(abhinavsingh): get_descriptors, write_to_descriptors, read_from_descriptors
|
||||
# can be placed into their own abstract class which can then be shared by
|
||||
# HttpProxyBasePlugin, HttpWebServerBasePlugin and HttpProtocolHandlerPlugin class.
|
||||
#
|
||||
# Currently code has been shamelessly copied. Also these methods are not
|
||||
# marked as abstract to avoid breaking custom plugins written by users for
|
||||
# previous versions of proxy.py
|
||||
#
|
||||
# Since 3.4.0
|
||||
#
|
||||
# @abstractmethod
|
||||
def get_descriptors(
|
||||
self) -> Tuple[List[socket.socket], List[socket.socket]]:
|
||||
return [], [] # pragma: no cover
|
||||
|
||||
# @abstractmethod
|
||||
def write_to_descriptors(self, w: Writables) -> bool:
|
||||
"""Implementations must now write/flush data over the socket.
|
||||
|
||||
Note that buffer management is in-build into the connection classes.
|
||||
Hence implementations MUST call `flush` here, to send any buffered data
|
||||
over the socket.
|
||||
"""
|
||||
return False # pragma: no cover
|
||||
|
||||
# @abstractmethod
|
||||
def read_from_descriptors(self, r: Readables) -> bool:
|
||||
"""Implementations must now read data over the socket."""
|
||||
return False # pragma: no cover
|
||||
|
||||
@abstractmethod
|
||||
def routes(self) -> List[Tuple[int, str]]:
|
||||
"""Return List(protocol, path) that this plugin handles."""
|
||||
|
|
|
@ -76,7 +76,7 @@ class HttpWebServerPlugin(HttpProtocolHandlerPlugin):
|
|||
|
||||
if b'HttpWebServerBasePlugin' in self.flags.plugins:
|
||||
for klass in self.flags.plugins[b'HttpWebServerBasePlugin']:
|
||||
instance = klass(
|
||||
instance: HttpWebServerBasePlugin = klass(
|
||||
self.uid,
|
||||
self.flags,
|
||||
self.client,
|
||||
|
@ -181,11 +181,16 @@ class HttpWebServerPlugin(HttpProtocolHandlerPlugin):
|
|||
self.client.queue(self.DEFAULT_404_RESPONSE)
|
||||
return True
|
||||
|
||||
# TODO(abhinavsingh): Call plugin get/read/write descriptor callbacks
|
||||
def get_descriptors(
|
||||
self) -> Tuple[List[socket.socket], List[socket.socket]]:
|
||||
return [], []
|
||||
|
||||
def write_to_descriptors(self, w: Writables) -> bool:
|
||||
pass
|
||||
return False
|
||||
|
||||
def read_from_descriptors(self, r: Readables) -> bool:
|
||||
pass
|
||||
return False
|
||||
|
||||
def on_client_data(self, raw: memoryview) -> Optional[memoryview]:
|
||||
if self.switched_protocol == httpProtocolTypes.WEBSOCKET:
|
||||
|
@ -245,7 +250,3 @@ class HttpWebServerPlugin(HttpProtocolHandlerPlugin):
|
|||
text_(self.request.method),
|
||||
text_(self.request.path),
|
||||
(time.time() - self.start_time) * 1000))
|
||||
|
||||
def get_descriptors(
|
||||
self) -> Tuple[List[socket.socket], List[socket.socket]]:
|
||||
return [], []
|
||||
|
|
|
@ -0,0 +1,32 @@
|
|||
[{
|
||||
"regex": "tpc.googlesyndication.com/simgad/.*",
|
||||
"notes": "Google image ads"
|
||||
},
|
||||
{
|
||||
"regex": "tpc.googlesyndication.com/sadbundle/.*",
|
||||
"notes": "Google animated ad bundles"
|
||||
},
|
||||
{
|
||||
"regex": "pagead\\d+.googlesyndication.com/.*",
|
||||
"notes": "Google tracking"
|
||||
},
|
||||
{
|
||||
"regex": "(www){0,1}.google-analytics.com/r/collect\\?.*",
|
||||
"notes": "Google tracking"
|
||||
},
|
||||
{
|
||||
"regex": "(www){0,1}.facebook.com/tr/.*",
|
||||
"notes": "Facebook tracking"
|
||||
},
|
||||
{
|
||||
"regex": "tpc.googlesyndication.com/daca_images/simgad/.*",
|
||||
"notes": "Google image ads"
|
||||
},
|
||||
{
|
||||
"regex": ".*.2mdn.net/videoplayback/.*",
|
||||
"notes": "Twitch.tv video ads"
|
||||
},
|
||||
{
|
||||
"regex": "(www.){0,1}google.com(.*)/pagead/.*",
|
||||
"notes": "Google ads"
|
||||
}]
|
|
@ -10,20 +10,29 @@
|
|||
"""
|
||||
from typing import Optional
|
||||
|
||||
from ..common.utils import text_
|
||||
from ..common.flag import flags
|
||||
from ..http.exception import HttpRequestRejected
|
||||
from ..http.parser import HttpParser
|
||||
from ..http.codes import httpStatusCodes
|
||||
from ..http.proxy import HttpProxyBasePlugin
|
||||
|
||||
|
||||
flags.add_argument(
|
||||
'--filtered-upstream-hosts',
|
||||
type=str,
|
||||
default='facebook.com,www.facebook.com',
|
||||
help='Default: Blocks Facebook. Comma separated list of IPv4 and IPv6 addresses.'
|
||||
)
|
||||
|
||||
|
||||
class FilterByUpstreamHostPlugin(HttpProxyBasePlugin):
|
||||
"""Drop traffic by inspecting upstream host."""
|
||||
|
||||
FILTERED_DOMAINS = [b'google.com', b'www.google.com']
|
||||
|
||||
def before_upstream_connection(
|
||||
self, request: HttpParser) -> Optional[HttpParser]:
|
||||
if request.host in self.FILTERED_DOMAINS:
|
||||
print(self.flags.filtered_upstream_hosts)
|
||||
if text_(request.host) in self.flags.filtered_upstream_hosts.split(','):
|
||||
raise HttpRequestRejected(
|
||||
status_code=httpStatusCodes.I_AM_A_TEAPOT, reason=b'I\'m a tea pot',
|
||||
headers={
|
||||
|
|
|
@ -8,11 +8,12 @@
|
|||
:copyright: (c) 2013-present by Abhinav Singh and contributors.
|
||||
:license: BSD, see LICENSE for more details.
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
|
||||
from typing import Optional, List, Dict, Any
|
||||
|
||||
from ..common.flag import flags
|
||||
from ..http.exception import HttpRequestRejected
|
||||
from ..http.parser import HttpParser
|
||||
from ..http.codes import httpStatusCodes
|
||||
|
@ -23,6 +24,14 @@ import re
|
|||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# See adblock.json file in repository for sample example config
|
||||
flags.add_argument(
|
||||
'--filtered-url-regex-config',
|
||||
type=str,
|
||||
default='',
|
||||
help='Default: No config. Comma separated list of IPv4 and IPv6 addresses.'
|
||||
)
|
||||
|
||||
|
||||
class FilterByURLRegexPlugin(HttpProxyBasePlugin):
|
||||
"""Drops traffic by inspecting request URL and checking
|
||||
|
@ -31,48 +40,12 @@ class FilterByURLRegexPlugin(HttpProxyBasePlugin):
|
|||
filtering ads.
|
||||
"""
|
||||
|
||||
FILTER_LIST: List[Dict[str, Any]] = [
|
||||
{
|
||||
'regex': b'tpc.googlesyndication.com/simgad/.*',
|
||||
'status_code': httpStatusCodes.NOT_FOUND,
|
||||
'notes': 'Google image ads',
|
||||
},
|
||||
{
|
||||
'regex': b'tpc.googlesyndication.com/sadbundle/.*',
|
||||
'status_code': httpStatusCodes.NOT_FOUND,
|
||||
'notes': 'Google animated ad bundles',
|
||||
},
|
||||
{
|
||||
'regex': b'pagead\\d+.googlesyndication.com/.*',
|
||||
'status_code': httpStatusCodes.NOT_FOUND,
|
||||
'notes': 'Google tracking',
|
||||
},
|
||||
{
|
||||
'regex': b'(www){0,1}.google-analytics.com/r/collect\\?.*',
|
||||
'status_code': httpStatusCodes.NOT_FOUND,
|
||||
'notes': 'Google tracking',
|
||||
},
|
||||
{
|
||||
'regex': b'(www){0,1}.facebook.com/tr/.*',
|
||||
'status_code': httpStatusCodes.NOT_FOUND,
|
||||
'notes': 'Facebook tracking',
|
||||
},
|
||||
{
|
||||
'regex': b'tpc.googlesyndication.com/daca_images/simgad/.*',
|
||||
'status_code': httpStatusCodes.NOT_FOUND,
|
||||
'notes': 'Google image ads',
|
||||
},
|
||||
{
|
||||
'regex': b'.*.2mdn.net/videoplayback/.*',
|
||||
'status_code': httpStatusCodes.NOT_FOUND,
|
||||
'notes': 'Twitch.tv video ads',
|
||||
},
|
||||
{
|
||||
'regex': b'(www.){0,1}google.com(.*)/pagead/.*',
|
||||
'status_code': httpStatusCodes.NOT_FOUND,
|
||||
'notes': 'Google ads',
|
||||
},
|
||||
]
|
||||
def __init__(self, *args: Any, **kwargs: Any) -> None:
|
||||
super().__init__(*args, **kwargs)
|
||||
self.filters: List[Dict[str, Any]] = []
|
||||
if self.flags.filtered_url_regex_config != '':
|
||||
with open(self.flags.filtered_url_regex_config, 'rb') as f:
|
||||
self.filters = json.load(f)
|
||||
|
||||
def before_upstream_connection(
|
||||
self, request: HttpParser) -> Optional[HttpParser]:
|
||||
|
@ -80,7 +53,6 @@ class FilterByURLRegexPlugin(HttpProxyBasePlugin):
|
|||
|
||||
def handle_client_request(
|
||||
self, request: HttpParser) -> Optional[HttpParser]:
|
||||
|
||||
# determine host
|
||||
request_host = None
|
||||
if request.host:
|
||||
|
@ -96,37 +68,30 @@ class FilterByURLRegexPlugin(HttpProxyBasePlugin):
|
|||
# build URL
|
||||
url = b'%s%s' % (
|
||||
request_host,
|
||||
request.path,
|
||||
request.path
|
||||
)
|
||||
|
||||
# check URL against list
|
||||
rule_number = 1
|
||||
for blocked_entry in self.FILTER_LIST:
|
||||
|
||||
for blocked_entry in self.filters:
|
||||
# if regex matches on URL
|
||||
if re.search(text_(blocked_entry['regex']), text_(url)):
|
||||
|
||||
# log that the request has been filtered
|
||||
logger.info("Blocked: %r with status_code '%r' by rule number '%r'" % (
|
||||
text_(url),
|
||||
blocked_entry['status_code'],
|
||||
httpStatusCodes.NOT_FOUND,
|
||||
rule_number,
|
||||
))
|
||||
|
||||
# close the connection with the status code from the filter
|
||||
# list
|
||||
raise HttpRequestRejected(
|
||||
status_code=blocked_entry['status_code'],
|
||||
status_code=httpStatusCodes.NOT_FOUND,
|
||||
headers={b'Connection': b'close'},
|
||||
reason=b'Blocked',
|
||||
)
|
||||
|
||||
# stop looping through filter list
|
||||
break
|
||||
|
||||
# increment rule number
|
||||
rule_number += 1
|
||||
|
||||
return request
|
||||
|
||||
def handle_upstream_chunk(self, chunk: memoryview) -> memoryview:
|
||||
|
|
|
@ -35,7 +35,8 @@ class ProxyPoolPlugin(HttpProxyBasePlugin):
|
|||
|
||||
def before_upstream_connection(
|
||||
self, request: HttpParser) -> Optional[HttpParser]:
|
||||
"""Avoid upstream connection of server in the request.
|
||||
"""Avoids upstream connection to the server by returning None.
|
||||
|
||||
Initialize, connection to upstream proxy.
|
||||
"""
|
||||
# Implement your own logic here e.g. round-robin, least connection etc.
|
||||
|
@ -47,18 +48,19 @@ class ProxyPoolPlugin(HttpProxyBasePlugin):
|
|||
self, request: HttpParser) -> Optional[HttpParser]:
|
||||
request.path = self.rebuild_original_path(request)
|
||||
self.tunnel(request)
|
||||
# Returning None indicates core to gracefully
|
||||
# Returning None indicates the core to gracefully
|
||||
# flush client buffer and teardown the connection
|
||||
return None
|
||||
|
||||
def handle_upstream_chunk(self, chunk: memoryview) -> memoryview:
|
||||
"""Will never be called since we didn't establish an upstream connection."""
|
||||
return chunk
|
||||
raise Exception("This should have never been called")
|
||||
|
||||
def on_upstream_connection_close(self) -> None:
|
||||
"""Will never be called since we didn't establish an upstream connection."""
|
||||
pass
|
||||
raise Exception("This should have never been called")
|
||||
|
||||
# TODO(abhinavsingh): Upgrade to use non-blocking get/read/write API.
|
||||
def tunnel(self, request: HttpParser) -> None:
|
||||
"""Send to upstream proxy, receive from upstream proxy, queue back to client."""
|
||||
assert self.conn
|
||||
|
|
|
@ -54,6 +54,7 @@ class ReverseProxyPlugin(HttpWebServerBasePlugin):
|
|||
(httpProtocolTypes.HTTPS, ReverseProxyPlugin.REVERSE_PROXY_LOCATION)
|
||||
]
|
||||
|
||||
# TODO(abhinavsingh): Upgrade to use non-blocking get/read/write API.
|
||||
def handle_request(self, request: HttpParser) -> None:
|
||||
upstream = random.choice(ReverseProxyPlugin.REVERSE_PROXY_PASS)
|
||||
url = urlparse.urlsplit(upstream)
|
||||
|
|
|
@ -26,6 +26,8 @@ import inspect
|
|||
from types import TracebackType
|
||||
from typing import Dict, List, Optional, Generator, Any, Tuple, Type, Union, cast
|
||||
|
||||
from proxy.core.acceptor.work import Work
|
||||
|
||||
from .common.utils import bytes_, text_
|
||||
from .common.types import IpAddress
|
||||
from .common.version import __version__
|
||||
|
@ -122,18 +124,24 @@ flags.add_argument(
|
|||
|
||||
|
||||
class Proxy:
|
||||
"""Context manager for controlling core AcceptorPool server lifecycle.
|
||||
"""Context manager to control core AcceptorPool server lifecycle.
|
||||
|
||||
By default this context manager starts AcceptorPool with HttpProtocolHandler
|
||||
worker class.
|
||||
By default, AcceptorPool is started with HttpProtocolHandler worker class
|
||||
i.e. we are only expecting HTTP traffic to flow between clients and server.
|
||||
"""
|
||||
|
||||
def __init__(self, input_args: Optional[List[str]], **opts: Any) -> None:
|
||||
self.flags = Proxy.initialize(input_args, **opts)
|
||||
self.acceptors: Optional[AcceptorPool] = None
|
||||
self.pool: Optional[AcceptorPool] = None
|
||||
# TODO(abhinavsingh): Allow users to override the worker class itself
|
||||
# e.g. A clear text protocol. Or imagine a TelnetProtocolHandler instead
|
||||
# of default HttpProtocolHandler.
|
||||
self.work_klass: Type[Work] = HttpProtocolHandler
|
||||
|
||||
def write_pid_file(self) -> None:
|
||||
if self.flags.pid_file is not None:
|
||||
# TODO(abhinavsingh): Multiple instances of proxy.py running on
|
||||
# same host machine will currently result in overwriting the PID file
|
||||
with open(self.flags.pid_file, 'wb') as pid_file:
|
||||
pid_file.write(bytes_(os.getpid()))
|
||||
|
||||
|
@ -142,11 +150,11 @@ class Proxy:
|
|||
os.remove(self.flags.pid_file)
|
||||
|
||||
def __enter__(self) -> 'Proxy':
|
||||
self.acceptors = AcceptorPool(
|
||||
self.pool = AcceptorPool(
|
||||
flags=self.flags,
|
||||
work_klass=HttpProtocolHandler
|
||||
work_klass=self.work_klass
|
||||
)
|
||||
self.acceptors.setup()
|
||||
self.pool.setup()
|
||||
self.write_pid_file()
|
||||
return self
|
||||
|
||||
|
@ -155,8 +163,8 @@ class Proxy:
|
|||
exc_type: Optional[Type[BaseException]],
|
||||
exc_val: Optional[BaseException],
|
||||
exc_tb: Optional[TracebackType]) -> None:
|
||||
assert self.acceptors
|
||||
self.acceptors.shutdown()
|
||||
assert self.pool
|
||||
self.pool.shutdown()
|
||||
self.delete_pid_file()
|
||||
|
||||
@staticmethod
|
||||
|
@ -199,7 +207,7 @@ class Proxy:
|
|||
'plugins', args.plugins.split(text_(COMMA)))]
|
||||
)
|
||||
|
||||
# proxy.py currently cannot serve over HTTPS and perform TLS interception
|
||||
# proxy.py currently cannot serve over HTTPS and also perform TLS interception
|
||||
# at the same time. Check if user is trying to enable both feature
|
||||
# at the same time.
|
||||
if (args.cert_file and args.key_file) and \
|
||||
|
@ -213,6 +221,11 @@ class Proxy:
|
|||
if args.basic_auth:
|
||||
auth_code = base64.b64encode(bytes_(args.basic_auth))
|
||||
|
||||
# https://github.com/python/mypy/issues/5865
|
||||
#
|
||||
# def option(t: object, key: str, default: Any) -> Any:
|
||||
# return cast(t, opts.get(key, default))
|
||||
|
||||
args.plugins = plugins
|
||||
args.auth_code = cast(
|
||||
Optional[bytes],
|
||||
|
@ -436,7 +449,10 @@ def main(
|
|||
input_args: Optional[List[str]] = None,
|
||||
**opts: Any) -> None:
|
||||
try:
|
||||
with Proxy(input_args=input_args, **opts):
|
||||
with Proxy(input_args=input_args, **opts) as proxy:
|
||||
assert proxy.pool is not None
|
||||
logger.info('Listening on %s:%d' %
|
||||
(proxy.pool.flags.hostname, proxy.pool.flags.port))
|
||||
# TODO: Introduce cron feature
|
||||
# https://github.com/abhinavsingh/proxy.py/issues/392
|
||||
while True:
|
||||
|
|
3
setup.py
3
setup.py
|
@ -10,7 +10,7 @@
|
|||
"""
|
||||
from setuptools import setup, find_packages
|
||||
|
||||
VERSION = (2, 3, 1)
|
||||
VERSION = (2, 4, 0)
|
||||
__version__ = '.'.join(map(str, VERSION[0:3]))
|
||||
__description__ = '''⚡⚡⚡Fast, Lightweight, Pluggable, TLS interception capable proxy server
|
||||
focused on Network monitoring, controls & Application development, testing, debugging.'''
|
||||
|
@ -78,6 +78,7 @@ if __name__ == '__main__':
|
|||
'Programming Language :: Python :: 3.7',
|
||||
'Programming Language :: Python :: 3.8',
|
||||
'Programming Language :: Python :: 3.9',
|
||||
'Programming Language :: Python :: 3.10',
|
||||
'Topic :: Internet',
|
||||
'Topic :: Internet :: Proxy Servers',
|
||||
'Topic :: Internet :: WWW/HTTP',
|
||||
|
|
|
@ -49,7 +49,7 @@ class TestAcceptorPool(unittest.TestCase):
|
|||
sock.setsockopt.assert_called_with(
|
||||
socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
|
||||
sock.bind.assert_called_with(
|
||||
(str(pool.flags.hostname), pool.flags.port))
|
||||
(str(pool.flags.hostname), 8899))
|
||||
sock.listen.assert_called_with(pool.flags.backlog)
|
||||
sock.setblocking.assert_called_with(False)
|
||||
|
||||
|
|
|
@ -15,6 +15,7 @@ import json
|
|||
from urllib import parse as urlparse
|
||||
from unittest import mock
|
||||
from typing import cast
|
||||
from pathlib import Path
|
||||
|
||||
from proxy.proxy import Proxy
|
||||
from proxy.core.connection import TcpClientConnection
|
||||
|
@ -38,7 +39,11 @@ class TestHttpProxyPluginExamples(unittest.TestCase):
|
|||
mock_selector: mock.Mock) -> None:
|
||||
self.fileno = 10
|
||||
self._addr = ('127.0.0.1', 54382)
|
||||
self.flags = Proxy.initialize()
|
||||
adblock_json_path = Path(
|
||||
__file__).parent.parent.parent / "proxy" / "plugin" / "adblock.json"
|
||||
self.flags = Proxy.initialize(
|
||||
input_args=["--filtered-url-regex-config",
|
||||
str(adblock_json_path)])
|
||||
self.plugin = mock.MagicMock()
|
||||
|
||||
self.mock_fromfd = mock_fromfd
|
||||
|
@ -158,9 +163,9 @@ class TestHttpProxyPluginExamples(unittest.TestCase):
|
|||
def test_filter_by_upstream_host_plugin(
|
||||
self, mock_server_conn: mock.Mock) -> None:
|
||||
request = build_http_request(
|
||||
b'GET', b'http://google.com/',
|
||||
b'GET', b'http://facebook.com/',
|
||||
headers={
|
||||
b'Host': b'google.com',
|
||||
b'Host': b'facebook.com',
|
||||
}
|
||||
)
|
||||
self._conn.recv.return_value = request
|
||||
|
|
Loading…
Reference in New Issue