diff --git a/Makefile b/Makefile index 3d5ffd8e..54912559 100644 --- a/Makefile +++ b/Makefile @@ -47,7 +47,7 @@ lint: autopep8 --recursive --in-place --aggressive proxy.py autopep8 --recursive --in-place --aggressive tests.py autopep8 --recursive --in-place --aggressive plugin_examples.py - flake8 --ignore=E501,W504 --builtins="unicode" proxy.py + flake8 --ignore=E501,W504 proxy.py flake8 --ignore=E501,W504 tests.py container: diff --git a/README.md b/README.md index 34c33fe4..e79c7866 100644 --- a/README.md +++ b/README.md @@ -38,10 +38,12 @@ Table of Contents * [CacheResponsesPlugin](#cacheresponsesplugin) * [ManInTheMiddlePlugin](#maninthemiddleplugin) * [Plugin Ordering](#plugin-ordering) -* [Plugin Developer Guide](#plugin-developer-guide) * [End-to-End Encryption](#end-to-end-encryption) * [TLS Encryption](#tls-interception) -* [Usage](#usage) +* [Plugin Developer and Contributor Guide](#plugin-developer-and-contributor-guide) + * [Everything is a plugin](#everything-is-a-plugin) + * [proxy.py Internals](#proxypy-internals) +* [Flags](#flags) Features ======== @@ -110,7 +112,7 @@ See [plugin_examples.py](https://github.com/abhinavsingh/proxy.py/blob/develop/p All the examples below also works with `https` traffic but require additional flags and certificate generation. See [TLS Interception](#tls-interception). -### RedirectToCustomServerPlugin +## RedirectToCustomServerPlugin Redirects all incoming `http` requests to custom web server. By default, it redirects client requests to inbuilt web server, @@ -145,7 +147,7 @@ Along with the proxy request log, you must also see a http web server request lo 2019-09-24 19:09:33,603 - INFO - pid:49995 - access_log:1157 - ::1:49524 - GET localhost:8899/ - 404 NOT FOUND - 70 bytes ``` -### FilterByUpstreamHostPlugin +## FilterByUpstreamHostPlugin Drops traffic by inspecting upstream host. By default, plugin drops traffic for `google.com` and `www.google.com`. @@ -179,7 +181,7 @@ Traceback (most recent call last): 2019-09-24 19:21:37,897 - INFO - pid:50074 - access_log:1157 - ::1:49911 - GET None:None/ - None None - 0 bytes ``` -### CacheResponsesPlugin +## CacheResponsesPlugin Caches Upstream Server Responses. @@ -255,7 +257,7 @@ Connection: keep-alive } ``` -### ManInTheMiddlePlugin +## ManInTheMiddlePlugin Modifies upstream server responses. @@ -279,7 +281,7 @@ Hello from man in the middle Response body `Hello from man in the middle` is sent by our plugin. -### Plugin Ordering +## Plugin Ordering When using multiple plugins, depending upon plugin functionality, it might be worth considering the order in which plugins are passed @@ -294,21 +296,14 @@ requests for `google.com` and `www.google.com` and redirect other Hence, in this scenario it is important to use `FilterByUpstreamHostPlugin` before `RedirectToCustomServerPlugin`. If we enable `RedirectToCustomServerPlugin` before `FilterByUpstreamHostPlugin`, -`google` requests will also get redirected to inbuilt web server. - -Plugin Developer Guide -====================== - -TODO, meanwhile read [plugin_examples.py](https://github.com/abhinavsingh/proxy.py/blob/develop/plugin_examples.py) -code. Most of the plugin hook names are self explanatory e.g. `handle_upstream_response`. - -Also, see documentation for `HttpProxyBasePlugin` abstract class for some insights. +`google` requests will also get redirected to inbuilt web server, +instead of being dropped. End-to-End Encryption ===================== By default, `proxy.py` uses `http` protocol for communication with clients e.g. `curl`, `browser`. -For enabling end-to-end encrypting using `TLS` / `HTTPS` first generate certificates using: +For enabling end-to-end encrypting using `tls` / `https` first generate certificates: ``` make https-certificates @@ -340,7 +335,7 @@ Verify using `curl -x https://localhost:8899 --proxy-cacert https-cert.pem https TLS Interception ================= -By default, `proxy.py` doesn't tries to decrypt `https` traffic between client and server. +By default, `proxy.py` doesn't decrypt `https` traffic between client and server. To enable TLS interception first generate CA certificates: ``` @@ -371,10 +366,61 @@ Verify using `curl -x localhost:8899 --cacert ca-cert.pem https://httpbin.org/ge } ``` -Use CA flags with [plugin examples](#plugin-examples) to make them work with -`https` traffic. +Now you can use CA flags with +[plugin examples](#plugin-examples) to make them work for `https` traffic. -Usage +Plugin Developer and Contributor Guide +====================================== + +## Everything is a plugin + +As you might have guessed by now, in `proxy.py` everything is a plugin. + +- We enabled proxy server plugins using `--plugins` flag. + All the [plugin examples](#plugin-examples) were implementing + `HttpProxyBasePlugin`. See documentation of + [HttpProxyBasePlugin](https://github.com/abhinavsingh/proxy.py/blob/b03629fa0df1595eb4995427bc601063be7fdca9/proxy.py#L894-L938) + for available lifecycle hooks. Use `HttpProxyBasePlugin` to modify + behavior of http(s) proxy protocol between client and upstream server. + Example, [FilterByUpstreamHostPlugin](#filterbyupstreamhostplugin). + +- We also enabled inbuilt web server using `--enable-web-server`. + Inbuilt web server implements `HttpProtocolBasePlugin` plugin. + See documentation of [HttpProtocolBasePlugin](https://github.com/abhinavsingh/proxy.py/blob/b03629fa0df1595eb4995427bc601063be7fdca9/proxy.py#L793-L850) + for available lifecycle hooks. Use `HttpProtocolBasePlugin` to add + new features for http(s) clients. Example, + [HttpWebServerPlugin](https://github.com/abhinavsingh/proxy.py/blob/b03629fa0df1595eb4995427bc601063be7fdca9/proxy.py#L1185-L1260). + +- There also is a `--disable-http-proxy` flag. It disables inbuilt proxy server. + Use this flag with `--enable-web-server` flag to run `proxy.py` as a programmable + http(s) server. [HttpProxyPlugin](https://github.com/abhinavsingh/proxy.py/blob/b03629fa0df1595eb4995427bc601063be7fdca9/proxy.py#L941-L1182) + also implements `HttpProtocolBasePlugin`. + +## proxy.py Internals + +- [HttpProtocolHandler](https://github.com/abhinavsingh/proxy.py/blob/b03629fa0df1595eb4995427bc601063be7fdca9/proxy.py#L1263-L1440) +thread is started with the accepted [TcpClientConnection](https://github.com/abhinavsingh/proxy.py/blob/b03629fa0df1595eb4995427bc601063be7fdca9/proxy.py#L230-L237). +`HttpProtocolHandler` is responsible for parsing incoming client request and invoking +`HttpProtocolBasePlugin` lifecycle hooks. + +- `HttpProxyPlugin` which implements `HttpProtocolBasePlugin` also has its own plugin +mechanism. Its responsibility is to establish connection between client and +upstream [TcpServerConnection](https://github.com/abhinavsingh/proxy.py/blob/b03629fa0df1595eb4995427bc601063be7fdca9/proxy.py#L204-L227) +and invoke `HttpProxyBasePlugin` lifecycle hooks. + +- `HttpProtocolHandler` threads are started by [Worker](https://github.com/abhinavsingh/proxy.py/blob/b03629fa0df1595eb4995427bc601063be7fdca9/proxy.py#L424-L472) + processes. + +- `--num-workers` `Worker` processes are started by + [MultiCoreRequestDispatcher](https://github.com/abhinavsingh/proxy.py/blob/b03629fa0df1595eb4995427bc601063be7fdca9/proxy.py#L368-L421) + on start-up. `Worker` processes receives `TcpClientConnection` over a pipe from `MultiCoreRequestDispatcher`. + +- `MultiCoreRequestDispatcher` implements [TcpServer](https://github.com/abhinavsingh/proxy.py/blob/b03629fa0df1595eb4995427bc601063be7fdca9/proxy.py#L240-L302) + abstract class. `TcpServer` accepts `TcpClientConnection`. `MultiCoreRequestDispatcher` + ensures full utilization of available CPU cores, for which it dispatches + accepted `TcpClientConnection` to `Worker` processes in a round-robin fashion. + +Flags ===== ``` diff --git a/plugin_examples.py b/plugin_examples.py index a2643453..24e34cfd 100644 --- a/plugin_examples.py +++ b/plugin_examples.py @@ -20,10 +20,6 @@ class RedirectToCustomServerPlugin(proxy.HttpProxyBasePlugin): UPSTREAM_SERVER = b'http://localhost:8899' - def __init__(self, config: proxy.HttpProtocolConfig, client: proxy.TcpClientConnection, - request: proxy.HttpParser) -> None: - super().__init__(config, client, request) - def before_upstream_connection(self) -> None: # Redirect all non-https requests to inbuilt WebServer. if self.request.method != b'CONNECT': @@ -45,13 +41,10 @@ class FilterByUpstreamHostPlugin(proxy.HttpProxyBasePlugin): FILTERED_DOMAINS = [b'google.com', b'www.google.com'] - def __init__(self, config: proxy.HttpProtocolConfig, client: proxy.TcpClientConnection, - request: proxy.HttpParser) -> None: - super().__init__(config, client, request) - def before_upstream_connection(self) -> None: if self.request.host in self.FILTERED_DOMAINS: - raise proxy.HttpRequestRejected(status_code=418, reason=b'I\'m a tea pot') + raise proxy.HttpRequestRejected( + status_code=418, reason=b'I\'m a tea pot') def on_upstream_connection(self) -> None: pass diff --git a/proxy.py b/proxy.py index 19ba0fdc..72a8c7a1 100755 --- a/proxy.py +++ b/proxy.py @@ -444,13 +444,17 @@ class Worker(multiprocessing.Process): fileno = recv_handle(self.work_queue) conn = socket.fromfd( fileno, family=self.config.family, type=socket.SOCK_STREAM) - # TODO(abhinavsingh): Move handshake logic within HttpProtocolHandler. + # TODO(abhinavsingh): Move handshake logic within + # HttpProtocolHandler. if self.config.certfile and self.config.keyfile: try: - ctx = ssl.create_default_context(ssl.Purpose.CLIENT_AUTH) + ctx = ssl.create_default_context( + ssl.Purpose.CLIENT_AUTH) ctx.options |= ssl.OP_NO_SSLv2 | ssl.OP_NO_SSLv3 | ssl.OP_NO_TLSv1 | ssl.OP_NO_TLSv1_1 ctx.verify_mode = ssl.CERT_NONE - ctx.load_cert_chain(certfile=self.config.certfile, keyfile=self.config.keyfile) + ctx.load_cert_chain( + certfile=self.config.certfile, + keyfile=self.config.keyfile) conn = ctx.wrap_socket(conn, server_side=True) except OSError as e: logger.exception( @@ -565,7 +569,7 @@ class HttpParser: def is_chunked_encoded_response(self) -> bool: return self.type == httpParserTypes.RESPONSE_PARSER and b'transfer-encoding' in self.headers and \ - self.headers[b'transfer-encoding'][1].lower() == b'chunked' + self.headers[b'transfer-encoding'][1].lower() == b'chunked' def parse(self, raw: bytes) -> None: """Parses Http request out of raw bytes. @@ -592,8 +596,8 @@ class HttpParser: self.state = httpParserStates.RCVING_BODY self.body += raw if self.body and len( - self.body) >= int( - self.headers[b'content-length'][1]): + self.body) >= int( + self.headers[b'content-length'][1]): self.state = httpParserStates.COMPLETE elif self.is_chunked_encoded_response(): if not self.chunk_parser: @@ -1049,7 +1053,8 @@ class HttpProxyPlugin(HttpProtocolBasePlugin): if not os.path.isfile(cert_file_path): logger.debug('Generating certificates %s', cert_file_path) # TODO: Use ssl.get_server_certificate to populate generated certificate metadata - # Currently we only set CN=example.org on the generated certificates. + # Currently we only set CN=example.org on the generated + # certificates. gen_cert = subprocess.Popen( ['/usr/bin/openssl', 'req', '-new', '-key', self.config.ca_signing_key_file, '-subj', '/CN=%s' % text_(self.request.host)], @@ -1104,9 +1109,12 @@ class HttpProxyPlugin(HttpProtocolBasePlugin): keyfile=self.config.ca_signing_key_file, certfile=generated_cert) # Wrap our connection to upstream server connection - ctx = ssl.create_default_context(ssl.Purpose.SERVER_AUTH) + ctx = ssl.create_default_context( + ssl.Purpose.SERVER_AUTH) ctx.options |= ssl.OP_NO_SSLv2 | ssl.OP_NO_SSLv3 | ssl.OP_NO_TLSv1 | ssl.OP_NO_TLSv1_1 - self.server.conn = ctx.wrap_socket(self.server.conn, server_hostname=text_(self.request.host)) + self.server.conn = ctx.wrap_socket( + self.server.conn, server_hostname=text_( + self.request.host)) logger.info( 'Intercepting traffic using %s', generated_cert) return self.client.conn @@ -1333,18 +1341,21 @@ class HttpProtocolHandler(threading.Thread): logger.debug( 'Updated client conn to %s', upgraded_sock) self.client.conn = upgraded_sock - # Update self.client.conn references for all plugins + # Update self.client.conn references for all + # plugins for plugin_ in self.plugins.values(): if plugin_ != plugin: plugin_.client.conn = upgraded_sock - logger.debug('Upgraded client conn for plugin %s', str(plugin_)) + logger.debug( + 'Upgraded client conn for plugin %s', str(plugin_)) elif isinstance(upgraded_sock, bool) and upgraded_sock: return True except Exception as e: if e.__class__.__name__ in ( ProxyAuthenticationFailed.__name__, ProxyConnectionFailed.__name__, HttpRequestRejected.__name__): - logger.exception('HttpProtocolException type raised', exc_info=e) + logger.exception( + 'HttpProtocolException type raised', exc_info=e) response = e.response(self.request) # type: ignore if response: self.client.queue(response) diff --git a/tests.py b/tests.py index 70fac039..8412cfe0 100644 --- a/tests.py +++ b/tests.py @@ -804,7 +804,9 @@ class TestHttpProtocolHandler(unittest.TestCase): proxy.CRLF ])) self.proxy.run_once() - self.assertEqual(self.proxy.client.conn.received, proxy.ProxyConnectionFailed.RESPONSE_PKT) + self.assertEqual( + self.proxy.client.conn.received, + proxy.ProxyConnectionFailed.RESPONSE_PKT) @mock.patch('select.select') def test_proxy_authentication_failed(self, mock_select): @@ -823,7 +825,9 @@ class TestHttpProtocolHandler(unittest.TestCase): proxy.CRLF ])) self.proxy.run_once() - self.assertEqual(self.proxy.client.conn.received, proxy.ProxyAuthenticationFailed.RESPONSE_PKT) + self.assertEqual( + self.proxy.client.conn.received, + proxy.ProxyAuthenticationFailed.RESPONSE_PKT) @mock.patch('select.select') @mock.patch('proxy.TcpServerConnection')