From fd838ca64ebd8e851ea3db31cae6768ac94f7534 Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Sun, 7 Nov 2021 02:50:11 +0530 Subject: [PATCH] DEFAULT_CA_FILE is now certifi/cacert.pem (#691) * Add FAQ: OSError when wrapping client for TLS Interception * Silence exception log for several valid "cert verification failed" by client during tls interception * Lint checks * Move exception handling within wrap_server/wrap_client methods * Lint fixes * Use certifi/cacert.pem as default --ca-file flag value * Address tests after DEFAULT_CA_FILE change * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- README.md | 47 ++++++ proxy/common/constants.py | 9 +- proxy/core/acceptor/threadless.py | 18 ++- proxy/http/proxy/server.py | 137 ++++++++++++++---- proxy/plugin/cache/store/disk.py | 3 +- proxy/plugin/reverse_proxy.py | 9 +- proxy/proxy.py | 4 + .../http/test_http_proxy_tls_interception.py | 3 +- 8 files changed, 183 insertions(+), 47 deletions(-) diff --git a/README.md b/README.md index b906faf4..048741e3 100644 --- a/README.md +++ b/README.md @@ -100,6 +100,7 @@ - [Docker image not working on MacOS](#docker-image-not-working-on-macos) - [ValueError: filedescriptor out of range in select](#valueerror-filedescriptor-out-of-range-in-select) - [None:None in access logs](#nonenone-in-access-logs) + - [OSError when wrapping client for TLS Interception](#oserror-when-wrapping-client-for-tls-interception) - [Plugin Developer and Contributor Guide](#plugin-developer-and-contributor-guide) - [High level architecture](#high-level-architecture) - [Everything is a plugin](#everything-is-a-plugin) @@ -1721,6 +1722,52 @@ few obvious ones include: 1. Client established a connection but never completed the request. 2. A plugin returned a response prematurely, avoiding connection to upstream server. +## OSError when wrapping client for TLS Interception + +With `TLS Interception` on, you might occasionally see following exceptions: + +```console +2021-11-06 23:33:34,540 - pid:91032 [E] server.intercept:678 - OSError when wrapping client +Traceback (most recent call last): + ...[redacted]... + ...[redacted]... + ...[redacted]... +ssl.SSLError: [SSL: TLSV1_ALERT_UNKNOWN_CA] tlsv1 alert unknown ca (_ssl.c:997) +...[redacted]... - CONNECT oauth2.googleapis.com:443 - 0 bytes - 272.08 ms +``` + +Some clients can throw `TLSV1_ALERT_UNKNOWN_CA` if they cannot verify the certificate of the server +because it is signed by an unknown issuer CA. Which is the case when we are doing TLS interception. +This can be for a variety of reasons e.g. certificate pinning etc. + +Another exception you might see is `CERTIFICATE_VERIFY_FAILED`: + +```console +2021-11-06 23:36:02,002 - pid:91033 [E] handler.handle_readables:293 - Exception while receiving from client connection with reason SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: self signed certificate in certificate chain (_ssl.c:997)') +Traceback (most recent call last): + ...[redacted]... + ...[redacted]... + ...[redacted]... +ssl.SSLCertVerificationError: [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: self signed certificate in certificate chain (_ssl.c:997) +...[redacted]... - CONNECT init.push.apple.com:443 - 0 bytes - 892.99 ms +``` + +In future, we might support serving original HTTPS content for such clients while still +performing TLS interception in the background. This will keep the clients happy without +impacting our ability to TLS intercept. Unfortunately, this feature is currently not available. + +Another example with `SSLEOFError` exception: + +```console +2021-11-06 23:46:40,446 - pid:91034 [E] server.intercept:678 - OSError when wrapping client +Traceback (most recent call last): + ...[redacted]... + ...[redacted]... + ...[redacted]... +ssl.SSLEOFError: EOF occurred in violation of protocol (_ssl.c:997) +...[redacted]... - CONNECT stock.adobe.io:443 - 0 bytes - 685.32 ms +``` + # Plugin Developer and Contributor Guide ## High level architecture diff --git a/proxy/common/constants.py b/proxy/common/constants.py index 25c6ee54..43c449be 100644 --- a/proxy/common/constants.py +++ b/proxy/common/constants.py @@ -12,6 +12,7 @@ import os import time import secrets import pathlib +import sysconfig import ipaddress from typing import List @@ -23,6 +24,10 @@ PROXY_PY_START_TIME = time.time() # /path/to/proxy.py/proxy folder PROXY_PY_DIR = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) +# Path to virtualenv/lib/python3.X/site-packages +PROXY_PY_SITE_PACKAGES = sysconfig.get_path('purelib') +assert PROXY_PY_SITE_PACKAGES + CRLF = b'\r\n' COLON = b':' WHITESPACE = b' ' @@ -46,7 +51,9 @@ DEFAULT_CA_CERT_FILE = None DEFAULT_CA_KEY_FILE = None DEFAULT_CA_SIGNING_KEY_FILE = None DEFAULT_CERT_FILE = None -DEFAULT_CA_FILE = None +DEFAULT_CA_FILE = pathlib.Path( + PROXY_PY_SITE_PACKAGES, +) / 'certifi' / 'cacert.pem' DEFAULT_CLIENT_RECVBUF_SIZE = DEFAULT_BUFFER_SIZE DEFAULT_DEVTOOLS_WS_PATH = b'/devtools' DEFAULT_DISABLE_HEADERS: List[bytes] = [] diff --git a/proxy/core/acceptor/threadless.py b/proxy/core/acceptor/threadless.py index 2e25673a..d50d311d 100644 --- a/proxy/core/acceptor/threadless.py +++ b/proxy/core/acceptor/threadless.py @@ -74,12 +74,22 @@ class Threadless(multiprocessing.Process): Tuple[Readables, Writables], None, None, ]: + assert self.selector is not None events: Dict[socket.socket, int] = {} for work in self.works.values(): - events.update(work.get_events()) - assert self.selector is not None - for fd in events: - self.selector.register(fd, events[fd]) + worker_events = work.get_events() + events.update(worker_events) + for fd in worker_events: + # Can throw ValueError: Invalid file descriptor: -1 + # + # Work classes must handle the exception and shutdown + # gracefully otherwise this will result in bringing down the + # entire threadless process + # + # This is only possible when work.get_events pass + # an invalid file descriptor. Example, because of bad + # exception handling within the work implementation class. + self.selector.register(fd, worker_events[fd]) ev = self.selector.select(timeout=1) readables = [] writables = [] diff --git a/proxy/http/proxy/server.py b/proxy/http/proxy/server.py index 67f79898..113fe659 100644 --- a/proxy/http/proxy/server.py +++ b/proxy/http/proxy/server.py @@ -66,9 +66,9 @@ flags.add_argument( flags.add_argument( '--ca-file', type=str, - default=DEFAULT_CA_FILE, - help='Default: None. Provide path to custom CA file for peer certificate validation. ' - 'Specially useful on MacOS.', + default=str(DEFAULT_CA_FILE), + help='Default: ' + str(DEFAULT_CA_FILE) + + '. Provide path to custom CA bundle for peer certificate verification', ) flags.add_argument( '--ca-signing-key-file', @@ -658,49 +658,122 @@ class HttpProxyPlugin(HttpProtocolHandlerPlugin): def intercept(self) -> Union[socket.socket, bool]: # Perform SSL/TLS handshake with upstream - self.wrap_server() + teardown = self.wrap_server() + if teardown: + raise HttpProtocolException( + 'Exception when wrapping server for interception', + ) # Generate certificate and perform handshake with client - try: - # wrap_client also flushes client data before wrapping - # sending to client can raise, handle expected exceptions - self.wrap_client() - except subprocess.TimeoutExpired as e: # Popen communicate timeout - logger.exception( - 'TimeoutExpired during certificate generation', exc_info=e, + # wrap_client also flushes client data before wrapping + # sending to client can raise, handle expected exceptions + teardown = self.wrap_client() + if teardown: + raise HttpProtocolException( + 'Exception when wrapping client for interception', ) - return True - except BrokenPipeError: - logger.error( - 'BrokenPipeError when wrapping client', - ) - return True - except OSError as e: - logger.exception( - 'OSError when wrapping client', exc_info=e, - ) - return True # Update all plugin connection reference # TODO(abhinavsingh): Is this required? for plugin in self.plugins.values(): plugin.client._conn = self.client.connection return self.client.connection - def wrap_server(self) -> None: + def wrap_server(self) -> bool: assert self.upstream is not None assert isinstance(self.upstream.connection, socket.socket) - self.upstream.wrap(text_(self.request.host), self.flags.ca_file) + try: + self.upstream.wrap(text_(self.request.host), self.flags.ca_file) + except ssl.SSLCertVerificationError: # Server raised certificate verification error + # When --disable-interception-on-ssl-cert-verification-error flag is on, + # we will cache such upstream hosts and avoid intercepting them for future + # requests. + logger.warning( + 'ssl.SSLCertVerificationError: ' + + 'Server raised cert verification error for upstream: {0}'.format( + self.upstream.addr[0], + ), + ) + return True + except ssl.SSLError as e: + if e.reason == 'SSLV3_ALERT_HANDSHAKE_FAILURE': + logger.warning( + '{0}: '.format(e.reason) + + 'Server raised handshake alert failure for upstream: {0}'.format( + self.upstream.addr[0], + ), + ) + else: + logger.exception( + 'SSLError when wrapping client for upstream: {0}'.format( + self.upstream.addr[0], + ), exc_info=e, + ) + return True assert isinstance(self.upstream.connection, ssl.SSLSocket) + return False - def wrap_client(self) -> None: + def wrap_client(self) -> bool: assert self.upstream is not None and self.flags.ca_signing_key_file is not None assert isinstance(self.upstream.connection, ssl.SSLSocket) - generated_cert = self.generate_upstream_certificate( - cast(Dict[str, Any], self.upstream.connection.getpeercert()), - ) - self.client.wrap(self.flags.ca_signing_key_file, generated_cert) - logger.debug( - 'TLS interception using %s', generated_cert, - ) + try: + # TODO: Perform async certificate generation + generated_cert = self.generate_upstream_certificate( + cast(Dict[str, Any], self.upstream.connection.getpeercert()), + ) + self.client.wrap(self.flags.ca_signing_key_file, generated_cert) + except subprocess.TimeoutExpired as e: # Popen communicate timeout + logger.exception( + 'TimeoutExpired during certificate generation', exc_info=e, + ) + return True + except ssl.SSLCertVerificationError: # Client raised certificate verification error + # When --disable-interception-on-ssl-cert-verification-error flag is on, + # we will cache such upstream hosts and avoid intercepting them for future + # requests. + logger.warning( + 'ssl.SSLCertVerificationError: ' + + 'Client raised cert verification error for upstream: {0}'.format( + self.upstream.addr[0], + ), + ) + return True + except ssl.SSLEOFError as e: + logger.warning( + 'ssl.SSLEOFError {0} when wrapping client for upstream: {1}'.format( + str(e), self.upstream.addr[0], + ), + ) + return True + except ssl.SSLError as e: + if e.reason in ('TLSV1_ALERT_UNKNOWN_CA', 'UNSUPPORTED_PROTOCOL'): + logger.warning( + '{0}: '.format(e.reason) + + 'Client raised cert verification error for upstream: {0}'.format( + self.upstream.addr[0], + ), + ) + else: + logger.exception( + 'OSError when wrapping client for upstream: {0}'.format( + self.upstream.addr[0], + ), exc_info=e, + ) + return True + except BrokenPipeError: + logger.error( + 'BrokenPipeError when wrapping client for upstream: {0}'.format( + self.upstream.addr[0], + ), + ) + return True + except OSError as e: + logger.exception( + 'OSError when wrapping client for upstream: {0}'.format( + self.upstream.addr[0], + ), exc_info=e, + ) + return True + logger.debug('TLS intercepting using %s', generated_cert) + return False # # Event emitter callbacks diff --git a/proxy/plugin/cache/store/disk.py b/proxy/plugin/cache/store/disk.py index f50565f8..41429809 100644 --- a/proxy/plugin/cache/store/disk.py +++ b/proxy/plugin/cache/store/disk.py @@ -27,7 +27,8 @@ flags.add_argument( '--cache-dir', type=str, default=tempfile.gettempdir(), - help='Default: A temporary directory. Flag only applicable when cache plugin is used with on-disk storage.', + help='Default: A temporary directory. ' + + 'Flag only applicable when cache plugin is used with on-disk storage.', ) diff --git a/proxy/plugin/reverse_proxy.py b/proxy/plugin/reverse_proxy.py index 19cd13e7..13f5b80c 100644 --- a/proxy/plugin/reverse_proxy.py +++ b/proxy/plugin/reverse_proxy.py @@ -12,9 +12,7 @@ import ssl import random import socket import logging -import sysconfig -from pathlib import Path from typing import List, Optional, Tuple, Any from urllib import parse as urlparse @@ -29,11 +27,6 @@ from ..http.server import HttpWebServerBasePlugin, httpProtocolTypes logger = logging.getLogger(__name__) -# We need CA bundle to verify TLS connection to upstream servers -PURE_LIB = sysconfig.get_path('purelib') -assert PURE_LIB -CACERT_PEM_PATH = Path(PURE_LIB) / 'certifi' / 'cacert.pem' - # TODO: ReverseProxyPlugin and ProxyPoolPlugin are implementing # a similar behavior. Abstract that particular logic out into its @@ -135,7 +128,7 @@ class ReverseProxyPlugin(HttpWebServerBasePlugin): self.upstream.wrap( text_( url.hostname, - ), ca_file=str(CACERT_PEM_PATH), + ), ca_file=str(self.flags.ca_file), ) self.upstream.queue(memoryview(request.build())) except ConnectionRefusedError: diff --git a/proxy/proxy.py b/proxy/proxy.py index 988dec45..6f9d2078 100644 --- a/proxy/proxy.py +++ b/proxy/proxy.py @@ -514,6 +514,10 @@ def main( # configuration etc. # # TODO: Python shell within running proxy.py environment? + # + # TODO: Pid watcher which watches for processes started + # by proxy.py core. May be alert or restart those processes + # on failure. while True: time.sleep(1) except KeyboardInterrupt: diff --git a/tests/http/test_http_proxy_tls_interception.py b/tests/http/test_http_proxy_tls_interception.py index a413a3fe..1c030685 100644 --- a/tests/http/test_http_proxy_tls_interception.py +++ b/tests/http/test_http_proxy_tls_interception.py @@ -16,6 +16,7 @@ import selectors from typing import Any from unittest import mock +from proxy.common.constants import DEFAULT_CA_FILE from proxy.core.connection import TcpClientConnection, TcpServerConnection from proxy.http.handler import HttpProtocolHandler @@ -168,7 +169,7 @@ class TestHttpProxyTlsInterception(unittest.TestCase): ) self.mock_ssl_context.assert_called_with( - ssl.Purpose.SERVER_AUTH, cafile=None, + ssl.Purpose.SERVER_AUTH, cafile=str(DEFAULT_CA_FILE), ) # self.assertEqual(self.mock_ssl_context.return_value.options, # ssl.OP_NO_SSLv2 | ssl.OP_NO_SSLv3 | ssl.OP_NO_TLSv1 |