160 lines
5.8 KiB
Python
160 lines
5.8 KiB
Python
# -*- coding: utf-8 -*-
|
|
"""
|
|
proxy.py
|
|
~~~~~~~~
|
|
⚡⚡⚡ Fast, Lightweight, Pluggable, TLS interception capable proxy server focused on
|
|
Network monitoring, controls & Application development, testing, debugging.
|
|
|
|
:copyright: (c) 2013-present by Abhinav Singh and contributors.
|
|
:license: BSD, see LICENSE for more details.
|
|
|
|
.. spelling::
|
|
|
|
http
|
|
url
|
|
"""
|
|
from typing import List, Tuple, Optional
|
|
|
|
from .exception import HttpProtocolException
|
|
from ..common.utils import text_
|
|
from ..common.constants import AT, COLON, SLASH, DEFAULT_ALLOWED_URL_SCHEMES
|
|
|
|
|
|
class Url:
|
|
"""``urllib.urlparse`` doesn't work for proxy.py, so we wrote a simple URL.
|
|
|
|
Currently, URL only implements what is necessary for HttpParser to work.
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
scheme: Optional[bytes] = None,
|
|
username: Optional[bytes] = None,
|
|
password: Optional[bytes] = None,
|
|
hostname: Optional[bytes] = None,
|
|
port: Optional[int] = None,
|
|
remainder: Optional[bytes] = None,
|
|
) -> None:
|
|
self.scheme: Optional[bytes] = scheme
|
|
self.username: Optional[bytes] = username
|
|
self.password: Optional[bytes] = password
|
|
self.hostname: Optional[bytes] = hostname
|
|
self.port: Optional[int] = port
|
|
self.remainder: Optional[bytes] = remainder
|
|
|
|
@property
|
|
def has_credentials(self) -> bool:
|
|
"""Returns true if both username and password components are present."""
|
|
return self.username is not None and self.password is not None
|
|
|
|
def __str__(self) -> str:
|
|
url = ''
|
|
if self.scheme:
|
|
url += '{0}://'.format(text_(self.scheme))
|
|
if self.hostname:
|
|
url += text_(self.hostname)
|
|
if self.port:
|
|
url += ':{0}'.format(self.port)
|
|
if self.remainder:
|
|
url += text_(self.remainder)
|
|
return url
|
|
|
|
@classmethod
|
|
def from_bytes(cls, raw: bytes, allowed_url_schemes: Optional[List[bytes]] = None) -> 'Url':
|
|
"""A URL within proxy.py core can have several styles,
|
|
because proxy.py supports both proxy and web server use cases.
|
|
|
|
Example:
|
|
For a Web server, url is like ``/`` or ``/get`` or ``/get?key=value``
|
|
For a HTTPS connect tunnel, url is like ``httpbin.org:443``
|
|
For a HTTP proxy request, url is like ``http://httpbin.org/get``
|
|
|
|
proxy.py internally never expects a https scheme in the request line.
|
|
But `Url` class provides support for parsing any scheme present in the URLs.
|
|
e.g. ftp, icap etc.
|
|
|
|
If a url with no scheme is parsed, e.g. ``//host/abc.js``, then scheme
|
|
defaults to `http`.
|
|
|
|
Further:
|
|
1) URL may contain unicode characters
|
|
2) URL may contain IPv4 and IPv6 format addresses instead of domain names
|
|
"""
|
|
# SLASH == 47, check if URL starts with single slash but not double slash
|
|
starts_with_single_slash = raw[0] == 47
|
|
starts_with_double_slash = starts_with_single_slash and \
|
|
len(raw) >= 2 and \
|
|
raw[1] == 47
|
|
if starts_with_single_slash and \
|
|
not starts_with_double_slash:
|
|
return cls(remainder=raw)
|
|
scheme = None
|
|
rest = None
|
|
if not starts_with_double_slash:
|
|
# Find scheme
|
|
parts = raw.split(b'://', 1)
|
|
if len(parts) == 2:
|
|
scheme = parts[0]
|
|
rest = parts[1]
|
|
if scheme not in (allowed_url_schemes or DEFAULT_ALLOWED_URL_SCHEMES):
|
|
raise HttpProtocolException(
|
|
'Invalid scheme received in the request line %r' % raw,
|
|
)
|
|
else:
|
|
rest = raw[len(SLASH + SLASH):]
|
|
if scheme is not None or starts_with_double_slash:
|
|
assert rest is not None
|
|
parts = rest.split(SLASH, 1)
|
|
username, password, host, port = Url._parse(parts[0])
|
|
return cls(
|
|
scheme=scheme if not starts_with_double_slash else b'http',
|
|
username=username,
|
|
password=password,
|
|
hostname=host,
|
|
port=port,
|
|
remainder=None if len(parts) == 1 else (
|
|
SLASH + parts[1]
|
|
),
|
|
)
|
|
username, password, host, port = Url._parse(raw)
|
|
return cls(username=username, password=password, hostname=host, port=port)
|
|
|
|
@staticmethod
|
|
def _parse(raw: bytes) -> Tuple[
|
|
Optional[bytes],
|
|
Optional[bytes],
|
|
bytes,
|
|
Optional[int],
|
|
]:
|
|
split_at = raw.split(AT, 1)
|
|
username, password = None, None
|
|
if len(split_at) == 2:
|
|
username, password = split_at[0].split(COLON)
|
|
parts = split_at[-1].split(COLON, 2)
|
|
num_parts = len(parts)
|
|
port: Optional[int] = None
|
|
# No port found
|
|
if num_parts == 1:
|
|
return username, password, parts[0], None
|
|
# Host and port found
|
|
if num_parts == 2:
|
|
return username, password, COLON.join(parts[:-1]), int(parts[-1])
|
|
# More than a single COLON i.e. IPv6 scenario
|
|
try:
|
|
# Try to resolve last part as an int port
|
|
last_token = parts[-1].split(COLON)
|
|
port = int(last_token[-1])
|
|
host = COLON.join(parts[:-1]) + COLON + \
|
|
COLON.join(last_token[:-1])
|
|
except ValueError:
|
|
# If unable to convert last part into port,
|
|
# treat entire data as host
|
|
host, port = raw, None
|
|
# patch up invalid ipv6 scenario
|
|
rhost = host.decode('utf-8')
|
|
if COLON.decode('utf-8') in rhost and \
|
|
rhost[0] != '[' and \
|
|
rhost[-1] != ']':
|
|
host = b'[' + host + b']'
|
|
return username, password, host, port
|