Memory optimizations (#189)

* Avoid persisting raw content in memory within parser, simply parse and throw-away. Addresses #187

* Clarity in test comments
This commit is contained in:
Abhinav Singh 2019-11-24 11:59:41 -08:00 committed by GitHub
parent 4fa4b99476
commit 9767010425
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 17 additions and 20 deletions

View File

@ -45,8 +45,7 @@ class HttpParser:
self.type: int = parser_type
self.state: int = httpParserStates.INITIALIZED
# Raw bytes as passed to parse(raw) method and its total size
self.bytes: bytes = b''
# Total size of raw bytes passed for parsing
self.total_size: int = 0
# Buffer to hold unprocessed bytes
@ -118,7 +117,7 @@ class HttpParser:
self.host, self.port = self.url.hostname, self.url.port \
if self.url.port else 80
else:
raise KeyError(b'Invalid request\n%b' % self.bytes)
raise KeyError('Invalid request. Method: %r, Url: %r' % (self.method, self.url))
self.path = self.build_url()
def is_chunked_encoded(self) -> bool:
@ -129,13 +128,11 @@ class HttpParser:
"""Parses Http request out of raw bytes.
Check HttpParser state after parse has successfully returned."""
self.bytes += raw
self.total_size += len(raw)
# Prepend past buffer
raw = self.buffer + raw
self.buffer = b''
# TODO(abhinavsingh): Someday clean this up.
more = True if len(raw) > 0 else False
while more and self.state != httpParserStates.COMPLETE:
if self.state in (
@ -182,21 +179,25 @@ class HttpParser:
else:
self.process_header(line)
# When connect request is received without a following host header
# See
# `TestHttpParser.test_connect_request_without_host_header_request_parse`
# for details
# TODO(abhinavsingh): Can these be generalized instead of per-case handling?
# When server sends a response line without any header or body e.g.
# HTTP/1.1 200 Connection established\r\n\r\n
if self.state == httpParserStates.LINE_RCVD and \
self.type == httpParserTypes.RESPONSE_PARSER and \
raw == CRLF:
self.state = httpParserStates.COMPLETE
# When connect request is received without a following host header
# See
# `TestHttpParser.test_connect_request_without_host_header_request_parse`
# for details
#
# When raw request has ended with \r\n\r\n and no more http headers are expected
# See `TestHttpParser.test_request_parse_without_content_length` and
# `TestHttpParser.test_response_parse_without_content_length` for details
elif self.state == httpParserStates.HEADERS_COMPLETE and \
self.type == httpParserTypes.REQUEST_PARSER and \
self.method != httpMethods.POST and \
self.bytes.endswith(CRLF * 2):
raw == b'':
self.state = httpParserStates.COMPLETE
elif self.state == httpParserStates.HEADERS_COMPLETE and \
self.type == httpParserTypes.REQUEST_PARSER and \
@ -205,7 +206,7 @@ class HttpParser:
(b'content-length' not in self.headers or
(b'content-length' in self.headers and
int(self.headers[b'content-length'][1]) == 0)) and \
self.bytes.endswith(CRLF * 2):
raw == b'':
self.state = httpParserStates.COMPLETE
return len(raw) > 0, raw

View File

@ -322,20 +322,16 @@ class TestHttpProtocolHandler(unittest.TestCase):
self._conn.send.call_args[0][0],
HttpProxyPlugin.PROXY_TUNNEL_ESTABLISHED_RESPONSE_PKT)
self._conn.recv.return_value = CRLF.join([
b'GET / HTTP/1.1',
b'Host: localhost:%d' % self.http_server_port,
b'User-Agent: proxy.py/%s' % bytes_(__version__),
CRLF
])
self.protocol_handler.run_once()
pkt = CRLF.join([
b'GET / HTTP/1.1',
b'Host: localhost:%d' % self.http_server_port,
b'User-Agent: proxy.py/%s' % bytes_(__version__),
CRLF
])
self._conn.recv.return_value = pkt
self.protocol_handler.run_once()
server.queue.assert_called_once_with(pkt)
server.buffer_size.return_value = len(pkt)
server.flush.assert_not_called()