From 09da1febbd9beac5ef5650274899439f5ce10e98 Mon Sep 17 00:00:00 2001 From: Aldo Cortesi Date: Thu, 2 Jun 2016 13:03:37 +1200 Subject: [PATCH] Shift a bunch more string-related functions to strutils --- examples/custom_contentviews.py | 5 ++-- mitmproxy/contentviews.py | 9 +++--- mitmproxy/utils.py | 38 ------------------------ netlib/strutils.py | 51 +++++++++++++++++++++++++++++++++ netlib/utils.py | 15 ---------- pathod/log.py | 5 +--- test/mitmproxy/test_utils.py | 19 ------------ test/netlib/test_strutils.py | 23 +++++++++++++++ test/netlib/test_utils.py | 4 --- 9 files changed, 82 insertions(+), 87 deletions(-) diff --git a/examples/custom_contentviews.py b/examples/custom_contentviews.py index 6cc9314c5..034f356c0 100644 --- a/examples/custom_contentviews.py +++ b/examples/custom_contentviews.py @@ -1,7 +1,8 @@ import string import lxml.html import lxml.etree -from mitmproxy import utils, contentviews +from mitmproxy import contentviews +from netlib import strutils class ViewPigLatin(contentviews.View): @@ -10,7 +11,7 @@ class ViewPigLatin(contentviews.View): content_types = ["text/html"] def __call__(self, data, **metadata): - if utils.isXML(data): + if strutils.isXML(data): parser = lxml.etree.HTMLParser( strip_cdata=True, remove_blank_text=True diff --git a/mitmproxy/contentviews.py b/mitmproxy/contentviews.py index 0ddf7c647..42061a8c4 100644 --- a/mitmproxy/contentviews.py +++ b/mitmproxy/contentviews.py @@ -37,7 +37,6 @@ from netlib import http from netlib import odict from netlib.http import url from netlib import strutils -import netlib.utils try: import pyamf @@ -130,11 +129,11 @@ class ViewAuto(View): ct = "%s/%s" % (ct[0], ct[1]) if ct in content_types_map: return content_types_map[ct][0](data, **metadata) - elif mitmproxy.utils.isXML(data): + elif strutils.isXML(data): return get("XML")(data, **metadata) if metadata.get("query"): return get("Query")(data, **metadata) - if data and mitmproxy.utils.isMostlyBin(data): + if data and strutils.isMostlyBin(data): return get("Hex")(data) if not data: return "No content", [] @@ -157,7 +156,7 @@ class ViewHex(View): @staticmethod def _format(data): - for offset, hexa, s in netlib.utils.hexdump(data): + for offset, hexa, s in strutils.hexdump(data): yield [ ("offset", offset + " "), ("text", hexa + " "), @@ -227,7 +226,7 @@ class ViewHTML(View): content_types = ["text/html"] def __call__(self, data, **metadata): - if mitmproxy.utils.isXML(data): + if strutils.isXML(data): parser = lxml.etree.HTMLParser( strip_cdata=True, remove_blank_text=True diff --git a/mitmproxy/utils.py b/mitmproxy/utils.py index 672805d01..680bc495b 100644 --- a/mitmproxy/utils.py +++ b/mitmproxy/utils.py @@ -25,32 +25,6 @@ def format_timestamp_with_milli(s): return d.strftime("%Y-%m-%d %H:%M:%S.%f")[:-3] -def isBin(s): - """ - Does this string have any non-ASCII characters? - """ - for i in s: - i = ord(i) - if i < 9 or 13 < i < 32 or 126 < i: - return True - return False - - -def isMostlyBin(s): - s = s[:100] - return sum(isBin(ch) for ch in s) / len(s) > 0.3 - - -def isXML(s): - for i in s: - if i in "\n \t": - continue - elif i == "<": - return True - else: - return False - - def pretty_json(s): try: p = json.loads(s) @@ -92,15 +66,3 @@ class LRUCache: d = self.cacheList.pop() self.cache.pop(d) return ret - - -def clean_hanging_newline(t): - """ - Many editors will silently add a newline to the final line of a - document (I'm looking at you, Vim). This function fixes this common - problem at the risk of removing a hanging newline in the rare cases - where the user actually intends it. - """ - if t and t[-1] == "\n": - return t[:-1] - return t diff --git a/netlib/strutils.py b/netlib/strutils.py index 7a62185bb..03b371f56 100644 --- a/netlib/strutils.py +++ b/netlib/strutils.py @@ -101,3 +101,54 @@ def escaped_str_to_bytes(data): # This one is difficult - we use an undocumented Python API here # as per http://stackoverflow.com/a/23151714/934719 return codecs.escape_decode(data)[0] + + +def isBin(s): + """ + Does this string have any non-ASCII characters? + """ + for i in s: + i = ord(i) + if i < 9 or 13 < i < 32 or 126 < i: + return True + return False + + +def isMostlyBin(s): + s = s[:100] + return sum(isBin(ch) for ch in s) / len(s) > 0.3 + + +def isXML(s): + for i in s: + if i in "\n \t": + continue + elif i == "<": + return True + else: + return False + + +def clean_hanging_newline(t): + """ + Many editors will silently add a newline to the final line of a + document (I'm looking at you, Vim). This function fixes this common + problem at the risk of removing a hanging newline in the rare cases + where the user actually intends it. + """ + if t and t[-1] == "\n": + return t[:-1] + return t + + +def hexdump(s): + """ + Returns: + A generator of (offset, hex, str) tuples + """ + for i in range(0, len(s), 16): + offset = "{:0=10x}".format(i).encode() + part = s[i:i + 16] + x = b" ".join("{:0=2x}".format(i).encode() for i in six.iterbytes(part)) + x = x.ljust(47) # 16*2 + 15 + yield (offset, x, clean_bin(part, False)) diff --git a/netlib/utils.py b/netlib/utils.py index 00e7e5d9b..b4b99679f 100644 --- a/netlib/utils.py +++ b/netlib/utils.py @@ -6,21 +6,6 @@ import inspect import six -from netlib import strutils - - -def hexdump(s): - """ - Returns: - A generator of (offset, hex, str) tuples - """ - for i in range(0, len(s), 16): - offset = "{:0=10x}".format(i).encode() - part = s[i:i + 16] - x = b" ".join("{:0=2x}".format(i).encode() for i in six.iterbytes(part)) - x = x.ljust(47) # 16*2 + 15 - yield (offset, x, strutils.clean_bin(part, False)) - def setbit(byte, offset, value): """ diff --git a/pathod/log.py b/pathod/log.py index 85006ba87..5bf55de49 100644 --- a/pathod/log.py +++ b/pathod/log.py @@ -2,9 +2,6 @@ import datetime import six -import netlib.utils -import netlib.tcp -import netlib.http from netlib import strutils TIMEFMT = '%d-%m-%y %H:%M:%S' @@ -63,7 +60,7 @@ class LogCtx(object): def dump(self, data, hexdump): if hexdump: - for line in netlib.utils.hexdump(data): + for line in strutils.hexdump(data): self("\t%s %s %s" % line) else: for i in strutils.clean_bin(data).split("\n"): diff --git a/test/mitmproxy/test_utils.py b/test/mitmproxy/test_utils.py index 2af7a3325..c01b5f2af 100644 --- a/test/mitmproxy/test_utils.py +++ b/test/mitmproxy/test_utils.py @@ -13,25 +13,6 @@ def test_format_timestamp_with_milli(): assert utils.format_timestamp_with_milli(utils.timestamp()) -def test_isBin(): - assert not utils.isBin("testing\n\r") - assert utils.isBin("testing\x01") - assert utils.isBin("testing\x0e") - assert utils.isBin("testing\x7f") - - -def test_isXml(): - assert not utils.isXML("foo") - assert utils.isXML("