From 8972250167cfd55dcfcb93b2d3d7b33e0546629d Mon Sep 17 00:00:00 2001 From: Henrique Date: Fri, 15 Nov 2019 13:07:12 -0500 Subject: [PATCH] Removed the custom lexer in favor of using pyparsing. --- mitmproxy/command.py | 75 ++++++--- mitmproxy/lexer.py | 99 ------------ .../tools/console/commander/commander.py | 16 +- test/mitmproxy/test_command.py | 153 ++++++++++++++---- test/mitmproxy/test_lexer.py | 75 --------- .../mitmproxy/tools/console/test_commander.py | 5 - .../tools/console/test_defaultkeys.py | 12 +- 7 files changed, 193 insertions(+), 242 deletions(-) delete mode 100644 mitmproxy/lexer.py delete mode 100644 test/mitmproxy/test_lexer.py diff --git a/mitmproxy/command.py b/mitmproxy/command.py index d9ba40550..4aa6fdb2f 100644 --- a/mitmproxy/command.py +++ b/mitmproxy/command.py @@ -3,14 +3,13 @@ """ import inspect import types -import io import typing import textwrap import functools import sys +import pyparsing from mitmproxy import exceptions -from mitmproxy import lexer import mitmproxy.types @@ -22,10 +21,6 @@ def verify_arg_signature(f: typing.Callable, args: list, kwargs: dict) -> None: raise exceptions.CommandError("command argument mismatch: %s" % v.args[0]) -def get_lexer(s): - return lexer.Lexer(s) - - def typename(t: type) -> str: """ Translates a type to an explanatory string. @@ -79,6 +74,20 @@ class Command: return "%s %s%s" % (self.path, params, ret) def prepare_args(self, args: typing.Sequence[str]) -> typing.List[typing.Any]: + + # Arguments that are just blank spaces aren't really arguments + # We need to get rid of those. If the user intended to pass a sequence + # of spaces, it would come between quotes + clean_args = [] + for a in args: + if isinstance(a, str): + if a.strip() != '': + clean_args.append(a) + else: + clean_args.append(a) + + args = clean_args + verify_arg_signature(self.func, list(args), {}) remainder: typing.Sequence[str] = [] @@ -152,24 +161,36 @@ class CommandManager(mitmproxy.types._CommandBase): """ Parse a possibly partial command. Return a sequence of ParseResults and a sequence of remainder type help items. """ - buf = io.StringIO(cmdstr) parts: typing.List[str] = [] - lex = get_lexer(buf) - while 1: - remainder = cmdstr[buf.tell():] - try: - t = lex.get_token() - except ValueError: - parts.append(remainder) - break - if not t: - break - parts.append(t) + + rex = pyparsing.QuotedString("\"", escChar='\\', unquoteResults=False) |\ + pyparsing.QuotedString("'", escChar='\\', unquoteResults=False) |\ + pyparsing.Combine(pyparsing.Literal('"') + pyparsing.Word(pyparsing.printables + " ") + pyparsing.StringEnd()) |\ + pyparsing.Word(pyparsing.printables) |\ + pyparsing.Word(' ') + + rex = rex.copy().leaveWhitespace() + + remainder = cmdstr + + for t, start, end in rex.scanString(cmdstr): + + remainder = cmdstr[end:] + parts.append(t[0]) + + if remainder != '': + parts.append(remainder) if not parts: parts = [] - elif cmdstr.endswith(" "): - parts.append("") + + # First item in parts has always to be the command + # so we remove any blank tokens from the start of it + while True: + if parts and parts[0].strip() == '': + del parts[0] + else: + break parse: typing.List[ParseResult] = [] params: typing.List[type] = [] @@ -180,10 +201,15 @@ class CommandManager(mitmproxy.types._CommandBase): if parts[i] in self.commands: params.extend(self.commands[parts[i]].paramtypes) elif params: - typ = params.pop(0) - if typ == mitmproxy.types.Cmd and params and params[0] == mitmproxy.types.Arg: - if parts[i] in self.commands: - params[:] = self.commands[parts[i]].paramtypes + if parts[i].strip() != '': + typ = params.pop(0) + if typ == mitmproxy.types.Cmd and params and params[0] == mitmproxy.types.Arg: + if parts[i] in self.commands: + params[:] = self.commands[parts[i]].paramtypes + else: + # If the token is just a bunch of spaces, then we don't + # want to count it against the arguments of the command + typ = mitmproxy.types.Unknown else: typ = mitmproxy.types.Unknown @@ -228,6 +254,7 @@ class CommandManager(mitmproxy.types._CommandBase): """ if path not in self.commands: raise exceptions.CommandError("Unknown command: %s" % path) + return self.commands[path].call(args) def execute(self, cmdstr: str): diff --git a/mitmproxy/lexer.py b/mitmproxy/lexer.py deleted file mode 100644 index f123a8384..000000000 --- a/mitmproxy/lexer.py +++ /dev/null @@ -1,99 +0,0 @@ -from enum import Enum -import io -from typing import Union, List - - -class State(Enum): - QUOTE = 1 - ESCAPE = 2 - TEXT = 3 - - -class Lexer: - - def __init__(self, text: Union[str, io.StringIO]) -> None: - self._count = 0 - self._parsed = False - - self._state = State.TEXT - self._states: List[State] = [] - self._text_pos = 0 - self._quote_start_pos = 0 - - if isinstance(text, str): - self.text = io.StringIO(text) - else: - self.text = text - - def __iter__(self): - return self - - def __next__(self): - t = self.get_token() - - if t == '': - raise StopIteration - - return t - - def get_token(self): - try: - return self.parse() - except ValueError: - raise - - def parse(self): - acc = '' - quote = '' - self._state = State.TEXT - - whitespace = "\r\n\t " - - self.text.seek(self._text_pos) - - while True: - ch = self.text.read(1) - self._text_pos += 1 - - # If this is the last char of the string, let's save the token - if ch == '' or ch is None: - break - - if self._state == State.QUOTE: - if ch == '\\': - self._states.append(self._state) - self._state = State.ESCAPE - acc += ch - elif ch == quote: - self._state = self._states.pop() - acc += ch - else: - acc += ch - - elif self._state == State.ESCAPE: - acc += ch - self._state = self._states.pop() - - elif self._state == State.TEXT: - if ch in whitespace: - if acc != '': - break - elif ch == '"' or ch == "'": - quote = ch - self._quote_start_pos = self._text_pos - self._states.append(self._state) - self._state = State.QUOTE - acc += ch - elif ch == '\\': - self._states.append(self._state) - self._state = State.ESCAPE - acc += ch - else: - acc += ch - - self._token = acc - - if self._state == State.QUOTE: - raise ValueError("No closing quotation for quote in position %d" % self._quote_start_pos) - - return self._token diff --git a/mitmproxy/tools/console/commander/commander.py b/mitmproxy/tools/console/commander/commander.py index ee31e1e9e..fa67407ec 100644 --- a/mitmproxy/tools/console/commander/commander.py +++ b/mitmproxy/tools/console/commander/commander.py @@ -52,7 +52,7 @@ CompletionState = typing.NamedTuple( class CommandBuffer: def __init__(self, master: mitmproxy.master.Master, start: str = "") -> None: self.master = master - self.text = self.flatten(start) + self.text = start # Cursor is always within the range [0:len(buffer)]. self._cursor = len(self.text) self.completion: CompletionState = None @@ -105,7 +105,7 @@ class CommandBuffer: ret.append(("commander_invalid", p.value)) else: ret.append(("text", "")) - ret.append(("text", " ")) + if remhelp: ret.append(("text", " ")) for v in remhelp: @@ -113,11 +113,6 @@ class CommandBuffer: return ret - def flatten(self, txt): - parts, _ = self.parse_quoted(txt) - ret = [x.value for x in parts] - return " ".join(ret) - def left(self) -> None: self.cursor = self.cursor - 1 @@ -141,7 +136,7 @@ class CommandBuffer: nxt = self.completion.completer.cycle() buf = " ".join([i.value for i in self.completion.parse[:-1]]) + " " + nxt buf = buf.strip() - self.text = self.flatten(buf) + self.text = buf self.cursor = len(self.text) def backspace(self) -> None: @@ -155,6 +150,11 @@ class CommandBuffer: """ Inserts text at the cursor. """ + + # We don't want to insert a space before the command + if k == ' ' and self.text[0:self.cursor].strip() == '': + return + self.text = self.text[:self.cursor] + k + self.text[self.cursor:] self.cursor += len(k) self.completion = None diff --git a/test/mitmproxy/test_command.py b/test/mitmproxy/test_command.py index ad475fba9..ae4c400c8 100644 --- a/test/mitmproxy/test_command.py +++ b/test/mitmproxy/test_command.py @@ -115,12 +115,9 @@ class TestCommand: [ "foo bar", [ - command.ParseResult( - value = "foo", type = mitmproxy.types.Cmd, valid = False - ), - command.ParseResult( - value = "bar", type = mitmproxy.types.Unknown, valid = False - ) + command.ParseResult(value = "foo", type = mitmproxy.types.Cmd, valid = False), + command.ParseResult(value = " ", type = mitmproxy.types.Unknown, valid = False), + command.ParseResult(value = "bar", type = mitmproxy.types.Unknown, valid = False) ], [], ], @@ -128,6 +125,7 @@ class TestCommand: "cmd1 'bar", [ command.ParseResult(value = "cmd1", type = mitmproxy.types.Cmd, valid = True), + command.ParseResult(value = " ", type = mitmproxy.types.Unknown, valid = False), command.ParseResult(value = "'bar", type = str, valid = True) ], [], @@ -146,6 +144,7 @@ class TestCommand: "cmd3 1", [ command.ParseResult(value = "cmd3", type = mitmproxy.types.Cmd, valid = True), + command.ParseResult(value = " ", type = mitmproxy.types.Unknown, valid = False), command.ParseResult(value = "1", type = int, valid = True), ], [] @@ -154,28 +153,27 @@ class TestCommand: "cmd3 ", [ command.ParseResult(value = "cmd3", type = mitmproxy.types.Cmd, valid = True), - command.ParseResult(value = "", type = int, valid = False), + command.ParseResult(value = " ", type = mitmproxy.types.Unknown, valid = False), ], - [] + ['int'] ], [ "subcommand ", [ - command.ParseResult( - value = "subcommand", type = mitmproxy.types.Cmd, valid = True, - ), - command.ParseResult(value = "", type = mitmproxy.types.Cmd, valid = False), + command.ParseResult(value = "subcommand", type = mitmproxy.types.Cmd, valid = True,), + command.ParseResult(value = " ", type = mitmproxy.types.Unknown, valid = False), ], - ["arg"], + ["cmd", "arg"], ], [ "subcommand cmd3 ", [ command.ParseResult(value = "subcommand", type = mitmproxy.types.Cmd, valid = True), + command.ParseResult(value = " ", type = mitmproxy.types.Unknown, valid = False), command.ParseResult(value = "cmd3", type = mitmproxy.types.Cmd, valid = True), - command.ParseResult(value = "", type = int, valid = False), + command.ParseResult(value = " ", type = mitmproxy.types.Unknown, valid = False), ], - [] + ["int"] ], [ "cmd4", @@ -188,22 +186,15 @@ class TestCommand: "cmd4 ", [ command.ParseResult(value = "cmd4", type = mitmproxy.types.Cmd, valid = True), - command.ParseResult(value = "", type = int, valid = False), + command.ParseResult(value = " ", type = mitmproxy.types.Unknown, valid = False), ], - ["str", "path"] - ], - [ - "cmd4 1", - [ - command.ParseResult(value = "cmd4", type = mitmproxy.types.Cmd, valid = True), - command.ParseResult(value = "1", type = int, valid = True), - ], - ["str", "path"] + ["int", "str", "path"] ], [ "cmd4 1", [ command.ParseResult(value = "cmd4", type = mitmproxy.types.Cmd, valid = True), + command.ParseResult(value = " ", type = mitmproxy.types.Unknown, valid = False), command.ParseResult(value = "1", type = int, valid = True), ], ["str", "path"] @@ -219,14 +210,15 @@ class TestCommand: "flow ", [ command.ParseResult(value = "flow", type = mitmproxy.types.Cmd, valid = True), - command.ParseResult(value = "", type = flow.Flow, valid = False), + command.ParseResult(value = " ", type = mitmproxy.types.Unknown, valid = False), ], - ["str"] + ["flow", "str"] ], [ "flow x", [ command.ParseResult(value = "flow", type = mitmproxy.types.Cmd, valid = True), + command.ParseResult(value = " ", type = mitmproxy.types.Unknown, valid = False), command.ParseResult(value = "x", type = flow.Flow, valid = False), ], ["str"] @@ -235,15 +227,17 @@ class TestCommand: "flow x ", [ command.ParseResult(value = "flow", type = mitmproxy.types.Cmd, valid = True), + command.ParseResult(value = " ", type = mitmproxy.types.Unknown, valid = False), command.ParseResult(value = "x", type = flow.Flow, valid = False), - command.ParseResult(value = "", type = str, valid = True), + command.ParseResult(value = " ", type = mitmproxy.types.Unknown, valid = False), ], - [] + ["str"] ], [ "flow \"one two", [ command.ParseResult(value = "flow", type = mitmproxy.types.Cmd, valid = True), + command.ParseResult(value = " ", type = mitmproxy.types.Unknown, valid = False), command.ParseResult(value = "\"one two", type = flow.Flow, valid = False), ], ["str"] @@ -252,11 +246,112 @@ class TestCommand: "flow \"three four\"", [ command.ParseResult(value = "flow", type = mitmproxy.types.Cmd, valid = True), + command.ParseResult(value = " ", type = mitmproxy.types.Unknown, valid = False), command.ParseResult(value = '"three four"', type = flow.Flow, valid = False), ], ["str"] ], + [ + "spaces ' '", + [ + command.ParseResult(value = "spaces", type = mitmproxy.types.Cmd, valid = False), + command.ParseResult(value = " ", type = mitmproxy.types.Unknown, valid = False), + command.ParseResult(value = "' '", type = mitmproxy.types.Unknown, valid = False) + ], + [], + ], + [ + 'spaces2 " "', + [ + command.ParseResult(value = "spaces2", type = mitmproxy.types.Cmd, valid = False), + command.ParseResult(value = " ", type = mitmproxy.types.Unknown, valid = False), + command.ParseResult(value = '" "', type = mitmproxy.types.Unknown, valid = False) + ], + [], + ], + [ + '"abc"', + [ + command.ParseResult(value = '"abc"', type = mitmproxy.types.Cmd, valid = False), + ], + [], + ], + [ + "'def'", + [ + command.ParseResult(value = "'def'", type = mitmproxy.types.Cmd, valid = False), + ], + [], + ], + [ + "cmd10 'a' \"b\" c", + [ + command.ParseResult(value = "cmd10", type = mitmproxy.types.Cmd, valid = False), + command.ParseResult(value = " ", type = mitmproxy.types.Unknown, valid = False), + command.ParseResult(value = "'a'", type = mitmproxy.types.Unknown, valid = False), + command.ParseResult(value = " ", type = mitmproxy.types.Unknown, valid = False), + command.ParseResult(value = '"b"', type = mitmproxy.types.Unknown, valid = False), + command.ParseResult(value = " ", type = mitmproxy.types.Unknown, valid = False), + command.ParseResult(value = "c", type = mitmproxy.types.Unknown, valid = False), + ], + [], + ], + [ + "cmd11 'a \"b\" c'", + [ + command.ParseResult(value = "cmd11", type = mitmproxy.types.Cmd, valid = False), + command.ParseResult(value = " ", type = mitmproxy.types.Unknown, valid = False), + command.ParseResult(value = "'a \"b\" c'", type = mitmproxy.types.Unknown, valid = False), + ], + [], + ], + [ + 'cmd12 "a \'b\' c"', + [ + command.ParseResult(value = "cmd12", type = mitmproxy.types.Cmd, valid = False), + command.ParseResult(value = " ", type = mitmproxy.types.Unknown, valid = False), + command.ParseResult(value = '"a \'b\' c"', type = mitmproxy.types.Unknown, valid = False), + ], + [], + ], + [ + r'cmd13 "a \"b\" c"', + [ + command.ParseResult(value = "cmd13", type = mitmproxy.types.Cmd, valid = False), + command.ParseResult(value = " ", type = mitmproxy.types.Unknown, valid = False), + command.ParseResult(value = r'"a \"b\" c"', type = mitmproxy.types.Unknown, valid = False), + ], + [], + ], + [ + r"cmd14 'a \'b\' c'", + [ + command.ParseResult(value = "cmd14", type = mitmproxy.types.Cmd, valid = False), + command.ParseResult(value = " ", type = mitmproxy.types.Unknown, valid = False), + command.ParseResult(value = r"'a \'b\' c'", type = mitmproxy.types.Unknown, valid = False), + ], + [], + ], + [ + " spaces_at_the_begining_are_stripped", + [ + command.ParseResult(value = "spaces_at_the_begining_are_stripped", type = mitmproxy.types.Cmd, valid = False), + ], + [], + ], + [ + " spaces_at_the_begining_are_stripped but_not_at_the_end ", + [ + command.ParseResult(value = "spaces_at_the_begining_are_stripped", type = mitmproxy.types.Cmd, valid = False), + command.ParseResult(value = " ", type = mitmproxy.types.Unknown, valid = False), + command.ParseResult(value = "but_not_at_the_end", type = mitmproxy.types.Unknown, valid = False), + command.ParseResult(value = " ", type = mitmproxy.types.Unknown, valid = False), + ], + [], + ], + ] + with taddons.context() as tctx: tctx.master.addons.add(TAddon()) for s, expected, expectedremain in tests: diff --git a/test/mitmproxy/test_lexer.py b/test/mitmproxy/test_lexer.py deleted file mode 100644 index 19ef155b4..000000000 --- a/test/mitmproxy/test_lexer.py +++ /dev/null @@ -1,75 +0,0 @@ -from mitmproxy import lexer -import pytest -import io - - -class TestScripts: - - def test_simple(self): - - cases = [ - { - "text": r'abc', - "result": ['abc'] - }, - { - "text": r'"Hello \" Double Quotes"', - "result": ['"Hello \\" Double Quotes"'] - }, - { - "text": r"'Hello \' Single Quotes'", - "result": ["'Hello \\' Single Quotes'"] - }, - { - "text": r'"\""', - "result": ['"\\""'] - }, - { - "text": r'abc "def\" \x bla \z \\ \e \ " xpto', - "result": ['abc', '"def\\" \\x bla \\z \\\\ \\e \\ "', 'xpto'] - }, - { - "text": r'', - "result": [] - }, - { - "text": r' ', - "result": [] - }, - { - "text": r' ', - "result": [] - }, - { - "text": r'Space in the end ', - "result": ['Space', 'in', 'the', 'end'] - }, - { - "text": '\n\n\rHello\n World With Spaces\n\n', - "result": ['Hello', 'World', 'With', 'Spaces'] - }, - { - "text": r'\" Escaping characters without reason', - "result": ['\\"', 'Escaping', 'characters', 'without', 'reason'] - }, - ] - - for t in cases: - - lex = lexer.Lexer(t['text']) - tokens = list(lex) - result = t['result'] - assert(tokens == result) - - def test_fail(self): - text = r'"should fail with missing closing quote' - lex = lexer.Lexer(text) - with pytest.raises(ValueError, match="No closing quotation"): - assert list(lex) - - def test_stringio_text(self): - text = io.StringIO(r'Increase test coverage') - lex = lexer.Lexer(text) - tokens = list(lex) - result = ['Increase', 'test', 'coverage'] - assert(tokens == result) diff --git a/test/mitmproxy/tools/console/test_commander.py b/test/mitmproxy/tools/console/test_commander.py index 81e007f09..798ca5fe5 100644 --- a/test/mitmproxy/tools/console/test_commander.py +++ b/test/mitmproxy/tools/console/test_commander.py @@ -165,8 +165,3 @@ class TestCommandBuffer: cb = commander.CommandBuffer(tctx.master) cb.text = "foo" assert cb.render() - - def test_flatten(self): - with taddons.context() as tctx: - cb = commander.CommandBuffer(tctx.master) - assert cb.flatten("foo bar") == "foo bar" diff --git a/test/mitmproxy/tools/console/test_defaultkeys.py b/test/mitmproxy/tools/console/test_defaultkeys.py index 035f71f7f..7e8df6b60 100644 --- a/test/mitmproxy/tools/console/test_defaultkeys.py +++ b/test/mitmproxy/tools/console/test_defaultkeys.py @@ -3,12 +3,14 @@ from mitmproxy.tools.console import defaultkeys from mitmproxy.tools.console import keymap from mitmproxy.tools.console import master from mitmproxy import command - +from mitmproxy import ctx import pytest @pytest.mark.asyncio async def test_commands_exist(): + command_manager = command.CommandManager(ctx) + km = keymap.Keymap(None) defaultkeys.map(km) assert km.bindings @@ -16,7 +18,10 @@ async def test_commands_exist(): await m.load_flow(tflow()) for binding in km.bindings: - cmd, *args = command.get_lexer(binding.command) + results = command_manager.parse_partial(binding.command) + + cmd = results[0][0].value + args = [a.value for a in results[0][1:]] assert cmd in m.commands.commands @@ -24,4 +29,7 @@ async def test_commands_exist(): try: cmd_obj.prepare_args(args) except Exception as e: + + import pdb + pdb.set_trace() raise ValueError("Invalid command: {}".format(binding.command)) from e