Removed the custom lexer in favor of using pyparsing.

This commit is contained in:
Henrique 2019-11-15 13:07:12 -05:00
parent f2b118817e
commit 8972250167
7 changed files with 193 additions and 242 deletions

View File

@ -3,14 +3,13 @@
"""
import inspect
import types
import io
import typing
import textwrap
import functools
import sys
import pyparsing
from mitmproxy import exceptions
from mitmproxy import lexer
import mitmproxy.types
@ -22,10 +21,6 @@ def verify_arg_signature(f: typing.Callable, args: list, kwargs: dict) -> None:
raise exceptions.CommandError("command argument mismatch: %s" % v.args[0])
def get_lexer(s):
return lexer.Lexer(s)
def typename(t: type) -> str:
"""
Translates a type to an explanatory string.
@ -79,6 +74,20 @@ class Command:
return "%s %s%s" % (self.path, params, ret)
def prepare_args(self, args: typing.Sequence[str]) -> typing.List[typing.Any]:
# Arguments that are just blank spaces aren't really arguments
# We need to get rid of those. If the user intended to pass a sequence
# of spaces, it would come between quotes
clean_args = []
for a in args:
if isinstance(a, str):
if a.strip() != '':
clean_args.append(a)
else:
clean_args.append(a)
args = clean_args
verify_arg_signature(self.func, list(args), {})
remainder: typing.Sequence[str] = []
@ -152,24 +161,36 @@ class CommandManager(mitmproxy.types._CommandBase):
"""
Parse a possibly partial command. Return a sequence of ParseResults and a sequence of remainder type help items.
"""
buf = io.StringIO(cmdstr)
parts: typing.List[str] = []
lex = get_lexer(buf)
while 1:
remainder = cmdstr[buf.tell():]
try:
t = lex.get_token()
except ValueError:
parts.append(remainder)
break
if not t:
break
parts.append(t)
rex = pyparsing.QuotedString("\"", escChar='\\', unquoteResults=False) |\
pyparsing.QuotedString("'", escChar='\\', unquoteResults=False) |\
pyparsing.Combine(pyparsing.Literal('"') + pyparsing.Word(pyparsing.printables + " ") + pyparsing.StringEnd()) |\
pyparsing.Word(pyparsing.printables) |\
pyparsing.Word(' ')
rex = rex.copy().leaveWhitespace()
remainder = cmdstr
for t, start, end in rex.scanString(cmdstr):
remainder = cmdstr[end:]
parts.append(t[0])
if remainder != '':
parts.append(remainder)
if not parts:
parts = []
elif cmdstr.endswith(" "):
parts.append("")
# First item in parts has always to be the command
# so we remove any blank tokens from the start of it
while True:
if parts and parts[0].strip() == '':
del parts[0]
else:
break
parse: typing.List[ParseResult] = []
params: typing.List[type] = []
@ -180,10 +201,15 @@ class CommandManager(mitmproxy.types._CommandBase):
if parts[i] in self.commands:
params.extend(self.commands[parts[i]].paramtypes)
elif params:
typ = params.pop(0)
if typ == mitmproxy.types.Cmd and params and params[0] == mitmproxy.types.Arg:
if parts[i] in self.commands:
params[:] = self.commands[parts[i]].paramtypes
if parts[i].strip() != '':
typ = params.pop(0)
if typ == mitmproxy.types.Cmd and params and params[0] == mitmproxy.types.Arg:
if parts[i] in self.commands:
params[:] = self.commands[parts[i]].paramtypes
else:
# If the token is just a bunch of spaces, then we don't
# want to count it against the arguments of the command
typ = mitmproxy.types.Unknown
else:
typ = mitmproxy.types.Unknown
@ -228,6 +254,7 @@ class CommandManager(mitmproxy.types._CommandBase):
"""
if path not in self.commands:
raise exceptions.CommandError("Unknown command: %s" % path)
return self.commands[path].call(args)
def execute(self, cmdstr: str):

View File

@ -1,99 +0,0 @@
from enum import Enum
import io
from typing import Union, List
class State(Enum):
QUOTE = 1
ESCAPE = 2
TEXT = 3
class Lexer:
def __init__(self, text: Union[str, io.StringIO]) -> None:
self._count = 0
self._parsed = False
self._state = State.TEXT
self._states: List[State] = []
self._text_pos = 0
self._quote_start_pos = 0
if isinstance(text, str):
self.text = io.StringIO(text)
else:
self.text = text
def __iter__(self):
return self
def __next__(self):
t = self.get_token()
if t == '':
raise StopIteration
return t
def get_token(self):
try:
return self.parse()
except ValueError:
raise
def parse(self):
acc = ''
quote = ''
self._state = State.TEXT
whitespace = "\r\n\t "
self.text.seek(self._text_pos)
while True:
ch = self.text.read(1)
self._text_pos += 1
# If this is the last char of the string, let's save the token
if ch == '' or ch is None:
break
if self._state == State.QUOTE:
if ch == '\\':
self._states.append(self._state)
self._state = State.ESCAPE
acc += ch
elif ch == quote:
self._state = self._states.pop()
acc += ch
else:
acc += ch
elif self._state == State.ESCAPE:
acc += ch
self._state = self._states.pop()
elif self._state == State.TEXT:
if ch in whitespace:
if acc != '':
break
elif ch == '"' or ch == "'":
quote = ch
self._quote_start_pos = self._text_pos
self._states.append(self._state)
self._state = State.QUOTE
acc += ch
elif ch == '\\':
self._states.append(self._state)
self._state = State.ESCAPE
acc += ch
else:
acc += ch
self._token = acc
if self._state == State.QUOTE:
raise ValueError("No closing quotation for quote in position %d" % self._quote_start_pos)
return self._token

View File

@ -52,7 +52,7 @@ CompletionState = typing.NamedTuple(
class CommandBuffer:
def __init__(self, master: mitmproxy.master.Master, start: str = "") -> None:
self.master = master
self.text = self.flatten(start)
self.text = start
# Cursor is always within the range [0:len(buffer)].
self._cursor = len(self.text)
self.completion: CompletionState = None
@ -105,7 +105,7 @@ class CommandBuffer:
ret.append(("commander_invalid", p.value))
else:
ret.append(("text", ""))
ret.append(("text", " "))
if remhelp:
ret.append(("text", " "))
for v in remhelp:
@ -113,11 +113,6 @@ class CommandBuffer:
return ret
def flatten(self, txt):
parts, _ = self.parse_quoted(txt)
ret = [x.value for x in parts]
return " ".join(ret)
def left(self) -> None:
self.cursor = self.cursor - 1
@ -141,7 +136,7 @@ class CommandBuffer:
nxt = self.completion.completer.cycle()
buf = " ".join([i.value for i in self.completion.parse[:-1]]) + " " + nxt
buf = buf.strip()
self.text = self.flatten(buf)
self.text = buf
self.cursor = len(self.text)
def backspace(self) -> None:
@ -155,6 +150,11 @@ class CommandBuffer:
"""
Inserts text at the cursor.
"""
# We don't want to insert a space before the command
if k == ' ' and self.text[0:self.cursor].strip() == '':
return
self.text = self.text[:self.cursor] + k + self.text[self.cursor:]
self.cursor += len(k)
self.completion = None

View File

@ -115,12 +115,9 @@ class TestCommand:
[
"foo bar",
[
command.ParseResult(
value = "foo", type = mitmproxy.types.Cmd, valid = False
),
command.ParseResult(
value = "bar", type = mitmproxy.types.Unknown, valid = False
)
command.ParseResult(value = "foo", type = mitmproxy.types.Cmd, valid = False),
command.ParseResult(value = " ", type = mitmproxy.types.Unknown, valid = False),
command.ParseResult(value = "bar", type = mitmproxy.types.Unknown, valid = False)
],
[],
],
@ -128,6 +125,7 @@ class TestCommand:
"cmd1 'bar",
[
command.ParseResult(value = "cmd1", type = mitmproxy.types.Cmd, valid = True),
command.ParseResult(value = " ", type = mitmproxy.types.Unknown, valid = False),
command.ParseResult(value = "'bar", type = str, valid = True)
],
[],
@ -146,6 +144,7 @@ class TestCommand:
"cmd3 1",
[
command.ParseResult(value = "cmd3", type = mitmproxy.types.Cmd, valid = True),
command.ParseResult(value = " ", type = mitmproxy.types.Unknown, valid = False),
command.ParseResult(value = "1", type = int, valid = True),
],
[]
@ -154,28 +153,27 @@ class TestCommand:
"cmd3 ",
[
command.ParseResult(value = "cmd3", type = mitmproxy.types.Cmd, valid = True),
command.ParseResult(value = "", type = int, valid = False),
command.ParseResult(value = " ", type = mitmproxy.types.Unknown, valid = False),
],
[]
['int']
],
[
"subcommand ",
[
command.ParseResult(
value = "subcommand", type = mitmproxy.types.Cmd, valid = True,
),
command.ParseResult(value = "", type = mitmproxy.types.Cmd, valid = False),
command.ParseResult(value = "subcommand", type = mitmproxy.types.Cmd, valid = True,),
command.ParseResult(value = " ", type = mitmproxy.types.Unknown, valid = False),
],
["arg"],
["cmd", "arg"],
],
[
"subcommand cmd3 ",
[
command.ParseResult(value = "subcommand", type = mitmproxy.types.Cmd, valid = True),
command.ParseResult(value = " ", type = mitmproxy.types.Unknown, valid = False),
command.ParseResult(value = "cmd3", type = mitmproxy.types.Cmd, valid = True),
command.ParseResult(value = "", type = int, valid = False),
command.ParseResult(value = " ", type = mitmproxy.types.Unknown, valid = False),
],
[]
["int"]
],
[
"cmd4",
@ -188,22 +186,15 @@ class TestCommand:
"cmd4 ",
[
command.ParseResult(value = "cmd4", type = mitmproxy.types.Cmd, valid = True),
command.ParseResult(value = "", type = int, valid = False),
command.ParseResult(value = " ", type = mitmproxy.types.Unknown, valid = False),
],
["str", "path"]
],
[
"cmd4 1",
[
command.ParseResult(value = "cmd4", type = mitmproxy.types.Cmd, valid = True),
command.ParseResult(value = "1", type = int, valid = True),
],
["str", "path"]
["int", "str", "path"]
],
[
"cmd4 1",
[
command.ParseResult(value = "cmd4", type = mitmproxy.types.Cmd, valid = True),
command.ParseResult(value = " ", type = mitmproxy.types.Unknown, valid = False),
command.ParseResult(value = "1", type = int, valid = True),
],
["str", "path"]
@ -219,14 +210,15 @@ class TestCommand:
"flow ",
[
command.ParseResult(value = "flow", type = mitmproxy.types.Cmd, valid = True),
command.ParseResult(value = "", type = flow.Flow, valid = False),
command.ParseResult(value = " ", type = mitmproxy.types.Unknown, valid = False),
],
["str"]
["flow", "str"]
],
[
"flow x",
[
command.ParseResult(value = "flow", type = mitmproxy.types.Cmd, valid = True),
command.ParseResult(value = " ", type = mitmproxy.types.Unknown, valid = False),
command.ParseResult(value = "x", type = flow.Flow, valid = False),
],
["str"]
@ -235,15 +227,17 @@ class TestCommand:
"flow x ",
[
command.ParseResult(value = "flow", type = mitmproxy.types.Cmd, valid = True),
command.ParseResult(value = " ", type = mitmproxy.types.Unknown, valid = False),
command.ParseResult(value = "x", type = flow.Flow, valid = False),
command.ParseResult(value = "", type = str, valid = True),
command.ParseResult(value = " ", type = mitmproxy.types.Unknown, valid = False),
],
[]
["str"]
],
[
"flow \"one two",
[
command.ParseResult(value = "flow", type = mitmproxy.types.Cmd, valid = True),
command.ParseResult(value = " ", type = mitmproxy.types.Unknown, valid = False),
command.ParseResult(value = "\"one two", type = flow.Flow, valid = False),
],
["str"]
@ -252,11 +246,112 @@ class TestCommand:
"flow \"three four\"",
[
command.ParseResult(value = "flow", type = mitmproxy.types.Cmd, valid = True),
command.ParseResult(value = " ", type = mitmproxy.types.Unknown, valid = False),
command.ParseResult(value = '"three four"', type = flow.Flow, valid = False),
],
["str"]
],
[
"spaces ' '",
[
command.ParseResult(value = "spaces", type = mitmproxy.types.Cmd, valid = False),
command.ParseResult(value = " ", type = mitmproxy.types.Unknown, valid = False),
command.ParseResult(value = "' '", type = mitmproxy.types.Unknown, valid = False)
],
[],
],
[
'spaces2 " "',
[
command.ParseResult(value = "spaces2", type = mitmproxy.types.Cmd, valid = False),
command.ParseResult(value = " ", type = mitmproxy.types.Unknown, valid = False),
command.ParseResult(value = '" "', type = mitmproxy.types.Unknown, valid = False)
],
[],
],
[
'"abc"',
[
command.ParseResult(value = '"abc"', type = mitmproxy.types.Cmd, valid = False),
],
[],
],
[
"'def'",
[
command.ParseResult(value = "'def'", type = mitmproxy.types.Cmd, valid = False),
],
[],
],
[
"cmd10 'a' \"b\" c",
[
command.ParseResult(value = "cmd10", type = mitmproxy.types.Cmd, valid = False),
command.ParseResult(value = " ", type = mitmproxy.types.Unknown, valid = False),
command.ParseResult(value = "'a'", type = mitmproxy.types.Unknown, valid = False),
command.ParseResult(value = " ", type = mitmproxy.types.Unknown, valid = False),
command.ParseResult(value = '"b"', type = mitmproxy.types.Unknown, valid = False),
command.ParseResult(value = " ", type = mitmproxy.types.Unknown, valid = False),
command.ParseResult(value = "c", type = mitmproxy.types.Unknown, valid = False),
],
[],
],
[
"cmd11 'a \"b\" c'",
[
command.ParseResult(value = "cmd11", type = mitmproxy.types.Cmd, valid = False),
command.ParseResult(value = " ", type = mitmproxy.types.Unknown, valid = False),
command.ParseResult(value = "'a \"b\" c'", type = mitmproxy.types.Unknown, valid = False),
],
[],
],
[
'cmd12 "a \'b\' c"',
[
command.ParseResult(value = "cmd12", type = mitmproxy.types.Cmd, valid = False),
command.ParseResult(value = " ", type = mitmproxy.types.Unknown, valid = False),
command.ParseResult(value = '"a \'b\' c"', type = mitmproxy.types.Unknown, valid = False),
],
[],
],
[
r'cmd13 "a \"b\" c"',
[
command.ParseResult(value = "cmd13", type = mitmproxy.types.Cmd, valid = False),
command.ParseResult(value = " ", type = mitmproxy.types.Unknown, valid = False),
command.ParseResult(value = r'"a \"b\" c"', type = mitmproxy.types.Unknown, valid = False),
],
[],
],
[
r"cmd14 'a \'b\' c'",
[
command.ParseResult(value = "cmd14", type = mitmproxy.types.Cmd, valid = False),
command.ParseResult(value = " ", type = mitmproxy.types.Unknown, valid = False),
command.ParseResult(value = r"'a \'b\' c'", type = mitmproxy.types.Unknown, valid = False),
],
[],
],
[
" spaces_at_the_begining_are_stripped",
[
command.ParseResult(value = "spaces_at_the_begining_are_stripped", type = mitmproxy.types.Cmd, valid = False),
],
[],
],
[
" spaces_at_the_begining_are_stripped but_not_at_the_end ",
[
command.ParseResult(value = "spaces_at_the_begining_are_stripped", type = mitmproxy.types.Cmd, valid = False),
command.ParseResult(value = " ", type = mitmproxy.types.Unknown, valid = False),
command.ParseResult(value = "but_not_at_the_end", type = mitmproxy.types.Unknown, valid = False),
command.ParseResult(value = " ", type = mitmproxy.types.Unknown, valid = False),
],
[],
],
]
with taddons.context() as tctx:
tctx.master.addons.add(TAddon())
for s, expected, expectedremain in tests:

View File

@ -1,75 +0,0 @@
from mitmproxy import lexer
import pytest
import io
class TestScripts:
def test_simple(self):
cases = [
{
"text": r'abc',
"result": ['abc']
},
{
"text": r'"Hello \" Double Quotes"',
"result": ['"Hello \\" Double Quotes"']
},
{
"text": r"'Hello \' Single Quotes'",
"result": ["'Hello \\' Single Quotes'"]
},
{
"text": r'"\""',
"result": ['"\\""']
},
{
"text": r'abc "def\" \x bla \z \\ \e \ " xpto',
"result": ['abc', '"def\\" \\x bla \\z \\\\ \\e \\ "', 'xpto']
},
{
"text": r'',
"result": []
},
{
"text": r' ',
"result": []
},
{
"text": r' ',
"result": []
},
{
"text": r'Space in the end ',
"result": ['Space', 'in', 'the', 'end']
},
{
"text": '\n\n\rHello\n World With Spaces\n\n',
"result": ['Hello', 'World', 'With', 'Spaces']
},
{
"text": r'\" Escaping characters without reason',
"result": ['\\"', 'Escaping', 'characters', 'without', 'reason']
},
]
for t in cases:
lex = lexer.Lexer(t['text'])
tokens = list(lex)
result = t['result']
assert(tokens == result)
def test_fail(self):
text = r'"should fail with missing closing quote'
lex = lexer.Lexer(text)
with pytest.raises(ValueError, match="No closing quotation"):
assert list(lex)
def test_stringio_text(self):
text = io.StringIO(r'Increase test coverage')
lex = lexer.Lexer(text)
tokens = list(lex)
result = ['Increase', 'test', 'coverage']
assert(tokens == result)

View File

@ -165,8 +165,3 @@ class TestCommandBuffer:
cb = commander.CommandBuffer(tctx.master)
cb.text = "foo"
assert cb.render()
def test_flatten(self):
with taddons.context() as tctx:
cb = commander.CommandBuffer(tctx.master)
assert cb.flatten("foo bar") == "foo bar"

View File

@ -3,12 +3,14 @@ from mitmproxy.tools.console import defaultkeys
from mitmproxy.tools.console import keymap
from mitmproxy.tools.console import master
from mitmproxy import command
from mitmproxy import ctx
import pytest
@pytest.mark.asyncio
async def test_commands_exist():
command_manager = command.CommandManager(ctx)
km = keymap.Keymap(None)
defaultkeys.map(km)
assert km.bindings
@ -16,7 +18,10 @@ async def test_commands_exist():
await m.load_flow(tflow())
for binding in km.bindings:
cmd, *args = command.get_lexer(binding.command)
results = command_manager.parse_partial(binding.command)
cmd = results[0][0].value
args = [a.value for a in results[0][1:]]
assert cmd in m.commands.commands
@ -24,4 +29,7 @@ async def test_commands_exist():
try:
cmd_obj.prepare_args(args)
except Exception as e:
import pdb
pdb.set_trace()
raise ValueError("Invalid command: {}".format(binding.command)) from e