Fixed reconstruct

This commit is contained in:
Erez Shinan 2018-06-27 16:56:07 +03:00
parent 847870fdc8
commit d11c67fea0
2 changed files with 30 additions and 25 deletions

View File

@ -23,9 +23,9 @@ test_json = '''
}
'''
def test_scanless():
def test_earley():
json_parser = Lark(json_grammar, lexer=None)
json_parser = Lark(json_grammar)
tree = json_parser.parse(test_json)
# print ('@@', tree.pretty())
@ -48,5 +48,5 @@ def test_lalr():
print (new_json)
print (json.loads(new_json) == json.loads(test_json))
test_scanless()
test_earley()
test_lalr()

View File

@ -1,15 +1,16 @@
from collections import defaultdict
from .tree import Tree, Transformer_NoRecurse
from .common import is_terminal, ParserConf, PatternStr
from .tree import Tree
from .visitors import Transformer_InPlace
from .common import ParserConf, PatternStr
from .lexer import Token
from .parsers import earley, resolve_ambig
from .grammar import Rule
from .grammar import Rule, Terminal, NonTerminal
def is_discarded_terminal(t):
return is_terminal(t) and t.startswith('_')
return t.is_term and t.filter_out
def is_iter_empty(i):
try:
@ -18,19 +19,21 @@ def is_iter_empty(i):
except StopIteration:
return True
class WriteTokensTransformer(Transformer_NoRecurse):
class WriteTokensTransformer(Transformer_InPlace):
def __init__(self, tokens):
self.tokens = tokens
def __default__(self, t):
if not isinstance(t, MatchTree):
return t
def __default__(self, data, children, meta):
# if not isinstance(t, MatchTree):
# return t
if not getattr(meta, 'match_tree', False):
return Tree(data, children)
iter_args = iter(t.children)
iter_args = iter(children)
to_write = []
for sym in t.orig_expansion:
for sym in meta.orig_expansion:
if is_discarded_terminal(sym):
t = self.tokens[sym]
t = self.tokens[sym.name]
assert isinstance(t.pattern, PatternStr)
to_write.append(t.pattern.value)
else:
@ -39,9 +42,9 @@ class WriteTokensTransformer(Transformer_NoRecurse):
to_write += x
else:
if isinstance(x, Token):
assert x.type == sym, x
assert Terminal(x.type) == sym, x
else:
assert x.data == sym, (sym, x)
assert NonTerminal(x.data) == sym, (sym, x)
to_write.append(x)
assert is_iter_empty(iter_args)
@ -58,13 +61,14 @@ class MakeMatchTree:
def __call__(self, args):
t = MatchTree(self.name, args)
t.orig_expansion = self.expansion
t.meta.match_tree = True
t.meta.orig_expansion = self.expansion
return t
class Reconstructor:
def __init__(self, parser):
# Recreate the rules to assume a standard lexer
_tokens, rules, _grammar_extra = parser.grammar.compile(lexer='standard', start='whatever')
_tokens, rules, _grammar_extra = parser.grammar.compile()
expand1s = {r.origin for r in parser.rules if r.options and r.options.expand1}
@ -72,25 +76,26 @@ class Reconstructor:
for r in rules:
# Rules can match their alias
if r.alias:
d[r.alias].append(r.expansion)
d[r.origin].append([r.alias])
alias = NonTerminal(r.alias)
d[alias].append(r.expansion)
d[r.origin].append([alias])
else:
d[r.origin].append(r.expansion)
# Expanded rules can match their own terminal
for sym in r.expansion:
if sym in expand1s:
d[sym].append([sym.upper()])
d[sym].append([Terminal(sym.name)])
reduced_rules = defaultdict(list)
for name, expansions in d.items():
for expansion in expansions:
reduced = [sym if sym.startswith('_') or sym in expand1s else sym.upper()
reduced = [sym if sym.name.startswith('_') or sym in expand1s else Terminal(sym.name)
for sym in expansion if not is_discarded_terminal(sym)]
reduced_rules[name, tuple(reduced)].append(expansion)
self.rules = [Rule(name, list(reduced), MakeMatchTree(name, expansions[0]), None)
self.rules = [Rule(name, list(reduced), MakeMatchTree(name.name, expansions[0]), None)
for (name, reduced), expansions in reduced_rules.items()]
self.write_tokens = WriteTokensTransformer({t.name:t for t in _tokens})
@ -98,9 +103,9 @@ class Reconstructor:
def _match(self, term, token):
if isinstance(token, Tree):
return token.data.upper() == term
return Terminal(token.data) == term
elif isinstance(token, Token):
return term == token.type
return term == Terminal(token.type)
assert False
def _reconstruct(self, tree):