mirror of https://github.com/lark-parser/lark.git
Fixed reconstruct
This commit is contained in:
parent
847870fdc8
commit
d11c67fea0
|
@ -23,9 +23,9 @@ test_json = '''
|
|||
}
|
||||
'''
|
||||
|
||||
def test_scanless():
|
||||
def test_earley():
|
||||
|
||||
json_parser = Lark(json_grammar, lexer=None)
|
||||
json_parser = Lark(json_grammar)
|
||||
tree = json_parser.parse(test_json)
|
||||
|
||||
# print ('@@', tree.pretty())
|
||||
|
@ -48,5 +48,5 @@ def test_lalr():
|
|||
print (new_json)
|
||||
print (json.loads(new_json) == json.loads(test_json))
|
||||
|
||||
test_scanless()
|
||||
test_earley()
|
||||
test_lalr()
|
||||
|
|
|
@ -1,15 +1,16 @@
|
|||
from collections import defaultdict
|
||||
|
||||
from .tree import Tree, Transformer_NoRecurse
|
||||
from .common import is_terminal, ParserConf, PatternStr
|
||||
from .tree import Tree
|
||||
from .visitors import Transformer_InPlace
|
||||
from .common import ParserConf, PatternStr
|
||||
from .lexer import Token
|
||||
from .parsers import earley, resolve_ambig
|
||||
from .grammar import Rule
|
||||
from .grammar import Rule, Terminal, NonTerminal
|
||||
|
||||
|
||||
|
||||
def is_discarded_terminal(t):
|
||||
return is_terminal(t) and t.startswith('_')
|
||||
return t.is_term and t.filter_out
|
||||
|
||||
def is_iter_empty(i):
|
||||
try:
|
||||
|
@ -18,19 +19,21 @@ def is_iter_empty(i):
|
|||
except StopIteration:
|
||||
return True
|
||||
|
||||
class WriteTokensTransformer(Transformer_NoRecurse):
|
||||
class WriteTokensTransformer(Transformer_InPlace):
|
||||
def __init__(self, tokens):
|
||||
self.tokens = tokens
|
||||
|
||||
def __default__(self, t):
|
||||
if not isinstance(t, MatchTree):
|
||||
return t
|
||||
def __default__(self, data, children, meta):
|
||||
# if not isinstance(t, MatchTree):
|
||||
# return t
|
||||
if not getattr(meta, 'match_tree', False):
|
||||
return Tree(data, children)
|
||||
|
||||
iter_args = iter(t.children)
|
||||
iter_args = iter(children)
|
||||
to_write = []
|
||||
for sym in t.orig_expansion:
|
||||
for sym in meta.orig_expansion:
|
||||
if is_discarded_terminal(sym):
|
||||
t = self.tokens[sym]
|
||||
t = self.tokens[sym.name]
|
||||
assert isinstance(t.pattern, PatternStr)
|
||||
to_write.append(t.pattern.value)
|
||||
else:
|
||||
|
@ -39,9 +42,9 @@ class WriteTokensTransformer(Transformer_NoRecurse):
|
|||
to_write += x
|
||||
else:
|
||||
if isinstance(x, Token):
|
||||
assert x.type == sym, x
|
||||
assert Terminal(x.type) == sym, x
|
||||
else:
|
||||
assert x.data == sym, (sym, x)
|
||||
assert NonTerminal(x.data) == sym, (sym, x)
|
||||
to_write.append(x)
|
||||
|
||||
assert is_iter_empty(iter_args)
|
||||
|
@ -58,13 +61,14 @@ class MakeMatchTree:
|
|||
|
||||
def __call__(self, args):
|
||||
t = MatchTree(self.name, args)
|
||||
t.orig_expansion = self.expansion
|
||||
t.meta.match_tree = True
|
||||
t.meta.orig_expansion = self.expansion
|
||||
return t
|
||||
|
||||
class Reconstructor:
|
||||
def __init__(self, parser):
|
||||
# Recreate the rules to assume a standard lexer
|
||||
_tokens, rules, _grammar_extra = parser.grammar.compile(lexer='standard', start='whatever')
|
||||
_tokens, rules, _grammar_extra = parser.grammar.compile()
|
||||
|
||||
expand1s = {r.origin for r in parser.rules if r.options and r.options.expand1}
|
||||
|
||||
|
@ -72,25 +76,26 @@ class Reconstructor:
|
|||
for r in rules:
|
||||
# Rules can match their alias
|
||||
if r.alias:
|
||||
d[r.alias].append(r.expansion)
|
||||
d[r.origin].append([r.alias])
|
||||
alias = NonTerminal(r.alias)
|
||||
d[alias].append(r.expansion)
|
||||
d[r.origin].append([alias])
|
||||
else:
|
||||
d[r.origin].append(r.expansion)
|
||||
|
||||
# Expanded rules can match their own terminal
|
||||
for sym in r.expansion:
|
||||
if sym in expand1s:
|
||||
d[sym].append([sym.upper()])
|
||||
d[sym].append([Terminal(sym.name)])
|
||||
|
||||
reduced_rules = defaultdict(list)
|
||||
for name, expansions in d.items():
|
||||
for expansion in expansions:
|
||||
reduced = [sym if sym.startswith('_') or sym in expand1s else sym.upper()
|
||||
reduced = [sym if sym.name.startswith('_') or sym in expand1s else Terminal(sym.name)
|
||||
for sym in expansion if not is_discarded_terminal(sym)]
|
||||
|
||||
reduced_rules[name, tuple(reduced)].append(expansion)
|
||||
|
||||
self.rules = [Rule(name, list(reduced), MakeMatchTree(name, expansions[0]), None)
|
||||
self.rules = [Rule(name, list(reduced), MakeMatchTree(name.name, expansions[0]), None)
|
||||
for (name, reduced), expansions in reduced_rules.items()]
|
||||
|
||||
self.write_tokens = WriteTokensTransformer({t.name:t for t in _tokens})
|
||||
|
@ -98,9 +103,9 @@ class Reconstructor:
|
|||
|
||||
def _match(self, term, token):
|
||||
if isinstance(token, Tree):
|
||||
return token.data.upper() == term
|
||||
return Terminal(token.data) == term
|
||||
elif isinstance(token, Token):
|
||||
return term == token.type
|
||||
return term == Terminal(token.type)
|
||||
assert False
|
||||
|
||||
def _reconstruct(self, tree):
|
||||
|
|
Loading…
Reference in New Issue