diff --git a/examples/reconstruct_json.py b/examples/reconstruct_json.py index 4695698..07df86c 100644 --- a/examples/reconstruct_json.py +++ b/examples/reconstruct_json.py @@ -23,9 +23,9 @@ test_json = ''' } ''' -def test_scanless(): +def test_earley(): - json_parser = Lark(json_grammar, lexer=None) + json_parser = Lark(json_grammar) tree = json_parser.parse(test_json) # print ('@@', tree.pretty()) @@ -48,5 +48,5 @@ def test_lalr(): print (new_json) print (json.loads(new_json) == json.loads(test_json)) -test_scanless() +test_earley() test_lalr() diff --git a/lark/reconstruct.py b/lark/reconstruct.py index 08fcdc6..aafc5b3 100644 --- a/lark/reconstruct.py +++ b/lark/reconstruct.py @@ -1,15 +1,16 @@ from collections import defaultdict -from .tree import Tree, Transformer_NoRecurse -from .common import is_terminal, ParserConf, PatternStr +from .tree import Tree +from .visitors import Transformer_InPlace +from .common import ParserConf, PatternStr from .lexer import Token from .parsers import earley, resolve_ambig -from .grammar import Rule +from .grammar import Rule, Terminal, NonTerminal def is_discarded_terminal(t): - return is_terminal(t) and t.startswith('_') + return t.is_term and t.filter_out def is_iter_empty(i): try: @@ -18,19 +19,21 @@ def is_iter_empty(i): except StopIteration: return True -class WriteTokensTransformer(Transformer_NoRecurse): +class WriteTokensTransformer(Transformer_InPlace): def __init__(self, tokens): self.tokens = tokens - def __default__(self, t): - if not isinstance(t, MatchTree): - return t + def __default__(self, data, children, meta): + # if not isinstance(t, MatchTree): + # return t + if not getattr(meta, 'match_tree', False): + return Tree(data, children) - iter_args = iter(t.children) + iter_args = iter(children) to_write = [] - for sym in t.orig_expansion: + for sym in meta.orig_expansion: if is_discarded_terminal(sym): - t = self.tokens[sym] + t = self.tokens[sym.name] assert isinstance(t.pattern, PatternStr) to_write.append(t.pattern.value) else: @@ -39,9 +42,9 @@ class WriteTokensTransformer(Transformer_NoRecurse): to_write += x else: if isinstance(x, Token): - assert x.type == sym, x + assert Terminal(x.type) == sym, x else: - assert x.data == sym, (sym, x) + assert NonTerminal(x.data) == sym, (sym, x) to_write.append(x) assert is_iter_empty(iter_args) @@ -58,13 +61,14 @@ class MakeMatchTree: def __call__(self, args): t = MatchTree(self.name, args) - t.orig_expansion = self.expansion + t.meta.match_tree = True + t.meta.orig_expansion = self.expansion return t class Reconstructor: def __init__(self, parser): # Recreate the rules to assume a standard lexer - _tokens, rules, _grammar_extra = parser.grammar.compile(lexer='standard', start='whatever') + _tokens, rules, _grammar_extra = parser.grammar.compile() expand1s = {r.origin for r in parser.rules if r.options and r.options.expand1} @@ -72,25 +76,26 @@ class Reconstructor: for r in rules: # Rules can match their alias if r.alias: - d[r.alias].append(r.expansion) - d[r.origin].append([r.alias]) + alias = NonTerminal(r.alias) + d[alias].append(r.expansion) + d[r.origin].append([alias]) else: d[r.origin].append(r.expansion) # Expanded rules can match their own terminal for sym in r.expansion: if sym in expand1s: - d[sym].append([sym.upper()]) + d[sym].append([Terminal(sym.name)]) reduced_rules = defaultdict(list) for name, expansions in d.items(): for expansion in expansions: - reduced = [sym if sym.startswith('_') or sym in expand1s else sym.upper() + reduced = [sym if sym.name.startswith('_') or sym in expand1s else Terminal(sym.name) for sym in expansion if not is_discarded_terminal(sym)] reduced_rules[name, tuple(reduced)].append(expansion) - self.rules = [Rule(name, list(reduced), MakeMatchTree(name, expansions[0]), None) + self.rules = [Rule(name, list(reduced), MakeMatchTree(name.name, expansions[0]), None) for (name, reduced), expansions in reduced_rules.items()] self.write_tokens = WriteTokensTransformer({t.name:t for t in _tokens}) @@ -98,9 +103,9 @@ class Reconstructor: def _match(self, term, token): if isinstance(token, Tree): - return token.data.upper() == term + return Terminal(token.data) == term elif isinstance(token, Token): - return term == token.type + return term == Terminal(token.type) assert False def _reconstruct(self, tree):