Fixed bug in Earley: A tree builder optimization clashed with explicit ambiguity

This commit is contained in:
Erez Shinan 2018-04-05 15:40:33 +03:00
parent 255ef0d973
commit 25c3c51b1c
3 changed files with 43 additions and 5 deletions

View File

@ -172,7 +172,7 @@ class Lark:
def _build_parser(self):
self.parser_class = get_frontend(self.options.parser, self.options.lexer)
self._parse_tree_builder = ParseTreeBuilder(self.rules, self.options.tree_class, self.options.propagate_positions, self.options.keep_all_tokens)
self._parse_tree_builder = ParseTreeBuilder(self.rules, self.options.tree_class, self.options.propagate_positions, self.options.keep_all_tokens, self.options.parser!='lalr')
callback = self._parse_tree_builder.create_callback(self.options.transformer)
if self.profiler:
for f in dir(callback):

View File

@ -57,6 +57,19 @@ class ChildFilter:
self.node_builder = node_builder
self.to_include = to_include
def __call__(self, children):
filtered = []
for i, to_expand in self.to_include:
if to_expand:
filtered += children[i].children
else:
filtered.append(children[i])
return self.node_builder(filtered)
class ChildFilterLALR(ChildFilter):
"Optimized childfilter for LALR (assumes no duplication in parse tree, so it's safe to change it)"
def __call__(self, children):
filtered = []
for i, to_expand in self.to_include:
@ -73,21 +86,22 @@ class ChildFilter:
def _should_expand(sym):
return not is_terminal(sym) and sym.startswith('_')
def maybe_create_child_filter(expansion, filter_out):
def maybe_create_child_filter(expansion, filter_out, ambiguous):
to_include = [(i, _should_expand(sym)) for i, sym in enumerate(expansion) if sym not in filter_out]
if len(to_include) < len(expansion) or any(to_expand for i, to_expand in to_include):
return partial(ChildFilter, to_include)
return partial(ChildFilter if ambiguous else ChildFilterLALR, to_include)
class Callback(object):
pass
class ParseTreeBuilder:
def __init__(self, rules, tree_class, propagate_positions=False, keep_all_tokens=False):
def __init__(self, rules, tree_class, propagate_positions=False, keep_all_tokens=False, ambiguous=False):
self.tree_class = tree_class
self.propagate_positions = propagate_positions
self.always_keep_all_tokens = keep_all_tokens
self.ambiguous = ambiguous
self.rule_builders = list(self._init_builders(rules))
@ -107,7 +121,7 @@ class ParseTreeBuilder:
wrapper_chain = filter(None, [
create_token and partial(CreateToken, create_token),
(expand_single_child and not rule.alias) and ExpandSingleChild,
maybe_create_child_filter(rule.expansion, () if keep_all_tokens else filter_out),
maybe_create_child_filter(rule.expansion, () if keep_all_tokens else filter_out, self.ambiguous),
self.propagate_positions and PropagatePositions,
])

View File

@ -293,6 +293,30 @@ def _make_full_earley_test(LEXER):
self.assertEqual(res, expected)
def test_explicit_ambiguity(self):
grammar = r"""
start: NAME+
NAME: /\w+/
%ignore " "
"""
text = """cat"""
parser = Lark(grammar, start='start', ambiguity='explicit')
tree = parser.parse(text)
self.assertEqual(tree.data, '_ambig')
combinations = {tuple(str(s) for s in t.children) for t in tree.children}
self.assertEqual(combinations, {
('cat',),
('ca', 't'),
('c', 'at'),
('c', 'a' ,'t')
})