mirror of https://github.com/lark-parser/lark.git
Fixed bug in Earley: A tree builder optimization clashed with explicit ambiguity
This commit is contained in:
parent
255ef0d973
commit
25c3c51b1c
|
@ -172,7 +172,7 @@ class Lark:
|
|||
def _build_parser(self):
|
||||
self.parser_class = get_frontend(self.options.parser, self.options.lexer)
|
||||
|
||||
self._parse_tree_builder = ParseTreeBuilder(self.rules, self.options.tree_class, self.options.propagate_positions, self.options.keep_all_tokens)
|
||||
self._parse_tree_builder = ParseTreeBuilder(self.rules, self.options.tree_class, self.options.propagate_positions, self.options.keep_all_tokens, self.options.parser!='lalr')
|
||||
callback = self._parse_tree_builder.create_callback(self.options.transformer)
|
||||
if self.profiler:
|
||||
for f in dir(callback):
|
||||
|
|
|
@ -57,6 +57,19 @@ class ChildFilter:
|
|||
self.node_builder = node_builder
|
||||
self.to_include = to_include
|
||||
|
||||
def __call__(self, children):
|
||||
filtered = []
|
||||
for i, to_expand in self.to_include:
|
||||
if to_expand:
|
||||
filtered += children[i].children
|
||||
else:
|
||||
filtered.append(children[i])
|
||||
|
||||
return self.node_builder(filtered)
|
||||
|
||||
class ChildFilterLALR(ChildFilter):
|
||||
"Optimized childfilter for LALR (assumes no duplication in parse tree, so it's safe to change it)"
|
||||
|
||||
def __call__(self, children):
|
||||
filtered = []
|
||||
for i, to_expand in self.to_include:
|
||||
|
@ -73,21 +86,22 @@ class ChildFilter:
|
|||
def _should_expand(sym):
|
||||
return not is_terminal(sym) and sym.startswith('_')
|
||||
|
||||
def maybe_create_child_filter(expansion, filter_out):
|
||||
def maybe_create_child_filter(expansion, filter_out, ambiguous):
|
||||
to_include = [(i, _should_expand(sym)) for i, sym in enumerate(expansion) if sym not in filter_out]
|
||||
|
||||
if len(to_include) < len(expansion) or any(to_expand for i, to_expand in to_include):
|
||||
return partial(ChildFilter, to_include)
|
||||
return partial(ChildFilter if ambiguous else ChildFilterLALR, to_include)
|
||||
|
||||
|
||||
class Callback(object):
|
||||
pass
|
||||
|
||||
class ParseTreeBuilder:
|
||||
def __init__(self, rules, tree_class, propagate_positions=False, keep_all_tokens=False):
|
||||
def __init__(self, rules, tree_class, propagate_positions=False, keep_all_tokens=False, ambiguous=False):
|
||||
self.tree_class = tree_class
|
||||
self.propagate_positions = propagate_positions
|
||||
self.always_keep_all_tokens = keep_all_tokens
|
||||
self.ambiguous = ambiguous
|
||||
|
||||
self.rule_builders = list(self._init_builders(rules))
|
||||
|
||||
|
@ -107,7 +121,7 @@ class ParseTreeBuilder:
|
|||
wrapper_chain = filter(None, [
|
||||
create_token and partial(CreateToken, create_token),
|
||||
(expand_single_child and not rule.alias) and ExpandSingleChild,
|
||||
maybe_create_child_filter(rule.expansion, () if keep_all_tokens else filter_out),
|
||||
maybe_create_child_filter(rule.expansion, () if keep_all_tokens else filter_out, self.ambiguous),
|
||||
self.propagate_positions and PropagatePositions,
|
||||
])
|
||||
|
||||
|
|
|
@ -293,6 +293,30 @@ def _make_full_earley_test(LEXER):
|
|||
self.assertEqual(res, expected)
|
||||
|
||||
|
||||
def test_explicit_ambiguity(self):
|
||||
grammar = r"""
|
||||
start: NAME+
|
||||
|
||||
NAME: /\w+/
|
||||
|
||||
%ignore " "
|
||||
"""
|
||||
|
||||
text = """cat"""
|
||||
|
||||
parser = Lark(grammar, start='start', ambiguity='explicit')
|
||||
tree = parser.parse(text)
|
||||
self.assertEqual(tree.data, '_ambig')
|
||||
|
||||
combinations = {tuple(str(s) for s in t.children) for t in tree.children}
|
||||
self.assertEqual(combinations, {
|
||||
('cat',),
|
||||
('ca', 't'),
|
||||
('c', 'at'),
|
||||
('c', 'a' ,'t')
|
||||
})
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue