mirror of https://github.com/lark-parser/lark.git
Improvements to the Earley parser ambiguity resolution
This commit is contained in:
parent
9570918005
commit
48efa6500a
|
@ -29,6 +29,9 @@ class Derivation(Tree):
|
||||||
Tree.__init__(self, 'drv', items or [])
|
Tree.__init__(self, 'drv', items or [])
|
||||||
self.rule = rule
|
self.rule = rule
|
||||||
|
|
||||||
|
def _pretty_label(self): # Nicer pretty for debugging the parser
|
||||||
|
return self.rule.origin if self.rule else self.data
|
||||||
|
|
||||||
END_TOKEN = EndToken()
|
END_TOKEN = EndToken()
|
||||||
|
|
||||||
class Item(object):
|
class Item(object):
|
||||||
|
@ -106,8 +109,11 @@ class Column:
|
||||||
new_tree = old_tree.copy()
|
new_tree = old_tree.copy()
|
||||||
new_tree.rule = old_tree.rule
|
new_tree.rule = old_tree.rule
|
||||||
old_tree.set('_ambig', [new_tree])
|
old_tree.set('_ambig', [new_tree])
|
||||||
|
old_tree.rule = None # No longer a 'drv' node
|
||||||
|
|
||||||
if item.tree.children[0] is old_tree: # XXX a little hacky!
|
if item.tree.children[0] is old_tree: # XXX a little hacky!
|
||||||
raise ParseError("Infinite recursion in grammar! (Rule %s)" % item.rule)
|
raise ParseError("Infinite recursion in grammar! (Rule %s)" % item.rule)
|
||||||
|
|
||||||
old_tree.children.append(item.tree)
|
old_tree.children.append(item.tree)
|
||||||
else:
|
else:
|
||||||
self.completed[item] = item
|
self.completed[item] = item
|
||||||
|
@ -234,6 +240,14 @@ def _compare_drv(tree1, tree2):
|
||||||
# Probably trees that don't take part in this parse (better way to distinguish?)
|
# Probably trees that don't take part in this parse (better way to distinguish?)
|
||||||
return compare(tree1, tree2)
|
return compare(tree1, tree2)
|
||||||
|
|
||||||
|
# XXX These artifacts can appear due to imperfections in the ordering of Visitor_NoRecurse,
|
||||||
|
# when confronted with duplicate (same-id) nodes. Fixing this ordering is possible, but would be
|
||||||
|
# computationally inefficient. So we handle it here.
|
||||||
|
if tree1.data == '_ambig':
|
||||||
|
_resolve_ambig(tree1)
|
||||||
|
if tree2.data == '_ambig':
|
||||||
|
_resolve_ambig(tree2)
|
||||||
|
|
||||||
c = _compare_rules(tree1.rule, tree2.rule)
|
c = _compare_rules(tree1.rule, tree2.rule)
|
||||||
if c:
|
if c:
|
||||||
return c
|
return c
|
||||||
|
@ -247,13 +261,20 @@ def _compare_drv(tree1, tree2):
|
||||||
return compare(len(tree1.children), len(tree2.children))
|
return compare(len(tree1.children), len(tree2.children))
|
||||||
|
|
||||||
|
|
||||||
class ResolveAmbig(Visitor_NoRecurse):
|
def _resolve_ambig(tree):
|
||||||
def _ambig(self, tree):
|
assert tree.data == '_ambig'
|
||||||
|
|
||||||
best = min(tree.children, key=cmp_to_key(_compare_drv))
|
best = min(tree.children, key=cmp_to_key(_compare_drv))
|
||||||
assert best.data == 'drv'
|
assert best.data == 'drv'
|
||||||
tree.set('drv', best.children)
|
tree.set('drv', best.children)
|
||||||
tree.rule = best.rule # needed for applying callbacks
|
tree.rule = best.rule # needed for applying callbacks
|
||||||
|
|
||||||
|
assert tree.data != '_ambig'
|
||||||
|
|
||||||
|
class ResolveAmbig(Visitor_NoRecurse):
|
||||||
|
def _ambig(self, tree):
|
||||||
|
_resolve_ambig(tree)
|
||||||
|
|
||||||
|
|
||||||
# RULES = [
|
# RULES = [
|
||||||
# ('a', ['d']),
|
# ('a', ['d']),
|
||||||
|
|
Loading…
Reference in New Issue