Improvements to the Earley parser ambiguity resolution

This commit is contained in:
Erez Shinan 2017-07-17 17:11:43 +03:00
parent 9570918005
commit 48efa6500a
1 changed files with 25 additions and 4 deletions

View File

@ -29,6 +29,9 @@ class Derivation(Tree):
Tree.__init__(self, 'drv', items or []) Tree.__init__(self, 'drv', items or [])
self.rule = rule self.rule = rule
def _pretty_label(self): # Nicer pretty for debugging the parser
return self.rule.origin if self.rule else self.data
END_TOKEN = EndToken() END_TOKEN = EndToken()
class Item(object): class Item(object):
@ -106,8 +109,11 @@ class Column:
new_tree = old_tree.copy() new_tree = old_tree.copy()
new_tree.rule = old_tree.rule new_tree.rule = old_tree.rule
old_tree.set('_ambig', [new_tree]) old_tree.set('_ambig', [new_tree])
old_tree.rule = None # No longer a 'drv' node
if item.tree.children[0] is old_tree: # XXX a little hacky! if item.tree.children[0] is old_tree: # XXX a little hacky!
raise ParseError("Infinite recursion in grammar! (Rule %s)" % item.rule) raise ParseError("Infinite recursion in grammar! (Rule %s)" % item.rule)
old_tree.children.append(item.tree) old_tree.children.append(item.tree)
else: else:
self.completed[item] = item self.completed[item] = item
@ -234,6 +240,14 @@ def _compare_drv(tree1, tree2):
# Probably trees that don't take part in this parse (better way to distinguish?) # Probably trees that don't take part in this parse (better way to distinguish?)
return compare(tree1, tree2) return compare(tree1, tree2)
# XXX These artifacts can appear due to imperfections in the ordering of Visitor_NoRecurse,
# when confronted with duplicate (same-id) nodes. Fixing this ordering is possible, but would be
# computationally inefficient. So we handle it here.
if tree1.data == '_ambig':
_resolve_ambig(tree1)
if tree2.data == '_ambig':
_resolve_ambig(tree2)
c = _compare_rules(tree1.rule, tree2.rule) c = _compare_rules(tree1.rule, tree2.rule)
if c: if c:
return c return c
@ -247,13 +261,20 @@ def _compare_drv(tree1, tree2):
return compare(len(tree1.children), len(tree2.children)) return compare(len(tree1.children), len(tree2.children))
class ResolveAmbig(Visitor_NoRecurse): def _resolve_ambig(tree):
def _ambig(self, tree): assert tree.data == '_ambig'
best = min(tree.children, key=cmp_to_key(_compare_drv)) best = min(tree.children, key=cmp_to_key(_compare_drv))
assert best.data == 'drv' assert best.data == 'drv'
tree.set('drv', best.children) tree.set('drv', best.children)
tree.rule = best.rule # needed for applying callbacks tree.rule = best.rule # needed for applying callbacks
assert tree.data != '_ambig'
class ResolveAmbig(Visitor_NoRecurse):
def _ambig(self, tree):
_resolve_ambig(tree)
# RULES = [ # RULES = [
# ('a', ['d']), # ('a', ['d']),