BUGFIX: Solved an elusive bug in Earley parser, when empty rules repeat in the same column

This commit is contained in:
Erez Shinan 2017-04-05 17:32:56 +03:00
parent a7f99dd8c6
commit 1685f94ea3
2 changed files with 18 additions and 2 deletions

View File

@ -98,7 +98,7 @@ class Column:
for item in items:
if item.is_complete:
# XXX TODO Potential bug: What happens if there's ambiguity in an empty rule?
# XXX Potential bug: What happens if there's ambiguity in an empty rule?
if item.rule.expansion and item in self.completed:
old_tree = self.completed[item].tree
if old_tree.data != 'ambig':
@ -110,7 +110,7 @@ class Column:
old_tree.children.append(item.tree)
else:
self.completed[item] = item
self.to_reduce.append(item)
self.to_reduce.append(item)
else:
if item not in added:
added.add(item)

View File

@ -19,6 +19,7 @@ logging.basicConfig(level=logging.INFO)
from lark.lark import Lark
from lark.common import GrammarError, ParseError
from lark.lexer import LexError
from lark.tree import Tree
__path__ = os.path.dirname(__file__)
def _read(n, *args):
@ -104,6 +105,21 @@ class TestEarley(unittest.TestCase):
res = l.parse("aaa")
self.assertEqual(res.children, ['aaa'])
def test_earley_repeating_empty(self):
# This was a sneaky bug!
grammar = """
!start: "a" empty empty "b"
empty: empty2
empty2:
"""
parser = Lark(grammar, parser='earley', lexer=None)
res = parser.parse('ab')
empty_tree = Tree('empty', [Tree('empty2', [])])
self.assertSequenceEqual(res.children, ['a', empty_tree, empty_tree, 'b'])
def _make_parser_test(LEXER, PARSER):
def _Lark(grammar, **kwargs):
return Lark(grammar, lexer=LEXER, parser=PARSER, **kwargs)