mirror of https://github.com/lark-parser/lark.git
Feature: Added explicit ambiguity option for Earley
This commit is contained in:
parent
0c0d210872
commit
f374e70b2c
|
@ -0,0 +1,39 @@
|
|||
#
|
||||
# This example shows how to use get explicit ambiguity from Lark's Earley parser.
|
||||
#
|
||||
|
||||
from lark import Lark
|
||||
|
||||
g = """
|
||||
sentence: noun verb noun -> simple
|
||||
| noun verb "like" noun -> comparative
|
||||
|
||||
noun: ADJ? NOUN
|
||||
verb: VERB
|
||||
|
||||
NOUN: "flies" | "bananas" | "fruit"
|
||||
VERB: "like" | "flies"
|
||||
ADJ: "fruit"
|
||||
|
||||
%import common.WS
|
||||
%ignore WS
|
||||
"""
|
||||
|
||||
lark = Lark(g, start='sentence', ambiguity='explicit')
|
||||
|
||||
print(lark.parse('fruit flies like bananas').pretty())
|
||||
|
||||
# Outputs:
|
||||
#
|
||||
# _ambig
|
||||
# comparative
|
||||
# noun fruit
|
||||
# verb flies
|
||||
# noun bananas
|
||||
# simple
|
||||
# noun
|
||||
# fruit
|
||||
# flies
|
||||
# verb like
|
||||
# noun bananas
|
||||
|
|
@ -3,4 +3,4 @@ from .common import ParseError, GrammarError
|
|||
from .lark import Lark
|
||||
from .utils import inline_args
|
||||
|
||||
__version__ = "0.2.6"
|
||||
__version__ = "0.2.7"
|
||||
|
|
25
lark/lark.py
25
lark/lark.py
|
@ -27,6 +27,11 @@ class LarkOptions(object):
|
|||
"contextual": Stronger lexer (only works with parser="lalr")
|
||||
"auto" (default): Choose for me based on grammar and parser
|
||||
|
||||
ambiguity - Decides how to handle ambiguity in the parse. Only relevant if parser="earley"
|
||||
"resolve": The parser will automatically choose the simplest derivation
|
||||
(it chooses consistently: greedy for tokens, non-greedy for rules)
|
||||
"explicit": The parser will return all derivations wrapped in "_ambig" tree nodes (i.e. a forest).
|
||||
|
||||
transformer - Applies the transformer to every parse tree
|
||||
debug - Affects verbosity (default: False)
|
||||
keep_all_tokens - Don't automagically remove "punctuation" tokens (default: False)
|
||||
|
@ -49,6 +54,7 @@ class LarkOptions(object):
|
|||
self.transformer = o.pop('transformer', None)
|
||||
self.start = o.pop('start', 'start')
|
||||
self.profile = o.pop('profile', False)
|
||||
self.ambiguity = o.pop('ambiguity', 'auto')
|
||||
|
||||
assert self.parser in ('earley', 'lalr', None)
|
||||
|
||||
|
@ -119,13 +125,20 @@ class Lark:
|
|||
assert not self.options.profile, "Feature temporarily disabled"
|
||||
self.profiler = Profiler() if self.options.profile else None
|
||||
|
||||
lexer = self.options.lexer
|
||||
if lexer == 'auto':
|
||||
if self.options.lexer == 'auto':
|
||||
if self.options.parser == 'lalr':
|
||||
lexer = 'standard'
|
||||
self.options.lexer = 'standard'
|
||||
elif self.options.parser == 'earley':
|
||||
lexer = None
|
||||
self.options.lexer = lexer
|
||||
self.options.lexer = None
|
||||
lexer = self.options.lexer
|
||||
assert lexer in ('standard', 'contextual', None)
|
||||
|
||||
if self.options.ambiguity == 'auto':
|
||||
if self.options.parser == 'earley':
|
||||
self.options.ambiguity = 'resolve'
|
||||
else:
|
||||
assert self.options.parser == 'earley'
|
||||
assert self.options.ambiguity in ('resolve', 'explicit', 'auto')
|
||||
|
||||
self.grammar = load_grammar(grammar, source)
|
||||
tokens, self.rules, self.grammar_extra = self.grammar.compile(lexer=bool(lexer), start=self.options.start)
|
||||
|
@ -155,7 +168,7 @@ class Lark:
|
|||
setattr(callback, f, self.profiler.make_wrapper('transformer', getattr(callback, f)))
|
||||
parser_conf = ParserConf(rules, callback, self.options.start)
|
||||
|
||||
return self.parser_class(self.lexer_conf, parser_conf)
|
||||
return self.parser_class(self.lexer_conf, parser_conf, options=self.options)
|
||||
|
||||
|
||||
def lex(self, text):
|
||||
|
|
|
@ -20,7 +20,7 @@ class WithLexer:
|
|||
return stream
|
||||
|
||||
class LALR(WithLexer):
|
||||
def __init__(self, lexer_conf, parser_conf):
|
||||
def __init__(self, lexer_conf, parser_conf, options=None):
|
||||
WithLexer.__init__(self, lexer_conf)
|
||||
|
||||
self.parser_conf = parser_conf
|
||||
|
@ -31,7 +31,7 @@ class LALR(WithLexer):
|
|||
return self.parser.parse(tokens)
|
||||
|
||||
class LALR_ContextualLexer:
|
||||
def __init__(self, lexer_conf, parser_conf):
|
||||
def __init__(self, lexer_conf, parser_conf, options=None):
|
||||
self.lexer_conf = lexer_conf
|
||||
self.parser_conf = parser_conf
|
||||
|
||||
|
@ -126,12 +126,16 @@ class OldEarley_NoLex:
|
|||
return res[0]
|
||||
|
||||
class Earley_NoLex:
|
||||
def __init__(self, lexer_conf, parser_conf):
|
||||
def __init__(self, lexer_conf, parser_conf, options=None):
|
||||
self.token_by_name = {t.name:t for t in lexer_conf.tokens}
|
||||
|
||||
rules = [(n, list(self._prepare_expansion(x)), a) for n,x,a in parser_conf.rules]
|
||||
|
||||
self.parser = earley.Parser(rules, parser_conf.start, parser_conf.callback)
|
||||
resolve_ambiguity = (options.ambiguity=='resolve') if options else True
|
||||
self.parser = earley.Parser(rules,
|
||||
parser_conf.start,
|
||||
parser_conf.callback,
|
||||
resolve_ambiguity=resolve_ambiguity)
|
||||
|
||||
def _prepare_expansion(self, expansion):
|
||||
for sym in expansion:
|
||||
|
@ -149,12 +153,16 @@ class Earley_NoLex:
|
|||
return self.parser.parse(new_text)
|
||||
|
||||
class Earley(WithLexer):
|
||||
def __init__(self, lexer_conf, parser_conf):
|
||||
def __init__(self, lexer_conf, parser_conf, options=None):
|
||||
WithLexer.__init__(self, lexer_conf)
|
||||
|
||||
rules = [(n, self._prepare_expansion(x), a) for n,x,a in parser_conf.rules]
|
||||
|
||||
self.parser = earley.Parser(rules, parser_conf.start, parser_conf.callback)
|
||||
resolve_ambiguity = (options.ambiguity=='resolve') if options else True
|
||||
self.parser = earley.Parser(rules,
|
||||
parser_conf.start,
|
||||
parser_conf.callback,
|
||||
resolve_ambiguity=resolve_ambiguity)
|
||||
|
||||
def _prepare_expansion(self, expansion):
|
||||
return [Terminal_Token(sym) if is_terminal(sym) else sym for sym in expansion]
|
||||
|
|
|
@ -101,10 +101,10 @@ class Column:
|
|||
# XXX Potential bug: What happens if there's ambiguity in an empty rule?
|
||||
if item.rule.expansion and item in self.completed:
|
||||
old_tree = self.completed[item].tree
|
||||
if old_tree.data != 'ambig':
|
||||
if old_tree.data != '_ambig':
|
||||
new_tree = old_tree.copy()
|
||||
new_tree.rule = old_tree.rule
|
||||
old_tree.set('ambig', [new_tree])
|
||||
old_tree.set('_ambig', [new_tree])
|
||||
if item.tree.children[0] is old_tree: # XXX a little hacky!
|
||||
raise ParseError("Infinite recursion in grammar!")
|
||||
old_tree.children.append(item.tree)
|
||||
|
@ -125,9 +125,10 @@ class Column:
|
|||
return bool(self.item_count)
|
||||
|
||||
class Parser:
|
||||
def __init__(self, rules, start, callback):
|
||||
def __init__(self, rules, start, callback, resolve_ambiguity=True):
|
||||
self.analysis = GrammarAnalyzer(rules, start)
|
||||
self.start = start
|
||||
self.resolve_ambiguity = resolve_ambiguity
|
||||
|
||||
self.postprocess = {}
|
||||
self.predictions = {}
|
||||
|
@ -197,9 +198,11 @@ class Parser:
|
|||
elif len(solutions) == 1:
|
||||
tree = solutions[0]
|
||||
else:
|
||||
tree = Tree('ambig', solutions)
|
||||
tree = Tree('_ambig', solutions)
|
||||
|
||||
if self.resolve_ambiguity:
|
||||
ResolveAmbig().visit(tree)
|
||||
|
||||
ResolveAmbig().visit(tree)
|
||||
return ApplyCallbacks(self.postprocess).transform(tree)
|
||||
|
||||
|
||||
|
@ -220,9 +223,8 @@ def _compare_rules(rule1, rule2):
|
|||
assert rule1.origin == rule2.origin
|
||||
c = compare( len(rule1.expansion), len(rule2.expansion))
|
||||
if rule1.origin.startswith('__'): # XXX hack! We need to set priority in parser, not here
|
||||
return c
|
||||
else:
|
||||
return -c
|
||||
c = -c
|
||||
return c
|
||||
|
||||
def _compare_drv(tree1, tree2):
|
||||
if not (isinstance(tree1, Tree) and isinstance(tree2, Tree)):
|
||||
|
@ -242,8 +244,8 @@ def _compare_drv(tree1, tree2):
|
|||
|
||||
|
||||
class ResolveAmbig(Visitor_NoRecurse):
|
||||
def ambig(self, tree):
|
||||
best = max(tree.children, key=cmp_to_key(_compare_drv))
|
||||
def _ambig(self, tree):
|
||||
best = min(tree.children, key=cmp_to_key(_compare_drv))
|
||||
assert best.data == 'drv'
|
||||
tree.set('drv', best.children)
|
||||
tree.rule = best.rule # needed for applying callbacks
|
||||
|
|
|
@ -120,6 +120,23 @@ class TestEarley(unittest.TestCase):
|
|||
empty_tree = Tree('empty', [Tree('empty2', [])])
|
||||
self.assertSequenceEqual(res.children, ['a', empty_tree, empty_tree, 'b'])
|
||||
|
||||
def test_earley_explicit_ambiguity(self):
|
||||
# This was a sneaky bug!
|
||||
|
||||
grammar = """
|
||||
start: a b | ab
|
||||
a: "a"
|
||||
b: "b"
|
||||
ab: "ab"
|
||||
"""
|
||||
|
||||
parser = Lark(grammar, parser='earley', lexer=None, ambiguity='explicit')
|
||||
res = parser.parse('ab')
|
||||
|
||||
self.assertEqual( res.data, '_ambig')
|
||||
self.assertEqual( len(res.children), 2)
|
||||
|
||||
|
||||
def _make_parser_test(LEXER, PARSER):
|
||||
def _Lark(grammar, **kwargs):
|
||||
return Lark(grammar, lexer=LEXER, parser=PARSER, **kwargs)
|
||||
|
|
Loading…
Reference in New Issue