mirror of https://github.com/lark-parser/lark.git
Mid work. Not promising
This commit is contained in:
parent
39a17f1d56
commit
f1e844accd
|
@ -20,6 +20,7 @@ class LexerConf(Serialize):
|
|||
|
||||
class ParserConf:
|
||||
def __init__(self, rules, callbacks, start):
|
||||
assert isinstance(start, list)
|
||||
self.rules = rules
|
||||
self.callbacks = callbacks
|
||||
self.start = start
|
||||
|
|
|
@ -52,7 +52,7 @@ class UnexpectedInput(LarkError):
|
|||
|
||||
|
||||
class UnexpectedCharacters(LexError, UnexpectedInput):
|
||||
def __init__(self, seq, lex_pos, line, column, allowed=None, considered_tokens=None, state=None):
|
||||
def __init__(self, seq, lex_pos, line, column, allowed=None, considered_tokens=None, state=None, token_history=None):
|
||||
message = "No terminal defined for '%s' at line %d col %d" % (seq[lex_pos], line, column)
|
||||
|
||||
self.line = line
|
||||
|
@ -65,6 +65,8 @@ class UnexpectedCharacters(LexError, UnexpectedInput):
|
|||
message += '\n\n' + self.get_context(seq)
|
||||
if allowed:
|
||||
message += '\nExpecting: %s\n' % allowed
|
||||
if token_history:
|
||||
message += '\nPrevious tokens: %s\n' % ', '.join(repr(t) for t in token_history)
|
||||
|
||||
super(UnexpectedCharacters, self).__init__(message)
|
||||
|
||||
|
|
|
@ -85,6 +85,9 @@ class LarkOptions(Serialize):
|
|||
|
||||
options[name] = value
|
||||
|
||||
if isinstance(options['start'], str):
|
||||
options['start'] = [options['start']]
|
||||
|
||||
self.__dict__['options'] = options
|
||||
|
||||
assert self.parser in ('earley', 'lalr', 'cyk', None)
|
||||
|
|
|
@ -149,6 +149,7 @@ class _Lex:
|
|||
newline_types = frozenset(newline_types)
|
||||
ignore_types = frozenset(ignore_types)
|
||||
line_ctr = LineCounter()
|
||||
last_token = None
|
||||
|
||||
while line_ctr.char_pos < len(stream):
|
||||
lexer = self.lexer
|
||||
|
@ -166,6 +167,7 @@ class _Lex:
|
|||
t = lexer.callback[t.type](t)
|
||||
if not isinstance(t, Token):
|
||||
raise ValueError("Callbacks must return a token (returned %r)" % t)
|
||||
last_token = t
|
||||
yield t
|
||||
else:
|
||||
if type_ in lexer.callback:
|
||||
|
@ -180,7 +182,7 @@ class _Lex:
|
|||
break
|
||||
else:
|
||||
allowed = {v for m, tfi in lexer.mres for v in tfi.values()}
|
||||
raise UnexpectedCharacters(stream, line_ctr.char_pos, line_ctr.line, line_ctr.column, allowed=allowed, state=self.state)
|
||||
raise UnexpectedCharacters(stream, line_ctr.char_pos, line_ctr.line, line_ctr.column, allowed=allowed, state=self.state, token_history=last_token and [last_token])
|
||||
|
||||
|
||||
class UnlessCallback:
|
||||
|
|
|
@ -554,7 +554,8 @@ class Grammar:
|
|||
for s in r.expansion
|
||||
if isinstance(s, NonTerminal)
|
||||
and s != r.origin}
|
||||
compiled_rules = [r for r in compiled_rules if r.origin.name==start or r.origin in used_rules]
|
||||
used_rules |= {NonTerminal(s) for s in start}
|
||||
compiled_rules = [r for r in compiled_rules if r.origin in used_rules]
|
||||
if len(compiled_rules) == c:
|
||||
break
|
||||
|
||||
|
@ -690,7 +691,7 @@ class GrammarLoader:
|
|||
callback = ParseTreeBuilder(rules, ST).create_callback()
|
||||
lexer_conf = LexerConf(terminals, ['WS', 'COMMENT'])
|
||||
|
||||
parser_conf = ParserConf(rules, callback, 'start')
|
||||
parser_conf = ParserConf(rules, callback, ['start'])
|
||||
self.parser = LALR_TraditionalLexer(lexer_conf, parser_conf)
|
||||
|
||||
self.canonize_tree = CanonizeTree()
|
||||
|
|
|
@ -89,7 +89,7 @@ class Parser(object):
|
|||
self.orig_rules = {rule: rule for rule in rules}
|
||||
rules = [self._to_rule(rule) for rule in rules]
|
||||
self.grammar = to_cnf(Grammar(rules))
|
||||
self.start = NT(start)
|
||||
self.start = NT(start[0])
|
||||
|
||||
def _to_rule(self, lark_rule):
|
||||
"""Converts a lark rule, (lhs, rhs, callback, options), to a Rule."""
|
||||
|
|
|
@ -274,7 +274,7 @@ class Parser:
|
|||
assert i == len(columns)-1
|
||||
|
||||
def parse(self, stream, start_symbol=None):
|
||||
start_symbol = NonTerminal(start_symbol or self.parser_conf.start)
|
||||
start_symbol = NonTerminal(start_symbol or self.parser_conf.start[0])
|
||||
|
||||
columns = [set()]
|
||||
to_scan = set() # The scan buffer. 'Q' in E.Scott's paper.
|
||||
|
|
|
@ -109,7 +109,7 @@ class GrammarAnalyzer(object):
|
|||
def __init__(self, parser_conf, debug=False):
|
||||
self.debug = debug
|
||||
|
||||
rules = parser_conf.rules + [Rule(NonTerminal('$root'), [NonTerminal(parser_conf.start), Terminal('$END')])]
|
||||
rules = parser_conf.rules + [Rule(NonTerminal('$root'), [NonTerminal(s), Terminal('$END')]) for s in parser_conf.start]
|
||||
self.rules_by_origin = classify(rules, lambda r: r.origin)
|
||||
|
||||
if len(rules) != len(set(rules)):
|
||||
|
|
|
@ -29,10 +29,10 @@ Shift = Action('Shift')
|
|||
Reduce = Action('Reduce')
|
||||
|
||||
class ParseTable:
|
||||
def __init__(self, states, start_state, end_state):
|
||||
def __init__(self, states, start_state, end_states):
|
||||
self.states = states
|
||||
self.start_state = start_state
|
||||
self.end_state = end_state
|
||||
self.end_states = end_states
|
||||
|
||||
def serialize(self, memo):
|
||||
tokens = Enumerator()
|
||||
|
@ -48,7 +48,7 @@ class ParseTable:
|
|||
'tokens': tokens.reversed(),
|
||||
'states': states,
|
||||
'start_state': self.start_state,
|
||||
'end_state': self.end_state,
|
||||
'end_states': self.end_states,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
|
@ -59,7 +59,7 @@ class ParseTable:
|
|||
for token, (action, arg) in actions.items()}
|
||||
for state, actions in data['states'].items()
|
||||
}
|
||||
return cls(states, data['start_state'], data['end_state'])
|
||||
return cls(states, data['start_state'], data['end_states'])
|
||||
|
||||
|
||||
class IntParseTable(ParseTable):
|
||||
|
@ -77,8 +77,8 @@ class IntParseTable(ParseTable):
|
|||
|
||||
|
||||
start_state = state_to_idx[parse_table.start_state]
|
||||
end_state = state_to_idx[parse_table.end_state]
|
||||
return cls(int_states, start_state, end_state)
|
||||
end_states = [state_to_idx[s] for s in parse_table.end_states]
|
||||
return cls(int_states, start_state, end_states)
|
||||
|
||||
###}
|
||||
|
||||
|
@ -130,9 +130,7 @@ class LALR_Analyzer(GrammarAnalyzer):
|
|||
for _ in bfs([self.start_state], step):
|
||||
pass
|
||||
|
||||
self.end_state ,= self.end_states
|
||||
|
||||
self._parse_table = ParseTable(self.states, self.start_state, self.end_state)
|
||||
self._parse_table = ParseTable(self.states, self.start_state, self.end_states)
|
||||
|
||||
if self.debug:
|
||||
self.parse_table = self._parse_table
|
||||
|
|
|
@ -40,7 +40,7 @@ class _Parser:
|
|||
def __init__(self, parse_table, callbacks):
|
||||
self.states = parse_table.states
|
||||
self.start_state = parse_table.start_state
|
||||
self.end_state = parse_table.end_state
|
||||
self.end_states = parse_table.end_states
|
||||
self.callbacks = callbacks
|
||||
|
||||
def parse(self, seq, set_state=None):
|
||||
|
@ -81,7 +81,7 @@ class _Parser:
|
|||
for token in stream:
|
||||
while True:
|
||||
action, arg = get_action(token)
|
||||
assert arg != self.end_state
|
||||
assert arg not in self.end_states
|
||||
|
||||
if action is Shift:
|
||||
state_stack.append(arg)
|
||||
|
@ -95,7 +95,7 @@ class _Parser:
|
|||
while True:
|
||||
_action, arg = get_action(token)
|
||||
if _action is Shift:
|
||||
assert arg == self.end_state
|
||||
assert arg in self.end_states
|
||||
val ,= value_stack
|
||||
return val
|
||||
else:
|
||||
|
|
|
@ -1523,6 +1523,15 @@ def _make_parser_test(LEXER, PARSER):
|
|||
parser3 = Lark.deserialize(d, namespace, m)
|
||||
self.assertEqual(parser3.parse('ABC'), Tree('start', [Tree('b', [])]) )
|
||||
|
||||
def test_multi_start(self):
|
||||
parser = _Lark('''
|
||||
a: "x"
|
||||
b: "x" "b"?
|
||||
''', start=['a', 'b'])
|
||||
|
||||
# parser.parse('acab')
|
||||
# parser.parse('bcab')
|
||||
|
||||
|
||||
|
||||
_NAME = "Test" + PARSER.capitalize() + LEXER.capitalize()
|
||||
|
|
Loading…
Reference in New Issue