mirror of https://github.com/lark-parser/lark.git
Improved error reporting in Earley
This commit is contained in:
parent
7ba98c46f6
commit
1602482608
|
@ -43,7 +43,7 @@ Notice punctuation doesn't appear in the resulting tree. It's automatically filt
|
|||
|
||||
## Learn more about using Lark
|
||||
|
||||
- Read the [tutorial](/docs/json_tutorial.md), which shows how to write a JSON parser in Lark.
|
||||
- **Read the [tutorial](/docs/json_tutorial.md)**, which shows how to write a JSON parser in Lark.
|
||||
- Read the [reference](/docs/reference.md)
|
||||
- Browse the [examples](/examples), which include a calculator, and a Python-code parser.
|
||||
- Check out the [tests](/tests/test_parser.py) for more examples.
|
||||
|
@ -72,6 +72,7 @@ These features are planned to be implemented in the near future:
|
|||
- Grammar composition (in cases that the tokens can reliably signify a grammar change)
|
||||
- Parser generator - create a small parser, indepdendent of Lark, to embed in your project.
|
||||
- Optimizations in both the parsers and the lexer
|
||||
- Better handling of ambiguity
|
||||
|
||||
## Comparison to other parsers
|
||||
|
||||
|
|
|
@ -6,6 +6,22 @@ class ParseError(Exception):
|
|||
pass
|
||||
|
||||
|
||||
class UnexpectedToken(ParseError):
|
||||
def __init__(self, token, expected, seq, index):
|
||||
self.token = token
|
||||
self.expected = expected
|
||||
self.line = getattr(token, 'line', '?')
|
||||
self.column = getattr(token, 'column', '?')
|
||||
|
||||
context = ' '.join(['%r(%s)' % (t.value, t.type) for t in seq[index:index+5]])
|
||||
message = ("Unexpected token %r at line %s, column %s.\n"
|
||||
"Expected: %s\n"
|
||||
"Context: %s" % (token.value, self.line, self.column, expected, context))
|
||||
|
||||
super(ParseError, self).__init__(message)
|
||||
|
||||
|
||||
|
||||
def is_terminal(sym):
|
||||
return sym.isupper() or sym[0] == '$'
|
||||
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
"My name is Earley"
|
||||
|
||||
from ..utils import classify, STRING_TYPE
|
||||
from ..common import ParseError
|
||||
from ..common import ParseError, UnexpectedToken
|
||||
|
||||
try:
|
||||
xrange
|
||||
|
@ -14,6 +14,7 @@ class MatchFailed(object):
|
|||
class AbortParseMatch(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class Rule(object):
|
||||
def __init__(self, name, symbols, postprocess):
|
||||
self.name = name
|
||||
|
@ -34,6 +35,8 @@ class State(object):
|
|||
if self.is_literal:
|
||||
self.expect_symbol = self.expect_symbol['literal']
|
||||
assert isinstance(self.expect_symbol, STRING_TYPE), self.expect_symbol
|
||||
else:
|
||||
self.is_literal = False
|
||||
|
||||
def next_state(self, data):
|
||||
return State(self.rule, self.expect+1, self.reference, self.data + [data])
|
||||
|
@ -136,7 +139,8 @@ class Parser(object):
|
|||
self.advance_to(table, pos + 1, set())
|
||||
|
||||
if not table[-1]:
|
||||
raise ParseError('Error at line {t.line}:{t.column}'.format(t=stream[pos]))
|
||||
expected = {s.expect_symbol for s in table[-2] if s.is_literal}
|
||||
raise UnexpectedToken(stream[pos], expected, stream, pos)
|
||||
|
||||
res = list(self.finish(table))
|
||||
if not res:
|
||||
|
|
|
@ -1,21 +1,5 @@
|
|||
from .lalr_analysis import ACTION_SHIFT
|
||||
from ..common import ParseError
|
||||
|
||||
class UnexpectedToken(ParseError):
|
||||
def __init__(self, token, expected, seq, index):
|
||||
self.token = token
|
||||
self.expected = expected
|
||||
self.line = getattr(token, 'line', '?')
|
||||
self.column = getattr(token, 'column', '?')
|
||||
|
||||
context = ' '.join(['%r(%s)' % (t.value, t.type) for t in seq[index:index+5]])
|
||||
message = ("Unexpected input %r at line %s, column %s.\n"
|
||||
"Expected: %s\n"
|
||||
"Context: %s" % (token.value, self.line, self.column, expected, context))
|
||||
|
||||
super(ParseError, self).__init__(message)
|
||||
|
||||
|
||||
from ..common import ParseError, UnexpectedToken
|
||||
|
||||
|
||||
class Parser(object):
|
||||
|
|
Loading…
Reference in New Issue