Improved error reporting in Earley

This commit is contained in:
Erez Shinan 2017-02-12 23:31:09 +02:00
parent 7ba98c46f6
commit 1602482608
4 changed files with 25 additions and 20 deletions

View File

@ -43,7 +43,7 @@ Notice punctuation doesn't appear in the resulting tree. It's automatically filt
## Learn more about using Lark
- Read the [tutorial](/docs/json_tutorial.md), which shows how to write a JSON parser in Lark.
- **Read the [tutorial](/docs/json_tutorial.md)**, which shows how to write a JSON parser in Lark.
- Read the [reference](/docs/reference.md)
- Browse the [examples](/examples), which include a calculator, and a Python-code parser.
- Check out the [tests](/tests/test_parser.py) for more examples.
@ -72,6 +72,7 @@ These features are planned to be implemented in the near future:
- Grammar composition (in cases that the tokens can reliably signify a grammar change)
- Parser generator - create a small parser, indepdendent of Lark, to embed in your project.
- Optimizations in both the parsers and the lexer
- Better handling of ambiguity
## Comparison to other parsers

View File

@ -6,6 +6,22 @@ class ParseError(Exception):
pass
class UnexpectedToken(ParseError):
def __init__(self, token, expected, seq, index):
self.token = token
self.expected = expected
self.line = getattr(token, 'line', '?')
self.column = getattr(token, 'column', '?')
context = ' '.join(['%r(%s)' % (t.value, t.type) for t in seq[index:index+5]])
message = ("Unexpected token %r at line %s, column %s.\n"
"Expected: %s\n"
"Context: %s" % (token.value, self.line, self.column, expected, context))
super(ParseError, self).__init__(message)
def is_terminal(sym):
return sym.isupper() or sym[0] == '$'

View File

@ -1,7 +1,7 @@
"My name is Earley"
from ..utils import classify, STRING_TYPE
from ..common import ParseError
from ..common import ParseError, UnexpectedToken
try:
xrange
@ -14,6 +14,7 @@ class MatchFailed(object):
class AbortParseMatch(Exception):
pass
class Rule(object):
def __init__(self, name, symbols, postprocess):
self.name = name
@ -34,6 +35,8 @@ class State(object):
if self.is_literal:
self.expect_symbol = self.expect_symbol['literal']
assert isinstance(self.expect_symbol, STRING_TYPE), self.expect_symbol
else:
self.is_literal = False
def next_state(self, data):
return State(self.rule, self.expect+1, self.reference, self.data + [data])
@ -136,7 +139,8 @@ class Parser(object):
self.advance_to(table, pos + 1, set())
if not table[-1]:
raise ParseError('Error at line {t.line}:{t.column}'.format(t=stream[pos]))
expected = {s.expect_symbol for s in table[-2] if s.is_literal}
raise UnexpectedToken(stream[pos], expected, stream, pos)
res = list(self.finish(table))
if not res:

View File

@ -1,21 +1,5 @@
from .lalr_analysis import ACTION_SHIFT
from ..common import ParseError
class UnexpectedToken(ParseError):
def __init__(self, token, expected, seq, index):
self.token = token
self.expected = expected
self.line = getattr(token, 'line', '?')
self.column = getattr(token, 'column', '?')
context = ' '.join(['%r(%s)' % (t.value, t.type) for t in seq[index:index+5]])
message = ("Unexpected input %r at line %s, column %s.\n"
"Expected: %s\n"
"Context: %s" % (token.value, self.line, self.column, expected, context))
super(ParseError, self).__init__(message)
from ..common import ParseError, UnexpectedToken
class Parser(object):