mirror of https://github.com/lark-parser/lark.git
Syntax: symbol~number | symbol~min..max Example: HEXCOLOR: "#" (HEXDIGIT~3 | HEXDIGIT~6) short_sentence: word~4..20 Added range for tokens
This commit is contained in:
parent
1cc8bc9848
commit
7d11dfa5cd
|
@ -70,6 +70,7 @@ TOKENS = {
|
|||
'_COLON': ':',
|
||||
'_OR': r'\|',
|
||||
'_DOT': r'\.',
|
||||
'TILDE': '~',
|
||||
'RULE': '!?[_?]?[a-z][_a-z0-9]*',
|
||||
'TOKEN': '_?[A-Z][_A-Z0-9]*',
|
||||
'STRING': r'"(\\"|\\\\|[^"\n])*?"i?',
|
||||
|
@ -100,7 +101,10 @@ RULES = {
|
|||
'_expansion': ['', '_expansion expr'],
|
||||
|
||||
'?expr': ['atom',
|
||||
'atom OP'],
|
||||
'atom OP',
|
||||
'atom TILDE NUMBER',
|
||||
'atom TILDE NUMBER _DOT _DOT NUMBER',
|
||||
],
|
||||
|
||||
'?atom': ['_LPAR expansions _RPAR',
|
||||
'maybe',
|
||||
|
@ -146,7 +150,7 @@ class EBNF_to_BNF(InlineTransformer):
|
|||
self.rules_by_expr[expr] = t
|
||||
return t
|
||||
|
||||
def expr(self, rule, op):
|
||||
def expr(self, rule, op, *args):
|
||||
if op.value == '?':
|
||||
return T('expansions', [rule, T('expansion', [])])
|
||||
elif op.value == '+':
|
||||
|
@ -162,6 +166,14 @@ class EBNF_to_BNF(InlineTransformer):
|
|||
# _c : _c c | c;
|
||||
new_name = self._add_recurse_rule('star', rule)
|
||||
return T('expansions', [new_name, T('expansion', [])])
|
||||
elif op.value == '~':
|
||||
if len(args) == 1:
|
||||
mn = mx = int(args[0])
|
||||
else:
|
||||
mn, mx = map(int, args)
|
||||
if mx < mn:
|
||||
raise GrammarError("Bad Range for %s (%d..%d isn't allowed)" % (rule, mn, mx))
|
||||
return T('expansions', [T('expansion', [rule] * n) for n in range(mn, mx+1)])
|
||||
assert False, op
|
||||
|
||||
|
||||
|
@ -377,7 +389,17 @@ class TokenTreeToPattern(Transformer):
|
|||
return PatternRE('(?:%s)' % ('|'.join(i.to_regexp() for i in exps)), exps[0].flags)
|
||||
|
||||
def expr(self, args):
|
||||
inner, op = args
|
||||
inner, op = args[:2]
|
||||
if op == '~':
|
||||
if len(args) == 3:
|
||||
op = "{%d}" % int(args[2])
|
||||
else:
|
||||
mn, mx = map(int, args[2:])
|
||||
if mx < mn:
|
||||
raise GrammarError("Bad Range for %s (%d..%d isn't allowed)" % (inner, mn, mx))
|
||||
op = "{%d,%d}" % (mn, mx)
|
||||
else:
|
||||
assert len(args) == 2
|
||||
return PatternRE('(?:%s)%s' % (inner.to_regexp(), op), inner.flags)
|
||||
|
||||
|
||||
|
|
|
@ -1074,6 +1074,60 @@ def _make_parser_test(LEXER, PARSER):
|
|||
_Lark(r'start: "\\\t"').parse('\\\t')
|
||||
|
||||
|
||||
def test_ranged_repeat_rules(self):
|
||||
g = u"""!start: "A"~3
|
||||
"""
|
||||
l = _Lark(g)
|
||||
self.assertEqual(l.parse(u'AAA'), Tree('start', ["A", "A", "A"]))
|
||||
self.assertRaises(ParseError, l.parse, u'AA')
|
||||
self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAA')
|
||||
|
||||
|
||||
g = u"""!start: "A"~0..2
|
||||
"""
|
||||
if PARSER != 'cyk': # XXX CYK currently doesn't support empty grammars
|
||||
l = _Lark(g)
|
||||
self.assertEqual(l.parse(u''), Tree('start', []))
|
||||
self.assertEqual(l.parse(u'A'), Tree('start', ['A']))
|
||||
self.assertEqual(l.parse(u'AA'), Tree('start', ['A', 'A']))
|
||||
self.assertRaises((UnexpectedToken, UnexpectedInput), l.parse, u'AAA')
|
||||
|
||||
g = u"""!start: "A"~3..2
|
||||
"""
|
||||
self.assertRaises(GrammarError, _Lark, g)
|
||||
|
||||
g = u"""!start: "A"~2..3 "B"~2
|
||||
"""
|
||||
l = _Lark(g)
|
||||
self.assertEqual(l.parse(u'AABB'), Tree('start', ['A', 'A', 'B', 'B']))
|
||||
self.assertEqual(l.parse(u'AAABB'), Tree('start', ['A', 'A', 'A', 'B', 'B']))
|
||||
self.assertRaises(ParseError, l.parse, u'AAAB')
|
||||
self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAABBB')
|
||||
self.assertRaises((ParseError, UnexpectedInput), l.parse, u'ABB')
|
||||
self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAABB')
|
||||
|
||||
|
||||
def test_ranged_repeat_terms(self):
|
||||
g = u"""!start: AAA
|
||||
AAA: "A"~3
|
||||
"""
|
||||
l = _Lark(g)
|
||||
self.assertEqual(l.parse(u'AAA'), Tree('start', ["AAA"]))
|
||||
self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AA')
|
||||
self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAA')
|
||||
|
||||
g = u"""!start: AABB CC
|
||||
AABB: "A"~0..2 "B"~2
|
||||
CC: "C"~1..2
|
||||
"""
|
||||
l = _Lark(g)
|
||||
self.assertEqual(l.parse(u'AABBCC'), Tree('start', ['AABB', 'CC']))
|
||||
self.assertEqual(l.parse(u'BBC'), Tree('start', ['BB', 'C']))
|
||||
self.assertEqual(l.parse(u'ABBCC'), Tree('start', ['ABB', 'CC']))
|
||||
self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAB')
|
||||
self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAABBB')
|
||||
self.assertRaises((ParseError, UnexpectedInput), l.parse, u'ABB')
|
||||
self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAABB')
|
||||
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue