FEATURE: Added support for ranged-repeat for rules and terminals (Issues #75, #19)

Syntax: symbol~number
      | symbol~min..max

Example:

  HEXCOLOR: "#" (HEXDIGIT~3 | HEXDIGIT~6)
  short_sentence: word~4..20

Added range for tokens
This commit is contained in:
Erez Shinan 2018-02-10 19:43:19 +02:00
parent 1cc8bc9848
commit 7d11dfa5cd
2 changed files with 79 additions and 3 deletions

View File

@ -70,6 +70,7 @@ TOKENS = {
'_COLON': ':',
'_OR': r'\|',
'_DOT': r'\.',
'TILDE': '~',
'RULE': '!?[_?]?[a-z][_a-z0-9]*',
'TOKEN': '_?[A-Z][_A-Z0-9]*',
'STRING': r'"(\\"|\\\\|[^"\n])*?"i?',
@ -100,7 +101,10 @@ RULES = {
'_expansion': ['', '_expansion expr'],
'?expr': ['atom',
'atom OP'],
'atom OP',
'atom TILDE NUMBER',
'atom TILDE NUMBER _DOT _DOT NUMBER',
],
'?atom': ['_LPAR expansions _RPAR',
'maybe',
@ -146,7 +150,7 @@ class EBNF_to_BNF(InlineTransformer):
self.rules_by_expr[expr] = t
return t
def expr(self, rule, op):
def expr(self, rule, op, *args):
if op.value == '?':
return T('expansions', [rule, T('expansion', [])])
elif op.value == '+':
@ -162,6 +166,14 @@ class EBNF_to_BNF(InlineTransformer):
# _c : _c c | c;
new_name = self._add_recurse_rule('star', rule)
return T('expansions', [new_name, T('expansion', [])])
elif op.value == '~':
if len(args) == 1:
mn = mx = int(args[0])
else:
mn, mx = map(int, args)
if mx < mn:
raise GrammarError("Bad Range for %s (%d..%d isn't allowed)" % (rule, mn, mx))
return T('expansions', [T('expansion', [rule] * n) for n in range(mn, mx+1)])
assert False, op
@ -377,7 +389,17 @@ class TokenTreeToPattern(Transformer):
return PatternRE('(?:%s)' % ('|'.join(i.to_regexp() for i in exps)), exps[0].flags)
def expr(self, args):
inner, op = args
inner, op = args[:2]
if op == '~':
if len(args) == 3:
op = "{%d}" % int(args[2])
else:
mn, mx = map(int, args[2:])
if mx < mn:
raise GrammarError("Bad Range for %s (%d..%d isn't allowed)" % (inner, mn, mx))
op = "{%d,%d}" % (mn, mx)
else:
assert len(args) == 2
return PatternRE('(?:%s)%s' % (inner.to_regexp(), op), inner.flags)

View File

@ -1074,6 +1074,60 @@ def _make_parser_test(LEXER, PARSER):
_Lark(r'start: "\\\t"').parse('\\\t')
def test_ranged_repeat_rules(self):
g = u"""!start: "A"~3
"""
l = _Lark(g)
self.assertEqual(l.parse(u'AAA'), Tree('start', ["A", "A", "A"]))
self.assertRaises(ParseError, l.parse, u'AA')
self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAA')
g = u"""!start: "A"~0..2
"""
if PARSER != 'cyk': # XXX CYK currently doesn't support empty grammars
l = _Lark(g)
self.assertEqual(l.parse(u''), Tree('start', []))
self.assertEqual(l.parse(u'A'), Tree('start', ['A']))
self.assertEqual(l.parse(u'AA'), Tree('start', ['A', 'A']))
self.assertRaises((UnexpectedToken, UnexpectedInput), l.parse, u'AAA')
g = u"""!start: "A"~3..2
"""
self.assertRaises(GrammarError, _Lark, g)
g = u"""!start: "A"~2..3 "B"~2
"""
l = _Lark(g)
self.assertEqual(l.parse(u'AABB'), Tree('start', ['A', 'A', 'B', 'B']))
self.assertEqual(l.parse(u'AAABB'), Tree('start', ['A', 'A', 'A', 'B', 'B']))
self.assertRaises(ParseError, l.parse, u'AAAB')
self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAABBB')
self.assertRaises((ParseError, UnexpectedInput), l.parse, u'ABB')
self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAABB')
def test_ranged_repeat_terms(self):
g = u"""!start: AAA
AAA: "A"~3
"""
l = _Lark(g)
self.assertEqual(l.parse(u'AAA'), Tree('start', ["AAA"]))
self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AA')
self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAA')
g = u"""!start: AABB CC
AABB: "A"~0..2 "B"~2
CC: "C"~1..2
"""
l = _Lark(g)
self.assertEqual(l.parse(u'AABBCC'), Tree('start', ['AABB', 'CC']))
self.assertEqual(l.parse(u'BBC'), Tree('start', ['BB', 'C']))
self.assertEqual(l.parse(u'ABBCC'), Tree('start', ['ABB', 'CC']))
self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAB')
self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAABBB')
self.assertRaises((ParseError, UnexpectedInput), l.parse, u'ABB')
self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAABB')