From 0f0776c0fa552aa74708570a7fa86f4bb6d54921 Mon Sep 17 00:00:00 2001 From: Erez Shinan Date: Tue, 24 Apr 2018 15:36:53 +0300 Subject: [PATCH] BUGIX in lexer: Embedding strings overwrote priority (Issue #121) --- lark/lexer.py | 2 ++ tests/test_parser.py | 12 ++++++++++++ 2 files changed, 14 insertions(+) diff --git a/lark/lexer.py b/lark/lexer.py index 938d22b..19e1be4 100644 --- a/lark/lexer.py +++ b/lark/lexer.py @@ -143,6 +143,8 @@ def _create_unless(tokens): for retok in tokens_by_type.get(PatternRE, []): unless = [] # {} for strtok in tokens_by_type.get(PatternStr, []): + if strtok.priority > retok.priority: + continue s = strtok.pattern.value m = re.match(retok.pattern.to_regexp(), s) if m and m.group(0) == s: diff --git a/tests/test_parser.py b/tests/test_parser.py index d4d63ca..5c68bec 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -1173,6 +1173,18 @@ def _make_parser_test(LEXER, PARSER): self.assertRaises((ParseError, UnexpectedInput), l.parse, u'ABB') self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAABB') + @unittest.skipIf(PARSER=='earley', "Priority not handled correctly right now") # TODO XXX + def test_priority_vs_embedded(self): + g = """ + A.2: "a" + WORD: ("a".."z")+ + + start: (A | WORD)+ + """ + l = _Lark(g) + t = l.parse('abc') + self.assertEqual(t.children, ['a', 'bc']) + self.assertEqual(t.children[0].type, 'A')