lark/tests/test_parser.py

from __future__ import absolute_import

import unittest
import logging
import os
import sys
try:
    from cStringIO import StringIO as cStringIO
except ImportError:
    # Available only in Python 2.x, 3.x only has io.StringIO from below
    cStringIO = None
from io import (
        StringIO as uStringIO,
        open,
    )

logging.basicConfig(level=logging.INFO)

from lark.lark import Lark
from lark.common import GrammarError, ParseError
from lark.lexer import LexError

__path__ = os.path.dirname(__file__)
def _read(n, *args):
    with open(os.path.join(__path__, n), *args) as f:
        return f.read()

class TestParsers(unittest.TestCase):
    def test_same_ast(self):
        "Tests that Earley and LALR parsers produce equal trees"
        g = Lark("""start: "(" name_list ("," "*" NAME)? ")"
                    name_list: NAME | name_list "," NAME
                    NAME: /\w+/ """, parser='lalr')
        l = g.parse('(a,b,c,*x)')

        g = Lark("""start: "(" name_list ("," "*" NAME)? ")"
                    name_list: NAME | name_list "," NAME
                    NAME: /\w+/ """)
        l2 = g.parse('(a,b,c,*x)')
        assert l == l2, '%s != %s' % (l.pretty(), l2.pretty())


    def test_earley_nolex(self):
        g = Lark("""start: A "b" c
                    A: "a"+
                    c: "abc"
                    """, parser="earley", lexer=None)
        x = g.parse('aaaababc')


class TestEarley(unittest.TestCase):
    pass


def _make_parser_test(LEXER, PARSER):
    def _Lark(grammar, **kwargs):
        return Lark(grammar, lexer=LEXER, parser=PARSER, **kwargs)
    class _TestParser(unittest.TestCase):
        def test_basic1(self):
            g = _Lark("""start: a+ b a* "b" a*
                        b: "b"
                        a: "a"
                     """)
            r = g.parse('aaabaab')
            self.assertEqual( ''.join(x.data for x in r.children), 'aaabaa' )
            r = g.parse('aaabaaba')
            self.assertEqual( ''.join(x.data for x in r.children), 'aaabaaa' )

            self.assertRaises(ParseError, g.parse, 'aaabaa')

        def test_basic2(self):
            # Multiple parsers and colliding tokens
            g = _Lark("""start: B A
                        B: "12"
                        A: "1" """)
            g2 = _Lark("""start: B A
                         B: "12"
                         A: "2" """)
            x = g.parse('121')
            assert x.data == 'start' and x.children == ['12', '1'], x
            x = g2.parse('122')
            assert x.data == 'start' and x.children == ['12', '2'], x


        @unittest.skipIf(cStringIO is None, "cStringIO not available")
        def test_stringio_bytes(self):
            """Verify that a Lark can be created from file-like objects other than Python's standard 'file' object"""
            _Lark(cStringIO(b'start: a+ b a* "b" a*\n b: "b"\n a: "a" '))

        def test_stringio_unicode(self):
            """Verify that a Lark can be created from file-like objects other than Python's standard 'file' object"""
            _Lark(uStringIO(u'start: a+ b a* "b" a*\n b: "b"\n a: "a" '))

        def test_unicode(self):
            g = _Lark(u"""start: UNIA UNIB UNIA
                        UNIA: /\xa3/
                        UNIB: /\u0101/
                        """)
            g.parse(u'\xa3\u0101\u00a3')

        def test_unicode2(self):
            g = _Lark(r"""start: UNIA UNIB UNIA UNIC
                        UNIA: /\xa3/
                        UNIB: "a\u0101b\ "
                        UNIC: /a?\u0101c\n/
                        """)
            g.parse(u'\xa3a\u0101b\\ \u00a3\u0101c\n')


        def test_recurse_expansion(self):
            """Verify that stack depth doesn't get exceeded on recursive rules marked for expansion."""
            g = _Lark(r"""start: a | start a
                         a : "a" """)

            # Force PLY to write to the debug log, but prevent writing it to the terminal (uses repr() on the half-built
            # STree data structures, which uses recursion).
            g.parse("a" * (sys.getrecursionlimit() // 4))

        def test_expand1_lists_with_one_item(self):
            g = _Lark(r"""start: list
                            ?list: item+
                            item : A
                            A: "a"
                        """)
            r = g.parse("a")

            # because 'list' is an expand-if-contains-one rule and we only provided one element it should have expanded to 'item'
            self.assertSequenceEqual([subtree.data for subtree in r.children], ('item',))

            # regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule
            self.assertEqual(len(r.children), 1)

        def test_expand1_lists_with_one_item_2(self):
            g = _Lark(r"""start: list
                            ?list: item+ "!"
                            item : A
                            A: "a"
                        """)
            r = g.parse("a!")

            # because 'list' is an expand-if-contains-one rule and we only provided one element it should have expanded to 'item'
            self.assertSequenceEqual([subtree.data for subtree in r.children], ('item',))

            # regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule
            self.assertEqual(len(r.children), 1)

        def test_dont_expand1_lists_with_multiple_items(self):
            g = _Lark(r"""start: list
                            ?list: item+
                            item : A
                            A: "a"
                        """)
            r = g.parse("aa")

            # because 'list' is an expand-if-contains-one rule and we've provided more than one element it should *not* have expanded
            self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))

            # regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule
            self.assertEqual(len(r.children), 1)

            # Sanity check: verify that 'list' contains the two 'item's we've given it
            [list] = r.children
            self.assertSequenceEqual([item.data for item in list.children], ('item', 'item'))

        def test_dont_expand1_lists_with_multiple_items_2(self):
            g = _Lark(r"""start: list
                            ?list: item+ "!"
                            item : A
                            A: "a"
                        """)
            r = g.parse("aa!")

            # because 'list' is an expand-if-contains-one rule and we've provided more than one element it should *not* have expanded
            self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))

            # regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule
            self.assertEqual(len(r.children), 1)

            # Sanity check: verify that 'list' contains the two 'item's we've given it
            [list] = r.children
            self.assertSequenceEqual([item.data for item in list.children], ('item', 'item'))


        def test_empty_expand1_list(self):
            g = _Lark(r"""start: list
                            ?list: item*
                            item : A
                            A: "a"
                         """)
            r = g.parse("")

            # because 'list' is an expand-if-contains-one rule and we've provided less than one element (i.e. none) it should *not* have expanded
            self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))

            # regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule
            self.assertEqual(len(r.children), 1)

            # Sanity check: verify that 'list' contains no 'item's as we've given it none
            [list] = r.children
            self.assertSequenceEqual([item.data for item in list.children], ())

        def test_empty_expand1_list_2(self):
            g = _Lark(r"""start: list
                            ?list: item* "!"?
                            item : A
                            A: "a"
                         """)
            r = g.parse("")

            # because 'list' is an expand-if-contains-one rule and we've provided less than one element (i.e. none) it should *not* have expanded
            self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))

            # regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule
            self.assertEqual(len(r.children), 1)

            # Sanity check: verify that 'list' contains no 'item's as we've given it none
            [list] = r.children
            self.assertSequenceEqual([item.data for item in list.children], ())


        def test_empty_flatten_list(self):
            g = _Lark(r"""start: list
                            list: | item "," list
                            item : A
                            A: "a"
                         """)
            r = g.parse("")

            # Because 'list' is a flatten rule it's top-level element should *never* be expanded
            self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))

            # Sanity check: verify that 'list' contains no 'item's as we've given it none
            [list] = r.children
            self.assertSequenceEqual([item.data for item in list.children], ())

        @unittest.skipIf(True, "Flattening list isn't implemented (and may never be)")
        def test_single_item_flatten_list(self):
            g = _Lark(r"""start: list
                            list: | item "," list
                            item : A
                            A: "a"
                         """)
            r = g.parse("a,")

            # Because 'list' is a flatten rule it's top-level element should *never* be expanded
            self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))

            # Sanity check: verify that 'list' contains exactly the one 'item' we've given it
            [list] = r.children
            self.assertSequenceEqual([item.data for item in list.children], ('item',))

        @unittest.skipIf(True, "Flattening list isn't implemented (and may never be)")
        def test_multiple_item_flatten_list(self):
            g = _Lark(r"""start: list
                            #list: | item "," list
                            item : A
                            A: "a"
                         """)
            r = g.parse("a,a,")

            # Because 'list' is a flatten rule it's top-level element should *never* be expanded
            self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))

            # Sanity check: verify that 'list' contains exactly the two 'item's we've given it
            [list] = r.children
            self.assertSequenceEqual([item.data for item in list.children], ('item', 'item'))

        @unittest.skipIf(True, "Flattening list isn't implemented (and may never be)")
        def test_recurse_flatten(self):
            """Verify that stack depth doesn't get exceeded on recursive rules marked for flattening."""
            g = _Lark(r"""start: a | start a
                         a : A
                         A : "a" """)

            # Force PLY to write to the debug log, but prevent writing it to the terminal (uses repr() on the half-built
            # STree data structures, which uses recursion).
            g.parse("a" * (sys.getrecursionlimit() // 4))

        def test_token_collision(self):
            g = _Lark("""start: "Hello" NAME
                        NAME: /\w+/
                        %ignore " "
                    """)
            x = g.parse('Hello World')
            self.assertSequenceEqual(x.children, ['World'])
            x = g.parse('Hello HelloWorld')
            self.assertSequenceEqual(x.children, ['HelloWorld'])

        # def test_string_priority(self):
        #     g = _Lark("""start: (A | /a?bb/)+
        #                  A: "a"  """)
        #     x = g.parse('abb')
        #     self.assertEqual(len(x.children), 2)

        #     # This parse raises an exception because the lexer will always try to consume
        #     # "a" first and will never match the regular expression
        #     # This behavior is subject to change!!
        #     # Thie won't happen with ambiguity handling.
        #     g = _Lark("""start: (A | /a?ab/)+
        #                  A: "a"  """)
        #     self.assertRaises(LexError, g.parse, 'aab')

        def test_undefined_rule(self):
            self.assertRaises(GrammarError, _Lark, """start: a""")

        def test_undefined_token(self):
            self.assertRaises(GrammarError, _Lark, """start: A""")

        def test_rule_collision(self):
            g = _Lark("""start: "a"+ "b"
                             | "a"+ """)
            x = g.parse('aaaa')
            x = g.parse('aaaab')

        def test_rule_collision2(self):
            g = _Lark("""start: "a"* "b"
                             | "a"+ """)
            x = g.parse('aaaa')
            x = g.parse('aaaab')
            x = g.parse('b')

        def test_regex_embed(self):
            g = _Lark("""start: A B C
                        A: /a/
                        B: /${A}b/
                        C: /${B}c/
                        """)
            x = g.parse('aababc')

        def test_token_embed(self):
            g = _Lark("""start: A B C
                        A: "a"
                        B: A "b"
                        C: B "c"
                        """)
            x = g.parse('aababc')

        def test_token_not_anon(self):
            """Tests that "a" is matched as A, rather than an anonymous token.

            That means that "a" is not filtered out, despite being an 'immediate string'.
            Whether or not this is the intuitive behavior, I'm not sure yet.

            Perhaps the right thing to do is report a collision (if such is relevant)

            -Erez
            """

            g = _Lark("""start: "a"
                        A: "a" """)
            x = g.parse('a')

            self.assertEqual(len(x.children), 1, '"a" should not be considered anonymous')
            self.assertEqual(x.children[0].type, "A")

            g = _Lark("""start: /a/
                        A: /a/ """)
            x = g.parse('a')
            self.assertEqual(len(x.children), 1, '/a/ should not be considered anonymous')
            self.assertEqual(x.children[0].type, "A")

        def test_maybe(self):
            g = _Lark("""start: ["a"] """)
            x = g.parse('a')
            x = g.parse('')

        def test_start(self):
            g = _Lark("""a: "a" a? """, start='a')
            x = g.parse('a')
            x = g.parse('aa')
            x = g.parse('aaa')

        def test_alias(self):
            g = _Lark("""start: "a" -> b """)
            x = g.parse('a')
            self.assertEqual(x.data, "b")

        def test_token_ebnf(self):
            g = _Lark("""start: A
                      A: "a"* ("b"? "c".."e")+
                      """)
            x = g.parse('abcde')
            x = g.parse('dd')

        # def test_token_recurse(self):
        #     g = _Lark("""start: A
        #                  A: B
        #                  B: A
        #               """)

        def test_empty(self):
            # Fails an Earley implementation without special handling for empty rules,
            # or re-processing of already completed rules.
            g = _Lark(r"""start: _empty a "B"
                          a: _empty "A"
                          _empty: _empty2
                          _empty2: _empty3
                          _empty3:
                            """)
            x = g.parse('AB')

        def test_lexer_token_limit(self):
            "Python has a stupid limit of 100 groups in a regular expression. Test that we handle this limitation"
            tokens = {'A%d'%i:'"%d"'%i for i in range(300)}
            g = _Lark("""start: %s
                      %s""" % (' '.join(tokens), '\n'.join("%s: %s"%x for x in tokens.items())))

        def test_float_without_lexer(self):
            g = _Lark("""start: ["+"|"-"] float
                         float: digit* "." digit+ exp?
                              | digit+ exp
                         exp: ("e"|"E") ["+"|"-"] digit+
                         digit: "0"|"1"|"2"|"3"|"4"|"5"|"6"|"7"|"8"|"9"
                      """)
            g.parse("1.2")
            g.parse("-.2e9")
            g.parse("+2e-9")
            self.assertRaises(ParseError, g.parse, "+2e-9e")

    _NAME = "Test" + PARSER.capitalize() + (LEXER or 'None').capitalize()
    _TestParser.__name__ = _NAME
    globals()[_NAME] = _TestParser

_TO_TEST = [
        ('standard', 'earley'),
        ('standard', 'lalr'),
        ('contextual', 'lalr'),
]

for LEXER, PARSER in _TO_TEST:
    _make_parser_test(LEXER, PARSER)


if __name__ == '__main__':
    unittest.main()
Added tests and lots of fixes and refactoring 2017-02-07 15:40:46 +00:00			`from __future__ import absolute_import`

			`import unittest`
			`import logging`
			`import os`
			`import sys`
			`try:`
			`from cStringIO import StringIO as cStringIO`
			`except ImportError:`
			`# Available only in Python 2.x, 3.x only has io.StringIO from below`
			`cStringIO = None`
			`from io import (`
			`StringIO as uStringIO,`
			`open,`
			`)`

			`logging.basicConfig(level=logging.INFO)`

			`from lark.lark import Lark`
A little order and refactoring 2017-02-07 22:53:22 +00:00			`from lark.common import GrammarError, ParseError`
Fixed grammars 2017-02-23 22:45:34 +00:00			`from lark.lexer import LexError`
Added tests and lots of fixes and refactoring 2017-02-07 15:40:46 +00:00
			`__path__ = os.path.dirname(__file__)`
			`def _read(n, *args):`
			`with open(os.path.join(__path__, n), *args) as f:`
			`return f.read()`

Fixed bug in lalr parser. Now testing both lalr & earley in test_parser 2017-02-07 23:19:33 +00:00			`class TestParsers(unittest.TestCase):`
			`def test_same_ast(self):`
Added tests and lots of fixes and refactoring 2017-02-07 15:40:46 +00:00			`"Tests that Earley and LALR parsers produce equal trees"`
			`g = Lark("""start: "(" name_list ("," "*" NAME)? ")"`
			`name_list: NAME \| name_list "," NAME`
			`NAME: /\w+/ """, parser='lalr')`
			`l = g.parse('(a,b,c,*x)')`

			`g = Lark("""start: "(" name_list ("," "*" NAME)? ")"`
			`name_list: NAME \| name_list "," NAME`
			`NAME: /\w+/ """)`
			`l2 = g.parse('(a,b,c,*x)')`
			`assert l == l2, '%s != %s' % (l.pretty(), l2.pretty())`

Better support for scanless parsing 2017-02-26 09:56:04 +00:00
			`def test_earley_nolex(self):`
			`g = Lark("""start: A "b" c`
			`A: "a"+`
			`c: "abc"`
			`""", parser="earley", lexer=None)`
			`x = g.parse('aaaababc')`


Fixed bug in lalr parser. Now testing both lalr & earley in test_parser 2017-02-07 23:19:33 +00:00			`class TestEarley(unittest.TestCase):`
			`pass`

Better support for scanless parsing 2017-02-26 09:56:04 +00:00
Changed parser/lexer interface in lark. Bumped minor version 2017-02-25 16:35:31 +00:00			`def _make_parser_test(LEXER, PARSER):`
Fixed bug in lalr parser. Now testing both lalr & earley in test_parser 2017-02-07 23:19:33 +00:00			`def _Lark(grammar, **kwargs):`
Changed parser/lexer interface in lark. Bumped minor version 2017-02-25 16:35:31 +00:00			`return Lark(grammar, lexer=LEXER, parser=PARSER, **kwargs)`
Fixed bug in lalr parser. Now testing both lalr & earley in test_parser 2017-02-07 23:19:33 +00:00			`class _TestParser(unittest.TestCase):`
			`def test_basic1(self):`
			`g = _Lark("""start: a+ b a* "b" a*`
			`b: "b"`
			`a: "a"`
Added tests and lots of fixes and refactoring 2017-02-07 15:40:46 +00:00			`""")`
Fixed bug in lalr parser. Now testing both lalr & earley in test_parser 2017-02-07 23:19:33 +00:00			`r = g.parse('aaabaab')`
			`self.assertEqual( ''.join(x.data for x in r.children), 'aaabaa' )`
			`r = g.parse('aaabaaba')`
			`self.assertEqual( ''.join(x.data for x in r.children), 'aaabaaa' )`

			`self.assertRaises(ParseError, g.parse, 'aaabaa')`

			`def test_basic2(self):`
			`# Multiple parsers and colliding tokens`
			`g = _Lark("""start: B A`
			`B: "12"`
			`A: "1" """)`
			`g2 = _Lark("""start: B A`
			`B: "12"`
			`A: "2" """)`
			`x = g.parse('121')`
			`assert x.data == 'start' and x.children == ['12', '1'], x`
			`x = g2.parse('122')`
			`assert x.data == 'start' and x.children == ['12', '2'], x`


			`@unittest.skipIf(cStringIO is None, "cStringIO not available")`
			`def test_stringio_bytes(self):`
			`"""Verify that a Lark can be created from file-like objects other than Python's standard 'file' object"""`
			`_Lark(cStringIO(b'start: a+ b a* "b" a*\n b: "b"\n a: "a" '))`

			`def test_stringio_unicode(self):`
			`"""Verify that a Lark can be created from file-like objects other than Python's standard 'file' object"""`
			`_Lark(uStringIO(u'start: a+ b a* "b" a*\n b: "b"\n a: "a" '))`

			`def test_unicode(self):`
			`g = _Lark(u"""start: UNIA UNIB UNIA`
			`UNIA: /\xa3/`
			`UNIB: /\u0101/`
			`""")`
			`g.parse(u'\xa3\u0101\u00a3')`

			`def test_unicode2(self):`
			`g = _Lark(r"""start: UNIA UNIB UNIA UNIC`
			`UNIA: /\xa3/`
			`UNIB: "a\u0101b\ "`
			`UNIC: /a?\u0101c\n/`
			`""")`
			`g.parse(u'\xa3a\u0101b\\ \u00a3\u0101c\n')`


			`def test_recurse_expansion(self):`
			`"""Verify that stack depth doesn't get exceeded on recursive rules marked for expansion."""`
			`g = _Lark(r"""start: a \| start a`
			`a : "a" """)`

			`# Force PLY to write to the debug log, but prevent writing it to the terminal (uses repr() on the half-built`
			`# STree data structures, which uses recursion).`
			`g.parse("a" * (sys.getrecursionlimit() // 4))`

			`def test_expand1_lists_with_one_item(self):`
			`g = _Lark(r"""start: list`
			`?list: item+`
			`item : A`
			`A: "a"`
			`""")`
			`r = g.parse("a")`

			`# because 'list' is an expand-if-contains-one rule and we only provided one element it should have expanded to 'item'`
			`self.assertSequenceEqual([subtree.data for subtree in r.children], ('item',))`

			`# regardless of the amount of items: there should be only one child in 'start' because 'list' isn't an expand-all rule`
			`self.assertEqual(len(r.children), 1)`

			`def test_expand1_lists_with_one_item_2(self):`
			`g = _Lark(r"""start: list`
			`?list: item+ "!"`
			`item : A`
			`A: "a"`
			`""")`
			`r = g.parse("a!")`

			`# because 'list' is an expand-if-contains-one rule and we only provided one element it should have expanded to 'item'`
			`self.assertSequenceEqual([subtree.data for subtree in r.children], ('item',))`

			`# regardless of the amount of items: there should be only one child in 'start' because 'list' isn't an expand-all rule`
			`self.assertEqual(len(r.children), 1)`

			`def test_dont_expand1_lists_with_multiple_items(self):`
			`g = _Lark(r"""start: list`
			`?list: item+`
			`item : A`
			`A: "a"`
			`""")`
			`r = g.parse("aa")`

			`# because 'list' is an expand-if-contains-one rule and we've provided more than one element it should not have expanded`
			`self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))`

			`# regardless of the amount of items: there should be only one child in 'start' because 'list' isn't an expand-all rule`
			`self.assertEqual(len(r.children), 1)`

			`# Sanity check: verify that 'list' contains the two 'item's we've given it`
			`[list] = r.children`
			`self.assertSequenceEqual([item.data for item in list.children], ('item', 'item'))`

			`def test_dont_expand1_lists_with_multiple_items_2(self):`
			`g = _Lark(r"""start: list`
			`?list: item+ "!"`
			`item : A`
			`A: "a"`
			`""")`
			`r = g.parse("aa!")`

			`# because 'list' is an expand-if-contains-one rule and we've provided more than one element it should not have expanded`
			`self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))`

			`# regardless of the amount of items: there should be only one child in 'start' because 'list' isn't an expand-all rule`
			`self.assertEqual(len(r.children), 1)`

			`# Sanity check: verify that 'list' contains the two 'item's we've given it`
			`[list] = r.children`
			`self.assertSequenceEqual([item.data for item in list.children], ('item', 'item'))`



			`def test_empty_expand1_list(self):`
			`g = _Lark(r"""start: list`
			`?list: item*`
			`item : A`
			`A: "a"`
			`""")`
			`r = g.parse("")`

			`# because 'list' is an expand-if-contains-one rule and we've provided less than one element (i.e. none) it should not have expanded`
			`self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))`

			`# regardless of the amount of items: there should be only one child in 'start' because 'list' isn't an expand-all rule`
			`self.assertEqual(len(r.children), 1)`

			`# Sanity check: verify that 'list' contains no 'item's as we've given it none`
			`[list] = r.children`
			`self.assertSequenceEqual([item.data for item in list.children], ())`

			`def test_empty_expand1_list_2(self):`
			`g = _Lark(r"""start: list`
			`?list: item* "!"?`
			`item : A`
			`A: "a"`
			`""")`
			`r = g.parse("")`

			`# because 'list' is an expand-if-contains-one rule and we've provided less than one element (i.e. none) it should not have expanded`
			`self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))`

			`# regardless of the amount of items: there should be only one child in 'start' because 'list' isn't an expand-all rule`
			`self.assertEqual(len(r.children), 1)`

			`# Sanity check: verify that 'list' contains no 'item's as we've given it none`
			`[list] = r.children`
			`self.assertSequenceEqual([item.data for item in list.children], ())`


			`def test_empty_flatten_list(self):`
			`g = _Lark(r"""start: list`
			`list: \| item "," list`
			`item : A`
			`A: "a"`
			`""")`
			`r = g.parse("")`

			`# Because 'list' is a flatten rule it's top-level element should never be expanded`
			`self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))`

			`# Sanity check: verify that 'list' contains no 'item's as we've given it none`
			`[list] = r.children`
			`self.assertSequenceEqual([item.data for item in list.children], ())`

			`@unittest.skipIf(True, "Flattening list isn't implemented (and may never be)")`
			`def test_single_item_flatten_list(self):`
			`g = _Lark(r"""start: list`
			`list: \| item "," list`
			`item : A`
			`A: "a"`
			`""")`
			`r = g.parse("a,")`

			`# Because 'list' is a flatten rule it's top-level element should never be expanded`
			`self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))`

			`# Sanity check: verify that 'list' contains exactly the one 'item' we've given it`
			`[list] = r.children`
			`self.assertSequenceEqual([item.data for item in list.children], ('item',))`

			`@unittest.skipIf(True, "Flattening list isn't implemented (and may never be)")`
			`def test_multiple_item_flatten_list(self):`
			`g = _Lark(r"""start: list`
			`#list: \| item "," list`
			`item : A`
			`A: "a"`
			`""")`
			`r = g.parse("a,a,")`

			`# Because 'list' is a flatten rule it's top-level element should never be expanded`
			`self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))`

			`# Sanity check: verify that 'list' contains exactly the two 'item's we've given it`
			`[list] = r.children`
			`self.assertSequenceEqual([item.data for item in list.children], ('item', 'item'))`

			`@unittest.skipIf(True, "Flattening list isn't implemented (and may never be)")`
			`def test_recurse_flatten(self):`
			`"""Verify that stack depth doesn't get exceeded on recursive rules marked for flattening."""`
			`g = _Lark(r"""start: a \| start a`
			`a : A`
			`A : "a" """)`

			`# Force PLY to write to the debug log, but prevent writing it to the terminal (uses repr() on the half-built`
			`# STree data structures, which uses recursion).`
			`g.parse("a" * (sys.getrecursionlimit() // 4))`

			`def test_token_collision(self):`
			`g = _Lark("""start: "Hello" NAME`
			`NAME: /\w+/`
Initial support for EBNF in tokens (automatic compilation to regexps) 2017-02-23 11:00:16 +00:00			`%ignore " "`
Fixed bug in lalr parser. Now testing both lalr & earley in test_parser 2017-02-07 23:19:33 +00:00			`""")`
			`x = g.parse('Hello World')`
			`self.assertSequenceEqual(x.children, ['World'])`
			`x = g.parse('Hello HelloWorld')`
			`self.assertSequenceEqual(x.children, ['HelloWorld'])`

Fixed grammars 2017-02-23 22:45:34 +00:00			`# def test_string_priority(self):`
			`# g = _Lark("""start: (A \| /a?bb/)+`
			`# A: "a" """)`
			`# x = g.parse('abb')`
			`# self.assertEqual(len(x.children), 2)`

			`# # This parse raises an exception because the lexer will always try to consume`
			`# # "a" first and will never match the regular expression`
			`# # This behavior is subject to change!!`
			`# # Thie won't happen with ambiguity handling.`
			`# g = _Lark("""start: (A \| /a?ab/)+`
			`# A: "a" """)`
			`# self.assertRaises(LexError, g.parse, 'aab')`

Fixed bug in lalr parser. Now testing both lalr & earley in test_parser 2017-02-07 23:19:33 +00:00			`def test_undefined_rule(self):`
			`self.assertRaises(GrammarError, _Lark, """start: a""")`

			`def test_undefined_token(self):`
			`self.assertRaises(GrammarError, _Lark, """start: A""")`

			`def test_rule_collision(self):`
			`g = _Lark("""start: "a"+ "b"`
			`\| "a"+ """)`
			`x = g.parse('aaaa')`
			`x = g.parse('aaaab')`

			`def test_rule_collision2(self):`
			`g = _Lark("""start: "a"* "b"`
			`\| "a"+ """)`
			`x = g.parse('aaaa')`
			`x = g.parse('aaaab')`
			`x = g.parse('b')`

			`def test_regex_embed(self):`
			`g = _Lark("""start: A B C`
			`A: /a/`
			`B: /${A}b/`
			`C: /${B}c/`
			`""")`
			`x = g.parse('aababc')`

Initial support for EBNF in tokens (automatic compilation to regexps) 2017-02-23 11:00:16 +00:00			`def test_token_embed(self):`
			`g = _Lark("""start: A B C`
			`A: "a"`
			`B: A "b"`
			`C: B "c"`
			`""")`
			`x = g.parse('aababc')`

Fixed bug in lalr parser. Now testing both lalr & earley in test_parser 2017-02-07 23:19:33 +00:00			`def test_token_not_anon(self):`
			`"""Tests that "a" is matched as A, rather than an anonymous token.`

			`That means that "a" is not filtered out, despite being an 'immediate string'.`
			`Whether or not this is the intuitive behavior, I'm not sure yet.`

Refactored TokenDef to store Pattern(Str/RE) 2017-02-23 21:50:52 +00:00			`Perhaps the right thing to do is report a collision (if such is relevant)`

Fixed bug in lalr parser. Now testing both lalr & earley in test_parser 2017-02-07 23:19:33 +00:00			`-Erez`
			`"""`

			`g = _Lark("""start: "a"`
			`A: "a" """)`
			`x = g.parse('a')`
Refactored TokenDef to store Pattern(Str/RE) 2017-02-23 21:50:52 +00:00
Fixed bug in lalr parser. Now testing both lalr & earley in test_parser 2017-02-07 23:19:33 +00:00			`self.assertEqual(len(x.children), 1, '"a" should not be considered anonymous')`
			`self.assertEqual(x.children[0].type, "A")`

Refactored TokenDef to store Pattern(Str/RE) 2017-02-23 21:50:52 +00:00			`g = _Lark("""start: /a/`
			`A: /a/ """)`
			`x = g.parse('a')`
			`self.assertEqual(len(x.children), 1, '/a/ should not be considered anonymous')`
			`self.assertEqual(x.children[0].type, "A")`

Fixed bug in lalr parser. Now testing both lalr & earley in test_parser 2017-02-07 23:19:33 +00:00			`def test_maybe(self):`
			`g = _Lark("""start: ["a"] """)`
			`x = g.parse('a')`
			`x = g.parse('')`

			`def test_start(self):`
			`g = _Lark("""a: "a" a? """, start='a')`
			`x = g.parse('a')`
			`x = g.parse('aa')`
			`x = g.parse('aaa')`

			`def test_alias(self):`
			`g = _Lark("""start: "a" -> b """)`
			`x = g.parse('a')`
			`self.assertEqual(x.data, "b")`

Initial support for EBNF in tokens (automatic compilation to regexps) 2017-02-23 11:00:16 +00:00			`def test_token_ebnf(self):`
			`g = _Lark("""start: A`
			`A: "a"* ("b"? "c".."e")+`
			`""")`
			`x = g.parse('abcde')`
			`x = g.parse('dd')`

			`# def test_token_recurse(self):`
			`# g = _Lark("""start: A`
			`# A: B`
			`# B: A`
			`# """)`

Added a test for empty rules 2017-02-28 07:09:35 +00:00			`def test_empty(self):`
			`# Fails an Earley implementation without special handling for empty rules,`
			`# or re-processing of already completed rules.`
			`g = _Lark(r"""start: _empty a "B"`
			`a: _empty "A"`
			`_empty: _empty2`
			`_empty2: _empty3`
			`_empty3:`
			`""")`
			`x = g.parse('AB')`

Improved lexer, added profiler option to Lark 2017-02-10 09:50:50 +00:00			`def test_lexer_token_limit(self):`
			`"Python has a stupid limit of 100 groups in a regular expression. Test that we handle this limitation"`
			`tokens = {'A%d'%i:'"%d"'%i for i in range(300)}`
			`g = _Lark("""start: %s`
			`%s""" % (' '.join(tokens), '\n'.join("%s: %s"%x for x in tokens.items())))`

Fixed bug in lexer where unidentical tokens got the same name 2017-02-11 22:35:38 +00:00			`def test_float_without_lexer(self):`
			`g = _Lark("""start: ["+"\|"-"] float`
			`float: digit* "." digit+ exp?`
			`\| digit+ exp`
			`exp: ("e"\|"E") ["+"\|"-"] digit+`
			`digit: "0"\|"1"\|"2"\|"3"\|"4"\|"5"\|"6"\|"7"\|"8"\|"9"`
			`""")`
			`g.parse("1.2")`
			`g.parse("-.2e9")`
			`g.parse("+2e-9")`
			`self.assertRaises(ParseError, g.parse, "+2e-9e")`
Improved lexer, added profiler option to Lark 2017-02-10 09:50:50 +00:00
Changed parser/lexer interface in lark. Bumped minor version 2017-02-25 16:35:31 +00:00			`_NAME = "Test" + PARSER.capitalize() + (LEXER or 'None').capitalize()`
Fixed bug in lalr parser. Now testing both lalr & earley in test_parser 2017-02-07 23:19:33 +00:00			`_TestParser.__name__ = _NAME`
			`globals()[_NAME] = _TestParser`

Changed parser/lexer interface in lark. Bumped minor version 2017-02-25 16:35:31 +00:00			`_TO_TEST = [`
			`('standard', 'earley'),`
			`('standard', 'lalr'),`
			`('contextual', 'lalr'),`
			`]`

			`for LEXER, PARSER in _TO_TEST:`
			`_make_parser_test(LEXER, PARSER)`
Added tests and lots of fixes and refactoring 2017-02-07 15:40:46 +00:00

			`if __name__ == '__main__':`
			`unittest.main()`