propagate_positions & maybe_placeholders are now true by default, updated docs, tests & examples accordingly (Issue #449, #451)

2019-11-16 08:52:06 +02:00 · 2019-11-16 08:52:06 +02:00 · 84f08a452f
parent e39bfa1b18
commit 84f08a452f
8 changed files with 24 additions and 27 deletions
--- a/docs/classes.md
+++ b/docs/classes.md
@ -12,29 +12,31 @@ The Lark class is the main interface for the library. It's mostly a thin wrapper

 The Lark class accepts a grammar string or file object, and keyword options:

-* start - The symbol in the grammar that begins the parse (Default: `"start"`)
+* **start** - A list of the rules in the grammar that begin the parse (Default: `["start"]`)

-* parser - Decides which parser engine to use, "earley", "lalr" or "cyk". (Default: `"earley"`)
+* **parser** - Decides which parser engine to use, "earley", "lalr" or "cyk". (Default: `"earley"`)

-* lexer - Overrides default lexer.
+* **lexer** - Overrides default lexer, depending on parser.

-* transformer - Applies the transformer instead of building a parse tree (only allowed with parser="lalr")
+* **transformer** - Applies the provided transformer instead of building a parse tree (only allowed with parser="lalr")

-* postlex - Lexer post-processing (Default: None. only works when lexer is "standard" or "contextual")
+* **postlex** - Lexer post-processing (Default: `None`. only works when lexer is "standard" or "contextual")

-* ambiguity (only relevant for earley and cyk)
+* **ambiguity** (only relevant for earley and cyk)

     * "explicit" - Return all derivations inside an "_ambig" data node.

     * "resolve" - Let the parser choose the best derivation (greedy for tokens, non-greedy for rules. Default)

-* debug - Display warnings (such as Shift-Reduce warnings for LALR)
+* **debug** - Display warnings (such as Shift-Reduce warnings for LALR)

-* keep_all_tokens - Don't throw away any terminals from the tree (Default=False)
+* **keep_all_tokens** - Don't throw away any terminals from the tree (Default=`False`)

-* propagate_positions - Propagate line/column count to tree nodes (default=False)
+* **propagate_positions** - Propagate line/column count to tree nodes, at the cost of performance (default=`True`)

-* lexer_callbacks - A dictionary of callbacks of type f(Token) -> Token, used to interface with the lexer Token generation. Only works with the standard and contextual lexers. See [Recipes](recipes.md) for more information.
+* **maybe_placeholders** - The `[]` operator returns `None` when not matched. Setting this to `False` makes it behave like the `?` operator, and return no value at all, which may be a little faster (default=`True`)
+
+* **lexer_callbacks** - A dictionary of callbacks of type f(Token) -> Token, used to interface with the lexer Token generation. Only works with the standard and contextual lexers. See [Recipes](recipes.md) for more information.

 #### parse(self, text)

@ -50,7 +52,7 @@ The main tree class

 * `data` - The name of the rule or alias
 * `children` - List of matched sub-rules and terminals
-* `meta` - Line & Column numbers, if using `propagate_positions`
+* `meta` - Line & Column numbers (unless `propagate_positions` is disabled)

 #### \_\_init\_\_(self, data, children)

--- a/docs/grammar.md
+++ b/docs/grammar.md
@ -147,7 +147,7 @@ Each item is one of:
 * `TERMINAL`
 * `"string literal"` or `/regexp literal/`
 * `(item item ..)` - Group items
-* `[item item ..]` - Maybe. Same as `(item item ..)?`
+* `[item item ..]` - Maybe. Same as `(item item ..)?`, but generates `None` if there is no match
 * `item?` - Zero or one instances of item ("maybe")
 * `item*` - Zero or more instances of item
 * `item+` - One or more instances of item
@ -157,7 +157,7 @@ Each item is one of:
 **Examples:**
 ```perl
 hello_world: "hello" "world"
-mul: [mul "*"] number     //# Left-recursion is allowed!
+mul: (mul "*")? number     //# Left-recursion is allowed and encouraged!
 expr: expr operator expr
    | value               //# Multi-line, belongs to expr

--- a/examples/custom_lexer.py
+++ b/examples/custom_lexer.py
@ -29,7 +29,7 @@ parser = Lark("""
        data_item: STR INT*

        %declare STR INT
-        """, parser='lalr', lexer=TypeLexer)
+        """, parser='lalr', lexer=TypeLexer, propagate_positions=False)


 class ParseToDict(Transformer):
--- a/examples/reconstruct_json.py
+++ b/examples/reconstruct_json.py
@ -25,15 +25,9 @@ test_json = '''

 def test_earley():

-    json_parser = Lark(json_grammar)
+    json_parser = Lark(json_grammar, maybe_placeholders=False)
    tree = json_parser.parse(test_json)

-    # print ('@@', tree.pretty())
-    # for x in tree.find_data('true'):
-    #     x.data = 'false'
-    #     # x.children[0].value = '"HAHA"'
-
-
    new_json = Reconstructor(json_parser).reconstruct(tree)
    print (new_json)
    print (json.loads(new_json) == json.loads(test_json))
@ -41,7 +35,7 @@ def test_earley():

 def test_lalr():

-    json_parser = Lark(json_grammar, parser='lalr')
+    json_parser = Lark(json_grammar, parser='lalr', maybe_placeholders=False)
    tree = json_parser.parse(test_json)

    new_json = Reconstructor(json_parser).reconstruct(tree)
--- a/lark/lark.py
+++ b/lark/lark.py
@ -66,9 +66,9 @@ class LarkOptions(Serialize):
        'profile': False,
        'priority': 'auto',
        'ambiguity': 'auto',
-        'propagate_positions': False,
+        'propagate_positions': True,
        'lexer_callbacks': {},
-        'maybe_placeholders': False,
+        'maybe_placeholders': True,
        'edit_terminals': None,
    }

--- a/lark/reconstruct.py
+++ b/lark/reconstruct.py
@ -69,6 +69,7 @@ class MakeMatchTree:
 class Reconstructor:
    def __init__(self, parser):
        # XXX TODO calling compile twice returns different results!
+        assert parser.options.maybe_placeholders == False
        tokens, rules, _grammar_extra = parser.grammar.compile(parser.options.start)

        self.write_tokens = WriteTokensTransformer({t.name:t for t in tokens})
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@ -963,7 +963,7 @@ def _make_parser_test(LEXER, PARSER):

        @unittest.skipIf(PARSER == 'cyk', "No empty rules")
        def test_twice_empty(self):
-            g = """!start: [["A"]]
+            g = """!start: ("A"?)?
                """
            l = _Lark(g)
            tree = l.parse('A')
--- a/tests/test_reconstructor.py
+++ b/tests/test_reconstructor.py
@ -16,7 +16,7 @@ def _remove_ws(s):
 class TestReconstructor(TestCase):

    def assert_reconstruct(self, grammar, code):
-        parser = Lark(grammar, parser='lalr')
+        parser = Lark(grammar, parser='lalr', maybe_placeholders=False)
        tree = parser.parse(code)
        new = Reconstructor(parser).reconstruct(tree)
        self.assertEqual(_remove_ws(code), _remove_ws(new))
@ -105,7 +105,7 @@ class TestReconstructor(TestCase):
            %ignore WS
        """

-        json_parser = Lark(json_grammar, parser='lalr')
+        json_parser = Lark(json_grammar, parser='lalr', maybe_placeholders=False)
        tree = json_parser.parse(test_json)

        new_json = Reconstructor(json_parser).reconstruct(tree)