From 84f08a452f6aded0530948757841e61e2a4a423d Mon Sep 17 00:00:00 2001 From: Erez Shinan Date: Sat, 16 Nov 2019 08:52:06 +0200 Subject: [PATCH] propagate_positions & maybe_placeholders are now true by default, updated docs, tests & examples accordingly (Issue #449, #451) --- docs/classes.md | 24 +++++++++++++----------- docs/grammar.md | 4 ++-- examples/custom_lexer.py | 2 +- examples/reconstruct_json.py | 10 ++-------- lark/lark.py | 4 ++-- lark/reconstruct.py | 1 + tests/test_parser.py | 2 +- tests/test_reconstructor.py | 4 ++-- 8 files changed, 24 insertions(+), 27 deletions(-) diff --git a/docs/classes.md b/docs/classes.md index 1555a1f..1d59551 100644 --- a/docs/classes.md +++ b/docs/classes.md @@ -12,29 +12,31 @@ The Lark class is the main interface for the library. It's mostly a thin wrapper The Lark class accepts a grammar string or file object, and keyword options: -* start - The symbol in the grammar that begins the parse (Default: `"start"`) +* **start** - A list of the rules in the grammar that begin the parse (Default: `["start"]`) -* parser - Decides which parser engine to use, "earley", "lalr" or "cyk". (Default: `"earley"`) +* **parser** - Decides which parser engine to use, "earley", "lalr" or "cyk". (Default: `"earley"`) -* lexer - Overrides default lexer. +* **lexer** - Overrides default lexer, depending on parser. -* transformer - Applies the transformer instead of building a parse tree (only allowed with parser="lalr") +* **transformer** - Applies the provided transformer instead of building a parse tree (only allowed with parser="lalr") -* postlex - Lexer post-processing (Default: None. only works when lexer is "standard" or "contextual") +* **postlex** - Lexer post-processing (Default: `None`. only works when lexer is "standard" or "contextual") -* ambiguity (only relevant for earley and cyk) +* **ambiguity** (only relevant for earley and cyk) * "explicit" - Return all derivations inside an "_ambig" data node. * "resolve" - Let the parser choose the best derivation (greedy for tokens, non-greedy for rules. Default) -* debug - Display warnings (such as Shift-Reduce warnings for LALR) +* **debug** - Display warnings (such as Shift-Reduce warnings for LALR) -* keep_all_tokens - Don't throw away any terminals from the tree (Default=False) +* **keep_all_tokens** - Don't throw away any terminals from the tree (Default=`False`) -* propagate_positions - Propagate line/column count to tree nodes (default=False) +* **propagate_positions** - Propagate line/column count to tree nodes, at the cost of performance (default=`True`) -* lexer_callbacks - A dictionary of callbacks of type f(Token) -> Token, used to interface with the lexer Token generation. Only works with the standard and contextual lexers. See [Recipes](recipes.md) for more information. +* **maybe_placeholders** - The `[]` operator returns `None` when not matched. Setting this to `False` makes it behave like the `?` operator, and return no value at all, which may be a little faster (default=`True`) + +* **lexer_callbacks** - A dictionary of callbacks of type f(Token) -> Token, used to interface with the lexer Token generation. Only works with the standard and contextual lexers. See [Recipes](recipes.md) for more information. #### parse(self, text) @@ -50,7 +52,7 @@ The main tree class * `data` - The name of the rule or alias * `children` - List of matched sub-rules and terminals -* `meta` - Line & Column numbers, if using `propagate_positions` +* `meta` - Line & Column numbers (unless `propagate_positions` is disabled) #### \_\_init\_\_(self, data, children) diff --git a/docs/grammar.md b/docs/grammar.md index 8a8913b..cc518e9 100644 --- a/docs/grammar.md +++ b/docs/grammar.md @@ -147,7 +147,7 @@ Each item is one of: * `TERMINAL` * `"string literal"` or `/regexp literal/` * `(item item ..)` - Group items -* `[item item ..]` - Maybe. Same as `(item item ..)?` +* `[item item ..]` - Maybe. Same as `(item item ..)?`, but generates `None` if there is no match * `item?` - Zero or one instances of item ("maybe") * `item*` - Zero or more instances of item * `item+` - One or more instances of item @@ -157,7 +157,7 @@ Each item is one of: **Examples:** ```perl hello_world: "hello" "world" -mul: [mul "*"] number //# Left-recursion is allowed! +mul: (mul "*")? number //# Left-recursion is allowed and encouraged! expr: expr operator expr | value //# Multi-line, belongs to expr diff --git a/examples/custom_lexer.py b/examples/custom_lexer.py index 786bf4f..732e614 100644 --- a/examples/custom_lexer.py +++ b/examples/custom_lexer.py @@ -29,7 +29,7 @@ parser = Lark(""" data_item: STR INT* %declare STR INT - """, parser='lalr', lexer=TypeLexer) + """, parser='lalr', lexer=TypeLexer, propagate_positions=False) class ParseToDict(Transformer): diff --git a/examples/reconstruct_json.py b/examples/reconstruct_json.py index 07df86c..59c58b0 100644 --- a/examples/reconstruct_json.py +++ b/examples/reconstruct_json.py @@ -25,15 +25,9 @@ test_json = ''' def test_earley(): - json_parser = Lark(json_grammar) + json_parser = Lark(json_grammar, maybe_placeholders=False) tree = json_parser.parse(test_json) - # print ('@@', tree.pretty()) - # for x in tree.find_data('true'): - # x.data = 'false' - # # x.children[0].value = '"HAHA"' - - new_json = Reconstructor(json_parser).reconstruct(tree) print (new_json) print (json.loads(new_json) == json.loads(test_json)) @@ -41,7 +35,7 @@ def test_earley(): def test_lalr(): - json_parser = Lark(json_grammar, parser='lalr') + json_parser = Lark(json_grammar, parser='lalr', maybe_placeholders=False) tree = json_parser.parse(test_json) new_json = Reconstructor(json_parser).reconstruct(tree) diff --git a/lark/lark.py b/lark/lark.py index 47c6fba..db1dfd2 100644 --- a/lark/lark.py +++ b/lark/lark.py @@ -66,9 +66,9 @@ class LarkOptions(Serialize): 'profile': False, 'priority': 'auto', 'ambiguity': 'auto', - 'propagate_positions': False, + 'propagate_positions': True, 'lexer_callbacks': {}, - 'maybe_placeholders': False, + 'maybe_placeholders': True, 'edit_terminals': None, } diff --git a/lark/reconstruct.py b/lark/reconstruct.py index c446913..fb47b93 100644 --- a/lark/reconstruct.py +++ b/lark/reconstruct.py @@ -69,6 +69,7 @@ class MakeMatchTree: class Reconstructor: def __init__(self, parser): # XXX TODO calling compile twice returns different results! + assert parser.options.maybe_placeholders == False tokens, rules, _grammar_extra = parser.grammar.compile(parser.options.start) self.write_tokens = WriteTokensTransformer({t.name:t for t in tokens}) diff --git a/tests/test_parser.py b/tests/test_parser.py index e9d46e5..35b3015 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -963,7 +963,7 @@ def _make_parser_test(LEXER, PARSER): @unittest.skipIf(PARSER == 'cyk', "No empty rules") def test_twice_empty(self): - g = """!start: [["A"]] + g = """!start: ("A"?)? """ l = _Lark(g) tree = l.parse('A') diff --git a/tests/test_reconstructor.py b/tests/test_reconstructor.py index 526d2e2..ecab499 100644 --- a/tests/test_reconstructor.py +++ b/tests/test_reconstructor.py @@ -16,7 +16,7 @@ def _remove_ws(s): class TestReconstructor(TestCase): def assert_reconstruct(self, grammar, code): - parser = Lark(grammar, parser='lalr') + parser = Lark(grammar, parser='lalr', maybe_placeholders=False) tree = parser.parse(code) new = Reconstructor(parser).reconstruct(tree) self.assertEqual(_remove_ws(code), _remove_ws(new)) @@ -105,7 +105,7 @@ class TestReconstructor(TestCase): %ignore WS """ - json_parser = Lark(json_grammar, parser='lalr') + json_parser = Lark(json_grammar, parser='lalr', maybe_placeholders=False) tree = json_parser.parse(test_json) new_json = Reconstructor(json_parser).reconstruct(tree)