propagate_positions & maybe_placeholders are now true by default, updated docs, tests & examples accordingly (Issue #449, #451)

This commit is contained in:
Erez Shinan 2019-11-16 08:52:06 +02:00
parent e39bfa1b18
commit 84f08a452f
8 changed files with 24 additions and 27 deletions

View File

@ -12,29 +12,31 @@ The Lark class is the main interface for the library. It's mostly a thin wrapper
The Lark class accepts a grammar string or file object, and keyword options:
* start - The symbol in the grammar that begins the parse (Default: `"start"`)
* **start** - A list of the rules in the grammar that begin the parse (Default: `["start"]`)
* parser - Decides which parser engine to use, "earley", "lalr" or "cyk". (Default: `"earley"`)
* **parser** - Decides which parser engine to use, "earley", "lalr" or "cyk". (Default: `"earley"`)
* lexer - Overrides default lexer.
* **lexer** - Overrides default lexer, depending on parser.
* transformer - Applies the transformer instead of building a parse tree (only allowed with parser="lalr")
* **transformer** - Applies the provided transformer instead of building a parse tree (only allowed with parser="lalr")
* postlex - Lexer post-processing (Default: None. only works when lexer is "standard" or "contextual")
* **postlex** - Lexer post-processing (Default: `None`. only works when lexer is "standard" or "contextual")
* ambiguity (only relevant for earley and cyk)
* **ambiguity** (only relevant for earley and cyk)
* "explicit" - Return all derivations inside an "_ambig" data node.
* "resolve" - Let the parser choose the best derivation (greedy for tokens, non-greedy for rules. Default)
* debug - Display warnings (such as Shift-Reduce warnings for LALR)
* **debug** - Display warnings (such as Shift-Reduce warnings for LALR)
* keep_all_tokens - Don't throw away any terminals from the tree (Default=False)
* **keep_all_tokens** - Don't throw away any terminals from the tree (Default=`False`)
* propagate_positions - Propagate line/column count to tree nodes (default=False)
* **propagate_positions** - Propagate line/column count to tree nodes, at the cost of performance (default=`True`)
* lexer_callbacks - A dictionary of callbacks of type f(Token) -> Token, used to interface with the lexer Token generation. Only works with the standard and contextual lexers. See [Recipes](recipes.md) for more information.
* **maybe_placeholders** - The `[]` operator returns `None` when not matched. Setting this to `False` makes it behave like the `?` operator, and return no value at all, which may be a little faster (default=`True`)
* **lexer_callbacks** - A dictionary of callbacks of type f(Token) -> Token, used to interface with the lexer Token generation. Only works with the standard and contextual lexers. See [Recipes](recipes.md) for more information.
#### parse(self, text)
@ -50,7 +52,7 @@ The main tree class
* `data` - The name of the rule or alias
* `children` - List of matched sub-rules and terminals
* `meta` - Line & Column numbers, if using `propagate_positions`
* `meta` - Line & Column numbers (unless `propagate_positions` is disabled)
#### \_\_init\_\_(self, data, children)

View File

@ -147,7 +147,7 @@ Each item is one of:
* `TERMINAL`
* `"string literal"` or `/regexp literal/`
* `(item item ..)` - Group items
* `[item item ..]` - Maybe. Same as `(item item ..)?`
* `[item item ..]` - Maybe. Same as `(item item ..)?`, but generates `None` if there is no match
* `item?` - Zero or one instances of item ("maybe")
* `item*` - Zero or more instances of item
* `item+` - One or more instances of item
@ -157,7 +157,7 @@ Each item is one of:
**Examples:**
```perl
hello_world: "hello" "world"
mul: [mul "*"] number //# Left-recursion is allowed!
mul: (mul "*")? number //# Left-recursion is allowed and encouraged!
expr: expr operator expr
| value //# Multi-line, belongs to expr

View File

@ -29,7 +29,7 @@ parser = Lark("""
data_item: STR INT*
%declare STR INT
""", parser='lalr', lexer=TypeLexer)
""", parser='lalr', lexer=TypeLexer, propagate_positions=False)
class ParseToDict(Transformer):

View File

@ -25,15 +25,9 @@ test_json = '''
def test_earley():
json_parser = Lark(json_grammar)
json_parser = Lark(json_grammar, maybe_placeholders=False)
tree = json_parser.parse(test_json)
# print ('@@', tree.pretty())
# for x in tree.find_data('true'):
# x.data = 'false'
# # x.children[0].value = '"HAHA"'
new_json = Reconstructor(json_parser).reconstruct(tree)
print (new_json)
print (json.loads(new_json) == json.loads(test_json))
@ -41,7 +35,7 @@ def test_earley():
def test_lalr():
json_parser = Lark(json_grammar, parser='lalr')
json_parser = Lark(json_grammar, parser='lalr', maybe_placeholders=False)
tree = json_parser.parse(test_json)
new_json = Reconstructor(json_parser).reconstruct(tree)

View File

@ -66,9 +66,9 @@ class LarkOptions(Serialize):
'profile': False,
'priority': 'auto',
'ambiguity': 'auto',
'propagate_positions': False,
'propagate_positions': True,
'lexer_callbacks': {},
'maybe_placeholders': False,
'maybe_placeholders': True,
'edit_terminals': None,
}

View File

@ -69,6 +69,7 @@ class MakeMatchTree:
class Reconstructor:
def __init__(self, parser):
# XXX TODO calling compile twice returns different results!
assert parser.options.maybe_placeholders == False
tokens, rules, _grammar_extra = parser.grammar.compile(parser.options.start)
self.write_tokens = WriteTokensTransformer({t.name:t for t in tokens})

View File

@ -963,7 +963,7 @@ def _make_parser_test(LEXER, PARSER):
@unittest.skipIf(PARSER == 'cyk', "No empty rules")
def test_twice_empty(self):
g = """!start: [["A"]]
g = """!start: ("A"?)?
"""
l = _Lark(g)
tree = l.parse('A')

View File

@ -16,7 +16,7 @@ def _remove_ws(s):
class TestReconstructor(TestCase):
def assert_reconstruct(self, grammar, code):
parser = Lark(grammar, parser='lalr')
parser = Lark(grammar, parser='lalr', maybe_placeholders=False)
tree = parser.parse(code)
new = Reconstructor(parser).reconstruct(tree)
self.assertEqual(_remove_ws(code), _remove_ws(new))
@ -105,7 +105,7 @@ class TestReconstructor(TestCase):
%ignore WS
"""
json_parser = Lark(json_grammar, parser='lalr')
json_parser = Lark(json_grammar, parser='lalr', maybe_placeholders=False)
tree = json_parser.parse(test_json)
new_json = Reconstructor(json_parser).reconstruct(tree)