mirror of https://github.com/lark-parser/lark.git
propagate_positions & maybe_placeholders are now true by default, updated docs, tests & examples accordingly (Issue #449, #451)
This commit is contained in:
parent
e39bfa1b18
commit
84f08a452f
|
@ -12,29 +12,31 @@ The Lark class is the main interface for the library. It's mostly a thin wrapper
|
|||
|
||||
The Lark class accepts a grammar string or file object, and keyword options:
|
||||
|
||||
* start - The symbol in the grammar that begins the parse (Default: `"start"`)
|
||||
* **start** - A list of the rules in the grammar that begin the parse (Default: `["start"]`)
|
||||
|
||||
* parser - Decides which parser engine to use, "earley", "lalr" or "cyk". (Default: `"earley"`)
|
||||
* **parser** - Decides which parser engine to use, "earley", "lalr" or "cyk". (Default: `"earley"`)
|
||||
|
||||
* lexer - Overrides default lexer.
|
||||
* **lexer** - Overrides default lexer, depending on parser.
|
||||
|
||||
* transformer - Applies the transformer instead of building a parse tree (only allowed with parser="lalr")
|
||||
* **transformer** - Applies the provided transformer instead of building a parse tree (only allowed with parser="lalr")
|
||||
|
||||
* postlex - Lexer post-processing (Default: None. only works when lexer is "standard" or "contextual")
|
||||
* **postlex** - Lexer post-processing (Default: `None`. only works when lexer is "standard" or "contextual")
|
||||
|
||||
* ambiguity (only relevant for earley and cyk)
|
||||
* **ambiguity** (only relevant for earley and cyk)
|
||||
|
||||
* "explicit" - Return all derivations inside an "_ambig" data node.
|
||||
|
||||
* "resolve" - Let the parser choose the best derivation (greedy for tokens, non-greedy for rules. Default)
|
||||
|
||||
* debug - Display warnings (such as Shift-Reduce warnings for LALR)
|
||||
* **debug** - Display warnings (such as Shift-Reduce warnings for LALR)
|
||||
|
||||
* keep_all_tokens - Don't throw away any terminals from the tree (Default=False)
|
||||
* **keep_all_tokens** - Don't throw away any terminals from the tree (Default=`False`)
|
||||
|
||||
* propagate_positions - Propagate line/column count to tree nodes (default=False)
|
||||
* **propagate_positions** - Propagate line/column count to tree nodes, at the cost of performance (default=`True`)
|
||||
|
||||
* lexer_callbacks - A dictionary of callbacks of type f(Token) -> Token, used to interface with the lexer Token generation. Only works with the standard and contextual lexers. See [Recipes](recipes.md) for more information.
|
||||
* **maybe_placeholders** - The `[]` operator returns `None` when not matched. Setting this to `False` makes it behave like the `?` operator, and return no value at all, which may be a little faster (default=`True`)
|
||||
|
||||
* **lexer_callbacks** - A dictionary of callbacks of type f(Token) -> Token, used to interface with the lexer Token generation. Only works with the standard and contextual lexers. See [Recipes](recipes.md) for more information.
|
||||
|
||||
#### parse(self, text)
|
||||
|
||||
|
@ -50,7 +52,7 @@ The main tree class
|
|||
|
||||
* `data` - The name of the rule or alias
|
||||
* `children` - List of matched sub-rules and terminals
|
||||
* `meta` - Line & Column numbers, if using `propagate_positions`
|
||||
* `meta` - Line & Column numbers (unless `propagate_positions` is disabled)
|
||||
|
||||
#### \_\_init\_\_(self, data, children)
|
||||
|
||||
|
|
|
@ -147,7 +147,7 @@ Each item is one of:
|
|||
* `TERMINAL`
|
||||
* `"string literal"` or `/regexp literal/`
|
||||
* `(item item ..)` - Group items
|
||||
* `[item item ..]` - Maybe. Same as `(item item ..)?`
|
||||
* `[item item ..]` - Maybe. Same as `(item item ..)?`, but generates `None` if there is no match
|
||||
* `item?` - Zero or one instances of item ("maybe")
|
||||
* `item*` - Zero or more instances of item
|
||||
* `item+` - One or more instances of item
|
||||
|
@ -157,7 +157,7 @@ Each item is one of:
|
|||
**Examples:**
|
||||
```perl
|
||||
hello_world: "hello" "world"
|
||||
mul: [mul "*"] number //# Left-recursion is allowed!
|
||||
mul: (mul "*")? number //# Left-recursion is allowed and encouraged!
|
||||
expr: expr operator expr
|
||||
| value //# Multi-line, belongs to expr
|
||||
|
||||
|
|
|
@ -29,7 +29,7 @@ parser = Lark("""
|
|||
data_item: STR INT*
|
||||
|
||||
%declare STR INT
|
||||
""", parser='lalr', lexer=TypeLexer)
|
||||
""", parser='lalr', lexer=TypeLexer, propagate_positions=False)
|
||||
|
||||
|
||||
class ParseToDict(Transformer):
|
||||
|
|
|
@ -25,15 +25,9 @@ test_json = '''
|
|||
|
||||
def test_earley():
|
||||
|
||||
json_parser = Lark(json_grammar)
|
||||
json_parser = Lark(json_grammar, maybe_placeholders=False)
|
||||
tree = json_parser.parse(test_json)
|
||||
|
||||
# print ('@@', tree.pretty())
|
||||
# for x in tree.find_data('true'):
|
||||
# x.data = 'false'
|
||||
# # x.children[0].value = '"HAHA"'
|
||||
|
||||
|
||||
new_json = Reconstructor(json_parser).reconstruct(tree)
|
||||
print (new_json)
|
||||
print (json.loads(new_json) == json.loads(test_json))
|
||||
|
@ -41,7 +35,7 @@ def test_earley():
|
|||
|
||||
def test_lalr():
|
||||
|
||||
json_parser = Lark(json_grammar, parser='lalr')
|
||||
json_parser = Lark(json_grammar, parser='lalr', maybe_placeholders=False)
|
||||
tree = json_parser.parse(test_json)
|
||||
|
||||
new_json = Reconstructor(json_parser).reconstruct(tree)
|
||||
|
|
|
@ -66,9 +66,9 @@ class LarkOptions(Serialize):
|
|||
'profile': False,
|
||||
'priority': 'auto',
|
||||
'ambiguity': 'auto',
|
||||
'propagate_positions': False,
|
||||
'propagate_positions': True,
|
||||
'lexer_callbacks': {},
|
||||
'maybe_placeholders': False,
|
||||
'maybe_placeholders': True,
|
||||
'edit_terminals': None,
|
||||
}
|
||||
|
||||
|
|
|
@ -69,6 +69,7 @@ class MakeMatchTree:
|
|||
class Reconstructor:
|
||||
def __init__(self, parser):
|
||||
# XXX TODO calling compile twice returns different results!
|
||||
assert parser.options.maybe_placeholders == False
|
||||
tokens, rules, _grammar_extra = parser.grammar.compile(parser.options.start)
|
||||
|
||||
self.write_tokens = WriteTokensTransformer({t.name:t for t in tokens})
|
||||
|
|
|
@ -963,7 +963,7 @@ def _make_parser_test(LEXER, PARSER):
|
|||
|
||||
@unittest.skipIf(PARSER == 'cyk', "No empty rules")
|
||||
def test_twice_empty(self):
|
||||
g = """!start: [["A"]]
|
||||
g = """!start: ("A"?)?
|
||||
"""
|
||||
l = _Lark(g)
|
||||
tree = l.parse('A')
|
||||
|
|
|
@ -16,7 +16,7 @@ def _remove_ws(s):
|
|||
class TestReconstructor(TestCase):
|
||||
|
||||
def assert_reconstruct(self, grammar, code):
|
||||
parser = Lark(grammar, parser='lalr')
|
||||
parser = Lark(grammar, parser='lalr', maybe_placeholders=False)
|
||||
tree = parser.parse(code)
|
||||
new = Reconstructor(parser).reconstruct(tree)
|
||||
self.assertEqual(_remove_ws(code), _remove_ws(new))
|
||||
|
@ -105,7 +105,7 @@ class TestReconstructor(TestCase):
|
|||
%ignore WS
|
||||
"""
|
||||
|
||||
json_parser = Lark(json_grammar, parser='lalr')
|
||||
json_parser = Lark(json_grammar, parser='lalr', maybe_placeholders=False)
|
||||
tree = json_parser.parse(test_json)
|
||||
|
||||
new_json = Reconstructor(json_parser).reconstruct(tree)
|
||||
|
|
Loading…
Reference in New Issue