2017-02-15 08:33:22 +00:00
|
|
|
#
|
|
|
|
# This example demonstrates the power of the contextual lexer, by parsing a config file.
|
|
|
|
#
|
|
|
|
# The tokens NAME and VALUE match the same input. A regular lexer would arbitrarily
|
|
|
|
# choose one over the other, which would lead to a (confusing) parse error.
|
2017-02-17 21:23:13 +00:00
|
|
|
# However, due to the unambiguous structure of the grammar, the LALR(1) algorithm knows
|
2017-02-15 08:33:22 +00:00
|
|
|
# which one of them to expect at each point during the parse.
|
|
|
|
# The lexer then only matches the tokens that the parser expects.
|
|
|
|
# The result is a correct parse, something that is impossible with a regular lexer.
|
|
|
|
#
|
|
|
|
# Another approach is to discard a lexer altogether and use the Earley algorithm.
|
|
|
|
# It will handle more cases than the contextual lexer, but at the cost of performance.
|
|
|
|
# See examples/conf_nolex.py for an example of that approach.
|
|
|
|
#
|
|
|
|
|
|
|
|
from lark import Lark
|
|
|
|
|
|
|
|
parser = Lark(r"""
|
|
|
|
start: _NL? section+
|
|
|
|
section: "[" NAME "]" _NL item+
|
|
|
|
item: NAME "=" VALUE _NL
|
|
|
|
NAME: /[a-zA-Z_]\w*/
|
|
|
|
VALUE: /.*/
|
|
|
|
|
|
|
|
_NL: /(\r?\n)+/
|
2017-02-23 11:00:16 +00:00
|
|
|
|
|
|
|
%ignore /[\t \f]+/
|
|
|
|
%ignore /\#[^\n]*/
|
2017-02-15 08:33:22 +00:00
|
|
|
""", parser="lalr_contextual_lexer")
|
|
|
|
|
|
|
|
|
|
|
|
sample_conf = """
|
|
|
|
[bla]
|
|
|
|
a=Hello
|
|
|
|
this="that",4
|
|
|
|
"""
|
|
|
|
|
2017-02-15 23:36:24 +00:00
|
|
|
print(parser.parse(sample_conf).pretty())
|