From 19b09438faaa9bdf1633342abd8cdc8875993705 Mon Sep 17 00:00:00 2001 From: Erez Shinan Date: Thu, 20 Sep 2018 17:54:41 +0300 Subject: [PATCH] Added documentation for lexer_callbacks in recipes --- docs/classes.md | 2 ++ docs/recipes.md | 74 ++++++++++++++++++++++++++++++++++++++++++++++++ lark/__init__.py | 1 + mkdocs.yml | 1 + 4 files changed, 78 insertions(+) create mode 100644 docs/recipes.md diff --git a/docs/classes.md b/docs/classes.md index c509da4..971bf12 100644 --- a/docs/classes.md +++ b/docs/classes.md @@ -36,6 +36,8 @@ The Lark class accepts a grammar string or file object, and keyword options: * propagate_positions - Propagate line/column count to tree nodes (default=False) +* lexer_callbacks - A dictionary of callbacks of type f(Token) -> Token, used to interface with the lexer Token generation. Only works with the standard and contextual lexers. See [Recipes](recipes.md) for more information. + #### parse(self, text) Return a complete parse tree for the text (of type Tree) diff --git a/docs/recipes.md b/docs/recipes.md new file mode 100644 index 0000000..68c2ee4 --- /dev/null +++ b/docs/recipes.md @@ -0,0 +1,74 @@ +# Recipes + +A collection of recipes to use Lark and its various features + + + +## lexer_callbacks + +Use it to interface with the lexer as it generates tokens. + +Accepts a dictionary of the form + + {TOKEN_TYPE: callback} + +Where callback is of type `f(Token) -> Token` + +It only works with the standard and contextual lexers. + +### Example 1: Replace string values with ints for INT tokens + +```python +from lark import Lark, Token + +def tok_to_int(tok): + return Token.new_borrow_pos(tok.type, int(tok), tok) + +parser = Lark(""" +start: INT* +%import common.INT +%ignore " " +""", parser="lalr", lexer_callbacks = {'INT': tok_to_int}) + +print(parser.parse('3 14 159')) +``` + +Prints out: + +```python +Tree(start, [Token(INT, 3), Token(INT, 14), Token(INT, 159)]) +``` + + +### Example 2: Collect all comments +```python +from lark import Lark + +comments = [] + +parser = Lark(""" + start: INT* + + COMMENT: /#.*/ + + %import common (INT, WS) + %ignore COMMENT + %ignore WS +""", parser="lalr", lexer_callbacks={'COMMENT': comments.append}) + +parser.parse(""" +1 2 3 # hello +# world +4 5 6 +""") + +print(comments) +``` + +Prints out: + +```python +[Token(COMMENT, '# hello'), Token(COMMENT, '# world')] +``` + +*Note: We don't have to return a token, because comments are ignored* \ No newline at end of file diff --git a/lark/__init__.py b/lark/__init__.py index 56fa224..04f360f 100644 --- a/lark/__init__.py +++ b/lark/__init__.py @@ -2,6 +2,7 @@ from .tree import Tree from .visitors import Transformer, Visitor, v_args, Discard from .visitors import InlineTransformer, inline_args # XXX Deprecated from .exceptions import ParseError, LexError, GrammarError, UnexpectedToken, UnexpectedInput, UnexpectedCharacters +from .lexer import Token from .lark import Lark __version__ = "0.6.4" diff --git a/mkdocs.yml b/mkdocs.yml index ff81164..57dc0f0 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -8,3 +8,4 @@ pages: - Grammar Reference: grammar.md - Tree Construction Reference: tree_construction.md - Classes Reference: classes.md + - Recipes: recipes.md