mirror of https://github.com/lark-parser/lark.git
57 lines
1.3 KiB
Python
57 lines
1.3 KiB
Python
#
|
|
# This example demonstrates using Lark with a custom lexer.
|
|
#
|
|
# You can use a custom lexer to tokenize text when the lexers offered by Lark
|
|
# are too slow, or not flexible enough.
|
|
#
|
|
# You can also use it (as shown in this example) to tokenize streams of objects.
|
|
#
|
|
|
|
|
|
from lark import Lark, Transformer, v_args
|
|
from lark.lexer import Lexer, Token
|
|
|
|
class TypeLexer(Lexer):
|
|
def __init__(self, lexer_conf):
|
|
pass
|
|
|
|
def lex(self, data):
|
|
for obj in data:
|
|
if isinstance(obj, int):
|
|
yield Token('INT', obj)
|
|
elif isinstance(obj, (type(''), type(u''))):
|
|
yield Token('STR', obj)
|
|
else:
|
|
raise TypeError(obj)
|
|
|
|
parser = Lark("""
|
|
start: data_item+
|
|
data_item: STR INT*
|
|
|
|
%declare STR INT
|
|
""", parser='lalr', lexer=TypeLexer)
|
|
|
|
|
|
class ParseToDict(Transformer):
|
|
@v_args(inline=True)
|
|
def data_item(self, name, *numbers):
|
|
return name.value, [n.value for n in numbers]
|
|
|
|
start = dict
|
|
|
|
|
|
def test():
|
|
data = ['alice', 1, 27, 3, 'bob', 4, 'carrie', 'dan', 8, 6]
|
|
|
|
print(data)
|
|
|
|
tree = parser.parse(data)
|
|
res = ParseToDict().transform(tree)
|
|
|
|
print('-->')
|
|
print(res) # prints {'alice': [1, 27, 3], 'bob': [4], 'carrie': [], 'dan': [8, 6]}
|
|
|
|
|
|
if __name__ == '__main__':
|
|
test()
|