mirror of https://github.com/lark-parser/lark.git
Fixed bug in load_grammar. Improved nearley converter
This commit is contained in:
parent
52753cb3aa
commit
3d4ee92829
|
@ -219,7 +219,6 @@ class SimplifyTree(InlineTransformer):
|
|||
return tokenmods + [value]
|
||||
|
||||
def get_tokens(tree, token_set):
|
||||
tokens = []
|
||||
for t in tree.find_data('token'):
|
||||
x = t.children
|
||||
name = x[0].value
|
||||
|
@ -266,10 +265,13 @@ class ExtractAnonTokens(InlineTransformer):
|
|||
else:
|
||||
assert False, token
|
||||
|
||||
if value in self.token_reverse: # Kind of a wierd placement
|
||||
token_name = self.token_reverse[value]
|
||||
|
||||
if token_name not in self.token_set:
|
||||
self.token_set.add(token_name)
|
||||
self.tokens.append((token_name, token, []))
|
||||
assert value not in self.token_reverse
|
||||
assert value not in self.token_reverse, value
|
||||
self.token_reverse[value] = token_name
|
||||
|
||||
return Token('TOKEN', token_name, -1)
|
||||
|
|
|
@ -1,22 +1,32 @@
|
|||
"Converts between Lark and Nearley grammars. Work in progress!"
|
||||
|
||||
import os.path
|
||||
import sys
|
||||
|
||||
from lark import Lark, InlineTransformer
|
||||
|
||||
nearley_grammar = r"""
|
||||
start: (ruledef|directive)+
|
||||
|
||||
directive: "@" NAME STRING
|
||||
directive: "@" NAME (STRING|NAME)
|
||||
| "@" _JS -> js_code
|
||||
ruledef: NAME "->" expansions
|
||||
| NAME REGEXP "->" expansions -> macro
|
||||
expansions: expansion ("|" expansion)*
|
||||
|
||||
expansion: (rule|string|regexp)+ _JS?
|
||||
expansion: expr+ _JS?
|
||||
|
||||
?expr: item [":" /[+*?]/]
|
||||
|
||||
?item: rule|string|regexp
|
||||
| "(" expansions ")"
|
||||
|
||||
rule: NAME
|
||||
string: STRING
|
||||
regexp: REGEXP
|
||||
_JS: /(?s){%.*?%}/
|
||||
|
||||
NAME: /[a-zA-Z_]\w*/
|
||||
NAME: /[a-zA-Z_$]\w*/
|
||||
WS.ignore: /[\t \f\n]+/
|
||||
COMMENT.ignore: /\#[^\n]*/
|
||||
REGEXP: /\[.*?\]/
|
||||
|
@ -27,13 +37,20 @@ nearley_grammar = r"""
|
|||
|
||||
|
||||
class NearleyToLark(InlineTransformer):
|
||||
def __init__(self, builtin_path):
|
||||
self.builtin_path = builtin_path
|
||||
|
||||
def rule(self, name):
|
||||
return {'_': '_WS?', '__':'_WS'}.get(name, name)
|
||||
# return {'_': '_WS?', '__':'_WS'}.get(name, name)
|
||||
return {'_': '_ws_maybe', '__':'_ws'}.get(name, name)
|
||||
|
||||
def ruledef(self, name, exps):
|
||||
name = {'_': '_ws_maybe', '__':'_ws'}.get(name, name)
|
||||
return '%s: %s' % (name, exps)
|
||||
|
||||
def expr(self, item, op):
|
||||
return '(%s)%s' % (item, op)
|
||||
|
||||
def regexp(self, r):
|
||||
return '/%s/' % r
|
||||
|
||||
|
@ -45,35 +62,32 @@ class NearleyToLark(InlineTransformer):
|
|||
return ' '.join(x)
|
||||
|
||||
def expansions(self, *x):
|
||||
return '\n |'.join(x)
|
||||
return '(%s)' % ('\n |'.join(x))
|
||||
|
||||
def js_code(self):
|
||||
return ''
|
||||
|
||||
def macro(self, *args):
|
||||
return '' # TODO support macros?!
|
||||
|
||||
def directive(self, name, *args):
|
||||
if name == 'builtin':
|
||||
arg = args[0][1:-1]
|
||||
if arg == 'whitespace.ne':
|
||||
return r'_WS: /[ \t\n\v\f]/'
|
||||
elif arg == 'number.ne':
|
||||
return ('unsigned_int: DIGIT+\n'
|
||||
'DIGIT: /\d/\n'
|
||||
'decimal: "-"? DIGIT+ [/\./ DIGIT+] \n'
|
||||
'percentage: decimal "%"\n'
|
||||
)
|
||||
# TODO
|
||||
elif arg == 'postprocessors.ne':
|
||||
pass
|
||||
else:
|
||||
assert False, arg
|
||||
else:
|
||||
assert False
|
||||
pass
|
||||
with open(os.path.join(self.builtin_path, arg)) as f:
|
||||
text = f.read()
|
||||
return nearley_to_lark(text, self.builtin_path)
|
||||
elif name == 'preprocessor':
|
||||
return ''
|
||||
|
||||
raise Exception('Unknown directive: %s' % name)
|
||||
|
||||
def start(self, *rules):
|
||||
return '\n'.join(filter(None, rules))
|
||||
|
||||
def nearley_to_lark(g):
|
||||
def nearley_to_lark(g, builtin_path):
|
||||
parser = Lark(nearley_grammar)
|
||||
tree = parser.parse(g)
|
||||
return NearleyToLark().transform(tree)
|
||||
return NearleyToLark(builtin_path).transform(tree)
|
||||
|
||||
|
||||
def test():
|
||||
|
@ -112,7 +126,7 @@ def test():
|
|||
function(d) {return Math.floor(d[0]*255); }
|
||||
%}
|
||||
"""
|
||||
converted_grammar = nearley_to_lark(css_example_grammar)
|
||||
converted_grammar = nearley_to_lark(css_example_grammar, '/home/erez/nearley/builtin')
|
||||
print(converted_grammar)
|
||||
|
||||
l = Lark(converted_grammar, start='csscolor', parser='earley_nolex')
|
||||
|
@ -120,5 +134,17 @@ def test():
|
|||
print(l.parse('rgb(255, 70%, 3)').pretty())
|
||||
|
||||
|
||||
def main():
|
||||
try:
|
||||
nearley_lib = sys.argv[1]
|
||||
except IndexError:
|
||||
print("Reads Nearley grammar from stdin and outputs a lark grammar.")
|
||||
print("Usage: %s <nearley_lib_path>" % sys.argv[0])
|
||||
return
|
||||
|
||||
grammar = sys.stdin.read()
|
||||
print(nearley_to_lark(grammar, os.path.join(nearley_lib, 'builtin')))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
test()
|
||||
main()
|
||||
|
|
Loading…
Reference in New Issue