diff --git a/lark/common.py b/lark/common.py index c8336ec..a8f2975 100644 --- a/lark/common.py +++ b/lark/common.py @@ -13,10 +13,13 @@ class UnexpectedToken(ParseError): self.line = getattr(token, 'line', '?') self.column = getattr(token, 'column', '?') - context = ' '.join(['%r(%s)' % (t.value, t.type) for t in seq[index:index+5]]) + try: + context = ' '.join(['%r(%s)' % (t.value, t.type) for t in seq[index:index+5]]) + except AttributeError: + context = seq[index:index+5] message = ("Unexpected token %r at line %s, column %s.\n" "Expected: %s\n" - "Context: %s" % (token.value, self.line, self.column, expected, context)) + "Context: %s" % (token, self.line, self.column, expected, context)) super(ParseError, self).__init__(message) diff --git a/tools/__init__.py b/tools/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tools/nearley.py b/tools/nearley.py new file mode 100644 index 0000000..3cc9d50 --- /dev/null +++ b/tools/nearley.py @@ -0,0 +1,124 @@ +"Converts between Lark and Nearley grammars. Work in progress!" + +from lark import Lark, InlineTransformer + +nearley_grammar = r""" + start: (ruledef|directive)+ + + directive: "@" NAME STRING + ruledef: NAME "->" expansions + expansions: expansion ("|" expansion)* + + expansion: (rule|string|regexp)+ _JS? + + rule: NAME + string: STRING + regexp: REGEXP + _JS: /(?s){%.*?%}/ + + NAME: /[a-zA-Z_]\w*/ + WS.ignore: /[\t \f\n]+/ + COMMENT.ignore: /\#[^\n]*/ + REGEXP: /\[.*?\]/ + STRING: /".*?"/ + + """ + + + +class NearleyToLark(InlineTransformer): + + def rule(self, name): + return {'_': '_WS?', '__':'_WS'}.get(name, name) + + def ruledef(self, name, exps): + return '%s: %s' % (name, exps) + + def regexp(self, r): + return '/%s/' % r + + def string(self, s): + # TODO allow regular strings, and split them in the parser frontend + return ' '.join('"%s"'%ch for ch in s[1:-1]) + + def expansion(self, *x): + return ' '.join(x) + + def expansions(self, *x): + return '\n |'.join(x) + + def directive(self, name, *args): + if name == 'builtin': + arg = args[0][1:-1] + if arg == 'whitespace.ne': + return r'_WS: /[ \t\n\v\f]/' + elif arg == 'number.ne': + return ('unsigned_int: DIGIT+\n' + 'DIGIT: /\d/\n' + 'decimal: "-"? DIGIT+ [/\./ DIGIT+] \n' + 'percentage: decimal "%"\n' + ) + # TODO + elif arg == 'postprocessors.ne': + pass + else: + assert False, arg + else: + assert False + pass + + def start(self, *rules): + return '\n'.join(filter(None, rules)) + +def nearley_to_lark(g): + parser = Lark(nearley_grammar) + tree = parser.parse(g) + return NearleyToLark().transform(tree) + + +def test(): + css_example_grammar = """ +# http://www.w3.org/TR/css3-color/#colorunits + + @builtin "whitespace.ne" + @builtin "number.ne" + @builtin "postprocessors.ne" + + csscolor -> "#" hexdigit hexdigit hexdigit hexdigit hexdigit hexdigit {% + function(d) { + return { + "r": parseInt(d[1]+d[2], 16), + "g": parseInt(d[3]+d[4], 16), + "b": parseInt(d[5]+d[6], 16), + } + } + %} + | "#" hexdigit hexdigit hexdigit {% + function(d) { + return { + "r": parseInt(d[1]+d[1], 16), + "g": parseInt(d[2]+d[2], 16), + "b": parseInt(d[3]+d[3], 16), + } + } + %} + | "rgb" _ "(" _ colnum _ "," _ colnum _ "," _ colnum _ ")" {% $({"r": 4, "g": 8, "b": 12}) %} + | "hsl" _ "(" _ colnum _ "," _ colnum _ "," _ colnum _ ")" {% $({"h": 4, "s": 8, "l": 12}) %} + | "rgba" _ "(" _ colnum _ "," _ colnum _ "," _ colnum _ "," _ decimal _ ")" {% $({"r": 4, "g": 8, "b": 12, "a": 16}) %} + | "hsla" _ "(" _ colnum _ "," _ colnum _ "," _ colnum _ "," _ decimal _ ")" {% $({"h": 4, "s": 8, "l": 12, "a": 16}) %} + + hexdigit -> [a-fA-F0-9] + colnum -> unsigned_int {% id %} | percentage {% + function(d) {return Math.floor(d[0]*255); } + %} + """ + converted_grammar = nearley_to_lark(css_example_grammar) + print converted_grammar + + l = Lark(converted_grammar, start='csscolor', parser='earley_nolex') + print l.parse('#a199ff').pretty() + print l.parse('rgb(255, 70%, 3)').pretty() + + +if __name__ == '__main__': + test()