From ad30c324f52de33e697745cddc41ff734d0af2b1 Mon Sep 17 00:00:00 2001 From: Erez Shinan Date: Thu, 16 Nov 2017 09:13:14 +0200 Subject: [PATCH] Added support for all RE flags. --- examples/python2.g | 4 ++-- examples/python3.g | 2 +- lark/load_grammar.py | 8 +++++--- lark/tools/nearley.py | 2 +- 4 files changed, 9 insertions(+), 7 deletions(-) diff --git a/examples/python2.g b/examples/python2.g index e8ebda8..a429512 100644 --- a/examples/python2.g +++ b/examples/python2.g @@ -145,11 +145,11 @@ number: DEC_NUMBER | HEX_NUMBER | OCT_NUMBER | FLOAT | IMAG_NUMBER string: STRING | LONG_STRING // Tokens -COMMENT: /\#[^\n]*/ +COMMENT: /#[^\n]*/ _NEWLINE: ( /\r?\n[\t ]*/ | COMMENT )+ %ignore /[\t \f]+/ // WS -%ignore /\\\\[\t \f]*\r?\n/ // LINE_CONT +%ignore /\\[\t \f]*\r?\n/ // LINE_CONT %ignore COMMENT diff --git a/examples/python3.g b/examples/python3.g index bb5a4e8..c27b7ec 100644 --- a/examples/python3.g +++ b/examples/python3.g @@ -170,7 +170,7 @@ _NEWLINE: ( /\r?\n[\t ]*/ | COMMENT )+ %ignore /[\t \f]+/ // WS -%ignore /\\\\[\t \f]*\r?\n/ // LINE_CONT +%ignore /\\[\t \f]*\r?\n/ // LINE_CONT %ignore COMMENT diff --git a/lark/load_grammar.py b/lark/load_grammar.py index f04e3c0..6f2e102 100644 --- a/lark/load_grammar.py +++ b/lark/load_grammar.py @@ -18,6 +18,8 @@ from .tree import Tree as T, Transformer, InlineTransformer, Visitor __path__ = os.path.dirname(__file__) IMPORT_PATHS = [os.path.join(__path__, 'grammars')] +_RE_FLAGS = 'imslux' + _TOKEN_NAMES = { '.' : 'DOT', ',' : 'COMMA', @@ -70,7 +72,7 @@ TOKENS = { 'RULE': '!?[_?]?[a-z][_a-z0-9]*', 'TOKEN': '_?[A-Z][_A-Z0-9]*', 'STRING': r'"(\\"|\\\\|[^"\n])*?"i?', - 'REGEXP': r'/(?!/)(\\/|\\\\|[^/\n])*?/i?', + 'REGEXP': r'/(?!/)(\\/|\\\\|[^/\n])*?/[%s]?' % _RE_FLAGS, '_NL': r'(\r?\n)+\s*', 'WS': r'[ \t]+', 'COMMENT': r'//[^\n]*', @@ -287,7 +289,7 @@ class ExtractAnonTokens(InlineTransformer): def _literal_to_pattern(literal): v = literal.value - if v[-1] in 'i': + if v[-1] in _RE_FLAGS: flags = v[-1] v = v[:-1] else: @@ -295,7 +297,7 @@ def _literal_to_pattern(literal): assert v[0] == v[-1] and v[0] in '"/' x = v[1:-1] - x = re.sub(r'(\\[wd/]|\\\[|\\\])', r'\\\1', x) + x = re.sub(r'(\\[wd/ ]|\\\[|\\\])', r'\\\1', x) x = x.replace("'", r"\'") s = literal_eval("u'''%s'''" % x) return { 'STRING': PatternStr, diff --git a/lark/tools/nearley.py b/lark/tools/nearley.py index 107b8ae..5e277a1 100644 --- a/lark/tools/nearley.py +++ b/lark/tools/nearley.py @@ -26,7 +26,7 @@ nearley_grammar = r""" rule: NAME string: STRING regexp: REGEXP - JS: /(?s){%.*?%}/ + JS: /{%.*?%}/s js: JS? NAME: /[a-zA-Z_$]\w*/