mirror of https://github.com/lark-parser/lark.git
Added %declare
This commit is contained in:
parent
2b4ef11ebf
commit
ac0d49e7ab
|
@ -18,11 +18,10 @@ tree_grammar = r"""
|
||||||
|
|
||||||
%import common.CNAME -> NAME
|
%import common.CNAME -> NAME
|
||||||
%import common.WS_INLINE
|
%import common.WS_INLINE
|
||||||
|
%declare _INDENT _DEDENT
|
||||||
%ignore WS_INLINE
|
%ignore WS_INLINE
|
||||||
|
|
||||||
_NL: /(\r?\n[\t ]*)+/
|
_NL: /(\r?\n[\t ]*)+/
|
||||||
_INDENT: "<INDENT>"
|
|
||||||
_DEDENT: "<DEDENT>"
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
class TreeIndenter(Indenter):
|
class TreeIndenter(Indenter):
|
||||||
|
|
|
@ -234,7 +234,7 @@ class ContextualLexer:
|
||||||
lexer = lexer_by_tokens[key]
|
lexer = lexer_by_tokens[key]
|
||||||
except KeyError:
|
except KeyError:
|
||||||
accepts = set(accepts) | set(ignore) | set(always_accept)
|
accepts = set(accepts) | set(ignore) | set(always_accept)
|
||||||
state_tokens = [tokens_by_name[n] for n in accepts if is_terminal(n) and n!='$END']
|
state_tokens = [tokens_by_name[n] for n in accepts if n and n in tokens_by_name]
|
||||||
lexer = Lexer(state_tokens, ignore=ignore, user_callbacks=user_callbacks)
|
lexer = Lexer(state_tokens, ignore=ignore, user_callbacks=user_callbacks)
|
||||||
lexer_by_tokens[key] = lexer
|
lexer_by_tokens[key] = lexer
|
||||||
|
|
||||||
|
|
|
@ -22,7 +22,7 @@ IMPORT_PATHS = [os.path.join(__path__, 'grammars')]
|
||||||
|
|
||||||
_RE_FLAGS = 'imslux'
|
_RE_FLAGS = 'imslux'
|
||||||
|
|
||||||
_TOKEN_NAMES = {
|
_TERMINAL_NAMES = {
|
||||||
'.' : 'DOT',
|
'.' : 'DOT',
|
||||||
',' : 'COMMA',
|
',' : 'COMMA',
|
||||||
':' : 'COLON',
|
':' : 'COLON',
|
||||||
|
@ -62,7 +62,7 @@ _TOKEN_NAMES = {
|
||||||
}
|
}
|
||||||
|
|
||||||
# Grammar Parser
|
# Grammar Parser
|
||||||
TOKENS = {
|
TERMINALS = {
|
||||||
'_LPAR': r'\(',
|
'_LPAR': r'\(',
|
||||||
'_RPAR': r'\)',
|
'_RPAR': r'\)',
|
||||||
'_LBRA': r'\[',
|
'_LBRA': r'\[',
|
||||||
|
@ -73,7 +73,7 @@ TOKENS = {
|
||||||
'_DOT': r'\.',
|
'_DOT': r'\.',
|
||||||
'TILDE': '~',
|
'TILDE': '~',
|
||||||
'RULE': '!?[_?]?[a-z][_a-z0-9]*',
|
'RULE': '!?[_?]?[a-z][_a-z0-9]*',
|
||||||
'TOKEN': '_?[A-Z][_A-Z0-9]*',
|
'TERMINAL': '_?[A-Z][_A-Z0-9]*',
|
||||||
'STRING': r'"(\\"|\\\\|[^"\n])*?"i?',
|
'STRING': r'"(\\"|\\\\|[^"\n])*?"i?',
|
||||||
'REGEXP': r'/(?!/)(\\/|\\\\|[^/\n])*?/[%s]*' % _RE_FLAGS,
|
'REGEXP': r'/(?!/)(\\/|\\\\|[^/\n])*?/[%s]*' % _RE_FLAGS,
|
||||||
'_NL': r'(\r?\n)+\s*',
|
'_NL': r'(\r?\n)+\s*',
|
||||||
|
@ -81,6 +81,7 @@ TOKENS = {
|
||||||
'COMMENT': r'//[^\n]*',
|
'COMMENT': r'//[^\n]*',
|
||||||
'_TO': '->',
|
'_TO': '->',
|
||||||
'_IGNORE': r'%ignore',
|
'_IGNORE': r'%ignore',
|
||||||
|
'_DECLARE': r'%declare',
|
||||||
'_IMPORT': r'%import',
|
'_IMPORT': r'%import',
|
||||||
'NUMBER': r'\d+',
|
'NUMBER': r'\d+',
|
||||||
}
|
}
|
||||||
|
@ -116,22 +117,24 @@ RULES = {
|
||||||
'literal',
|
'literal',
|
||||||
'range'],
|
'range'],
|
||||||
|
|
||||||
'terminal': ['TOKEN'],
|
'terminal': ['TERMINAL'],
|
||||||
'nonterminal': ['RULE'],
|
'nonterminal': ['RULE'],
|
||||||
|
|
||||||
'?name': ['RULE', 'TOKEN'],
|
'?name': ['RULE', 'TERMINAL'],
|
||||||
|
|
||||||
'maybe': ['_LBRA expansions _RBRA'],
|
'maybe': ['_LBRA expansions _RBRA'],
|
||||||
'range': ['STRING _DOT _DOT STRING'],
|
'range': ['STRING _DOT _DOT STRING'],
|
||||||
|
|
||||||
'token': ['TOKEN _COLON expansions _NL',
|
'token': ['TERMINAL _COLON expansions _NL',
|
||||||
'TOKEN _DOT NUMBER _COLON expansions _NL'],
|
'TERMINAL _DOT NUMBER _COLON expansions _NL'],
|
||||||
'statement': ['ignore', 'import'],
|
'statement': ['ignore', 'import', 'declare'],
|
||||||
'ignore': ['_IGNORE expansions _NL'],
|
'ignore': ['_IGNORE expansions _NL'],
|
||||||
|
'declare': ['_DECLARE _declare_args _NL'],
|
||||||
'import': ['_IMPORT import_args _NL',
|
'import': ['_IMPORT import_args _NL',
|
||||||
'_IMPORT import_args _TO TOKEN'],
|
'_IMPORT import_args _TO TERMINAL _NL'],
|
||||||
'import_args': ['_import_args'],
|
'import_args': ['_import_args'],
|
||||||
'_import_args': ['name', '_import_args _DOT name'],
|
'_import_args': ['name', '_import_args _DOT name'],
|
||||||
|
'_declare_args': ['name', '_declare_args name'],
|
||||||
|
|
||||||
'literal': ['REGEXP', 'STRING'],
|
'literal': ['REGEXP', 'STRING'],
|
||||||
}
|
}
|
||||||
|
@ -278,7 +281,7 @@ class PrepareAnonTerminals(InlineTransformer):
|
||||||
except KeyError:
|
except KeyError:
|
||||||
# Try to assign an indicative anon-token name
|
# Try to assign an indicative anon-token name
|
||||||
try:
|
try:
|
||||||
token_name = _TOKEN_NAMES[value]
|
token_name = _TERMINAL_NAMES[value]
|
||||||
except KeyError:
|
except KeyError:
|
||||||
if value.isalnum() and value[0].isalpha() and value.upper() not in self.token_set:
|
if value.isalnum() and value[0].isalpha() and value.upper() not in self.token_set:
|
||||||
with suppress(UnicodeEncodeError):
|
with suppress(UnicodeEncodeError):
|
||||||
|
@ -302,7 +305,7 @@ class PrepareAnonTerminals(InlineTransformer):
|
||||||
self.token_reverse[p] = tokendef
|
self.token_reverse[p] = tokendef
|
||||||
self.tokens.append(tokendef)
|
self.tokens.append(tokendef)
|
||||||
|
|
||||||
return Terminal(Token('TOKEN', token_name, -1), filter_out=isinstance(p, PatternStr))
|
return Terminal(Token('TERMINAL', token_name, -1), filter_out=isinstance(p, PatternStr))
|
||||||
|
|
||||||
|
|
||||||
def _rfind(s, choices):
|
def _rfind(s, choices):
|
||||||
|
@ -376,7 +379,7 @@ class TokenTreeToPattern(Transformer):
|
||||||
return items[0]
|
return items[0]
|
||||||
if len({i.flags for i in items}) > 1:
|
if len({i.flags for i in items}) > 1:
|
||||||
raise GrammarError("Lark doesn't support joining tokens with conflicting flags!")
|
raise GrammarError("Lark doesn't support joining tokens with conflicting flags!")
|
||||||
return PatternRE(''.join(i.to_regexp() for i in items), items[0].flags)
|
return PatternRE(''.join(i.to_regexp() for i in items), items[0].flags if items else ())
|
||||||
|
|
||||||
def expansions(self, exps):
|
def expansions(self, exps):
|
||||||
if len(exps) == 1:
|
if len(exps) == 1:
|
||||||
|
@ -412,7 +415,7 @@ class PrepareSymbols(Transformer):
|
||||||
return v
|
return v
|
||||||
elif v.type == 'RULE':
|
elif v.type == 'RULE':
|
||||||
return NonTerminal(v.value)
|
return NonTerminal(v.value)
|
||||||
elif v.type == 'TOKEN':
|
elif v.type == 'TERMINAL':
|
||||||
return Terminal(v.value, filter_out=v.startswith('_'))
|
return Terminal(v.value, filter_out=v.startswith('_'))
|
||||||
assert False
|
assert False
|
||||||
|
|
||||||
|
@ -435,8 +438,15 @@ class Grammar:
|
||||||
|
|
||||||
# Convert token-trees to strings/regexps
|
# Convert token-trees to strings/regexps
|
||||||
transformer = PrepareLiterals() * TokenTreeToPattern()
|
transformer = PrepareLiterals() * TokenTreeToPattern()
|
||||||
|
for name, (token_tree, priority) in token_defs:
|
||||||
|
if token_tree is None: # Terminal added through %declare
|
||||||
|
continue
|
||||||
|
expansions = list(token_tree.find_data('expansion'))
|
||||||
|
if len(expansions) == 1 and not expansions[0].children:
|
||||||
|
raise GrammarError("Terminals cannot be empty (%s)" % name)
|
||||||
|
|
||||||
tokens = [TokenDef(name, transformer.transform(token_tree), priority)
|
tokens = [TokenDef(name, transformer.transform(token_tree), priority)
|
||||||
for name, (token_tree, priority) in token_defs]
|
for name, (token_tree, priority) in token_defs if token_tree]
|
||||||
|
|
||||||
# =================
|
# =================
|
||||||
# Compile Rules
|
# Compile Rules
|
||||||
|
@ -500,12 +510,14 @@ def resolve_token_references(token_defs):
|
||||||
while True:
|
while True:
|
||||||
changed = False
|
changed = False
|
||||||
for name, (token_tree, _p) in token_defs:
|
for name, (token_tree, _p) in token_defs:
|
||||||
|
if token_tree is None: # Terminal added through %declare
|
||||||
|
continue
|
||||||
for exp in token_tree.find_data('value'):
|
for exp in token_tree.find_data('value'):
|
||||||
item ,= exp.children
|
item ,= exp.children
|
||||||
if isinstance(item, Token):
|
if isinstance(item, Token):
|
||||||
if item.type == 'RULE':
|
if item.type == 'RULE':
|
||||||
raise GrammarError("Rules aren't allowed inside tokens (%s in %s)" % (item, name))
|
raise GrammarError("Rules aren't allowed inside terminals (%s in %s)" % (item, name))
|
||||||
if item.type == 'TOKEN':
|
if item.type == 'TERMINAL':
|
||||||
exp.children[0] = token_dict[item]
|
exp.children[0] = token_dict[item]
|
||||||
changed = True
|
changed = True
|
||||||
if not changed:
|
if not changed:
|
||||||
|
@ -539,7 +551,7 @@ class PrepareGrammar(InlineTransformer):
|
||||||
|
|
||||||
class GrammarLoader:
|
class GrammarLoader:
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
tokens = [TokenDef(name, PatternRE(value)) for name, value in TOKENS.items()]
|
tokens = [TokenDef(name, PatternRE(value)) for name, value in TERMINALS.items()]
|
||||||
|
|
||||||
rules = [options_from_rule(name, x) for name, x in RULES.items()]
|
rules = [options_from_rule(name, x) for name, x in RULES.items()]
|
||||||
rules = [Rule(NonTerminal(r), symbols_from_strcase(x.split()), None, o) for r, xs, o in rules for x in xs]
|
rules = [Rule(NonTerminal(r), symbols_from_strcase(x.split()), None, o) for r, xs, o in rules for x in xs]
|
||||||
|
@ -591,6 +603,7 @@ class GrammarLoader:
|
||||||
|
|
||||||
# Execute statements
|
# Execute statements
|
||||||
ignore = []
|
ignore = []
|
||||||
|
declared = []
|
||||||
for (stmt,) in statements:
|
for (stmt,) in statements:
|
||||||
if stmt.data == 'ignore':
|
if stmt.data == 'ignore':
|
||||||
t ,= stmt.children
|
t ,= stmt.children
|
||||||
|
@ -603,6 +616,9 @@ class GrammarLoader:
|
||||||
token_options = dict(g.token_defs)[dotted_path[-1]]
|
token_options = dict(g.token_defs)[dotted_path[-1]]
|
||||||
assert isinstance(token_options, tuple) and len(token_options)==2
|
assert isinstance(token_options, tuple) and len(token_options)==2
|
||||||
token_defs.append([name.value, token_options])
|
token_defs.append([name.value, token_options])
|
||||||
|
elif stmt.data == 'declare':
|
||||||
|
for t in stmt.children:
|
||||||
|
token_defs.append([t.value, (None, None)])
|
||||||
else:
|
else:
|
||||||
assert False, stmt
|
assert False, stmt
|
||||||
|
|
||||||
|
@ -613,7 +629,7 @@ class GrammarLoader:
|
||||||
raise GrammarError('Names starting with double-underscore are reserved (Error at %s)' % name)
|
raise GrammarError('Names starting with double-underscore are reserved (Error at %s)' % name)
|
||||||
|
|
||||||
# Handle ignore tokens
|
# Handle ignore tokens
|
||||||
# XXX A slightly hacky solution. Recognition of %ignore TOKEN as separate comes from the lexer's
|
# XXX A slightly hacky solution. Recognition of %ignore TERMINAL as separate comes from the lexer's
|
||||||
# inability to handle duplicate tokens (two names, one value)
|
# inability to handle duplicate tokens (two names, one value)
|
||||||
ignore_names = []
|
ignore_names = []
|
||||||
for t in ignore:
|
for t in ignore:
|
||||||
|
@ -623,7 +639,7 @@ class GrammarLoader:
|
||||||
item ,= t2.children
|
item ,= t2.children
|
||||||
if item.data == 'value':
|
if item.data == 'value':
|
||||||
item ,= item.children
|
item ,= item.children
|
||||||
if isinstance(item, Token) and item.type == 'TOKEN':
|
if isinstance(item, Token) and item.type == 'TERMINAL':
|
||||||
ignore_names.append(item.value)
|
ignore_names.append(item.value)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
@ -656,7 +672,7 @@ class GrammarLoader:
|
||||||
|
|
||||||
for name, expansions, _o in rules:
|
for name, expansions, _o in rules:
|
||||||
used_symbols = {t for x in expansions.find_data('expansion')
|
used_symbols = {t for x in expansions.find_data('expansion')
|
||||||
for t in x.scan_values(lambda t: t.type in ('RULE', 'TOKEN'))}
|
for t in x.scan_values(lambda t: t.type in ('RULE', 'TERMINAL'))}
|
||||||
for sym in used_symbols:
|
for sym in used_symbols:
|
||||||
if is_terminal(sym):
|
if is_terminal(sym):
|
||||||
if sym not in token_names:
|
if sym not in token_names:
|
||||||
|
|
Loading…
Reference in New Issue