mirror of https://github.com/lark-parser/lark.git
Began refactoring of lalr_analysis -> grammar_analysis
This commit is contained in:
parent
07df4b80eb
commit
972034fd2d
|
@ -57,58 +57,11 @@ def update_set(set1, set2):
|
||||||
set1 |= set2
|
set1 |= set2
|
||||||
return set1 != copy
|
return set1 != copy
|
||||||
|
|
||||||
class GrammarAnalyzer(object):
|
def calculate_sets(rules):
|
||||||
def __init__(self, rule_tuples, start_symbol, debug=False):
|
|
||||||
self.start_symbol = start_symbol
|
|
||||||
self.debug = debug
|
|
||||||
rule_tuples = list(rule_tuples)
|
|
||||||
rule_tuples.append(('$root', [start_symbol, '$end']))
|
|
||||||
rule_tuples = [(t[0], t[1], None) if len(t)==2 else t for t in rule_tuples]
|
|
||||||
|
|
||||||
self.rules = set()
|
|
||||||
self.rules_by_origin = {o: [] for o, _x, _a in rule_tuples}
|
|
||||||
for origin, exp, alias in rule_tuples:
|
|
||||||
r = Rule( origin, exp, alias )
|
|
||||||
self.rules.add(r)
|
|
||||||
self.rules_by_origin[origin].append(r)
|
|
||||||
|
|
||||||
for r in self.rules:
|
|
||||||
for sym in r.expansion:
|
|
||||||
if not (is_terminal(sym) or sym in self.rules_by_origin):
|
|
||||||
raise GrammarError("Using an undefined rule: %s" % sym)
|
|
||||||
|
|
||||||
self.init_state = self.expand_rule(start_symbol)
|
|
||||||
|
|
||||||
def expand_rule(self, rule):
|
|
||||||
"Returns all init_ptrs accessible by rule (recursive)"
|
|
||||||
init_ptrs = set()
|
|
||||||
def _expand_rule(rule):
|
|
||||||
assert not is_terminal(rule)
|
|
||||||
|
|
||||||
for r in self.rules_by_origin[rule]:
|
|
||||||
init_ptr = RulePtr(r, 0)
|
|
||||||
init_ptrs.add(init_ptr)
|
|
||||||
|
|
||||||
if r.expansion: # if not empty rule
|
|
||||||
new_r = init_ptr.next
|
|
||||||
if not is_terminal(new_r):
|
|
||||||
yield new_r
|
|
||||||
|
|
||||||
_ = list(bfs([rule], _expand_rule))
|
|
||||||
|
|
||||||
return fzset(init_ptrs)
|
|
||||||
|
|
||||||
def _first(self, r):
|
|
||||||
if is_terminal(r):
|
|
||||||
return {r}
|
|
||||||
else:
|
|
||||||
return {rp.next for rp in self.expand_rule(r) if is_terminal(rp.next)}
|
|
||||||
|
|
||||||
def _calc(self):
|
|
||||||
"""Calculate FOLLOW sets.
|
"""Calculate FOLLOW sets.
|
||||||
|
|
||||||
Adapted from: http://lara.epfl.ch/w/cc09:algorithm_for_first_and_follow_sets"""
|
Adapted from: http://lara.epfl.ch/w/cc09:algorithm_for_first_and_follow_sets"""
|
||||||
symbols = {sym for rule in self.rules for sym in rule.expansion} | {rule.origin for rule in self.rules}
|
symbols = {sym for rule in rules for sym in rule.expansion} | {rule.origin for rule in rules}
|
||||||
symbols.add('$root') # what about other unused rules?
|
symbols.add('$root') # what about other unused rules?
|
||||||
|
|
||||||
# foreach grammar rule X ::= Y(1) ... Y(k)
|
# foreach grammar rule X ::= Y(1) ... Y(k)
|
||||||
|
@ -135,7 +88,7 @@ class GrammarAnalyzer(object):
|
||||||
while changed:
|
while changed:
|
||||||
changed = False
|
changed = False
|
||||||
|
|
||||||
for rule in self.rules:
|
for rule in rules:
|
||||||
if set(rule.expansion) <= NULLABLE:
|
if set(rule.expansion) <= NULLABLE:
|
||||||
if update_set(NULLABLE, {rule.origin}):
|
if update_set(NULLABLE, {rule.origin}):
|
||||||
changed = True
|
changed = True
|
||||||
|
@ -153,10 +106,59 @@ class GrammarAnalyzer(object):
|
||||||
if update_set(FOLLOW[sym], FIRST[rule.expansion[j]]):
|
if update_set(FOLLOW[sym], FIRST[rule.expansion[j]]):
|
||||||
changed = True
|
changed = True
|
||||||
|
|
||||||
self.FOLLOW = FOLLOW
|
return FIRST, FOLLOW, NULLABLE
|
||||||
|
|
||||||
|
|
||||||
|
class GrammarAnalyzer(object):
|
||||||
|
def __init__(self, rule_tuples, start_symbol, debug=False):
|
||||||
|
self.start_symbol = start_symbol
|
||||||
|
self.debug = debug
|
||||||
|
rule_tuples = list(rule_tuples)
|
||||||
|
rule_tuples.append(('$root', [start_symbol, '$end']))
|
||||||
|
rule_tuples = [(t[0], t[1], None) if len(t)==2 else t for t in rule_tuples]
|
||||||
|
|
||||||
|
self.rules = set()
|
||||||
|
self.rules_by_origin = {o: [] for o, _x, _a in rule_tuples}
|
||||||
|
for origin, exp, alias in rule_tuples:
|
||||||
|
r = Rule( origin, exp, alias )
|
||||||
|
self.rules.add(r)
|
||||||
|
self.rules_by_origin[origin].append(r)
|
||||||
|
|
||||||
|
for r in self.rules:
|
||||||
|
for sym in r.expansion:
|
||||||
|
if not (is_terminal(sym) or sym in self.rules_by_origin):
|
||||||
|
raise GrammarError("Using an undefined rule: %s" % sym)
|
||||||
|
|
||||||
|
self.init_state = self.expand_rule(start_symbol)
|
||||||
|
|
||||||
|
self.FIRST, self.FOLLOW, self.NULLABLE = calculate_sets(self.rules)
|
||||||
|
|
||||||
|
def expand_rule(self, rule):
|
||||||
|
"Returns all init_ptrs accessible by rule (recursive)"
|
||||||
|
init_ptrs = set()
|
||||||
|
def _expand_rule(rule):
|
||||||
|
assert not is_terminal(rule)
|
||||||
|
|
||||||
|
for r in self.rules_by_origin[rule]:
|
||||||
|
init_ptr = RulePtr(r, 0)
|
||||||
|
init_ptrs.add(init_ptr)
|
||||||
|
|
||||||
|
if r.expansion: # if not empty rule
|
||||||
|
new_r = init_ptr.next
|
||||||
|
if not is_terminal(new_r):
|
||||||
|
yield new_r
|
||||||
|
|
||||||
|
_ = list(bfs([rule], _expand_rule))
|
||||||
|
|
||||||
|
return fzset(init_ptrs)
|
||||||
|
|
||||||
|
def _first(self, r):
|
||||||
|
if is_terminal(r):
|
||||||
|
return {r}
|
||||||
|
else:
|
||||||
|
return {rp.next for rp in self.expand_rule(r) if is_terminal(rp.next)}
|
||||||
|
|
||||||
def analyze(self):
|
def analyze(self):
|
||||||
self._calc()
|
|
||||||
|
|
||||||
self.states = {}
|
self.states = {}
|
||||||
def step(state):
|
def step(state):
|
||||||
|
|
Loading…
Reference in New Issue