From 972034fd2d584b3571ef923fd6700a41f9bd8d92 Mon Sep 17 00:00:00 2001 From: Erez Shinan Date: Mon, 20 Feb 2017 00:11:56 +0200 Subject: [PATCH] Began refactoring of lalr_analysis -> grammar_analysis --- lark/parsers/lalr_analysis.py | 106 +++++++++++++++++----------------- 1 file changed, 54 insertions(+), 52 deletions(-) diff --git a/lark/parsers/lalr_analysis.py b/lark/parsers/lalr_analysis.py index e50de18..bdd6c73 100644 --- a/lark/parsers/lalr_analysis.py +++ b/lark/parsers/lalr_analysis.py @@ -57,6 +57,58 @@ def update_set(set1, set2): set1 |= set2 return set1 != copy +def calculate_sets(rules): + """Calculate FOLLOW sets. + + Adapted from: http://lara.epfl.ch/w/cc09:algorithm_for_first_and_follow_sets""" + symbols = {sym for rule in rules for sym in rule.expansion} | {rule.origin for rule in rules} + symbols.add('$root') # what about other unused rules? + + # foreach grammar rule X ::= Y(1) ... Y(k) + # if k=0 or {Y(1),...,Y(k)} subset of NULLABLE then + # NULLABLE = NULLABLE union {X} + # for i = 1 to k + # if i=1 or {Y(1),...,Y(i-1)} subset of NULLABLE then + # FIRST(X) = FIRST(X) union FIRST(Y(i)) + # for j = i+1 to k + # if i=k or {Y(i+1),...Y(k)} subset of NULLABLE then + # FOLLOW(Y(i)) = FOLLOW(Y(i)) union FOLLOW(X) + # if i+1=j or {Y(i+1),...,Y(j-1)} subset of NULLABLE then + # FOLLOW(Y(i)) = FOLLOW(Y(i)) union FIRST(Y(j)) + # until none of NULLABLE,FIRST,FOLLOW changed in last iteration + + NULLABLE = set() + FIRST = {} + FOLLOW = {} + for sym in symbols: + FIRST[sym]={sym} if is_terminal(sym) else set() + FOLLOW[sym]=set() + + changed = True + while changed: + changed = False + + for rule in rules: + if set(rule.expansion) <= NULLABLE: + if update_set(NULLABLE, {rule.origin}): + changed = True + + for i, sym in enumerate(rule.expansion): + if set(rule.expansion[:i]) <= NULLABLE: + if update_set(FIRST[rule.origin], FIRST[sym]): + changed = True + if i==len(rule.expansion)-1 or set(rule.expansion[i:]) <= NULLABLE: + if update_set(FOLLOW[sym], FOLLOW[rule.origin]): + changed = True + + for j in range(i+1, len(rule.expansion)): + if set(rule.expansion[i+1:j]) <= NULLABLE: + if update_set(FOLLOW[sym], FIRST[rule.expansion[j]]): + changed = True + + return FIRST, FOLLOW, NULLABLE + + class GrammarAnalyzer(object): def __init__(self, rule_tuples, start_symbol, debug=False): self.start_symbol = start_symbol @@ -79,6 +131,8 @@ class GrammarAnalyzer(object): self.init_state = self.expand_rule(start_symbol) + self.FIRST, self.FOLLOW, self.NULLABLE = calculate_sets(self.rules) + def expand_rule(self, rule): "Returns all init_ptrs accessible by rule (recursive)" init_ptrs = set() @@ -104,59 +158,7 @@ class GrammarAnalyzer(object): else: return {rp.next for rp in self.expand_rule(r) if is_terminal(rp.next)} - def _calc(self): - """Calculate FOLLOW sets. - - Adapted from: http://lara.epfl.ch/w/cc09:algorithm_for_first_and_follow_sets""" - symbols = {sym for rule in self.rules for sym in rule.expansion} | {rule.origin for rule in self.rules} - symbols.add('$root') # what about other unused rules? - - # foreach grammar rule X ::= Y(1) ... Y(k) - # if k=0 or {Y(1),...,Y(k)} subset of NULLABLE then - # NULLABLE = NULLABLE union {X} - # for i = 1 to k - # if i=1 or {Y(1),...,Y(i-1)} subset of NULLABLE then - # FIRST(X) = FIRST(X) union FIRST(Y(i)) - # for j = i+1 to k - # if i=k or {Y(i+1),...Y(k)} subset of NULLABLE then - # FOLLOW(Y(i)) = FOLLOW(Y(i)) union FOLLOW(X) - # if i+1=j or {Y(i+1),...,Y(j-1)} subset of NULLABLE then - # FOLLOW(Y(i)) = FOLLOW(Y(i)) union FIRST(Y(j)) - # until none of NULLABLE,FIRST,FOLLOW changed in last iteration - - NULLABLE = set() - FIRST = {} - FOLLOW = {} - for sym in symbols: - FIRST[sym]={sym} if is_terminal(sym) else set() - FOLLOW[sym]=set() - - changed = True - while changed: - changed = False - - for rule in self.rules: - if set(rule.expansion) <= NULLABLE: - if update_set(NULLABLE, {rule.origin}): - changed = True - - for i, sym in enumerate(rule.expansion): - if set(rule.expansion[:i]) <= NULLABLE: - if update_set(FIRST[rule.origin], FIRST[sym]): - changed = True - if i==len(rule.expansion)-1 or set(rule.expansion[i:]) <= NULLABLE: - if update_set(FOLLOW[sym], FOLLOW[rule.origin]): - changed = True - - for j in range(i+1, len(rule.expansion)): - if set(rule.expansion[i+1:j]) <= NULLABLE: - if update_set(FOLLOW[sym], FIRST[rule.expansion[j]]): - changed = True - - self.FOLLOW = FOLLOW - def analyze(self): - self._calc() self.states = {} def step(state):