Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from collections import defaultdict
- from copy import deepcopy
- def convert_grammar_to_productions(file_path):
- with open(file_path, 'r') as file:
- grammar_content = file.read().strip()
- lines = [line.strip() for line in grammar_content.split('\n') if line.strip()]
- productions = []
- nonterminals = []
- for line in lines:
- left_side = line.split('->')[0].strip()
- if left_side not in nonterminals:
- nonterminals.append(left_side)
- all_symbols = set()
- for line in lines:
- left_side, right_side = [part.strip() for part in line.split('->')]
- alternatives = [alt.strip() for alt in right_side.split('|')]
- for alt in alternatives:
- symbols = alt.split()
- for symbol in symbols:
- if symbol != "":
- all_symbols.add(symbol)
- productions.append((left_side, symbols))
- terminals = [symbol for symbol in all_symbols if symbol not in nonterminals]
- terminals.append('$')
- return productions, nonterminals, terminals
- file_path = "text.txt"
- productions, nonterminals, terminals = convert_grammar_to_productions(file_path)
- print(productions)
- print(nonterminals)
- print(terminals)
- def print_grammar(prods, title="Original Grammar"):
- print(f"\n{title}:")
- for lhs, rhs in prods:
- print(f" {lhs} -> {' '.join(rhs)}")
- def format_item(item):
- lhs, rhs, dot, lookahead = item
- rhs_list = list(rhs)
- rhs_with_dot = rhs_list[:]
- rhs_with_dot.insert(dot, "•")
- lookahead_str = "/".join(sorted(lookahead))
- return f"{lhs} -> {' '.join(rhs_with_dot)}, {{{lookahead_str}}}"
- def print_state(state, state_id):
- print(f"State I{state_id}:")
- for item in state:
- print(" " + format_item(item))
- print()
- def print_states(states, title="States"):
- print(f"\n{title}:")
- for i, state in enumerate(states):
- print_state(state, i)
- def print_first_sets(first):
- print("\nFIRST Sets:")
- for sym in sorted(first.keys()):
- print(f" FIRST({sym}) = {{ {', '.join(sorted(first[sym]))} }}")
- print()
- def print_goto_transitions(merged_states, prods, first, nonterms, terms, core_to_index):
- print("\nGOTO Transitions (Merged LALR States):")
- def compute_merged_goto(state, symbol):
- g = set()
- for item in state:
- lhs, rhs, dot, lookahead = item
- if dot < len(rhs) and rhs[dot] == symbol:
- g.add((lhs, rhs, dot+1, lookahead))
- if not g:
- return None
- g = closure(g, prods, first, nonterms)
- core = frozenset((it[0], it[1], it[2]) for it in g)
- return core_to_index.get(core, None)
- all_symbols = nonterms + terms
- for i, state in enumerate(merged_states):
- for sym in all_symbols:
- tgt = compute_merged_goto(state, sym)
- if tgt is not None:
- print(f" GOTO(I{i}, {sym}) = I{tgt}")
- print()
- def print_parsing_table(table, terms, nonterms):
- headers = terms + ['$'] + nonterms
- header_line = "{:>8}" * (len(headers) + 1)
- print("\nLALR Parsing Table:")
- print(header_line.format("", *headers))
- for state in sorted(table.keys()):
- row = []
- for sym in headers:
- row.append(table[state].get(sym, ""))
- print(header_line.format(f"I{state}", *row))
- def augment_grammar(prods, start_sym):
- new_start = start_sym + "'"
- while any(prod[0] == new_start for prod in prods):
- new_start += "'"
- augmented = [(new_start, [start_sym])] + prods
- return augmented, new_start
- def compute_first(prods, nonterms, terms):
- first = {sym: set() for sym in nonterms}
- for t in terms:
- first[t] = {t}
- changed = True
- while changed:
- changed = False
- for lhs, rhs in prods:
- before = len(first[lhs])
- if rhs == ["^"]:
- first[lhs].add("^")
- else:
- for symbol in rhs:
- first[lhs].update(first[symbol] - {"^"})
- if "^" not in first[symbol]:
- break
- else:
- first[lhs].add("^")
- if len(first[lhs]) > before:
- changed = True
- return first
- def first_of_string(symbols, first):
- result = set()
- for symbol in symbols:
- if symbol not in first:
- first[symbol] = {symbol}
- result.update(first[symbol] - {"^"})
- if "^" not in first[symbol]:
- break
- else:
- result.add("^")
- return result
- def closure(items, prods, first, nonterms):
- closure_set = set(items)
- added = True
- while added:
- added = False
- new_items = set()
- for item in closure_set:
- lhs, rhs, dot, lookahead = item
- if dot < len(rhs):
- symbol = rhs[dot]
- if symbol in nonterms:
- beta = list(rhs[dot+1:])
- for prod in prods:
- if prod[0] == symbol:
- seq = beta + list(lookahead)
- la = first_of_string(seq, first)
- new_item = (symbol, tuple(prod[1]), 0, frozenset(la))
- if new_item not in closure_set:
- new_items.add(new_item)
- if new_items:
- closure_set |= new_items
- added = True
- return frozenset(closure_set)
- def goto(items, symbol, prods, first, nonterms):
- goto_set = set()
- for item in items:
- lhs, rhs, dot, lookahead = item
- if dot < len(rhs) and rhs[dot] == symbol:
- goto_set.add((lhs, rhs, dot+1, lookahead))
- if not goto_set:
- return frozenset()
- return closure(goto_set, prods, first, nonterms)
- def canonical_collection(prods, first, nonterms, terms, start_sym):
- C = []
- initial = closure({(start_sym, tuple(prods[0][1]), 0, frozenset({'$'}))}, prods, first, nonterms)
- C.append(initial)
- added = True
- while added:
- added = False
- for I in C.copy():
- for X in (nonterms + terms):
- goto_I = goto(I, X, prods, first, nonterms)
- if goto_I and goto_I not in C:
- C.append(goto_I)
- added = True
- return C
- def merge_states(collection):
- merged = {}
- state_mapping = {}
- for i, I in enumerate(collection):
- core = frozenset((item[0], item[1], item[2]) for item in I)
- if core in merged:
- merged[core] = merged[core] | I
- else:
- merged[core] = I
- state_mapping[i] = core
- merged_states = list(merged.values())
- return merged_states, state_mapping
- def construct_parsing_table(merged_states, prods, first, nonterms, terms, start_sym):
- core_to_index = {}
- for i, state in enumerate(merged_states):
- core = frozenset((item[0], item[1], item[2]) for item in state)
- core_to_index[core] = i
- table = {i: {} for i in range(len(merged_states))}
- def merged_goto(state, symbol):
- g = set()
- for item in state:
- lhs, rhs, dot, lookahead = item
- if dot < len(rhs) and rhs[dot] == symbol:
- g.add((lhs, rhs, dot+1, lookahead))
- if not g:
- return None
- g = closure(g, prods, first, nonterms)
- core = frozenset((it[0], it[1], it[2]) for it in g)
- return core_to_index.get(core, None)
- for i, state in enumerate(merged_states):
- for item in state:
- lhs, rhs, dot, lookahead = item
- if dot < len(rhs):
- a = rhs[dot]
- if a in terms:
- j = merged_goto(state, a)
- if j is not None:
- table[i][a] = f"S{j}"
- elif a in nonterms:
- j = merged_goto(state, a)
- if j is not None:
- table[i][a] = str(j)
- else:
- if lhs == start_sym and rhs == tuple(prods[0][1]):
- for la in lookahead:
- table[i][la] = "Acc"
- else:
- prod_index = None
- for index, prod in enumerate(prods):
- if index == 0:
- continue
- if lhs == prod[0] and list(rhs) == prod[1]:
- prod_index = index
- break
- if prod_index is None:
- continue
- for la in lookahead:
- if la in table[i]:
- table[i][la] += f"/R{prod_index}"
- else:
- table[i][la] = f"R{prod_index}"
- return table, core_to_index
- class Node:
- def __init__(self, symbol, children=None):
- self.symbol = symbol
- self.children = children if children is not None else []
- def _str(self, level=0):
- ret = " " * level + self.symbol + "\n"
- for child in self.children:
- ret += child._str(level + 1)
- return ret
- def parse_input_string(input_string, table, augmented_productions, new_start):
- tokens = input_string.split() + ["$"]
- state_stack = [0]
- node_stack = []
- i = 0
- step = 1
- header = f"{'Step':<5}{'Stack':<20}{'Input':<30}{'Action':<15}"
- print("\n" + header)
- print("-" * len(header))
- while True:
- current_state = state_stack[-1]
- current_token = tokens[i]
- action = table[current_state].get(current_token, "")
- print(f"{step:<5}{str(state_stack):<20}{' '.join(tokens[i:]):<30}{action:<15}")
- if action == "":
- print(f"\nError: No action defined for state {current_state} and token {current_token}.")
- return
- if action.startswith("S"):
- next_state = int(action[1:])
- state_stack.append(next_state)
- node_stack.append(Node(current_token))
- i += 1
- elif action.startswith("R"):
- prod_index = int(action.split("/")[0][1:])
- lhs, rhs = augmented_productions[prod_index]
- num_to_pop = len(rhs)
- children = []
- for _ in range(num_to_pop):
- state_stack.pop()
- children.append(node_stack.pop())
- children.reverse()
- new_node = Node(lhs, children)
- node_stack.append(new_node)
- goto_action = table[state_stack[-1]].get(lhs, "")
- if goto_action == "":
- print(f"\nError: No goto action for state {state_stack[-1]} and nonterminal {lhs}.")
- return
- state_stack.append(int(goto_action))
- elif action == "Acc":
- print(f"\nParsing Successful!")
- break
- step += 1
- # After parsing is complete, print the parse tree
- print("\nParse Tree:")
- print(node_stack[-1]._str())
- def main():
- global productions, nonterminals, terminals
- print_grammar(productions, "Original Grammar")
- augmented_productions, new_start = augment_grammar(productions, nonterminals[0])
- print_grammar(augmented_productions, "Augmented Grammar")
- if new_start not in nonterminals:
- nonterminals.insert(0, new_start)
- first_sets = compute_first(augmented_productions, nonterminals, terminals)
- if '$' not in first_sets:
- first_sets['$'] = {'$'}
- print_first_sets(first_sets)
- C = canonical_collection(augmented_productions, first_sets, nonterminals, terminals, new_start)
- print_states(C, "Canonical LR(1) Item Sets")
- merged_states, state_mapping = merge_states(C)
- print_states(merged_states, "Merged LALR States")
- table, core_to_index = construct_parsing_table(merged_states, augmented_productions, first_sets, nonterminals, terminals, new_start)
- print_goto_transitions(merged_states, augmented_productions, first_sets, nonterminals, terminals, core_to_index)
- print_parsing_table(table, terminals, nonterminals)
- input_str = input("Input string for parsing: ")
- print("\nParsing Input String:", input_str)
- parse_input_string(input_str, table, augmented_productions, new_start)
- if __name__ == "__main__":
- main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement