Rev 201 | Blame | Compare with Previous | Last modification | View Log | RSS feed
#!/usr/bin/env python## Copyright: 2006 Ira W. Snyder# Start Date: 2006-02-01# License: GNU General Public License v2 (or at your option, any later version)### CS411: Compilers and Interpreters# Project #1# Due: 2006-02-03## Check for <Python-2.3 compatibility (boolean values)try:True, Falseexcept NameError:(True, False) = (1, 0)import sysclass TOYParser:"""A parser for the TOY language"""def __init__(self):"""Constructor for the TOYParser"""# Instance Variablesself.__s = Stack()self.__i = InputParser()self.END_SYMBOL = '$'self.PARSING_TABLE = (('', 'BEGIN', 'END', 'IF', 'ELSE', 'WHILE', 'ID', 'READ', 'WRITE', 'NUM', '+', '(', ')', ';', '=', '$'),('program', 1, '', '', '', '', '', '', '', '', '', '', '', '', '', ''),('stmtseq', '', '', 2, '', 2, 2, 2, 2, '', '', '', '', '', '', ''),('stmtseq1', '', 4, '', '', '', '', '', '', '', '', '', '', 3, '', ''),('stmt', '', '', 5, '', 6, 7, 8, 9, '', '', '', '', '', '', ''),('exp', '', '', '', '', '', 10, '', '', 10, '', 10, '', '', '', ''),('exp1', '', 12, '', 12, '', '', '', '', '', 11, '', 12, 12, '', ''),('term', '', '', '', '', '', 15, '', '', 14, '', 13, '', '', '', ''))self.GRAMMAR_SPEC = (('program', 'BEGIN', 'stmtseq', 'END'),('stmtseq', 'stmt', 'stmtseq1'),('stmtseq1', ';', 'stmt', 'stmtseq1'),('stmtseq1', ),('stmt', 'IF', '(', 'exp', ')', 'stmt', 'ELSE', 'stmt'),('stmt', 'WHILE', '(', 'exp', ')', 'stmt'),('stmt', 'ID', '=', 'exp'),('stmt', 'READ', 'ID'),('stmt', 'WRITE', 'exp'),('exp', 'term', 'exp1'),('exp1', '+', 'term', 'exp1'),('exp1', ),('term', '(', 'exp', ')'),('term', 'NUM'),('term', 'ID'))self.START_SYMBOL = 'program'def terminals(self):"""Return a list of the terminals in the table"""return [n for n in self.PARSING_TABLE[0] if n != '']def nonterminals(self):"""Return a list of non-terminals in the table"""return [n[0] for n in self.PARSING_TABLE if n[0] != '']def __getInput(self):"""Get the input (program) to parse.NOTE: This performs no checking by itself, since we'll do that later"""return raw_input('Please input the program to parse: ')def main_menu(self):"""Run the menu interface of the program"""done = Falseinput = ''have_input = Falsewhile not done:print 'Menu:'print '========================================'print '1. Enter an input string'print '2. Parse input'print '3. Quit's = raw_input('Choice >>> ')if s == '1':input = self.__getInput()have_input = Trueelif s == '2':if have_input:self.__i.setInput(input)self.LL1_Parsing_Algo()else:print 'Enter an input string first'elif s == '3':done = Trueelse:print 'Bad Selection, try again'def simpleParserRun(self):"""Run the parser on a single input. Very simple"""# Print headerprint 'Simple TOY Language Parser'print 'Copyright (c) 2006 Ira W. Snyder (devel@irasnyder.com)'print '================================================================'# Get input (program)input = self.__getInput()print 'Parsing...'# Parse it!self.__i.setInput(input)self.LL1_Parsing_Algo()def getParsingTableEntry(self, nonterm, term):"""Get the correct production rule out of the parsing table"""nt_num = 0t_num = 0# Find the index of the nonterminal in the tablefor t in self.PARSING_TABLE:if t[0] == nonterm:breaknt_num += 1# Find the index of the terminal in the tablefor t in self.PARSING_TABLE[0]:if t == term:breakt_num += 1# Check for bad terminal in the input stringif t_num >= len(self.PARSING_TABLE[0]):self.__error(3, self.__s.getStack(), self.__i.getInput())return self.PARSING_TABLE[nt_num][t_num]def print_production_rule(self, num):"""Nicely print the production for a given rule number"""prod_list = self.GRAMMAR_SPEC[num]nt = prod_list[0]term_str = ' '.join(prod_list[1:])# Put 'epsilon' in term_str if nothing is thereif term_str == '':term_str = 'epsilon's = 'PRODUCTION: %s -> %s' % (nt, term_str)print sdef LL1_Parsing_Algo (self):"""This implements the table-based LL(1) parsing algorithm from Figure 4.2(Page 155) in Compiler Construction Principles and Practices.NOTE: There is a correction in the while() statement, from 'and' to 'or'."""# No error has occurred yet, so set to Falseerror_occurred = False# Prime the parserself.__s.push(self.END_SYMBOL)self.__s.push(self.START_SYMBOL)# The parsing algorithm itself# Keep going until we have no input OR nothing left in the stackwhile self.__s.peek() != self.END_SYMBOL or self.__i.getNextToken() != self.END_SYMBOL:# Try to match the stack and the input (terminal match)if self.__s.peek() == self.__i.getNextToken():print 'MATCH: %s' % (self.__i.getNextToken(), )self.__s.pop()self.__i.match()# Try to use a production rule from the tableelif self.__s.peek() in self.nonterminals() \and self.getParsingTableEntry(self.__s.peek(), self.__i.getNextToken()) != '':# Extract the entry from the parsing tablent = self.__s.pop()t = self.__i.getNextToken()pte = self.getParsingTableEntry(nt, t)# Print the production rule we just usedself.print_production_rule(pte-1)# Push the production back to the stack, in the correct (reversed) orderstack_additions = list(self.GRAMMAR_SPEC[pte-1][1:])stack_additions.reverse()for a in stack_additions:self.__s.push(a)# An error must have occurred. We either have an unknown symbol in the inputelse:error_occurred = Trueself.__error(1, self.__s.getStack(), self.__i.getInput())break# endif# wend# If an error already happened, no need to print out the error messages againif not error_occurred:if self.__s.peek() == self.END_SYMBOL and self.__i.getNextToken() == self.END_SYMBOL:print 'ACCEPTED: Valid program'else:self.__error(2, self.__s.getStack(), self.__i.getInput())def __error(self, errornum=0, stack=None, input=None):"""Try to print a decent error message"""if errornum == 1:if input[0] in self.terminals():print 'Error: \'%s\' unexpected here (syntax error)' % input[0]else:print 'Error: unknown symbol: \'%s\'' % input[0]elif errornum == 2:if stack[-1] == self.END_SYMBOL:print 'Error: ran out of input'else:print 'Error: extraneous input: \'%s\'' % inputelif errornum == 3:print 'Error: Invalid terminal: %s' % input[0]else:print 'Error: An unknown error occurred'# Always print the remaining stack and input stringsprint 'Stack: %s' % stackprint 'Input: %s' % input# Always exit the program, since I don't know how to go back to the menusys.exit(1)class InputParser:"""Implements a simple tokenizer for the TOY language"""def __init__(self):self.__inputstr = []def setInput(self, inputstr):"""Sets the private variable self.__inputstr to the parameter passed in"""self.__inputstr = inputstr.split()self.__inputstr.append('$') # Append '$' to the inputdef getInput(self):"""Return a list representation of whatever is remaining of the input"""return self.__inputstr[:]def getNextToken(self):"""Gets the first token off of the front of the input string, butDOES NOT remove any input."""try:return self.__inputstr[0]except IndexError:print 'CAUGHT IndexError in getNextToken()'return ''def match(self):"""A match has happened, so remove the front of the input string."""try:del self.__inputstr[0]except IndexError:print 'CAUGHT IndexError in match()'class Stack:"""Implements a simple stack using a list"""def __init__(self):self.__data = []def clear(self):"""Empty the stack"""self.__data = []def push(self, val):"""Push a value onto the top of the stack"""self.__data.append(val)def pop(self):"""Pop a value off the top of the stack.WARNING: throws IndexError if there is nothing left in the stack"""return self.__data.pop()def peek(self):"""Peek at the top of the stack. Basically a pop() without removing theobject from the top.WARNING: will throw an IndexError if there is nothing left in the stack."""temp = self.pop()self.push(temp)return tempdef getStack(self):"""Get whatever is in the stack, from bottom to top, as a list"""return self.__data[:]################################################################################def main():"""The main function for this program"""t = TOYParser()#t.main_menu()t.simpleParserRun()if __name__ == '__main__':main()