Rev 201 | Blame | Compare with Previous | Last modification | View Log | RSS feed
#!/usr/bin/env python
#
# Copyright: 2006 Ira W. Snyder
# Start Date: 2006-02-01
# License: GNU General Public License v2 (or at your option, any later version)
#
#
# CS411: Compilers and Interpreters
# Project #1
# Due: 2006-02-03
#
# Check for <Python-2.3 compatibility (boolean values)
try:
True, False
except NameError:
(True, False) = (1, 0)
import sys
class TOYParser:
"""A parser for the TOY language"""
def __init__(self):
"""Constructor for the TOYParser"""
# Instance Variables
self.__s = Stack()
self.__i = InputParser()
self.END_SYMBOL = '$'
self.PARSING_TABLE = (('', 'BEGIN', 'END', 'IF', 'ELSE', 'WHILE', 'ID', 'READ', 'WRITE', 'NUM', '+', '(', ')', ';', '=', '$'),
('program', 1, '', '', '', '', '', '', '', '', '', '', '', '', '', ''),
('stmtseq', '', '', 2, '', 2, 2, 2, 2, '', '', '', '', '', '', ''),
('stmtseq1', '', 4, '', '', '', '', '', '', '', '', '', '', 3, '', ''),
('stmt', '', '', 5, '', 6, 7, 8, 9, '', '', '', '', '', '', ''),
('exp', '', '', '', '', '', 10, '', '', 10, '', 10, '', '', '', ''),
('exp1', '', 12, '', 12, '', '', '', '', '', 11, '', 12, 12, '', ''),
('term', '', '', '', '', '', 15, '', '', 14, '', 13, '', '', '', ''))
self.GRAMMAR_SPEC = (('program', 'BEGIN', 'stmtseq', 'END'),
('stmtseq', 'stmt', 'stmtseq1'),
('stmtseq1', ';', 'stmt', 'stmtseq1'),
('stmtseq1', ),
('stmt', 'IF', '(', 'exp', ')', 'stmt', 'ELSE', 'stmt'),
('stmt', 'WHILE', '(', 'exp', ')', 'stmt'),
('stmt', 'ID', '=', 'exp'),
('stmt', 'READ', 'ID'),
('stmt', 'WRITE', 'exp'),
('exp', 'term', 'exp1'),
('exp1', '+', 'term', 'exp1'),
('exp1', ),
('term', '(', 'exp', ')'),
('term', 'NUM'),
('term', 'ID'))
self.START_SYMBOL = 'program'
def terminals(self):
"""Return a list of the terminals in the table"""
return [n for n in self.PARSING_TABLE[0] if n != '']
def nonterminals(self):
"""Return a list of non-terminals in the table"""
return [n[0] for n in self.PARSING_TABLE if n[0] != '']
def __getInput(self):
"""Get the input (program) to parse.
NOTE: This performs no checking by itself, since we'll do that later"""
return raw_input('Please input the program to parse: ')
def main_menu(self):
"""Run the menu interface of the program"""
done = False
input = ''
have_input = False
while not done:
print 'Menu:'
print '========================================'
print '1. Enter an input string'
print '2. Parse input'
print '3. Quit'
s = raw_input('Choice >>> ')
if s == '1':
input = self.__getInput()
have_input = True
elif s == '2':
if have_input:
self.__i.setInput(input)
self.LL1_Parsing_Algo()
else:
print 'Enter an input string first'
elif s == '3':
done = True
else:
print 'Bad Selection, try again'
def simpleParserRun(self):
"""Run the parser on a single input. Very simple"""
# Print header
print 'Simple TOY Language Parser'
print 'Copyright (c) 2006 Ira W. Snyder (devel@irasnyder.com)'
print '================================================================'
# Get input (program)
input = self.__getInput()
print 'Parsing...'
# Parse it!
self.__i.setInput(input)
self.LL1_Parsing_Algo()
def getParsingTableEntry(self, nonterm, term):
"""Get the correct production rule out of the parsing table"""
nt_num = 0
t_num = 0
# Find the index of the nonterminal in the table
for t in self.PARSING_TABLE:
if t[0] == nonterm:
break
nt_num += 1
# Find the index of the terminal in the table
for t in self.PARSING_TABLE[0]:
if t == term:
break
t_num += 1
# Check for bad terminal in the input string
if t_num >= len(self.PARSING_TABLE[0]):
self.__error(3, self.__s.getStack(), self.__i.getInput())
return self.PARSING_TABLE[nt_num][t_num]
def print_production_rule(self, num):
"""Nicely print the production for a given rule number"""
prod_list = self.GRAMMAR_SPEC[num]
nt = prod_list[0]
term_str = ' '.join(prod_list[1:])
# Put 'epsilon' in term_str if nothing is there
if term_str == '':
term_str = 'epsilon'
s = 'PRODUCTION: %s -> %s' % (nt, term_str)
print s
def LL1_Parsing_Algo (self):
"""This implements the table-based LL(1) parsing algorithm from Figure 4.2
(Page 155) in Compiler Construction Principles and Practices.
NOTE: There is a correction in the while() statement, from 'and' to 'or'."""
# No error has occurred yet, so set to False
error_occurred = False
# Prime the parser
self.__s.push(self.END_SYMBOL)
self.__s.push(self.START_SYMBOL)
# The parsing algorithm itself
# Keep going until we have no input OR nothing left in the stack
while self.__s.peek() != self.END_SYMBOL or self.__i.getNextToken() != self.END_SYMBOL:
# Try to match the stack and the input (terminal match)
if self.__s.peek() == self.__i.getNextToken():
print 'MATCH: %s' % (self.__i.getNextToken(), )
self.__s.pop()
self.__i.match()
# Try to use a production rule from the table
elif self.__s.peek() in self.nonterminals() \
and self.getParsingTableEntry(self.__s.peek(), self.__i.getNextToken()) != '':
# Extract the entry from the parsing table
nt = self.__s.pop()
t = self.__i.getNextToken()
pte = self.getParsingTableEntry(nt, t)
# Print the production rule we just used
self.print_production_rule(pte-1)
# Push the production back to the stack, in the correct (reversed) order
stack_additions = list(self.GRAMMAR_SPEC[pte-1][1:])
stack_additions.reverse()
for a in stack_additions:
self.__s.push(a)
# An error must have occurred. We either have an unknown symbol in the input
else:
error_occurred = True
self.__error(1, self.__s.getStack(), self.__i.getInput())
break
# endif
# wend
# If an error already happened, no need to print out the error messages again
if not error_occurred:
if self.__s.peek() == self.END_SYMBOL and self.__i.getNextToken() == self.END_SYMBOL:
print 'ACCEPTED: Valid program'
else:
self.__error(2, self.__s.getStack(), self.__i.getInput())
def __error(self, errornum=0, stack=None, input=None):
"""Try to print a decent error message"""
if errornum == 1:
if input[0] in self.terminals():
print 'Error: \'%s\' unexpected here (syntax error)' % input[0]
else:
print 'Error: unknown symbol: \'%s\'' % input[0]
elif errornum == 2:
if stack[-1] == self.END_SYMBOL:
print 'Error: ran out of input'
else:
print 'Error: extraneous input: \'%s\'' % input
elif errornum == 3:
print 'Error: Invalid terminal: %s' % input[0]
else:
print 'Error: An unknown error occurred'
# Always print the remaining stack and input strings
print 'Stack: %s' % stack
print 'Input: %s' % input
# Always exit the program, since I don't know how to go back to the menu
sys.exit(1)
class InputParser:
"""Implements a simple tokenizer for the TOY language"""
def __init__(self):
self.__inputstr = []
def setInput(self, inputstr):
"""Sets the private variable self.__inputstr to the parameter passed in"""
self.__inputstr = inputstr.split()
self.__inputstr.append('$') # Append '$' to the input
def getInput(self):
"""Return a list representation of whatever is remaining of the input"""
return self.__inputstr[:]
def getNextToken(self):
"""Gets the first token off of the front of the input string, but
DOES NOT remove any input."""
try:
return self.__inputstr[0]
except IndexError:
print 'CAUGHT IndexError in getNextToken()'
return ''
def match(self):
"""A match has happened, so remove the front of the input string."""
try:
del self.__inputstr[0]
except IndexError:
print 'CAUGHT IndexError in match()'
class Stack:
"""Implements a simple stack using a list"""
def __init__(self):
self.__data = []
def clear(self):
"""Empty the stack"""
self.__data = []
def push(self, val):
"""Push a value onto the top of the stack"""
self.__data.append(val)
def pop(self):
"""Pop a value off the top of the stack.
WARNING: throws IndexError if there is nothing left in the stack"""
return self.__data.pop()
def peek(self):
"""Peek at the top of the stack. Basically a pop() without removing the
object from the top.
WARNING: will throw an IndexError if there is nothing left in the stack."""
temp = self.pop()
self.push(temp)
return temp
def getStack(self):
"""Get whatever is in the stack, from bottom to top, as a list"""
return self.__data[:]
################################################################################
def main():
"""The main function for this program"""
t = TOYParser()
#t.main_menu()
t.simpleParserRun()
if __name__ == '__main__':
main()