Rev 200 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | RSS feed
#!/usr/bin/env python
#
# Copyright: 2006 Ira W. Snyder
# Start Date: 2006-02-01
# License: GNU General Public License v2 (or at your option, any later version)
#
class TOYParser:
"""A parser for the TOY language"""
def __init__(self):
self.__s = Stack()
self.__i = InputParser()
self.END_SYMBOL = '$'
self.PARSING_TABLE = (('', 'BEGIN', 'END', 'IF', 'ELSE', 'WHILE', 'ID', 'READ', 'WRITE', 'NUM', '+', '(', ')', ';', '=', '$'),
('program', 1, '', '', '', '', '', '', '', '', '', '', '', '', '', ''),
('stmtseq', '', '', 2, '', 2, 2, 2, 2, '', '', '', '', '', '', ''),
('stmtseq1', '', 4, '', '', '', '', '', '', '', '', '', '', 3, '', ''),
('stmt', '', '', 5, '', 6, 7, 8, 9, '', '', '', '', '', '', ''),
('exp', '', '', '', '', '', 10, '', '', 10, '', 10, '', '', '', ''),
('exp1', '', 12, '', 12, '', '', '', '', '', 11, '', 12, 12, '', ''),
('term', '', '', '', '', '', 15, '', '', 14, '', 13, '', '', '', ''))
self.GRAMMAR_SPEC = (('program', 'BEGIN', 'stmtseq', 'END'),
('stmtseq', 'stmt', 'stmtseq1'),
('stmtseq1', ';', 'stmt', 'stmtseq1'),
('stmtseq1', ),
('stmt', 'IF', '(', 'exp', ')', 'stmt', 'ELSE', 'stmt'),
('stmt', 'WHILE', '(', 'exp', ')', 'stmt'),
('stmt', 'ID', '=', 'exp'),
('stmt', 'READ', 'ID'),
('stmt', 'WRITE', 'exp'),
('exp', 'term', 'exp1'),
('exp1', '+', 'term', 'exp1'),
('exp1', ),
('term', '(', 'exp', ')'),
('term', 'NUM'),
('term', 'ID'))
self.START_SYMBOL = 'program'
def terminals(self):
"""Return a list of the terminals in the table"""
return [n for n in self.PARSING_TABLE[0] if n != '']
def nonterminals(self):
"""Return a list of non-terminals in the table"""
return [n[0] for n in self.PARSING_TABLE if n[0] != '']
def getInput(self):
self.__i.setInput('BEGIN READ ID ; WHILE ( ID ) ID = ID + NUM ; WRITE ID END')
self.LL1_Parsing_Algo()
def getParsingTableEntry(self, nonterm, term):
"""Get the correct production rule out of the parsing table"""
nt_num = 0
t_num = 0
for t in self.PARSING_TABLE:
if t[0] == nonterm:
break
nt_num += 1
for t in self.PARSING_TABLE[0]:
if t == term:
break
t_num += 1
return self.PARSING_TABLE[nt_num][t_num]
def print_production_rule(self, num):
prod_list = self.GRAMMAR_SPEC[num]
nt = prod_list[0]
term_str = ' '.join(prod_list[1:])
s = 'PRODUCTION: %s -> %s' % (nt, term_str)
print s
def LL1_Parsing_Algo (self):
"""This implements the table-based LL(1) parsing algorithm from Figure 4.2
(Page 155) in Compiler Construction Principles and Practices.
NOTE: There is a correction in the while() statement, from 'and' to 'or'."""
self.__s.push(self.END_SYMBOL)
self.__s.push(self.START_SYMBOL)
while self.__s.peek() != self.END_SYMBOL or self.__i.getNextToken() != self.END_SYMBOL:
if self.__s.peek() == self.__i.getNextToken():
print 'MATCH: %s' % (self.__i.getNextToken(), )
self.__s.pop()
self.__i.match()
elif self.__s.peek() in self.nonterminals() and self.getParsingTableEntry(self.__s.peek(), self.__i.getNextToken()) != '':
nt = self.__s.pop()
t = self.__i.getNextToken()
pte = self.getParsingTableEntry(nt, t)
self.print_production_rule(pte-1)
# Basically pop the stack, and push back the correct stuff
# in reverse order
stack_additions = list(self.GRAMMAR_SPEC[pte-1][1:])
stack_additions.reverse()
for a in stack_additions:
self.__s.push(a)
else:
print 'An error happened'
# endif
# wend
if self.__s.peek() == self.END_SYMBOL and self.__i.getNextToken() == self.END_SYMBOL:
print 'ACCEPT'
else:
print 'ERROR'
class InputParser:
"""Implements a simple tokenizer for the TOY language"""
def __init__(self):
self.__inputstr = []
def setInput(self, inputstr):
self.__inputstr = inputstr.split()
self.__inputstr.append('$') # Append '$' to the input
def getInput(self):
"""Return a list representation of whatever is remaining of the input"""
return self.__inputstr[:]
def getNextToken(self):
"""Gets the first token off of the front of the input string, but
DOES NOT remove any input."""
try:
return self.__inputstr[0]
except IndexError:
print 'CAUGHT IndexError in getNextToken()'
return ''
def match(self):
"""A match has happened, so remove the front of the input string."""
try:
del self.__inputstr[0]
except IndexError:
print 'CAUGHT IndexError in match()'
class Stack:
"""Implements a simple stack using a list"""
def __init__(self):
self.__data = []
def clear(self):
"""Empty the stack"""
self.__data = []
def push(self, val):
"""Push a value onto the top of the stack"""
self.__data.append(val)
def pop(self):
"""Pop a value off the top of the stack.
WARNING: throws IndexError if there is nothing left in the stack"""
return self.__data.pop()
def peek(self):
"""Peek at the top of the stack. Basically a pop() without removing the
object from the top.
WARNING: will throw an IndexError if there is nothing left in the stack."""
temp = self.pop()
self.push(temp)
return temp
def getStack(self):
"""Get whatever is in the stack, from bottom to top, as a list"""
return self.__data[:]
################################################################################
################################################################################
################################################################################
################################################################################
################################################################################
def main():
"""The main function for this program"""
t = TOYParser()
print t.getInput()
if __name__ == '__main__':
main()