Subversion Repositories programming

Rev

Rev 200 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | RSS feed

#!/usr/bin/env python

#
# Copyright: 2006 Ira W. Snyder
# Start Date: 2006-02-01
# License: GNU General Public License v2 (or at your option, any later version)
#

class TOYParser:
    """A parser for the TOY language"""

    def __init__(self):
        self.__s = Stack()
        self.__i = InputParser()
        self.END_SYMBOL = '$'
        self.PARSING_TABLE = (('', 'BEGIN', 'END', 'IF', 'ELSE', 'WHILE', 'ID', 'READ', 'WRITE', 'NUM', '+', '(', ')', ';', '=', '$'),
                ('program', 1, '', '', '', '', '', '', '', '', '', '', '', '', '', ''),
                ('stmtseq', '', '', 2, '', 2, 2, 2, 2, '', '', '', '', '', '', ''),
                ('stmtseq1', '', 4, '', '', '', '', '', '', '', '', '', '', 3, '', ''),
                ('stmt', '', '', 5, '', 6, 7, 8, 9, '', '', '', '', '', '', ''),
                ('exp', '', '', '', '', '', 10, '', '', 10, '', 10, '', '', '', ''),
                ('exp1', '', 12, '', 12, '', '', '', '', '', 11, '', 12, 12, '', ''),
                ('term', '', '', '', '', '', 15, '', '', 14, '', 13, '', '', '', ''))

        self.GRAMMAR_SPEC = (('program', 'BEGIN', 'stmtseq', 'END'),
                ('stmtseq', 'stmt', 'stmtseq1'),
                ('stmtseq1', ';', 'stmt', 'stmtseq1'),
                ('stmtseq1', ),
                ('stmt', 'IF', '(', 'exp', ')', 'stmt', 'ELSE', 'stmt'),
                ('stmt', 'WHILE', '(', 'exp', ')', 'stmt'),
                ('stmt', 'ID', '=', 'exp'),
                ('stmt', 'READ', 'ID'),
                ('stmt', 'WRITE', 'exp'),
                ('exp', 'term', 'exp1'),
                ('exp1', '+', 'term', 'exp1'),
                ('exp1', ),
                ('term', '(', 'exp', ')'),
                ('term', 'NUM'),
                ('term', 'ID'))

        self.START_SYMBOL = 'program'

    def terminals(self):
        """Return a list of the terminals in the table"""

        return [n for n in self.PARSING_TABLE[0] if n != '']

    def nonterminals(self):
        """Return a list of non-terminals in the table"""

        return [n[0] for n in self.PARSING_TABLE if n[0] != '']
    
    def getInput(self):
        self.__i.setInput('BEGIN  READ ID ; WHILE ( ID ) ID  = ID + NUM ;  WRITE ID END')
        self.LL1_Parsing_Algo()

    def getParsingTableEntry(self, nonterm, term):
        """Get the correct production rule out of the parsing table"""

        nt_num = 0
        t_num = 0
        
        for t in self.PARSING_TABLE:
            if t[0] == nonterm:
                break

            nt_num += 1

        for t in self.PARSING_TABLE[0]:
            if t == term:
                break

            t_num += 1

        return self.PARSING_TABLE[nt_num][t_num]

    def print_production_rule(self, num):
        prod_list = self.GRAMMAR_SPEC[num]
        
        nt = prod_list[0]
        term_str = ' '.join(prod_list[1:])

        s = 'PRODUCTION: %s -> %s' % (nt, term_str)

        print s
        
    def LL1_Parsing_Algo (self):
        """This implements the table-based LL(1) parsing algorithm from Figure 4.2
        (Page 155) in Compiler Construction Principles and Practices.
        NOTE: There is a correction in the while() statement, from 'and' to 'or'."""

        self.__s.push(self.END_SYMBOL)
        self.__s.push(self.START_SYMBOL)

        while self.__s.peek() != self.END_SYMBOL or self.__i.getNextToken() != self.END_SYMBOL:

            if self.__s.peek() == self.__i.getNextToken():
                print 'MATCH: %s' % (self.__i.getNextToken(), )
                self.__s.pop()
                self.__i.match()

            elif self.__s.peek() in self.nonterminals() and self.getParsingTableEntry(self.__s.peek(), self.__i.getNextToken()) != '':

                    nt = self.__s.pop()
                    t = self.__i.getNextToken()
                    pte = self.getParsingTableEntry(nt, t)

                    self.print_production_rule(pte-1)
    
                    # Basically pop the stack, and push back the correct stuff
                    # in reverse order
                    stack_additions = list(self.GRAMMAR_SPEC[pte-1][1:])
                    stack_additions.reverse()

                    for a in stack_additions:
                        self.__s.push(a)

            else:
                print 'An error happened'
            # endif
        # wend

        if self.__s.peek() == self.END_SYMBOL and self.__i.getNextToken() == self.END_SYMBOL:
            print 'ACCEPT'
        else:
            print 'ERROR'

class InputParser:
    """Implements a simple tokenizer for the TOY language"""

    def __init__(self):
        self.__inputstr = []

    def setInput(self, inputstr):
        self.__inputstr = inputstr.split()
        self.__inputstr.append('$') # Append '$' to the input

    def getInput(self):
        """Return a list representation of whatever is remaining of the input"""

        return self.__inputstr[:]
        
    def getNextToken(self):
        """Gets the first token off of the front of the input string, but
        DOES NOT remove any input."""
        
        try:
            return self.__inputstr[0]
        except IndexError:
            print 'CAUGHT IndexError in getNextToken()'
            return ''

    def match(self):
        """A match has happened, so remove the front of the input string."""
        
        try:
            del self.__inputstr[0]
        except IndexError:
            print 'CAUGHT IndexError in match()'

class Stack:
    """Implements a simple stack using a list"""

    def __init__(self):
        self.__data = []

    def clear(self):
        """Empty the stack"""
        self.__data = []
        
    def push(self, val):
        """Push a value onto the top of the stack"""
        self.__data.append(val)

    def pop(self):
        """Pop a value off the top of the stack.
        WARNING: throws IndexError if there is nothing left in the stack"""
        return self.__data.pop()

    def peek(self):
        """Peek at the top of the stack. Basically a pop() without removing the
        object from the top.
        WARNING: will throw an IndexError if there is nothing left in the stack."""

        temp = self.pop()
        self.push(temp)
        return temp

    def getStack(self):
        """Get whatever is in the stack, from bottom to top, as a list"""

        return self.__data[:]

################################################################################
################################################################################
################################################################################
################################################################################
################################################################################

def main():
    """The main function for this program"""

    t = TOYParser()
    print t.getInput()

if __name__ == '__main__':
    main()