Subversion Repositories programming

Rev

Rev 201 | Blame | Compare with Previous | Last modification | View Log | RSS feed

#!/usr/bin/env python

#
# Copyright: 2006 Ira W. Snyder
# Start Date: 2006-02-01
# License: GNU General Public License v2 (or at your option, any later version)
#

#
# CS411: Compilers and Interpreters
# Project #1
# Due: 2006-02-03
#

# Check for <Python-2.3 compatibility (boolean values)
try:
  True, False
except NameError:
  (True, False) = (1, 0)

import sys

class TOYParser:
    """A parser for the TOY language"""

    def __init__(self):
        """Constructor for the TOYParser"""

        # Instance Variables
        self.__s = Stack()
        self.__i = InputParser()
        self.END_SYMBOL = '$'
        self.PARSING_TABLE = (('', 'BEGIN', 'END', 'IF', 'ELSE', 'WHILE', 'ID', 'READ', 'WRITE', 'NUM', '+', '(', ')', ';', '=', '$'),
                ('program', 1, '', '', '', '', '', '', '', '', '', '', '', '', '', ''),
                ('stmtseq', '', '', 2, '', 2, 2, 2, 2, '', '', '', '', '', '', ''),
                ('stmtseq1', '', 4, '', '', '', '', '', '', '', '', '', '', 3, '', ''),
                ('stmt', '', '', 5, '', 6, 7, 8, 9, '', '', '', '', '', '', ''),
                ('exp', '', '', '', '', '', 10, '', '', 10, '', 10, '', '', '', ''),
                ('exp1', '', 12, '', 12, '', '', '', '', '', 11, '', 12, 12, '', ''),
                ('term', '', '', '', '', '', 15, '', '', 14, '', 13, '', '', '', ''))

        self.GRAMMAR_SPEC = (('program', 'BEGIN', 'stmtseq', 'END'),
                ('stmtseq', 'stmt', 'stmtseq1'),
                ('stmtseq1', ';', 'stmt', 'stmtseq1'),
                ('stmtseq1', ),
                ('stmt', 'IF', '(', 'exp', ')', 'stmt', 'ELSE', 'stmt'),
                ('stmt', 'WHILE', '(', 'exp', ')', 'stmt'),
                ('stmt', 'ID', '=', 'exp'),
                ('stmt', 'READ', 'ID'),
                ('stmt', 'WRITE', 'exp'),
                ('exp', 'term', 'exp1'),
                ('exp1', '+', 'term', 'exp1'),
                ('exp1', ),
                ('term', '(', 'exp', ')'),
                ('term', 'NUM'),
                ('term', 'ID'))

        self.START_SYMBOL = 'program'

    def terminals(self):
        """Return a list of the terminals in the table"""

        return [n for n in self.PARSING_TABLE[0] if n != '']

    def nonterminals(self):
        """Return a list of non-terminals in the table"""

        return [n[0] for n in self.PARSING_TABLE if n[0] != '']

    def __getInput(self):
        """Get the input (program) to parse.
        NOTE: This performs no checking by itself, since we'll do that later"""

        return raw_input('Please input the program to parse: ')

    def main_menu(self):
        """Run the menu interface of the program"""

        done = False
        input = ''
        have_input = False

        while not done:
            print 'Menu:'
            print '========================================'
            print '1. Enter an input string'
            print '2. Parse input'
            print '3. Quit'
            print
            s = raw_input('Choice >>> ')
            print

            if s == '1':
                input = self.__getInput()
                have_input = True
                print
            elif s == '2':
                if have_input:
                    self.__i.setInput(input)
                    self.LL1_Parsing_Algo()
                else:
                    print 'Enter an input string first'
                print
            elif s == '3':
                done = True
            else:
                print 'Bad Selection, try again'
                print

    def simpleParserRun(self):
        """Run the parser on a single input. Very simple"""

        # Print header
        print 'Simple TOY Language Parser'
        print 'Copyright (c) 2006 Ira W. Snyder (devel@irasnyder.com)'
        print '================================================================'
        print

        # Get input (program)
        input = self.__getInput()

        print
        print 'Parsing...'
        print

        # Parse it!
        self.__i.setInput(input)
        self.LL1_Parsing_Algo()

    def getParsingTableEntry(self, nonterm, term):
        """Get the correct production rule out of the parsing table"""

        nt_num = 0
        t_num = 0

        # Find the index of the nonterminal in the table
        for t in self.PARSING_TABLE:
            if t[0] == nonterm:
                break

            nt_num += 1

        # Find the index of the terminal in the table
        for t in self.PARSING_TABLE[0]:
            if t == term:
                break

            t_num += 1

        # Check for bad terminal in the input string
        if t_num >= len(self.PARSING_TABLE[0]):
            self.__error(3, self.__s.getStack(), self.__i.getInput())

        return self.PARSING_TABLE[nt_num][t_num]

    def print_production_rule(self, num):
        """Nicely print the production for a given rule number"""

        prod_list = self.GRAMMAR_SPEC[num]

        nt = prod_list[0]
        term_str = ' '.join(prod_list[1:])

        # Put 'epsilon' in term_str if nothing is there
        if term_str == '':
            term_str = 'epsilon'

        s = 'PRODUCTION: %s -> %s' % (nt, term_str)

        print s

    def LL1_Parsing_Algo (self):
        """This implements the table-based LL(1) parsing algorithm from Figure 4.2
        (Page 155) in Compiler Construction Principles and Practices.
        NOTE: There is a correction in the while() statement, from 'and' to 'or'."""

        # No error has occurred yet, so set to False
        error_occurred = False

        # Prime the parser
        self.__s.push(self.END_SYMBOL)
        self.__s.push(self.START_SYMBOL)

        # The parsing algorithm itself
        # Keep going until we have no input OR nothing left in the stack
        while self.__s.peek() != self.END_SYMBOL or self.__i.getNextToken() != self.END_SYMBOL:

            # Try to match the stack and the input (terminal match)
            if self.__s.peek() == self.__i.getNextToken():
                print 'MATCH: %s' % (self.__i.getNextToken(), )
                self.__s.pop()
                self.__i.match()

            # Try to use a production rule from the table
            elif self.__s.peek() in self.nonterminals() \
            and self.getParsingTableEntry(self.__s.peek(), self.__i.getNextToken()) != '':

                    # Extract the entry from the parsing table
                    nt = self.__s.pop()
                    t = self.__i.getNextToken()
                    pte = self.getParsingTableEntry(nt, t)

                    # Print the production rule we just used
                    self.print_production_rule(pte-1)

                    # Push the production back to the stack, in the correct (reversed) order
                    stack_additions = list(self.GRAMMAR_SPEC[pte-1][1:])
                    stack_additions.reverse()

                    for a in stack_additions:
                        self.__s.push(a)

            # An error must have occurred. We either have an unknown symbol in the input
            else:
                error_occurred = True
                self.__error(1, self.__s.getStack(), self.__i.getInput())
                break
            # endif
        # wend

        # If an error already happened, no need to print out the error messages again
        if not error_occurred:
            if self.__s.peek() == self.END_SYMBOL and self.__i.getNextToken() == self.END_SYMBOL:
                print 'ACCEPTED: Valid program'
            else:
                self.__error(2, self.__s.getStack(), self.__i.getInput())

    def __error(self, errornum=0, stack=None, input=None):
        """Try to print a decent error message"""


        if errornum == 1:
            if input[0] in self.terminals():
                print 'Error: \'%s\' unexpected here (syntax error)' % input[0]
            else:
                print 'Error: unknown symbol: \'%s\'' % input[0]
        elif errornum == 2:
            if stack[-1] == self.END_SYMBOL:
                print 'Error: ran out of input'
            else:
                print 'Error: extraneous input: \'%s\'' % input
        elif errornum == 3:
            print 'Error: Invalid terminal: %s' % input[0]
        else:
            print 'Error: An unknown error occurred'

        # Always print the remaining stack and input strings
        print 'Stack: %s' % stack
        print 'Input: %s' % input

        # Always exit the program, since I don't know how to go back to the menu
        sys.exit(1)

class InputParser:
    """Implements a simple tokenizer for the TOY language"""

    def __init__(self):
        self.__inputstr = []

    def setInput(self, inputstr):
        """Sets the private variable self.__inputstr to the parameter passed in"""
        self.__inputstr = inputstr.split()
        self.__inputstr.append('$') # Append '$' to the input

    def getInput(self):
        """Return a list representation of whatever is remaining of the input"""

        return self.__inputstr[:]

    def getNextToken(self):
        """Gets the first token off of the front of the input string, but
        DOES NOT remove any input."""

        try:
            return self.__inputstr[0]
        except IndexError:
            print 'CAUGHT IndexError in getNextToken()'
            return ''

    def match(self):
        """A match has happened, so remove the front of the input string."""

        try:
            del self.__inputstr[0]
        except IndexError:
            print 'CAUGHT IndexError in match()'

class Stack:
    """Implements a simple stack using a list"""

    def __init__(self):
        self.__data = []

    def clear(self):
        """Empty the stack"""
        self.__data = []

    def push(self, val):
        """Push a value onto the top of the stack"""
        self.__data.append(val)

    def pop(self):
        """Pop a value off the top of the stack.
        WARNING: throws IndexError if there is nothing left in the stack"""
        return self.__data.pop()

    def peek(self):
        """Peek at the top of the stack. Basically a pop() without removing the
        object from the top.
        WARNING: will throw an IndexError if there is nothing left in the stack."""

        temp = self.pop()
        self.push(temp)
        return temp

    def getStack(self):
        """Get whatever is in the stack, from bottom to top, as a list"""

        return self.__data[:]

################################################################################

def main():
    """The main function for this program"""

    t = TOYParser()
    #t.main_menu()
    t.simpleParserRun()

if __name__ == '__main__':
    main()