Subversion Repositories programming

Rev

Rev 201 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
199 ira 1
#!/usr/bin/env python
2
 
200 ira 3
#
199 ira 4
# Copyright: 2006 Ira W. Snyder
5
# Start Date: 2006-02-01
6
# License: GNU General Public License v2 (or at your option, any later version)
7
#
8
 
202 ira 9
#
10
# CS411: Compilers and Interpreters
11
# Project #1
12
# Due: 2006-02-03
13
#
14
 
15
# Check for <Python-2.3 compatibility (boolean values)
16
try:
17
  True, False
18
except NameError:
19
  (True, False) = (1, 0)
20
 
21
import sys
22
 
201 ira 23
class TOYParser:
24
    """A parser for the TOY language"""
199 ira 25
 
201 ira 26
    def __init__(self):
202 ira 27
        """Constructor for the TOYParser"""
28
 
29
        # Instance Variables
201 ira 30
        self.__s = Stack()
31
        self.__i = InputParser()
32
        self.END_SYMBOL = '$'
33
        self.PARSING_TABLE = (('', 'BEGIN', 'END', 'IF', 'ELSE', 'WHILE', 'ID', 'READ', 'WRITE', 'NUM', '+', '(', ')', ';', '=', '$'),
34
                ('program', 1, '', '', '', '', '', '', '', '', '', '', '', '', '', ''),
35
                ('stmtseq', '', '', 2, '', 2, 2, 2, 2, '', '', '', '', '', '', ''),
36
                ('stmtseq1', '', 4, '', '', '', '', '', '', '', '', '', '', 3, '', ''),
37
                ('stmt', '', '', 5, '', 6, 7, 8, 9, '', '', '', '', '', '', ''),
38
                ('exp', '', '', '', '', '', 10, '', '', 10, '', 10, '', '', '', ''),
39
                ('exp1', '', 12, '', 12, '', '', '', '', '', 11, '', 12, 12, '', ''),
40
                ('term', '', '', '', '', '', 15, '', '', 14, '', 13, '', '', '', ''))
41
 
42
        self.GRAMMAR_SPEC = (('program', 'BEGIN', 'stmtseq', 'END'),
200 ira 43
                ('stmtseq', 'stmt', 'stmtseq1'),
44
                ('stmtseq1', ';', 'stmt', 'stmtseq1'),
45
                ('stmtseq1', ),
46
                ('stmt', 'IF', '(', 'exp', ')', 'stmt', 'ELSE', 'stmt'),
47
                ('stmt', 'WHILE', '(', 'exp', ')', 'stmt'),
48
                ('stmt', 'ID', '=', 'exp'),
49
                ('stmt', 'READ', 'ID'),
50
                ('stmt', 'WRITE', 'exp'),
51
                ('exp', 'term', 'exp1'),
52
                ('exp1', '+', 'term', 'exp1'),
53
                ('exp1', ),
54
                ('term', '(', 'exp', ')'),
55
                ('term', 'NUM'),
56
                ('term', 'ID'))
57
 
201 ira 58
        self.START_SYMBOL = 'program'
200 ira 59
 
201 ira 60
    def terminals(self):
61
        """Return a list of the terminals in the table"""
199 ira 62
 
201 ira 63
        return [n for n in self.PARSING_TABLE[0] if n != '']
199 ira 64
 
201 ira 65
    def nonterminals(self):
66
        """Return a list of non-terminals in the table"""
199 ira 67
 
201 ira 68
        return [n[0] for n in self.PARSING_TABLE if n[0] != '']
202 ira 69
 
70
    def __getInput(self):
71
        """Get the input (program) to parse.
72
        NOTE: This performs no checking by itself, since we'll do that later"""
73
 
74
        return raw_input('Please input the program to parse: ')
75
 
76
    def main_menu(self):
77
        """Run the menu interface of the program"""
78
 
79
        done = False
80
        input = ''
81
        have_input = False
82
 
83
        while not done:
84
            print 'Menu:'
85
            print '========================================'
86
            print '1. Enter an input string'
87
            print '2. Parse input'
88
            print '3. Quit'
89
            print
90
            s = raw_input('Choice >>> ')
91
            print
92
 
93
            if s == '1':
94
                input = self.__getInput()
95
                have_input = True
96
                print
97
            elif s == '2':
98
                if have_input:
99
                    self.__i.setInput(input)
100
                    self.LL1_Parsing_Algo()
101
                else:
102
                    print 'Enter an input string first'
103
                print
104
            elif s == '3':
105
                done = True
106
            else:
107
                print 'Bad Selection, try again'
108
                print
109
 
110
    def simpleParserRun(self):
111
        """Run the parser on a single input. Very simple"""
112
 
113
        # Print header
114
        print 'Simple TOY Language Parser'
115
        print 'Copyright (c) 2006 Ira W. Snyder (devel@irasnyder.com)'
116
        print '================================================================'
117
        print
118
 
119
        # Get input (program)
120
        input = self.__getInput()
121
 
122
        print
123
        print 'Parsing...'
124
        print
125
 
126
        # Parse it!
127
        self.__i.setInput(input)
201 ira 128
        self.LL1_Parsing_Algo()
199 ira 129
 
201 ira 130
    def getParsingTableEntry(self, nonterm, term):
131
        """Get the correct production rule out of the parsing table"""
199 ira 132
 
201 ira 133
        nt_num = 0
134
        t_num = 0
202 ira 135
 
136
        # Find the index of the nonterminal in the table
201 ira 137
        for t in self.PARSING_TABLE:
138
            if t[0] == nonterm:
139
                break
199 ira 140
 
201 ira 141
            nt_num += 1
200 ira 142
 
202 ira 143
        # Find the index of the terminal in the table
201 ira 144
        for t in self.PARSING_TABLE[0]:
145
            if t == term:
146
                break
147
 
148
            t_num += 1
149
 
202 ira 150
        # Check for bad terminal in the input string
151
        if t_num >= len(self.PARSING_TABLE[0]):
152
            self.__error(3, self.__s.getStack(), self.__i.getInput())
153
 
201 ira 154
        return self.PARSING_TABLE[nt_num][t_num]
155
 
156
    def print_production_rule(self, num):
202 ira 157
        """Nicely print the production for a given rule number"""
158
 
201 ira 159
        prod_list = self.GRAMMAR_SPEC[num]
202 ira 160
 
201 ira 161
        nt = prod_list[0]
162
        term_str = ' '.join(prod_list[1:])
163
 
202 ira 164
        # Put 'epsilon' in term_str if nothing is there
165
        if term_str == '':
166
            term_str = 'epsilon'
167
 
201 ira 168
        s = 'PRODUCTION: %s -> %s' % (nt, term_str)
169
 
170
        print s
202 ira 171
 
201 ira 172
    def LL1_Parsing_Algo (self):
173
        """This implements the table-based LL(1) parsing algorithm from Figure 4.2
174
        (Page 155) in Compiler Construction Principles and Practices.
175
        NOTE: There is a correction in the while() statement, from 'and' to 'or'."""
176
 
202 ira 177
        # No error has occurred yet, so set to False
178
        error_occurred = False
179
 
180
        # Prime the parser
201 ira 181
        self.__s.push(self.END_SYMBOL)
182
        self.__s.push(self.START_SYMBOL)
183
 
202 ira 184
        # The parsing algorithm itself
185
        # Keep going until we have no input OR nothing left in the stack
201 ira 186
        while self.__s.peek() != self.END_SYMBOL or self.__i.getNextToken() != self.END_SYMBOL:
187
 
202 ira 188
            # Try to match the stack and the input (terminal match)
201 ira 189
            if self.__s.peek() == self.__i.getNextToken():
190
                print 'MATCH: %s' % (self.__i.getNextToken(), )
191
                self.__s.pop()
192
                self.__i.match()
193
 
202 ira 194
            # Try to use a production rule from the table
195
            elif self.__s.peek() in self.nonterminals() \
196
            and self.getParsingTableEntry(self.__s.peek(), self.__i.getNextToken()) != '':
201 ira 197
 
202 ira 198
                    # Extract the entry from the parsing table
201 ira 199
                    nt = self.__s.pop()
200
                    t = self.__i.getNextToken()
201
                    pte = self.getParsingTableEntry(nt, t)
202
 
202 ira 203
                    # Print the production rule we just used
201 ira 204
                    self.print_production_rule(pte-1)
202 ira 205
 
206
                    # Push the production back to the stack, in the correct (reversed) order
201 ira 207
                    stack_additions = list(self.GRAMMAR_SPEC[pte-1][1:])
208
                    stack_additions.reverse()
209
 
210
                    for a in stack_additions:
211
                        self.__s.push(a)
212
 
202 ira 213
            # An error must have occurred. We either have an unknown symbol in the input
201 ira 214
            else:
202 ira 215
                error_occurred = True
216
                self.__error(1, self.__s.getStack(), self.__i.getInput())
217
                break
201 ira 218
            # endif
219
        # wend
220
 
202 ira 221
        # If an error already happened, no need to print out the error messages again
222
        if not error_occurred:
223
            if self.__s.peek() == self.END_SYMBOL and self.__i.getNextToken() == self.END_SYMBOL:
224
                print 'ACCEPTED: Valid program'
225
            else:
226
                self.__error(2, self.__s.getStack(), self.__i.getInput())
227
 
228
    def __error(self, errornum=0, stack=None, input=None):
229
        """Try to print a decent error message"""
230
 
231
 
232
        if errornum == 1:
233
            if input[0] in self.terminals():
234
                print 'Error: \'%s\' unexpected here (syntax error)' % input[0]
235
            else:
236
                print 'Error: unknown symbol: \'%s\'' % input[0]
237
        elif errornum == 2:
238
            if stack[-1] == self.END_SYMBOL:
239
                print 'Error: ran out of input'
240
            else:
241
                print 'Error: extraneous input: \'%s\'' % input
242
        elif errornum == 3:
243
            print 'Error: Invalid terminal: %s' % input[0]
201 ira 244
        else:
202 ira 245
            print 'Error: An unknown error occurred'
201 ira 246
 
202 ira 247
        # Always print the remaining stack and input strings
248
        print 'Stack: %s' % stack
249
        print 'Input: %s' % input
250
 
251
        # Always exit the program, since I don't know how to go back to the menu
252
        sys.exit(1)
253
 
199 ira 254
class InputParser:
255
    """Implements a simple tokenizer for the TOY language"""
256
 
201 ira 257
    def __init__(self):
258
        self.__inputstr = []
259
 
260
    def setInput(self, inputstr):
202 ira 261
        """Sets the private variable self.__inputstr to the parameter passed in"""
200 ira 262
        self.__inputstr = inputstr.split()
201 ira 263
        self.__inputstr.append('$') # Append '$' to the input
199 ira 264
 
201 ira 265
    def getInput(self):
266
        """Return a list representation of whatever is remaining of the input"""
267
 
268
        return self.__inputstr[:]
202 ira 269
 
199 ira 270
    def getNextToken(self):
271
        """Gets the first token off of the front of the input string, but
272
        DOES NOT remove any input."""
202 ira 273
 
200 ira 274
        try:
201 ira 275
            return self.__inputstr[0]
200 ira 276
        except IndexError:
277
            print 'CAUGHT IndexError in getNextToken()'
278
            return ''
199 ira 279
 
280
    def match(self):
281
        """A match has happened, so remove the front of the input string."""
202 ira 282
 
200 ira 283
        try:
284
            del self.__inputstr[0]
285
        except IndexError:
286
            print 'CAUGHT IndexError in match()'
199 ira 287
 
288
class Stack:
289
    """Implements a simple stack using a list"""
290
 
291
    def __init__(self):
292
        self.__data = []
293
 
201 ira 294
    def clear(self):
295
        """Empty the stack"""
296
        self.__data = []
202 ira 297
 
199 ira 298
    def push(self, val):
299
        """Push a value onto the top of the stack"""
300
        self.__data.append(val)
301
 
302
    def pop(self):
303
        """Pop a value off the top of the stack.
304
        WARNING: throws IndexError if there is nothing left in the stack"""
305
        return self.__data.pop()
306
 
307
    def peek(self):
308
        """Peek at the top of the stack. Basically a pop() without removing the
309
        object from the top.
310
        WARNING: will throw an IndexError if there is nothing left in the stack."""
311
 
312
        temp = self.pop()
313
        self.push(temp)
314
        return temp
315
 
201 ira 316
    def getStack(self):
317
        """Get whatever is in the stack, from bottom to top, as a list"""
318
 
319
        return self.__data[:]
320
 
199 ira 321
################################################################################
322
 
323
def main():
324
    """The main function for this program"""
325
 
201 ira 326
    t = TOYParser()
202 ira 327
    #t.main_menu()
328
    t.simpleParserRun()
200 ira 329
 
199 ira 330
if __name__ == '__main__':
331
    main()
332