Subversion Repositories programming

Rev

Rev 201 | Show entire file | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 201 Rev 202
Line 4... Line 4...
4
# Copyright: 2006 Ira W. Snyder
4
# Copyright: 2006 Ira W. Snyder
5
# Start Date: 2006-02-01
5
# Start Date: 2006-02-01
6
# License: GNU General Public License v2 (or at your option, any later version)
6
# License: GNU General Public License v2 (or at your option, any later version)
7
#
7
#
8
 
8
 
-
 
9
#
-
 
10
# CS411: Compilers and Interpreters
-
 
11
# Project #1
-
 
12
# Due: 2006-02-03
-
 
13
#
-
 
14
 
-
 
15
# Check for <Python-2.3 compatibility (boolean values)
-
 
16
try:
-
 
17
  True, False
-
 
18
except NameError:
-
 
19
  (True, False) = (1, 0)
-
 
20
 
-
 
21
import sys
-
 
22
 
9
class TOYParser:
23
class TOYParser:
10
    """A parser for the TOY language"""
24
    """A parser for the TOY language"""
11
 
25
 
12
    def __init__(self):
26
    def __init__(self):
-
 
27
        """Constructor for the TOYParser"""
-
 
28
 
-
 
29
        # Instance Variables
13
        self.__s = Stack()
30
        self.__s = Stack()
14
        self.__i = InputParser()
31
        self.__i = InputParser()
15
        self.END_SYMBOL = '$'
32
        self.END_SYMBOL = '$'
16
        self.PARSING_TABLE = (('', 'BEGIN', 'END', 'IF', 'ELSE', 'WHILE', 'ID', 'READ', 'WRITE', 'NUM', '+', '(', ')', ';', '=', '$'),
33
        self.PARSING_TABLE = (('', 'BEGIN', 'END', 'IF', 'ELSE', 'WHILE', 'ID', 'READ', 'WRITE', 'NUM', '+', '(', ')', ';', '=', '$'),
17
                ('program', 1, '', '', '', '', '', '', '', '', '', '', '', '', '', ''),
34
                ('program', 1, '', '', '', '', '', '', '', '', '', '', '', '', '', ''),
Line 47... Line 64...
47
 
64
 
48
    def nonterminals(self):
65
    def nonterminals(self):
49
        """Return a list of non-terminals in the table"""
66
        """Return a list of non-terminals in the table"""
50
 
67
 
51
        return [n[0] for n in self.PARSING_TABLE if n[0] != '']
68
        return [n[0] for n in self.PARSING_TABLE if n[0] != '']
52
    
69
 
53
    def getInput(self):
70
    def __getInput(self):
-
 
71
        """Get the input (program) to parse.
54
        self.__i.setInput('BEGIN  READ ID ; WHILE ( ID ) ID  = ID + NUM ;  WRITE ID END')
72
        NOTE: This performs no checking by itself, since we'll do that later"""
-
 
73
 
-
 
74
        return raw_input('Please input the program to parse: ')
-
 
75
 
-
 
76
    def main_menu(self):
-
 
77
        """Run the menu interface of the program"""
-
 
78
 
-
 
79
        done = False
-
 
80
        input = ''
-
 
81
        have_input = False
-
 
82
 
-
 
83
        while not done:
-
 
84
            print 'Menu:'
-
 
85
            print '========================================'
-
 
86
            print '1. Enter an input string'
-
 
87
            print '2. Parse input'
-
 
88
            print '3. Quit'
-
 
89
            print
-
 
90
            s = raw_input('Choice >>> ')
-
 
91
            print
-
 
92
 
-
 
93
            if s == '1':
-
 
94
                input = self.__getInput()
-
 
95
                have_input = True
-
 
96
                print
-
 
97
            elif s == '2':
-
 
98
                if have_input:
-
 
99
                    self.__i.setInput(input)
-
 
100
                    self.LL1_Parsing_Algo()
-
 
101
                else:
-
 
102
                    print 'Enter an input string first'
-
 
103
                print
-
 
104
            elif s == '3':
-
 
105
                done = True
-
 
106
            else:
-
 
107
                print 'Bad Selection, try again'
-
 
108
                print
-
 
109
 
-
 
110
    def simpleParserRun(self):
-
 
111
        """Run the parser on a single input. Very simple"""
-
 
112
 
-
 
113
        # Print header
-
 
114
        print 'Simple TOY Language Parser'
-
 
115
        print 'Copyright (c) 2006 Ira W. Snyder (devel@irasnyder.com)'
-
 
116
        print '================================================================'
-
 
117
        print
-
 
118
 
-
 
119
        # Get input (program)
-
 
120
        input = self.__getInput()
-
 
121
 
-
 
122
        print
-
 
123
        print 'Parsing...'
-
 
124
        print
-
 
125
 
-
 
126
        # Parse it!
-
 
127
        self.__i.setInput(input)
55
        self.LL1_Parsing_Algo()
128
        self.LL1_Parsing_Algo()
56
 
129
 
57
    def getParsingTableEntry(self, nonterm, term):
130
    def getParsingTableEntry(self, nonterm, term):
58
        """Get the correct production rule out of the parsing table"""
131
        """Get the correct production rule out of the parsing table"""
59
 
132
 
60
        nt_num = 0
133
        nt_num = 0
61
        t_num = 0
134
        t_num = 0
62
        
135
 
-
 
136
        # Find the index of the nonterminal in the table
63
        for t in self.PARSING_TABLE:
137
        for t in self.PARSING_TABLE:
64
            if t[0] == nonterm:
138
            if t[0] == nonterm:
65
                break
139
                break
66
 
140
 
67
            nt_num += 1
141
            nt_num += 1
68
 
142
 
-
 
143
        # Find the index of the terminal in the table
69
        for t in self.PARSING_TABLE[0]:
144
        for t in self.PARSING_TABLE[0]:
70
            if t == term:
145
            if t == term:
71
                break
146
                break
72
 
147
 
73
            t_num += 1
148
            t_num += 1
74
 
149
 
-
 
150
        # Check for bad terminal in the input string
-
 
151
        if t_num >= len(self.PARSING_TABLE[0]):
-
 
152
            self.__error(3, self.__s.getStack(), self.__i.getInput())
-
 
153
 
75
        return self.PARSING_TABLE[nt_num][t_num]
154
        return self.PARSING_TABLE[nt_num][t_num]
76
 
155
 
77
    def print_production_rule(self, num):
156
    def print_production_rule(self, num):
-
 
157
        """Nicely print the production for a given rule number"""
-
 
158
 
78
        prod_list = self.GRAMMAR_SPEC[num]
159
        prod_list = self.GRAMMAR_SPEC[num]
79
        
160
 
80
        nt = prod_list[0]
161
        nt = prod_list[0]
81
        term_str = ' '.join(prod_list[1:])
162
        term_str = ' '.join(prod_list[1:])
82
 
163
 
-
 
164
        # Put 'epsilon' in term_str if nothing is there
-
 
165
        if term_str == '':
-
 
166
            term_str = 'epsilon'
-
 
167
 
83
        s = 'PRODUCTION: %s -> %s' % (nt, term_str)
168
        s = 'PRODUCTION: %s -> %s' % (nt, term_str)
84
 
169
 
85
        print s
170
        print s
86
        
171
 
87
    def LL1_Parsing_Algo (self):
172
    def LL1_Parsing_Algo (self):
88
        """This implements the table-based LL(1) parsing algorithm from Figure 4.2
173
        """This implements the table-based LL(1) parsing algorithm from Figure 4.2
89
        (Page 155) in Compiler Construction Principles and Practices.
174
        (Page 155) in Compiler Construction Principles and Practices.
90
        NOTE: There is a correction in the while() statement, from 'and' to 'or'."""
175
        NOTE: There is a correction in the while() statement, from 'and' to 'or'."""
91
 
176
 
-
 
177
        # No error has occurred yet, so set to False
-
 
178
        error_occurred = False
-
 
179
 
-
 
180
        # Prime the parser
92
        self.__s.push(self.END_SYMBOL)
181
        self.__s.push(self.END_SYMBOL)
93
        self.__s.push(self.START_SYMBOL)
182
        self.__s.push(self.START_SYMBOL)
94
 
183
 
-
 
184
        # The parsing algorithm itself
-
 
185
        # Keep going until we have no input OR nothing left in the stack
95
        while self.__s.peek() != self.END_SYMBOL or self.__i.getNextToken() != self.END_SYMBOL:
186
        while self.__s.peek() != self.END_SYMBOL or self.__i.getNextToken() != self.END_SYMBOL:
96
 
187
 
-
 
188
            # Try to match the stack and the input (terminal match)
97
            if self.__s.peek() == self.__i.getNextToken():
189
            if self.__s.peek() == self.__i.getNextToken():
98
                print 'MATCH: %s' % (self.__i.getNextToken(), )
190
                print 'MATCH: %s' % (self.__i.getNextToken(), )
99
                self.__s.pop()
191
                self.__s.pop()
100
                self.__i.match()
192
                self.__i.match()
101
 
193
 
-
 
194
            # Try to use a production rule from the table
-
 
195
            elif self.__s.peek() in self.nonterminals() \
102
            elif self.__s.peek() in self.nonterminals() and self.getParsingTableEntry(self.__s.peek(), self.__i.getNextToken()) != '':
196
            and self.getParsingTableEntry(self.__s.peek(), self.__i.getNextToken()) != '':
103
 
197
 
-
 
198
                    # Extract the entry from the parsing table
104
                    nt = self.__s.pop()
199
                    nt = self.__s.pop()
105
                    t = self.__i.getNextToken()
200
                    t = self.__i.getNextToken()
106
                    pte = self.getParsingTableEntry(nt, t)
201
                    pte = self.getParsingTableEntry(nt, t)
107
 
202
 
-
 
203
                    # Print the production rule we just used
108
                    self.print_production_rule(pte-1)
204
                    self.print_production_rule(pte-1)
109
    
205
 
110
                    # Basically pop the stack, and push back the correct stuff
206
                    # Push the production back to the stack, in the correct (reversed) order
111
                    # in reverse order
-
 
112
                    stack_additions = list(self.GRAMMAR_SPEC[pte-1][1:])
207
                    stack_additions = list(self.GRAMMAR_SPEC[pte-1][1:])
113
                    stack_additions.reverse()
208
                    stack_additions.reverse()
114
 
209
 
115
                    for a in stack_additions:
210
                    for a in stack_additions:
116
                        self.__s.push(a)
211
                        self.__s.push(a)
117
 
212
 
-
 
213
            # An error must have occurred. We either have an unknown symbol in the input
118
            else:
214
            else:
119
                print 'An error happened'
215
                error_occurred = True
-
 
216
                self.__error(1, self.__s.getStack(), self.__i.getInput())
-
 
217
                break
120
            # endif
218
            # endif
121
        # wend
219
        # wend
122
 
220
 
-
 
221
        # If an error already happened, no need to print out the error messages again
-
 
222
        if not error_occurred:
123
        if self.__s.peek() == self.END_SYMBOL and self.__i.getNextToken() == self.END_SYMBOL:
223
            if self.__s.peek() == self.END_SYMBOL and self.__i.getNextToken() == self.END_SYMBOL:
124
            print 'ACCEPT'
224
                print 'ACCEPTED: Valid program'
-
 
225
            else:
-
 
226
                self.__error(2, self.__s.getStack(), self.__i.getInput())
-
 
227
 
-
 
228
    def __error(self, errornum=0, stack=None, input=None):
-
 
229
        """Try to print a decent error message"""
-
 
230
 
-
 
231
 
-
 
232
        if errornum == 1:
-
 
233
            if input[0] in self.terminals():
-
 
234
                print 'Error: \'%s\' unexpected here (syntax error)' % input[0]
-
 
235
            else:
-
 
236
                print 'Error: unknown symbol: \'%s\'' % input[0]
-
 
237
        elif errornum == 2:
-
 
238
            if stack[-1] == self.END_SYMBOL:
-
 
239
                print 'Error: ran out of input'
-
 
240
            else:
-
 
241
                print 'Error: extraneous input: \'%s\'' % input
-
 
242
        elif errornum == 3:
-
 
243
            print 'Error: Invalid terminal: %s' % input[0]
125
        else:
244
        else:
-
 
245
            print 'Error: An unknown error occurred'
-
 
246
 
-
 
247
        # Always print the remaining stack and input strings
-
 
248
        print 'Stack: %s' % stack
-
 
249
        print 'Input: %s' % input
-
 
250
 
-
 
251
        # Always exit the program, since I don't know how to go back to the menu
126
            print 'ERROR'
252
        sys.exit(1)
127
 
253
 
128
class InputParser:
254
class InputParser:
129
    """Implements a simple tokenizer for the TOY language"""
255
    """Implements a simple tokenizer for the TOY language"""
130
 
256
 
131
    def __init__(self):
257
    def __init__(self):
132
        self.__inputstr = []
258
        self.__inputstr = []
133
 
259
 
134
    def setInput(self, inputstr):
260
    def setInput(self, inputstr):
-
 
261
        """Sets the private variable self.__inputstr to the parameter passed in"""
135
        self.__inputstr = inputstr.split()
262
        self.__inputstr = inputstr.split()
136
        self.__inputstr.append('$') # Append '$' to the input
263
        self.__inputstr.append('$') # Append '$' to the input
137
 
264
 
138
    def getInput(self):
265
    def getInput(self):
139
        """Return a list representation of whatever is remaining of the input"""
266
        """Return a list representation of whatever is remaining of the input"""
140
 
267
 
141
        return self.__inputstr[:]
268
        return self.__inputstr[:]
142
        
269
 
143
    def getNextToken(self):
270
    def getNextToken(self):
144
        """Gets the first token off of the front of the input string, but
271
        """Gets the first token off of the front of the input string, but
145
        DOES NOT remove any input."""
272
        DOES NOT remove any input."""
146
        
273
 
147
        try:
274
        try:
148
            return self.__inputstr[0]
275
            return self.__inputstr[0]
149
        except IndexError:
276
        except IndexError:
150
            print 'CAUGHT IndexError in getNextToken()'
277
            print 'CAUGHT IndexError in getNextToken()'
151
            return ''
278
            return ''
152
 
279
 
153
    def match(self):
280
    def match(self):
154
        """A match has happened, so remove the front of the input string."""
281
        """A match has happened, so remove the front of the input string."""
155
        
282
 
156
        try:
283
        try:
157
            del self.__inputstr[0]
284
            del self.__inputstr[0]
158
        except IndexError:
285
        except IndexError:
159
            print 'CAUGHT IndexError in match()'
286
            print 'CAUGHT IndexError in match()'
160
 
287
 
Line 165... Line 292...
165
        self.__data = []
292
        self.__data = []
166
 
293
 
167
    def clear(self):
294
    def clear(self):
168
        """Empty the stack"""
295
        """Empty the stack"""
169
        self.__data = []
296
        self.__data = []
170
        
297
 
171
    def push(self, val):
298
    def push(self, val):
172
        """Push a value onto the top of the stack"""
299
        """Push a value onto the top of the stack"""
173
        self.__data.append(val)
300
        self.__data.append(val)
174
 
301
 
175
    def pop(self):
302
    def pop(self):
Line 190... Line 317...
190
        """Get whatever is in the stack, from bottom to top, as a list"""
317
        """Get whatever is in the stack, from bottom to top, as a list"""
191
 
318
 
192
        return self.__data[:]
319
        return self.__data[:]
193
 
320
 
194
################################################################################
321
################################################################################
195
################################################################################
-
 
196
################################################################################
-
 
197
################################################################################
-
 
198
################################################################################
-
 
199
 
322
 
200
def main():
323
def main():
201
    """The main function for this program"""
324
    """The main function for this program"""
202
 
325
 
203
    t = TOYParser()
326
    t = TOYParser()
204
    print t.getInput()
327
    #t.main_menu()
-
 
328
    t.simpleParserRun()
205
 
329
 
206
if __name__ == '__main__':
330
if __name__ == '__main__':
207
    main()
331
    main()
208
 
332