| 199 |
ira |
1 |
#!/usr/bin/env python
|
|
|
2 |
|
| 200 |
ira |
3 |
#
|
| 199 |
ira |
4 |
# Copyright: 2006 Ira W. Snyder
|
|
|
5 |
# Start Date: 2006-02-01
|
|
|
6 |
# License: GNU General Public License v2 (or at your option, any later version)
|
|
|
7 |
#
|
|
|
8 |
|
| 202 |
ira |
9 |
#
|
|
|
10 |
# CS411: Compilers and Interpreters
|
|
|
11 |
# Project #1
|
|
|
12 |
# Due: 2006-02-03
|
|
|
13 |
#
|
|
|
14 |
|
|
|
15 |
# Check for <Python-2.3 compatibility (boolean values)
|
|
|
16 |
try:
|
|
|
17 |
True, False
|
|
|
18 |
except NameError:
|
|
|
19 |
(True, False) = (1, 0)
|
|
|
20 |
|
|
|
21 |
import sys
|
|
|
22 |
|
| 201 |
ira |
23 |
class TOYParser:
|
|
|
24 |
"""A parser for the TOY language"""
|
| 199 |
ira |
25 |
|
| 201 |
ira |
26 |
def __init__(self):
|
| 202 |
ira |
27 |
"""Constructor for the TOYParser"""
|
|
|
28 |
|
|
|
29 |
# Instance Variables
|
| 201 |
ira |
30 |
self.__s = Stack()
|
|
|
31 |
self.__i = InputParser()
|
|
|
32 |
self.END_SYMBOL = '$'
|
|
|
33 |
self.PARSING_TABLE = (('', 'BEGIN', 'END', 'IF', 'ELSE', 'WHILE', 'ID', 'READ', 'WRITE', 'NUM', '+', '(', ')', ';', '=', '$'),
|
|
|
34 |
('program', 1, '', '', '', '', '', '', '', '', '', '', '', '', '', ''),
|
|
|
35 |
('stmtseq', '', '', 2, '', 2, 2, 2, 2, '', '', '', '', '', '', ''),
|
|
|
36 |
('stmtseq1', '', 4, '', '', '', '', '', '', '', '', '', '', 3, '', ''),
|
|
|
37 |
('stmt', '', '', 5, '', 6, 7, 8, 9, '', '', '', '', '', '', ''),
|
|
|
38 |
('exp', '', '', '', '', '', 10, '', '', 10, '', 10, '', '', '', ''),
|
|
|
39 |
('exp1', '', 12, '', 12, '', '', '', '', '', 11, '', 12, 12, '', ''),
|
|
|
40 |
('term', '', '', '', '', '', 15, '', '', 14, '', 13, '', '', '', ''))
|
|
|
41 |
|
|
|
42 |
self.GRAMMAR_SPEC = (('program', 'BEGIN', 'stmtseq', 'END'),
|
| 200 |
ira |
43 |
('stmtseq', 'stmt', 'stmtseq1'),
|
|
|
44 |
('stmtseq1', ';', 'stmt', 'stmtseq1'),
|
|
|
45 |
('stmtseq1', ),
|
|
|
46 |
('stmt', 'IF', '(', 'exp', ')', 'stmt', 'ELSE', 'stmt'),
|
|
|
47 |
('stmt', 'WHILE', '(', 'exp', ')', 'stmt'),
|
|
|
48 |
('stmt', 'ID', '=', 'exp'),
|
|
|
49 |
('stmt', 'READ', 'ID'),
|
|
|
50 |
('stmt', 'WRITE', 'exp'),
|
|
|
51 |
('exp', 'term', 'exp1'),
|
|
|
52 |
('exp1', '+', 'term', 'exp1'),
|
|
|
53 |
('exp1', ),
|
|
|
54 |
('term', '(', 'exp', ')'),
|
|
|
55 |
('term', 'NUM'),
|
|
|
56 |
('term', 'ID'))
|
|
|
57 |
|
| 201 |
ira |
58 |
self.START_SYMBOL = 'program'
|
| 200 |
ira |
59 |
|
| 201 |
ira |
60 |
def terminals(self):
|
|
|
61 |
"""Return a list of the terminals in the table"""
|
| 199 |
ira |
62 |
|
| 201 |
ira |
63 |
return [n for n in self.PARSING_TABLE[0] if n != '']
|
| 199 |
ira |
64 |
|
| 201 |
ira |
65 |
def nonterminals(self):
|
|
|
66 |
"""Return a list of non-terminals in the table"""
|
| 199 |
ira |
67 |
|
| 201 |
ira |
68 |
return [n[0] for n in self.PARSING_TABLE if n[0] != '']
|
| 202 |
ira |
69 |
|
|
|
70 |
def __getInput(self):
|
|
|
71 |
"""Get the input (program) to parse.
|
|
|
72 |
NOTE: This performs no checking by itself, since we'll do that later"""
|
|
|
73 |
|
|
|
74 |
return raw_input('Please input the program to parse: ')
|
|
|
75 |
|
|
|
76 |
def main_menu(self):
|
|
|
77 |
"""Run the menu interface of the program"""
|
|
|
78 |
|
|
|
79 |
done = False
|
|
|
80 |
input = ''
|
|
|
81 |
have_input = False
|
|
|
82 |
|
|
|
83 |
while not done:
|
|
|
84 |
print 'Menu:'
|
|
|
85 |
print '========================================'
|
|
|
86 |
print '1. Enter an input string'
|
|
|
87 |
print '2. Parse input'
|
|
|
88 |
print '3. Quit'
|
|
|
89 |
print
|
|
|
90 |
s = raw_input('Choice >>> ')
|
|
|
91 |
print
|
|
|
92 |
|
|
|
93 |
if s == '1':
|
|
|
94 |
input = self.__getInput()
|
|
|
95 |
have_input = True
|
|
|
96 |
print
|
|
|
97 |
elif s == '2':
|
|
|
98 |
if have_input:
|
|
|
99 |
self.__i.setInput(input)
|
|
|
100 |
self.LL1_Parsing_Algo()
|
|
|
101 |
else:
|
|
|
102 |
print 'Enter an input string first'
|
|
|
103 |
print
|
|
|
104 |
elif s == '3':
|
|
|
105 |
done = True
|
|
|
106 |
else:
|
|
|
107 |
print 'Bad Selection, try again'
|
|
|
108 |
print
|
|
|
109 |
|
|
|
110 |
def simpleParserRun(self):
|
|
|
111 |
"""Run the parser on a single input. Very simple"""
|
|
|
112 |
|
|
|
113 |
# Print header
|
|
|
114 |
print 'Simple TOY Language Parser'
|
|
|
115 |
print 'Copyright (c) 2006 Ira W. Snyder (devel@irasnyder.com)'
|
|
|
116 |
print '================================================================'
|
|
|
117 |
print
|
|
|
118 |
|
|
|
119 |
# Get input (program)
|
|
|
120 |
input = self.__getInput()
|
|
|
121 |
|
|
|
122 |
print
|
|
|
123 |
print 'Parsing...'
|
|
|
124 |
print
|
|
|
125 |
|
|
|
126 |
# Parse it!
|
|
|
127 |
self.__i.setInput(input)
|
| 201 |
ira |
128 |
self.LL1_Parsing_Algo()
|
| 199 |
ira |
129 |
|
| 201 |
ira |
130 |
def getParsingTableEntry(self, nonterm, term):
|
|
|
131 |
"""Get the correct production rule out of the parsing table"""
|
| 199 |
ira |
132 |
|
| 201 |
ira |
133 |
nt_num = 0
|
|
|
134 |
t_num = 0
|
| 202 |
ira |
135 |
|
|
|
136 |
# Find the index of the nonterminal in the table
|
| 201 |
ira |
137 |
for t in self.PARSING_TABLE:
|
|
|
138 |
if t[0] == nonterm:
|
|
|
139 |
break
|
| 199 |
ira |
140 |
|
| 201 |
ira |
141 |
nt_num += 1
|
| 200 |
ira |
142 |
|
| 202 |
ira |
143 |
# Find the index of the terminal in the table
|
| 201 |
ira |
144 |
for t in self.PARSING_TABLE[0]:
|
|
|
145 |
if t == term:
|
|
|
146 |
break
|
|
|
147 |
|
|
|
148 |
t_num += 1
|
|
|
149 |
|
| 202 |
ira |
150 |
# Check for bad terminal in the input string
|
|
|
151 |
if t_num >= len(self.PARSING_TABLE[0]):
|
|
|
152 |
self.__error(3, self.__s.getStack(), self.__i.getInput())
|
|
|
153 |
|
| 201 |
ira |
154 |
return self.PARSING_TABLE[nt_num][t_num]
|
|
|
155 |
|
|
|
156 |
def print_production_rule(self, num):
|
| 202 |
ira |
157 |
"""Nicely print the production for a given rule number"""
|
|
|
158 |
|
| 201 |
ira |
159 |
prod_list = self.GRAMMAR_SPEC[num]
|
| 202 |
ira |
160 |
|
| 201 |
ira |
161 |
nt = prod_list[0]
|
|
|
162 |
term_str = ' '.join(prod_list[1:])
|
|
|
163 |
|
| 202 |
ira |
164 |
# Put 'epsilon' in term_str if nothing is there
|
|
|
165 |
if term_str == '':
|
|
|
166 |
term_str = 'epsilon'
|
|
|
167 |
|
| 201 |
ira |
168 |
s = 'PRODUCTION: %s -> %s' % (nt, term_str)
|
|
|
169 |
|
|
|
170 |
print s
|
| 202 |
ira |
171 |
|
| 201 |
ira |
172 |
def LL1_Parsing_Algo (self):
|
|
|
173 |
"""This implements the table-based LL(1) parsing algorithm from Figure 4.2
|
|
|
174 |
(Page 155) in Compiler Construction Principles and Practices.
|
|
|
175 |
NOTE: There is a correction in the while() statement, from 'and' to 'or'."""
|
|
|
176 |
|
| 202 |
ira |
177 |
# No error has occurred yet, so set to False
|
|
|
178 |
error_occurred = False
|
|
|
179 |
|
|
|
180 |
# Prime the parser
|
| 201 |
ira |
181 |
self.__s.push(self.END_SYMBOL)
|
|
|
182 |
self.__s.push(self.START_SYMBOL)
|
|
|
183 |
|
| 202 |
ira |
184 |
# The parsing algorithm itself
|
|
|
185 |
# Keep going until we have no input OR nothing left in the stack
|
| 201 |
ira |
186 |
while self.__s.peek() != self.END_SYMBOL or self.__i.getNextToken() != self.END_SYMBOL:
|
|
|
187 |
|
| 202 |
ira |
188 |
# Try to match the stack and the input (terminal match)
|
| 201 |
ira |
189 |
if self.__s.peek() == self.__i.getNextToken():
|
|
|
190 |
print 'MATCH: %s' % (self.__i.getNextToken(), )
|
|
|
191 |
self.__s.pop()
|
|
|
192 |
self.__i.match()
|
|
|
193 |
|
| 202 |
ira |
194 |
# Try to use a production rule from the table
|
|
|
195 |
elif self.__s.peek() in self.nonterminals() \
|
|
|
196 |
and self.getParsingTableEntry(self.__s.peek(), self.__i.getNextToken()) != '':
|
| 201 |
ira |
197 |
|
| 202 |
ira |
198 |
# Extract the entry from the parsing table
|
| 201 |
ira |
199 |
nt = self.__s.pop()
|
|
|
200 |
t = self.__i.getNextToken()
|
|
|
201 |
pte = self.getParsingTableEntry(nt, t)
|
|
|
202 |
|
| 202 |
ira |
203 |
# Print the production rule we just used
|
| 201 |
ira |
204 |
self.print_production_rule(pte-1)
|
| 202 |
ira |
205 |
|
|
|
206 |
# Push the production back to the stack, in the correct (reversed) order
|
| 201 |
ira |
207 |
stack_additions = list(self.GRAMMAR_SPEC[pte-1][1:])
|
|
|
208 |
stack_additions.reverse()
|
|
|
209 |
|
|
|
210 |
for a in stack_additions:
|
|
|
211 |
self.__s.push(a)
|
|
|
212 |
|
| 202 |
ira |
213 |
# An error must have occurred. We either have an unknown symbol in the input
|
| 201 |
ira |
214 |
else:
|
| 202 |
ira |
215 |
error_occurred = True
|
|
|
216 |
self.__error(1, self.__s.getStack(), self.__i.getInput())
|
|
|
217 |
break
|
| 201 |
ira |
218 |
# endif
|
|
|
219 |
# wend
|
|
|
220 |
|
| 202 |
ira |
221 |
# If an error already happened, no need to print out the error messages again
|
|
|
222 |
if not error_occurred:
|
|
|
223 |
if self.__s.peek() == self.END_SYMBOL and self.__i.getNextToken() == self.END_SYMBOL:
|
|
|
224 |
print 'ACCEPTED: Valid program'
|
|
|
225 |
else:
|
|
|
226 |
self.__error(2, self.__s.getStack(), self.__i.getInput())
|
|
|
227 |
|
|
|
228 |
def __error(self, errornum=0, stack=None, input=None):
|
|
|
229 |
"""Try to print a decent error message"""
|
|
|
230 |
|
|
|
231 |
|
|
|
232 |
if errornum == 1:
|
|
|
233 |
if input[0] in self.terminals():
|
|
|
234 |
print 'Error: \'%s\' unexpected here (syntax error)' % input[0]
|
|
|
235 |
else:
|
|
|
236 |
print 'Error: unknown symbol: \'%s\'' % input[0]
|
|
|
237 |
elif errornum == 2:
|
|
|
238 |
if stack[-1] == self.END_SYMBOL:
|
|
|
239 |
print 'Error: ran out of input'
|
|
|
240 |
else:
|
|
|
241 |
print 'Error: extraneous input: \'%s\'' % input
|
|
|
242 |
elif errornum == 3:
|
|
|
243 |
print 'Error: Invalid terminal: %s' % input[0]
|
| 201 |
ira |
244 |
else:
|
| 202 |
ira |
245 |
print 'Error: An unknown error occurred'
|
| 201 |
ira |
246 |
|
| 202 |
ira |
247 |
# Always print the remaining stack and input strings
|
|
|
248 |
print 'Stack: %s' % stack
|
|
|
249 |
print 'Input: %s' % input
|
|
|
250 |
|
|
|
251 |
# Always exit the program, since I don't know how to go back to the menu
|
|
|
252 |
sys.exit(1)
|
|
|
253 |
|
| 199 |
ira |
254 |
class InputParser:
|
|
|
255 |
"""Implements a simple tokenizer for the TOY language"""
|
|
|
256 |
|
| 201 |
ira |
257 |
def __init__(self):
|
|
|
258 |
self.__inputstr = []
|
|
|
259 |
|
|
|
260 |
def setInput(self, inputstr):
|
| 202 |
ira |
261 |
"""Sets the private variable self.__inputstr to the parameter passed in"""
|
| 200 |
ira |
262 |
self.__inputstr = inputstr.split()
|
| 201 |
ira |
263 |
self.__inputstr.append('$') # Append '$' to the input
|
| 199 |
ira |
264 |
|
| 201 |
ira |
265 |
def getInput(self):
|
|
|
266 |
"""Return a list representation of whatever is remaining of the input"""
|
|
|
267 |
|
|
|
268 |
return self.__inputstr[:]
|
| 202 |
ira |
269 |
|
| 199 |
ira |
270 |
def getNextToken(self):
|
|
|
271 |
"""Gets the first token off of the front of the input string, but
|
|
|
272 |
DOES NOT remove any input."""
|
| 202 |
ira |
273 |
|
| 200 |
ira |
274 |
try:
|
| 201 |
ira |
275 |
return self.__inputstr[0]
|
| 200 |
ira |
276 |
except IndexError:
|
|
|
277 |
print 'CAUGHT IndexError in getNextToken()'
|
|
|
278 |
return ''
|
| 199 |
ira |
279 |
|
|
|
280 |
def match(self):
|
|
|
281 |
"""A match has happened, so remove the front of the input string."""
|
| 202 |
ira |
282 |
|
| 200 |
ira |
283 |
try:
|
|
|
284 |
del self.__inputstr[0]
|
|
|
285 |
except IndexError:
|
|
|
286 |
print 'CAUGHT IndexError in match()'
|
| 199 |
ira |
287 |
|
|
|
288 |
class Stack:
|
|
|
289 |
"""Implements a simple stack using a list"""
|
|
|
290 |
|
|
|
291 |
def __init__(self):
|
|
|
292 |
self.__data = []
|
|
|
293 |
|
| 201 |
ira |
294 |
def clear(self):
|
|
|
295 |
"""Empty the stack"""
|
|
|
296 |
self.__data = []
|
| 202 |
ira |
297 |
|
| 199 |
ira |
298 |
def push(self, val):
|
|
|
299 |
"""Push a value onto the top of the stack"""
|
|
|
300 |
self.__data.append(val)
|
|
|
301 |
|
|
|
302 |
def pop(self):
|
|
|
303 |
"""Pop a value off the top of the stack.
|
|
|
304 |
WARNING: throws IndexError if there is nothing left in the stack"""
|
|
|
305 |
return self.__data.pop()
|
|
|
306 |
|
|
|
307 |
def peek(self):
|
|
|
308 |
"""Peek at the top of the stack. Basically a pop() without removing the
|
|
|
309 |
object from the top.
|
|
|
310 |
WARNING: will throw an IndexError if there is nothing left in the stack."""
|
|
|
311 |
|
|
|
312 |
temp = self.pop()
|
|
|
313 |
self.push(temp)
|
|
|
314 |
return temp
|
|
|
315 |
|
| 201 |
ira |
316 |
def getStack(self):
|
|
|
317 |
"""Get whatever is in the stack, from bottom to top, as a list"""
|
|
|
318 |
|
|
|
319 |
return self.__data[:]
|
|
|
320 |
|
| 199 |
ira |
321 |
################################################################################
|
|
|
322 |
|
|
|
323 |
def main():
|
|
|
324 |
"""The main function for this program"""
|
|
|
325 |
|
| 201 |
ira |
326 |
t = TOYParser()
|
| 202 |
ira |
327 |
#t.main_menu()
|
|
|
328 |
t.simpleParserRun()
|
| 200 |
ira |
329 |
|
| 199 |
ira |
330 |
if __name__ == '__main__':
|
|
|
331 |
main()
|
|
|
332 |
|