199 |
ira |
1 |
#!/usr/bin/env python
|
|
|
2 |
|
200 |
ira |
3 |
#
|
199 |
ira |
4 |
# Copyright: 2006 Ira W. Snyder
|
|
|
5 |
# Start Date: 2006-02-01
|
|
|
6 |
# License: GNU General Public License v2 (or at your option, any later version)
|
|
|
7 |
#
|
|
|
8 |
|
201 |
ira |
9 |
class TOYParser:
|
|
|
10 |
"""A parser for the TOY language"""
|
199 |
ira |
11 |
|
201 |
ira |
12 |
def __init__(self):
|
|
|
13 |
self.__s = Stack()
|
|
|
14 |
self.__i = InputParser()
|
|
|
15 |
self.END_SYMBOL = '$'
|
|
|
16 |
self.PARSING_TABLE = (('', 'BEGIN', 'END', 'IF', 'ELSE', 'WHILE', 'ID', 'READ', 'WRITE', 'NUM', '+', '(', ')', ';', '=', '$'),
|
|
|
17 |
('program', 1, '', '', '', '', '', '', '', '', '', '', '', '', '', ''),
|
|
|
18 |
('stmtseq', '', '', 2, '', 2, 2, 2, 2, '', '', '', '', '', '', ''),
|
|
|
19 |
('stmtseq1', '', 4, '', '', '', '', '', '', '', '', '', '', 3, '', ''),
|
|
|
20 |
('stmt', '', '', 5, '', 6, 7, 8, 9, '', '', '', '', '', '', ''),
|
|
|
21 |
('exp', '', '', '', '', '', 10, '', '', 10, '', 10, '', '', '', ''),
|
|
|
22 |
('exp1', '', 12, '', 12, '', '', '', '', '', 11, '', 12, 12, '', ''),
|
|
|
23 |
('term', '', '', '', '', '', 15, '', '', 14, '', 13, '', '', '', ''))
|
|
|
24 |
|
|
|
25 |
self.GRAMMAR_SPEC = (('program', 'BEGIN', 'stmtseq', 'END'),
|
200 |
ira |
26 |
('stmtseq', 'stmt', 'stmtseq1'),
|
|
|
27 |
('stmtseq1', ';', 'stmt', 'stmtseq1'),
|
|
|
28 |
('stmtseq1', ),
|
|
|
29 |
('stmt', 'IF', '(', 'exp', ')', 'stmt', 'ELSE', 'stmt'),
|
|
|
30 |
('stmt', 'WHILE', '(', 'exp', ')', 'stmt'),
|
|
|
31 |
('stmt', 'ID', '=', 'exp'),
|
|
|
32 |
('stmt', 'READ', 'ID'),
|
|
|
33 |
('stmt', 'WRITE', 'exp'),
|
|
|
34 |
('exp', 'term', 'exp1'),
|
|
|
35 |
('exp1', '+', 'term', 'exp1'),
|
|
|
36 |
('exp1', ),
|
|
|
37 |
('term', '(', 'exp', ')'),
|
|
|
38 |
('term', 'NUM'),
|
|
|
39 |
('term', 'ID'))
|
|
|
40 |
|
201 |
ira |
41 |
self.START_SYMBOL = 'program'
|
200 |
ira |
42 |
|
201 |
ira |
43 |
def terminals(self):
|
|
|
44 |
"""Return a list of the terminals in the table"""
|
199 |
ira |
45 |
|
201 |
ira |
46 |
return [n for n in self.PARSING_TABLE[0] if n != '']
|
199 |
ira |
47 |
|
201 |
ira |
48 |
def nonterminals(self):
|
|
|
49 |
"""Return a list of non-terminals in the table"""
|
199 |
ira |
50 |
|
201 |
ira |
51 |
return [n[0] for n in self.PARSING_TABLE if n[0] != '']
|
|
|
52 |
|
|
|
53 |
def getInput(self):
|
|
|
54 |
self.__i.setInput('BEGIN READ ID ; WHILE ( ID ) ID = ID + NUM ; WRITE ID END')
|
|
|
55 |
self.LL1_Parsing_Algo()
|
199 |
ira |
56 |
|
201 |
ira |
57 |
def getParsingTableEntry(self, nonterm, term):
|
|
|
58 |
"""Get the correct production rule out of the parsing table"""
|
199 |
ira |
59 |
|
201 |
ira |
60 |
nt_num = 0
|
|
|
61 |
t_num = 0
|
|
|
62 |
|
|
|
63 |
for t in self.PARSING_TABLE:
|
|
|
64 |
if t[0] == nonterm:
|
|
|
65 |
break
|
199 |
ira |
66 |
|
201 |
ira |
67 |
nt_num += 1
|
200 |
ira |
68 |
|
201 |
ira |
69 |
for t in self.PARSING_TABLE[0]:
|
|
|
70 |
if t == term:
|
|
|
71 |
break
|
|
|
72 |
|
|
|
73 |
t_num += 1
|
|
|
74 |
|
|
|
75 |
return self.PARSING_TABLE[nt_num][t_num]
|
|
|
76 |
|
|
|
77 |
def print_production_rule(self, num):
|
|
|
78 |
prod_list = self.GRAMMAR_SPEC[num]
|
|
|
79 |
|
|
|
80 |
nt = prod_list[0]
|
|
|
81 |
term_str = ' '.join(prod_list[1:])
|
|
|
82 |
|
|
|
83 |
s = 'PRODUCTION: %s -> %s' % (nt, term_str)
|
|
|
84 |
|
|
|
85 |
print s
|
|
|
86 |
|
|
|
87 |
def LL1_Parsing_Algo (self):
|
|
|
88 |
"""This implements the table-based LL(1) parsing algorithm from Figure 4.2
|
|
|
89 |
(Page 155) in Compiler Construction Principles and Practices.
|
|
|
90 |
NOTE: There is a correction in the while() statement, from 'and' to 'or'."""
|
|
|
91 |
|
|
|
92 |
self.__s.push(self.END_SYMBOL)
|
|
|
93 |
self.__s.push(self.START_SYMBOL)
|
|
|
94 |
|
|
|
95 |
while self.__s.peek() != self.END_SYMBOL or self.__i.getNextToken() != self.END_SYMBOL:
|
|
|
96 |
|
|
|
97 |
if self.__s.peek() == self.__i.getNextToken():
|
|
|
98 |
print 'MATCH: %s' % (self.__i.getNextToken(), )
|
|
|
99 |
self.__s.pop()
|
|
|
100 |
self.__i.match()
|
|
|
101 |
|
|
|
102 |
elif self.__s.peek() in self.nonterminals() and self.getParsingTableEntry(self.__s.peek(), self.__i.getNextToken()) != '':
|
|
|
103 |
|
|
|
104 |
nt = self.__s.pop()
|
|
|
105 |
t = self.__i.getNextToken()
|
|
|
106 |
pte = self.getParsingTableEntry(nt, t)
|
|
|
107 |
|
|
|
108 |
self.print_production_rule(pte-1)
|
|
|
109 |
|
|
|
110 |
# Basically pop the stack, and push back the correct stuff
|
|
|
111 |
# in reverse order
|
|
|
112 |
stack_additions = list(self.GRAMMAR_SPEC[pte-1][1:])
|
|
|
113 |
stack_additions.reverse()
|
|
|
114 |
|
|
|
115 |
for a in stack_additions:
|
|
|
116 |
self.__s.push(a)
|
|
|
117 |
|
|
|
118 |
else:
|
|
|
119 |
print 'An error happened'
|
|
|
120 |
# endif
|
|
|
121 |
# wend
|
|
|
122 |
|
|
|
123 |
if self.__s.peek() == self.END_SYMBOL and self.__i.getNextToken() == self.END_SYMBOL:
|
|
|
124 |
print 'ACCEPT'
|
|
|
125 |
else:
|
|
|
126 |
print 'ERROR'
|
|
|
127 |
|
199 |
ira |
128 |
class InputParser:
|
|
|
129 |
"""Implements a simple tokenizer for the TOY language"""
|
|
|
130 |
|
201 |
ira |
131 |
def __init__(self):
|
|
|
132 |
self.__inputstr = []
|
|
|
133 |
|
|
|
134 |
def setInput(self, inputstr):
|
200 |
ira |
135 |
self.__inputstr = inputstr.split()
|
201 |
ira |
136 |
self.__inputstr.append('$') # Append '$' to the input
|
199 |
ira |
137 |
|
201 |
ira |
138 |
def getInput(self):
|
|
|
139 |
"""Return a list representation of whatever is remaining of the input"""
|
|
|
140 |
|
|
|
141 |
return self.__inputstr[:]
|
|
|
142 |
|
199 |
ira |
143 |
def getNextToken(self):
|
|
|
144 |
"""Gets the first token off of the front of the input string, but
|
|
|
145 |
DOES NOT remove any input."""
|
200 |
ira |
146 |
|
|
|
147 |
try:
|
201 |
ira |
148 |
return self.__inputstr[0]
|
200 |
ira |
149 |
except IndexError:
|
|
|
150 |
print 'CAUGHT IndexError in getNextToken()'
|
|
|
151 |
return ''
|
199 |
ira |
152 |
|
|
|
153 |
def match(self):
|
|
|
154 |
"""A match has happened, so remove the front of the input string."""
|
200 |
ira |
155 |
|
|
|
156 |
try:
|
|
|
157 |
del self.__inputstr[0]
|
|
|
158 |
except IndexError:
|
|
|
159 |
print 'CAUGHT IndexError in match()'
|
199 |
ira |
160 |
|
|
|
161 |
class Stack:
|
|
|
162 |
"""Implements a simple stack using a list"""
|
|
|
163 |
|
|
|
164 |
def __init__(self):
|
|
|
165 |
self.__data = []
|
|
|
166 |
|
201 |
ira |
167 |
def clear(self):
|
|
|
168 |
"""Empty the stack"""
|
|
|
169 |
self.__data = []
|
|
|
170 |
|
199 |
ira |
171 |
def push(self, val):
|
|
|
172 |
"""Push a value onto the top of the stack"""
|
|
|
173 |
self.__data.append(val)
|
|
|
174 |
|
|
|
175 |
def pop(self):
|
|
|
176 |
"""Pop a value off the top of the stack.
|
|
|
177 |
WARNING: throws IndexError if there is nothing left in the stack"""
|
|
|
178 |
return self.__data.pop()
|
|
|
179 |
|
|
|
180 |
def peek(self):
|
|
|
181 |
"""Peek at the top of the stack. Basically a pop() without removing the
|
|
|
182 |
object from the top.
|
|
|
183 |
WARNING: will throw an IndexError if there is nothing left in the stack."""
|
|
|
184 |
|
|
|
185 |
temp = self.pop()
|
|
|
186 |
self.push(temp)
|
|
|
187 |
return temp
|
|
|
188 |
|
201 |
ira |
189 |
def getStack(self):
|
|
|
190 |
"""Get whatever is in the stack, from bottom to top, as a list"""
|
|
|
191 |
|
|
|
192 |
return self.__data[:]
|
|
|
193 |
|
199 |
ira |
194 |
################################################################################
|
|
|
195 |
################################################################################
|
|
|
196 |
################################################################################
|
|
|
197 |
################################################################################
|
|
|
198 |
################################################################################
|
|
|
199 |
|
|
|
200 |
def main():
|
|
|
201 |
"""The main function for this program"""
|
|
|
202 |
|
201 |
ira |
203 |
t = TOYParser()
|
|
|
204 |
print t.getInput()
|
200 |
ira |
205 |
|
199 |
ira |
206 |
if __name__ == '__main__':
|
|
|
207 |
main()
|
|
|
208 |
|