Subversion Repositories programming

Rev

Rev 200 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
199 ira 1
#!/usr/bin/env python
2
 
200 ira 3
#
199 ira 4
# Copyright: 2006 Ira W. Snyder
5
# Start Date: 2006-02-01
6
# License: GNU General Public License v2 (or at your option, any later version)
7
#
8
 
201 ira 9
class TOYParser:
10
    """A parser for the TOY language"""
199 ira 11
 
201 ira 12
    def __init__(self):
13
        self.__s = Stack()
14
        self.__i = InputParser()
15
        self.END_SYMBOL = '$'
16
        self.PARSING_TABLE = (('', 'BEGIN', 'END', 'IF', 'ELSE', 'WHILE', 'ID', 'READ', 'WRITE', 'NUM', '+', '(', ')', ';', '=', '$'),
17
                ('program', 1, '', '', '', '', '', '', '', '', '', '', '', '', '', ''),
18
                ('stmtseq', '', '', 2, '', 2, 2, 2, 2, '', '', '', '', '', '', ''),
19
                ('stmtseq1', '', 4, '', '', '', '', '', '', '', '', '', '', 3, '', ''),
20
                ('stmt', '', '', 5, '', 6, 7, 8, 9, '', '', '', '', '', '', ''),
21
                ('exp', '', '', '', '', '', 10, '', '', 10, '', 10, '', '', '', ''),
22
                ('exp1', '', 12, '', 12, '', '', '', '', '', 11, '', 12, 12, '', ''),
23
                ('term', '', '', '', '', '', 15, '', '', 14, '', 13, '', '', '', ''))
24
 
25
        self.GRAMMAR_SPEC = (('program', 'BEGIN', 'stmtseq', 'END'),
200 ira 26
                ('stmtseq', 'stmt', 'stmtseq1'),
27
                ('stmtseq1', ';', 'stmt', 'stmtseq1'),
28
                ('stmtseq1', ),
29
                ('stmt', 'IF', '(', 'exp', ')', 'stmt', 'ELSE', 'stmt'),
30
                ('stmt', 'WHILE', '(', 'exp', ')', 'stmt'),
31
                ('stmt', 'ID', '=', 'exp'),
32
                ('stmt', 'READ', 'ID'),
33
                ('stmt', 'WRITE', 'exp'),
34
                ('exp', 'term', 'exp1'),
35
                ('exp1', '+', 'term', 'exp1'),
36
                ('exp1', ),
37
                ('term', '(', 'exp', ')'),
38
                ('term', 'NUM'),
39
                ('term', 'ID'))
40
 
201 ira 41
        self.START_SYMBOL = 'program'
200 ira 42
 
201 ira 43
    def terminals(self):
44
        """Return a list of the terminals in the table"""
199 ira 45
 
201 ira 46
        return [n for n in self.PARSING_TABLE[0] if n != '']
199 ira 47
 
201 ira 48
    def nonterminals(self):
49
        """Return a list of non-terminals in the table"""
199 ira 50
 
201 ira 51
        return [n[0] for n in self.PARSING_TABLE if n[0] != '']
52
 
53
    def getInput(self):
54
        self.__i.setInput('BEGIN  READ ID ; WHILE ( ID ) ID  = ID + NUM ;  WRITE ID END')
55
        self.LL1_Parsing_Algo()
199 ira 56
 
201 ira 57
    def getParsingTableEntry(self, nonterm, term):
58
        """Get the correct production rule out of the parsing table"""
199 ira 59
 
201 ira 60
        nt_num = 0
61
        t_num = 0
62
 
63
        for t in self.PARSING_TABLE:
64
            if t[0] == nonterm:
65
                break
199 ira 66
 
201 ira 67
            nt_num += 1
200 ira 68
 
201 ira 69
        for t in self.PARSING_TABLE[0]:
70
            if t == term:
71
                break
72
 
73
            t_num += 1
74
 
75
        return self.PARSING_TABLE[nt_num][t_num]
76
 
77
    def print_production_rule(self, num):
78
        prod_list = self.GRAMMAR_SPEC[num]
79
 
80
        nt = prod_list[0]
81
        term_str = ' '.join(prod_list[1:])
82
 
83
        s = 'PRODUCTION: %s -> %s' % (nt, term_str)
84
 
85
        print s
86
 
87
    def LL1_Parsing_Algo (self):
88
        """This implements the table-based LL(1) parsing algorithm from Figure 4.2
89
        (Page 155) in Compiler Construction Principles and Practices.
90
        NOTE: There is a correction in the while() statement, from 'and' to 'or'."""
91
 
92
        self.__s.push(self.END_SYMBOL)
93
        self.__s.push(self.START_SYMBOL)
94
 
95
        while self.__s.peek() != self.END_SYMBOL or self.__i.getNextToken() != self.END_SYMBOL:
96
 
97
            if self.__s.peek() == self.__i.getNextToken():
98
                print 'MATCH: %s' % (self.__i.getNextToken(), )
99
                self.__s.pop()
100
                self.__i.match()
101
 
102
            elif self.__s.peek() in self.nonterminals() and self.getParsingTableEntry(self.__s.peek(), self.__i.getNextToken()) != '':
103
 
104
                    nt = self.__s.pop()
105
                    t = self.__i.getNextToken()
106
                    pte = self.getParsingTableEntry(nt, t)
107
 
108
                    self.print_production_rule(pte-1)
109
 
110
                    # Basically pop the stack, and push back the correct stuff
111
                    # in reverse order
112
                    stack_additions = list(self.GRAMMAR_SPEC[pte-1][1:])
113
                    stack_additions.reverse()
114
 
115
                    for a in stack_additions:
116
                        self.__s.push(a)
117
 
118
            else:
119
                print 'An error happened'
120
            # endif
121
        # wend
122
 
123
        if self.__s.peek() == self.END_SYMBOL and self.__i.getNextToken() == self.END_SYMBOL:
124
            print 'ACCEPT'
125
        else:
126
            print 'ERROR'
127
 
199 ira 128
class InputParser:
129
    """Implements a simple tokenizer for the TOY language"""
130
 
201 ira 131
    def __init__(self):
132
        self.__inputstr = []
133
 
134
    def setInput(self, inputstr):
200 ira 135
        self.__inputstr = inputstr.split()
201 ira 136
        self.__inputstr.append('$') # Append '$' to the input
199 ira 137
 
201 ira 138
    def getInput(self):
139
        """Return a list representation of whatever is remaining of the input"""
140
 
141
        return self.__inputstr[:]
142
 
199 ira 143
    def getNextToken(self):
144
        """Gets the first token off of the front of the input string, but
145
        DOES NOT remove any input."""
200 ira 146
 
147
        try:
201 ira 148
            return self.__inputstr[0]
200 ira 149
        except IndexError:
150
            print 'CAUGHT IndexError in getNextToken()'
151
            return ''
199 ira 152
 
153
    def match(self):
154
        """A match has happened, so remove the front of the input string."""
200 ira 155
 
156
        try:
157
            del self.__inputstr[0]
158
        except IndexError:
159
            print 'CAUGHT IndexError in match()'
199 ira 160
 
161
class Stack:
162
    """Implements a simple stack using a list"""
163
 
164
    def __init__(self):
165
        self.__data = []
166
 
201 ira 167
    def clear(self):
168
        """Empty the stack"""
169
        self.__data = []
170
 
199 ira 171
    def push(self, val):
172
        """Push a value onto the top of the stack"""
173
        self.__data.append(val)
174
 
175
    def pop(self):
176
        """Pop a value off the top of the stack.
177
        WARNING: throws IndexError if there is nothing left in the stack"""
178
        return self.__data.pop()
179
 
180
    def peek(self):
181
        """Peek at the top of the stack. Basically a pop() without removing the
182
        object from the top.
183
        WARNING: will throw an IndexError if there is nothing left in the stack."""
184
 
185
        temp = self.pop()
186
        self.push(temp)
187
        return temp
188
 
201 ira 189
    def getStack(self):
190
        """Get whatever is in the stack, from bottom to top, as a list"""
191
 
192
        return self.__data[:]
193
 
199 ira 194
################################################################################
195
################################################################################
196
################################################################################
197
################################################################################
198
################################################################################
199
 
200
def main():
201
    """The main function for this program"""
202
 
201 ira 203
    t = TOYParser()
204
    print t.getInput()
200 ira 205
 
199 ira 206
if __name__ == '__main__':
207
    main()
208