Subversion Repositories programming

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
151 ira 1
#!/usr/bin/env python
2
# Copyright: Ira W. Snyder
3
# Start Date: 2005-11-05
4
# End Date: 2005-11-05
5
# License: Public Domain
6
#
7
# Changelog Follows:
8
# - 2005-11-05
9
# - Ported over the necessary parts of the automaton class from Project #1.
10
#   This is now called DFA.
11
# - Added the functions read_dfa_table() and find_end_of_occurrence() to DFA.
12
# - Ported over the necessary parts of the nfa class from Project #2.
13
#   This is now called NFA.
14
# - Added the function from_pattern_to_dfa() to NFA.
15
# - Created the StringChecker class.
16
#
17
 
18
# Check for <Python-2.3 compatibility (boolean values)
19
try:
20
  True, False
21
except NameError:
22
  (True, False) = (1, 0)
23
 
24
import string, sys
25
 
26
class DFA:
27
    """Implements a Deterministic Finite-State Automaton"""
28
 
29
    def __init__(self):
30
        """Constructor"""
31
 
32
        self.clear()
33
        self.verbose = False
34
 
35
    def clear(self):
36
        """Clear all of the important private variables"""
37
 
38
        self.automaton_entered = False
39
        self.num_states = 0
40
        self.num_letters = 0
41
        self.final_states = []
42
        self.trans_function = []
43
 
44
    def check_state(self, s):
45
        """Checks the string representing a state to see if it's a valid state.
46
        Returns True if it is valid, False otherwise"""
47
 
48
        retVal = True
49
 
50
        try:
51
            state = int(s)
52
        except (TypeError, ValueError):
53
            retVal = False
54
            state = 99999999
55
 
56
        if state >= self.num_states:
57
            retVal = False
58
 
59
        return retVal
60
 
61
    def get_letter_num(self, s):
62
        """Get the number value representing the character s"""
63
 
64
        s = s.lower()
65
 
66
        for i in range(26):
67
            if s == string.letters[i]:
68
                return i
69
 
70
        return -1
71
 
72
    def next_state(self, cur_state, letter):
73
        """Return the next state (as an integer) given the current state, and
74
        the next letter of input"""
75
 
76
        letter_num = self.get_letter_num(letter)
77
 
78
        # There is a bad letter in the input
79
        if letter_num == -1 or letter_num >= self.num_letters:
80
            raise ValueError
81
 
82
        next_state = self.trans_function[cur_state][letter_num]
83
 
84
        if self.verbose:
85
            print 'state %d letter %s -> state %d' % (cur_state, letter, next_state)
86
 
87
        return next_state
88
 
89
    def find_end_of_occurrence(self, input_string):
90
        """Find and return the position of the end of the first occurrence of the
91
        pattern in input_string. Return -1 if there is no occurrence"""
92
 
93
        cur_state = 0 # initial state is always q0
94
 
95
        try:
96
            for i in range(len(input_string)):
97
                c = input_string[i]
98
                cur_state = self.next_state(cur_state, c)
99
 
100
                if self.is_final_state(cur_state):
101
                    return i
102
        except ValueError:
103
            print 'There was a bad letter in the input, stopping here!'
104
 
105
        return -1 # not found
106
 
107
    def is_final_state(self, s):
108
        """Returns True if s is a final state, False otherwise"""
109
 
110
        retVal = True
111
 
112
        try:
113
            self.final_states.index(s)
114
        except ValueError:
115
            retVal = False
116
 
117
        return retVal
118
 
119
    def read_dfa_table(self, dfa_table):
120
        """Take a dfa table, as it is represented by the NFA class,
121
        and set ourselves up as a DFA using that information."""
122
 
123
        self.clear()
124
 
125
        for entry in dfa_table:
126
            # Find the number of letters in the table
127
            self.num_letters = len(entry) - 3
128
 
129
            # Find the number of states in the table
130
            self.num_states += 1
131
 
132
            # Find all of the final states in the table
133
            if entry[0]:
134
                self.final_states.append(entry[1])
135
 
136
            # The transition function (as we need it) is stored from
137
            # position 3 to the end of every entry in the table
138
            self.trans_function.append(entry[3:])
139
 
140
        # We've fully entered the automaton now
141
        self.automaton_entered = True
142
 
143
class NFA:
144
    """Implements a Non-Deterministic Finite-State Automaton"""
145
 
146
    def __init__(self):
147
        """Constructor"""
148
        self.__clear()
149
        self.verbose = False
150
 
151
    def __clear(self):
152
        """Clear all instance variables of the NFA class"""
153
 
154
        self.trans_func = {}
155
        self.num_states = 0
156
        self.final_states = []
157
        self.initial_state = 0
158
        self.possible_letters = []
159
        self.dfa_table = []
160
 
161
    def __state_in_range(self, state):
162
        """Check if a given state is in the acceptable range"""
163
 
164
        # Check to make sure this can be converted to an int.
165
        # Return False if the conversion fails.
166
        try:
167
            temp = int(state)
168
        except (TypeError, ValueError):
169
            return False
170
 
171
        # Make sure this is a state in the range
172
        if temp >= 0 and temp < self.num_states:
173
            return True
174
 
175
        # We weren't in the correct range, so return False
176
        return False
177
 
178
    def __next_states(self, state, letter):
179
        """Return the next states for the key (state, letter)"""
180
 
181
        try:
182
            next = self.trans_func[(state, letter)]
183
        except KeyError:
184
            # Create a new empty list for this state if it doesn't exist yet
185
            next = self.trans_func[(state, letter)] = []
186
 
187
        # Make sure that we can go to ourselves by the empty letter
188
        if letter == '^' and state not in next:
189
            next.append(state)
190
 
191
        return next
192
 
193
    def __E(self, state, letter='^', storage=None, visited=None):
194
        """Calculate E(state) and return it. This handles circular E()
195
        calculations."""
196
 
197
        # Work around weird mutable default argument stuff
198
        if storage is None:
199
            storage = []
200
 
201
        # Work around weird mutable default argument stuff
202
        if visited is None:
203
            visited = []
204
 
205
        # Find all of the direct next states that we can get to by
206
        # the empty string, and append anything that is not already there
207
        for i in self.__next_states(state, letter):
208
            if i not in storage:
209
                storage.append(i)
210
 
211
        # Visit everything in storage that has not been visited already
212
        for i in storage:
213
            if i not in visited:
214
                visited.append(i)
215
                temp = self.__E(i, letter, storage, visited)
216
 
217
                # Avoid duplicating things in storage
218
                for j in temp:
219
                    if j not in storage:
220
                        storage.append(j)
221
 
222
        return storage
223
 
224
    def __unique_sort(self, li):
225
        """Make sure everything in a list is unique, and then sort it"""
226
 
227
        newlist = []
228
 
229
        for i in li:
230
            if i not in newlist:
231
                newlist.append(i)
232
 
233
        newlist.sort()
234
        return newlist
235
 
236
    def __is_final_state(self, state):
237
        """Check if a state is a final state."""
238
 
239
        if state in self.final_states:
240
            return True
241
 
242
        return False
243
 
244
 
245
    def __is_list_final(self, states):
246
        """Check if at least one state in the list "states" is final"""
247
 
248
        for s in states:
249
            if self.__is_final_state(s):
250
                return True
251
 
252
        return False
253
 
254
    def __get_temp_record(self, num_letters):
255
        """Create a record (for the DFA table) that has the proper number
256
        of slots"""
257
 
258
        blank = None
259
        return [blank for i in range(num_letters + 3)]
260
 
261
    def __get_possible_letters(self):
262
        """Create a list of all the possible letters for the NFA,
263
        and store it in possible_letters"""
264
 
265
        # Get the list of all letters
266
        possible_letters = []
267
 
268
        for (state, letter) in self.trans_func.keys():
269
            if letter not in possible_letters and letter != '^':
270
                possible_letters.append(letter)
271
 
272
        possible_letters = self.__unique_sort(possible_letters)
273
        self.possible_letters = possible_letters
274
 
275
    def __generate_dfa_table(self):
276
        """Generate a table for the DFA representation of the NFA entered earlier"""
277
 
278
        # Get all the possible letters
279
        self.__get_possible_letters()
280
 
281
        # Prime the dfa table with the first state
282
        self.make_basic_record(self.__E(0))
283
 
284
        # Loop until we resolve every letter in the table
285
        while self.find_unresolved_letter() != (-1, -1):
286
            (record, letter_pos) = self.find_unresolved_letter()
287
            self.resolve_letter(record, letter_pos)
288
 
289
    def resolve_letter(self, record, letter_pos):
290
        """Resolve a letter in the table, either adding a new entry if the
291
        required state doesn't already exist, or putting a link to
292
        an existing state"""
293
 
294
        # Get the NFA equivalent multi-state
295
        fromstates = self.dfa_table[record][2]
296
        tostates = []
297
 
298
        # Find all the states we can go to from our multi-state.
299
        # It is _EXTREMELY_ important that you call E(s) for every state that
300
        # you can get to, otherwise you miss certain situations.
301
        for from_s in fromstates:
302
            for next_s in self.__next_states(from_s, self.possible_letters[letter_pos-3]):
303
                tostates.extend(self.__E(next_s))
304
 
305
        tostates = self.__unique_sort(tostates)
306
 
307
        # Make the letter we are trying to resolve point to a record in the table.
308
        # make_basic_record() will return the correct record, even if it needs to
309
        # create a new one.
310
        self.dfa_table[record][letter_pos] = self.make_basic_record(tostates)
311
 
312
    def find_unresolved_letter(self):
313
        """Returns an index pair of an unresolved letter that exists in the dfa_table.
314
        If there are no more letters, return (-1, -1)"""
315
 
316
        for i in range(len(self.dfa_table)):
317
            for j in range(len(self.possible_letters)):
318
                if self.dfa_table[i][j+3] == None:
319
                    return (i, j+3)
320
 
321
        return (-1, -1)
322
 
323
    def make_basic_record(self, from_states):
324
        """Create a record, if necessary, for the states "from_states."
325
        If a corresponding state already exists, return it's index. A new state
326
        will have everything but the letters filled in."""
327
 
328
        if self.dfa_state_exists(from_states) == -1: #doesn't exist
329
            temp_record = self.__get_temp_record(len(self.possible_letters))
330
            recordnum = len(self.dfa_table)
331
 
332
            temp_record[1] = recordnum
333
            temp_record[2] = self.__unique_sort(from_states)
334
            temp_record[0] = self.__is_list_final(from_states)
335
 
336
            self.dfa_table.append(temp_record)
337
 
338
        # always return a re-call of the function. This will not fail, since a new
339
        # record is added above if a record does not already exist.
340
        return self.dfa_state_exists(from_states)
341
 
342
 
343
    def dfa_state_exists(self, from_states):
344
        """Find out if this state already exists in the dfa table.
345
        If it does exist, return it's dfa state name.
346
        If it does not exist, return -1"""
347
 
348
        sorted_states = self.__unique_sort(from_states)
349
 
350
        for i in range(len(self.dfa_table)):
351
            if self.dfa_table[i][2] == sorted_states:
352
                return self.dfa_table[i][1]
353
 
354
        return -1
355
 
356
    def from_pattern_to_dfa(self, possible_letters, pattern):
357
        """Convert a pattern to a DFA. This is done using an intermediate
358
        NFA representation of the form (all_letters)* pattern (all_letters)*"""
359
 
360
        self.__clear()
361
 
362
        # Find the number of states we'll need
363
        self.num_states = len(pattern) + 1
364
 
365
        # The last state is the only final state, always
366
        self.final_states = [self.num_states - 1]
367
 
368
        # Get all of the possible letters
369
        self.possible_letters = possible_letters
370
 
371
        # Create the initial state
372
        for l in self.possible_letters:
373
            if l not in self.__next_states(0, l):
374
                self.__next_states(0, l).append(0)
375
 
376
        # Create the final state
377
        for l in self.possible_letters:
378
            if l not in self.__next_states(self.num_states - 1, l):
379
                self.__next_states(self.num_states - 1, l).append(self.num_states - 1)
380
 
381
        # Create each state in between
382
        for s in range(len(pattern)):
383
            self.__next_states(s, pattern[s]).append(s+1)
384
 
385
        # Make sure that q0 is the initial state
386
        self.initial_state = 0
387
 
388
        # Generate and return the table
389
        self.__generate_dfa_table()
390
        return self.dfa_table
391
 
392
class StringChecker:
393
    """An object that will check a string against a simple pattern,
394
    and will find the first occurrence of the pattern."""
395
 
396
    def __init__(self):
397
        """Constructor"""
398
 
399
        self.__clear()
400
 
401
    def __clear(self):
402
        """Clear all of the private variables"""
403
 
404
        self.DFA = DFA()
405
        self.NFA = NFA()
406
        self.pattern = ''
407
        self.test_str = ''
408
        self.possible_letters = ''
409
 
410
    def __input_last_letter(self):
411
        """Get the last possible letter from the user"""
412
 
413
        done = False
414
 
415
        while not done:
416
            print 'Please input the last possible'
417
            s = raw_input('letter that will be in your input or pattern: ')
418
 
419
            if len(s) != 1 and s not in string.letters:
420
                print 'Incorrect input, try again'
421
                print
422
                continue
423
 
424
            done = True
425
            for i in range(len(string.letters)):
426
                if string.letters[i] == s:
427
                    self.possible_letters = string.letters[:i+1]
428
                    break
429
 
430
    def __input_pattern(self):
431
        """Get the pattern from the user"""
432
 
433
        done = False
434
 
435
        while not done:
436
            s = raw_input('Please input a pattern to find: ')
437
 
438
            if not self.__valid_string(s):
439
                continue
440
 
441
            done = True
442
            self.pattern = s
443
 
444
    def __input_test_str(self):
445
        """Get the string in which to find the pattern, from the user"""
446
 
447
        done = False
448
 
449
        while not done:
450
            s = raw_input('Please input the string to test against: ')
451
 
452
            if not self.__valid_string(s):
453
                continue
454
 
455
            done = True
456
            self.test_str = s
457
 
458
    def __valid_string(self, str):
459
        """Make sure all letters in str are within the valid range"""
460
 
461
        for c in str:
462
            if c not in self.possible_letters:
463
                print 'Bad letter (%s) in the string, try again' % (c, )
464
                print
465
                return False
466
 
467
        return True
468
 
469
    def __print_occurrence(self):
470
        """Find and print the (non)occurrence of the pattern in the test string"""
471
 
472
        end_occurrence = self.DFA.find_end_of_occurrence(self.test_str)
473
 
474
        if end_occurrence == -1:
475
            print 'There was no occurrence of "%s" in the string "%s"' % (
476
                    self.pattern, self.test_str)
477
        else:
478
            print 'There was an occurrence of "%s" in the string "%s",' % (
479
                    self.pattern, self.test_str)
480
            print 'which starts at position %s' % (
481
                    end_occurrence - len(self.pattern) + 1, )
482
 
483
    def main_menu(self):
484
        """Run the StringChecker, giving the user possible things to do"""
485
 
486
        done = False
487
        pattern_entered = False
488
 
489
        while not done:
490
            print 'Menu:'
491
            print '========================================'
492
            print '1. Enter a pattern'
493
            print '2. Test string'
494
            print '3. Quit'
495
            print
496
            s = raw_input('Choice >>> ')
497
            print
498
 
499
            if s == '1':
500
                self.__clear()
501
                self.__input_last_letter()
502
                self.__input_pattern()
503
                dfa_table = self.NFA.from_pattern_to_dfa(self.possible_letters, self.pattern)
504
                self.DFA.read_dfa_table(dfa_table)
505
                pattern_entered = True
506
                print
507
            elif s == '2':
508
                if pattern_entered:
509
                    self.__input_test_str()
510
                    self.__print_occurrence()
511
                else:
512
                    print 'Enter a pattern first'
513
                print
514
            elif s == '3':
515
                done = True
516
            else:
517
                print 'Bad Selection'
518
                print
519
 
520
 
521
if __name__ == '__main__':
522
    checker = StringChecker()
523
    checker.main_menu()
524