151 |
ira |
1 |
#!/usr/bin/env python
|
|
|
2 |
# Copyright: Ira W. Snyder
|
|
|
3 |
# Start Date: 2005-11-05
|
|
|
4 |
# End Date: 2005-11-05
|
|
|
5 |
# License: Public Domain
|
|
|
6 |
#
|
|
|
7 |
# Changelog Follows:
|
|
|
8 |
# - 2005-11-05
|
|
|
9 |
# - Ported over the necessary parts of the automaton class from Project #1.
|
|
|
10 |
# This is now called DFA.
|
|
|
11 |
# - Added the functions read_dfa_table() and find_end_of_occurrence() to DFA.
|
|
|
12 |
# - Ported over the necessary parts of the nfa class from Project #2.
|
|
|
13 |
# This is now called NFA.
|
|
|
14 |
# - Added the function from_pattern_to_dfa() to NFA.
|
|
|
15 |
# - Created the StringChecker class.
|
|
|
16 |
#
|
|
|
17 |
|
|
|
18 |
# Check for <Python-2.3 compatibility (boolean values)
|
|
|
19 |
try:
|
|
|
20 |
True, False
|
|
|
21 |
except NameError:
|
|
|
22 |
(True, False) = (1, 0)
|
|
|
23 |
|
|
|
24 |
import string, sys
|
|
|
25 |
|
|
|
26 |
class DFA:
|
|
|
27 |
"""Implements a Deterministic Finite-State Automaton"""
|
|
|
28 |
|
|
|
29 |
def __init__(self):
|
|
|
30 |
"""Constructor"""
|
|
|
31 |
|
|
|
32 |
self.clear()
|
|
|
33 |
self.verbose = False
|
|
|
34 |
|
|
|
35 |
def clear(self):
|
|
|
36 |
"""Clear all of the important private variables"""
|
|
|
37 |
|
|
|
38 |
self.automaton_entered = False
|
|
|
39 |
self.num_states = 0
|
|
|
40 |
self.num_letters = 0
|
|
|
41 |
self.final_states = []
|
|
|
42 |
self.trans_function = []
|
|
|
43 |
|
|
|
44 |
def check_state(self, s):
|
|
|
45 |
"""Checks the string representing a state to see if it's a valid state.
|
|
|
46 |
Returns True if it is valid, False otherwise"""
|
|
|
47 |
|
|
|
48 |
retVal = True
|
|
|
49 |
|
|
|
50 |
try:
|
|
|
51 |
state = int(s)
|
|
|
52 |
except (TypeError, ValueError):
|
|
|
53 |
retVal = False
|
|
|
54 |
state = 99999999
|
|
|
55 |
|
|
|
56 |
if state >= self.num_states:
|
|
|
57 |
retVal = False
|
|
|
58 |
|
|
|
59 |
return retVal
|
|
|
60 |
|
|
|
61 |
def get_letter_num(self, s):
|
|
|
62 |
"""Get the number value representing the character s"""
|
|
|
63 |
|
|
|
64 |
s = s.lower()
|
|
|
65 |
|
|
|
66 |
for i in range(26):
|
|
|
67 |
if s == string.letters[i]:
|
|
|
68 |
return i
|
|
|
69 |
|
|
|
70 |
return -1
|
|
|
71 |
|
|
|
72 |
def next_state(self, cur_state, letter):
|
|
|
73 |
"""Return the next state (as an integer) given the current state, and
|
|
|
74 |
the next letter of input"""
|
|
|
75 |
|
|
|
76 |
letter_num = self.get_letter_num(letter)
|
|
|
77 |
|
|
|
78 |
# There is a bad letter in the input
|
|
|
79 |
if letter_num == -1 or letter_num >= self.num_letters:
|
|
|
80 |
raise ValueError
|
|
|
81 |
|
|
|
82 |
next_state = self.trans_function[cur_state][letter_num]
|
|
|
83 |
|
|
|
84 |
if self.verbose:
|
|
|
85 |
print 'state %d letter %s -> state %d' % (cur_state, letter, next_state)
|
|
|
86 |
|
|
|
87 |
return next_state
|
|
|
88 |
|
|
|
89 |
def find_end_of_occurrence(self, input_string):
|
|
|
90 |
"""Find and return the position of the end of the first occurrence of the
|
|
|
91 |
pattern in input_string. Return -1 if there is no occurrence"""
|
|
|
92 |
|
|
|
93 |
cur_state = 0 # initial state is always q0
|
|
|
94 |
|
|
|
95 |
try:
|
|
|
96 |
for i in range(len(input_string)):
|
|
|
97 |
c = input_string[i]
|
|
|
98 |
cur_state = self.next_state(cur_state, c)
|
|
|
99 |
|
|
|
100 |
if self.is_final_state(cur_state):
|
|
|
101 |
return i
|
|
|
102 |
except ValueError:
|
|
|
103 |
print 'There was a bad letter in the input, stopping here!'
|
|
|
104 |
|
|
|
105 |
return -1 # not found
|
|
|
106 |
|
|
|
107 |
def is_final_state(self, s):
|
|
|
108 |
"""Returns True if s is a final state, False otherwise"""
|
|
|
109 |
|
|
|
110 |
retVal = True
|
|
|
111 |
|
|
|
112 |
try:
|
|
|
113 |
self.final_states.index(s)
|
|
|
114 |
except ValueError:
|
|
|
115 |
retVal = False
|
|
|
116 |
|
|
|
117 |
return retVal
|
|
|
118 |
|
|
|
119 |
def read_dfa_table(self, dfa_table):
|
|
|
120 |
"""Take a dfa table, as it is represented by the NFA class,
|
|
|
121 |
and set ourselves up as a DFA using that information."""
|
|
|
122 |
|
|
|
123 |
self.clear()
|
|
|
124 |
|
|
|
125 |
for entry in dfa_table:
|
|
|
126 |
# Find the number of letters in the table
|
|
|
127 |
self.num_letters = len(entry) - 3
|
|
|
128 |
|
|
|
129 |
# Find the number of states in the table
|
|
|
130 |
self.num_states += 1
|
|
|
131 |
|
|
|
132 |
# Find all of the final states in the table
|
|
|
133 |
if entry[0]:
|
|
|
134 |
self.final_states.append(entry[1])
|
|
|
135 |
|
|
|
136 |
# The transition function (as we need it) is stored from
|
|
|
137 |
# position 3 to the end of every entry in the table
|
|
|
138 |
self.trans_function.append(entry[3:])
|
|
|
139 |
|
|
|
140 |
# We've fully entered the automaton now
|
|
|
141 |
self.automaton_entered = True
|
|
|
142 |
|
|
|
143 |
class NFA:
|
|
|
144 |
"""Implements a Non-Deterministic Finite-State Automaton"""
|
|
|
145 |
|
|
|
146 |
def __init__(self):
|
|
|
147 |
"""Constructor"""
|
|
|
148 |
self.__clear()
|
|
|
149 |
self.verbose = False
|
|
|
150 |
|
|
|
151 |
def __clear(self):
|
|
|
152 |
"""Clear all instance variables of the NFA class"""
|
|
|
153 |
|
|
|
154 |
self.trans_func = {}
|
|
|
155 |
self.num_states = 0
|
|
|
156 |
self.final_states = []
|
|
|
157 |
self.initial_state = 0
|
|
|
158 |
self.possible_letters = []
|
|
|
159 |
self.dfa_table = []
|
|
|
160 |
|
|
|
161 |
def __state_in_range(self, state):
|
|
|
162 |
"""Check if a given state is in the acceptable range"""
|
|
|
163 |
|
|
|
164 |
# Check to make sure this can be converted to an int.
|
|
|
165 |
# Return False if the conversion fails.
|
|
|
166 |
try:
|
|
|
167 |
temp = int(state)
|
|
|
168 |
except (TypeError, ValueError):
|
|
|
169 |
return False
|
|
|
170 |
|
|
|
171 |
# Make sure this is a state in the range
|
|
|
172 |
if temp >= 0 and temp < self.num_states:
|
|
|
173 |
return True
|
|
|
174 |
|
|
|
175 |
# We weren't in the correct range, so return False
|
|
|
176 |
return False
|
|
|
177 |
|
|
|
178 |
def __next_states(self, state, letter):
|
|
|
179 |
"""Return the next states for the key (state, letter)"""
|
|
|
180 |
|
|
|
181 |
try:
|
|
|
182 |
next = self.trans_func[(state, letter)]
|
|
|
183 |
except KeyError:
|
|
|
184 |
# Create a new empty list for this state if it doesn't exist yet
|
|
|
185 |
next = self.trans_func[(state, letter)] = []
|
|
|
186 |
|
|
|
187 |
# Make sure that we can go to ourselves by the empty letter
|
|
|
188 |
if letter == '^' and state not in next:
|
|
|
189 |
next.append(state)
|
|
|
190 |
|
|
|
191 |
return next
|
|
|
192 |
|
|
|
193 |
def __E(self, state, letter='^', storage=None, visited=None):
|
|
|
194 |
"""Calculate E(state) and return it. This handles circular E()
|
|
|
195 |
calculations."""
|
|
|
196 |
|
|
|
197 |
# Work around weird mutable default argument stuff
|
|
|
198 |
if storage is None:
|
|
|
199 |
storage = []
|
|
|
200 |
|
|
|
201 |
# Work around weird mutable default argument stuff
|
|
|
202 |
if visited is None:
|
|
|
203 |
visited = []
|
|
|
204 |
|
|
|
205 |
# Find all of the direct next states that we can get to by
|
|
|
206 |
# the empty string, and append anything that is not already there
|
|
|
207 |
for i in self.__next_states(state, letter):
|
|
|
208 |
if i not in storage:
|
|
|
209 |
storage.append(i)
|
|
|
210 |
|
|
|
211 |
# Visit everything in storage that has not been visited already
|
|
|
212 |
for i in storage:
|
|
|
213 |
if i not in visited:
|
|
|
214 |
visited.append(i)
|
|
|
215 |
temp = self.__E(i, letter, storage, visited)
|
|
|
216 |
|
|
|
217 |
# Avoid duplicating things in storage
|
|
|
218 |
for j in temp:
|
|
|
219 |
if j not in storage:
|
|
|
220 |
storage.append(j)
|
|
|
221 |
|
|
|
222 |
return storage
|
|
|
223 |
|
|
|
224 |
def __unique_sort(self, li):
|
|
|
225 |
"""Make sure everything in a list is unique, and then sort it"""
|
|
|
226 |
|
|
|
227 |
newlist = []
|
|
|
228 |
|
|
|
229 |
for i in li:
|
|
|
230 |
if i not in newlist:
|
|
|
231 |
newlist.append(i)
|
|
|
232 |
|
|
|
233 |
newlist.sort()
|
|
|
234 |
return newlist
|
|
|
235 |
|
|
|
236 |
def __is_final_state(self, state):
|
|
|
237 |
"""Check if a state is a final state."""
|
|
|
238 |
|
|
|
239 |
if state in self.final_states:
|
|
|
240 |
return True
|
|
|
241 |
|
|
|
242 |
return False
|
|
|
243 |
|
|
|
244 |
|
|
|
245 |
def __is_list_final(self, states):
|
|
|
246 |
"""Check if at least one state in the list "states" is final"""
|
|
|
247 |
|
|
|
248 |
for s in states:
|
|
|
249 |
if self.__is_final_state(s):
|
|
|
250 |
return True
|
|
|
251 |
|
|
|
252 |
return False
|
|
|
253 |
|
|
|
254 |
def __get_temp_record(self, num_letters):
|
|
|
255 |
"""Create a record (for the DFA table) that has the proper number
|
|
|
256 |
of slots"""
|
|
|
257 |
|
|
|
258 |
blank = None
|
|
|
259 |
return [blank for i in range(num_letters + 3)]
|
|
|
260 |
|
|
|
261 |
def __get_possible_letters(self):
|
|
|
262 |
"""Create a list of all the possible letters for the NFA,
|
|
|
263 |
and store it in possible_letters"""
|
|
|
264 |
|
|
|
265 |
# Get the list of all letters
|
|
|
266 |
possible_letters = []
|
|
|
267 |
|
|
|
268 |
for (state, letter) in self.trans_func.keys():
|
|
|
269 |
if letter not in possible_letters and letter != '^':
|
|
|
270 |
possible_letters.append(letter)
|
|
|
271 |
|
|
|
272 |
possible_letters = self.__unique_sort(possible_letters)
|
|
|
273 |
self.possible_letters = possible_letters
|
|
|
274 |
|
|
|
275 |
def __generate_dfa_table(self):
|
|
|
276 |
"""Generate a table for the DFA representation of the NFA entered earlier"""
|
|
|
277 |
|
|
|
278 |
# Get all the possible letters
|
|
|
279 |
self.__get_possible_letters()
|
|
|
280 |
|
|
|
281 |
# Prime the dfa table with the first state
|
|
|
282 |
self.make_basic_record(self.__E(0))
|
|
|
283 |
|
|
|
284 |
# Loop until we resolve every letter in the table
|
|
|
285 |
while self.find_unresolved_letter() != (-1, -1):
|
|
|
286 |
(record, letter_pos) = self.find_unresolved_letter()
|
|
|
287 |
self.resolve_letter(record, letter_pos)
|
|
|
288 |
|
|
|
289 |
def resolve_letter(self, record, letter_pos):
|
|
|
290 |
"""Resolve a letter in the table, either adding a new entry if the
|
|
|
291 |
required state doesn't already exist, or putting a link to
|
|
|
292 |
an existing state"""
|
|
|
293 |
|
|
|
294 |
# Get the NFA equivalent multi-state
|
|
|
295 |
fromstates = self.dfa_table[record][2]
|
|
|
296 |
tostates = []
|
|
|
297 |
|
|
|
298 |
# Find all the states we can go to from our multi-state.
|
|
|
299 |
# It is _EXTREMELY_ important that you call E(s) for every state that
|
|
|
300 |
# you can get to, otherwise you miss certain situations.
|
|
|
301 |
for from_s in fromstates:
|
|
|
302 |
for next_s in self.__next_states(from_s, self.possible_letters[letter_pos-3]):
|
|
|
303 |
tostates.extend(self.__E(next_s))
|
|
|
304 |
|
|
|
305 |
tostates = self.__unique_sort(tostates)
|
|
|
306 |
|
|
|
307 |
# Make the letter we are trying to resolve point to a record in the table.
|
|
|
308 |
# make_basic_record() will return the correct record, even if it needs to
|
|
|
309 |
# create a new one.
|
|
|
310 |
self.dfa_table[record][letter_pos] = self.make_basic_record(tostates)
|
|
|
311 |
|
|
|
312 |
def find_unresolved_letter(self):
|
|
|
313 |
"""Returns an index pair of an unresolved letter that exists in the dfa_table.
|
|
|
314 |
If there are no more letters, return (-1, -1)"""
|
|
|
315 |
|
|
|
316 |
for i in range(len(self.dfa_table)):
|
|
|
317 |
for j in range(len(self.possible_letters)):
|
|
|
318 |
if self.dfa_table[i][j+3] == None:
|
|
|
319 |
return (i, j+3)
|
|
|
320 |
|
|
|
321 |
return (-1, -1)
|
|
|
322 |
|
|
|
323 |
def make_basic_record(self, from_states):
|
|
|
324 |
"""Create a record, if necessary, for the states "from_states."
|
|
|
325 |
If a corresponding state already exists, return it's index. A new state
|
|
|
326 |
will have everything but the letters filled in."""
|
|
|
327 |
|
|
|
328 |
if self.dfa_state_exists(from_states) == -1: #doesn't exist
|
|
|
329 |
temp_record = self.__get_temp_record(len(self.possible_letters))
|
|
|
330 |
recordnum = len(self.dfa_table)
|
|
|
331 |
|
|
|
332 |
temp_record[1] = recordnum
|
|
|
333 |
temp_record[2] = self.__unique_sort(from_states)
|
|
|
334 |
temp_record[0] = self.__is_list_final(from_states)
|
|
|
335 |
|
|
|
336 |
self.dfa_table.append(temp_record)
|
|
|
337 |
|
|
|
338 |
# always return a re-call of the function. This will not fail, since a new
|
|
|
339 |
# record is added above if a record does not already exist.
|
|
|
340 |
return self.dfa_state_exists(from_states)
|
|
|
341 |
|
|
|
342 |
|
|
|
343 |
def dfa_state_exists(self, from_states):
|
|
|
344 |
"""Find out if this state already exists in the dfa table.
|
|
|
345 |
If it does exist, return it's dfa state name.
|
|
|
346 |
If it does not exist, return -1"""
|
|
|
347 |
|
|
|
348 |
sorted_states = self.__unique_sort(from_states)
|
|
|
349 |
|
|
|
350 |
for i in range(len(self.dfa_table)):
|
|
|
351 |
if self.dfa_table[i][2] == sorted_states:
|
|
|
352 |
return self.dfa_table[i][1]
|
|
|
353 |
|
|
|
354 |
return -1
|
|
|
355 |
|
|
|
356 |
def from_pattern_to_dfa(self, possible_letters, pattern):
|
|
|
357 |
"""Convert a pattern to a DFA. This is done using an intermediate
|
|
|
358 |
NFA representation of the form (all_letters)* pattern (all_letters)*"""
|
|
|
359 |
|
|
|
360 |
self.__clear()
|
|
|
361 |
|
|
|
362 |
# Find the number of states we'll need
|
|
|
363 |
self.num_states = len(pattern) + 1
|
|
|
364 |
|
|
|
365 |
# The last state is the only final state, always
|
|
|
366 |
self.final_states = [self.num_states - 1]
|
|
|
367 |
|
|
|
368 |
# Get all of the possible letters
|
|
|
369 |
self.possible_letters = possible_letters
|
|
|
370 |
|
|
|
371 |
# Create the initial state
|
|
|
372 |
for l in self.possible_letters:
|
|
|
373 |
if l not in self.__next_states(0, l):
|
|
|
374 |
self.__next_states(0, l).append(0)
|
|
|
375 |
|
|
|
376 |
# Create the final state
|
|
|
377 |
for l in self.possible_letters:
|
|
|
378 |
if l not in self.__next_states(self.num_states - 1, l):
|
|
|
379 |
self.__next_states(self.num_states - 1, l).append(self.num_states - 1)
|
|
|
380 |
|
|
|
381 |
# Create each state in between
|
|
|
382 |
for s in range(len(pattern)):
|
|
|
383 |
self.__next_states(s, pattern[s]).append(s+1)
|
|
|
384 |
|
|
|
385 |
# Make sure that q0 is the initial state
|
|
|
386 |
self.initial_state = 0
|
|
|
387 |
|
|
|
388 |
# Generate and return the table
|
|
|
389 |
self.__generate_dfa_table()
|
|
|
390 |
return self.dfa_table
|
|
|
391 |
|
|
|
392 |
class StringChecker:
|
|
|
393 |
"""An object that will check a string against a simple pattern,
|
|
|
394 |
and will find the first occurrence of the pattern."""
|
|
|
395 |
|
|
|
396 |
def __init__(self):
|
|
|
397 |
"""Constructor"""
|
|
|
398 |
|
|
|
399 |
self.__clear()
|
|
|
400 |
|
|
|
401 |
def __clear(self):
|
|
|
402 |
"""Clear all of the private variables"""
|
|
|
403 |
|
|
|
404 |
self.DFA = DFA()
|
|
|
405 |
self.NFA = NFA()
|
|
|
406 |
self.pattern = ''
|
|
|
407 |
self.test_str = ''
|
|
|
408 |
self.possible_letters = ''
|
|
|
409 |
|
|
|
410 |
def __input_last_letter(self):
|
|
|
411 |
"""Get the last possible letter from the user"""
|
|
|
412 |
|
|
|
413 |
done = False
|
|
|
414 |
|
|
|
415 |
while not done:
|
|
|
416 |
print 'Please input the last possible'
|
|
|
417 |
s = raw_input('letter that will be in your input or pattern: ')
|
|
|
418 |
|
|
|
419 |
if len(s) != 1 and s not in string.letters:
|
|
|
420 |
print 'Incorrect input, try again'
|
|
|
421 |
print
|
|
|
422 |
continue
|
|
|
423 |
|
|
|
424 |
done = True
|
|
|
425 |
for i in range(len(string.letters)):
|
|
|
426 |
if string.letters[i] == s:
|
|
|
427 |
self.possible_letters = string.letters[:i+1]
|
|
|
428 |
break
|
|
|
429 |
|
|
|
430 |
def __input_pattern(self):
|
|
|
431 |
"""Get the pattern from the user"""
|
|
|
432 |
|
|
|
433 |
done = False
|
|
|
434 |
|
|
|
435 |
while not done:
|
|
|
436 |
s = raw_input('Please input a pattern to find: ')
|
|
|
437 |
|
|
|
438 |
if not self.__valid_string(s):
|
|
|
439 |
continue
|
|
|
440 |
|
|
|
441 |
done = True
|
|
|
442 |
self.pattern = s
|
|
|
443 |
|
|
|
444 |
def __input_test_str(self):
|
|
|
445 |
"""Get the string in which to find the pattern, from the user"""
|
|
|
446 |
|
|
|
447 |
done = False
|
|
|
448 |
|
|
|
449 |
while not done:
|
|
|
450 |
s = raw_input('Please input the string to test against: ')
|
|
|
451 |
|
|
|
452 |
if not self.__valid_string(s):
|
|
|
453 |
continue
|
|
|
454 |
|
|
|
455 |
done = True
|
|
|
456 |
self.test_str = s
|
|
|
457 |
|
|
|
458 |
def __valid_string(self, str):
|
|
|
459 |
"""Make sure all letters in str are within the valid range"""
|
|
|
460 |
|
|
|
461 |
for c in str:
|
|
|
462 |
if c not in self.possible_letters:
|
|
|
463 |
print 'Bad letter (%s) in the string, try again' % (c, )
|
|
|
464 |
print
|
|
|
465 |
return False
|
|
|
466 |
|
|
|
467 |
return True
|
|
|
468 |
|
|
|
469 |
def __print_occurrence(self):
|
|
|
470 |
"""Find and print the (non)occurrence of the pattern in the test string"""
|
|
|
471 |
|
|
|
472 |
end_occurrence = self.DFA.find_end_of_occurrence(self.test_str)
|
|
|
473 |
|
|
|
474 |
if end_occurrence == -1:
|
|
|
475 |
print 'There was no occurrence of "%s" in the string "%s"' % (
|
|
|
476 |
self.pattern, self.test_str)
|
|
|
477 |
else:
|
|
|
478 |
print 'There was an occurrence of "%s" in the string "%s",' % (
|
|
|
479 |
self.pattern, self.test_str)
|
|
|
480 |
print 'which starts at position %s' % (
|
|
|
481 |
end_occurrence - len(self.pattern) + 1, )
|
|
|
482 |
|
|
|
483 |
def main_menu(self):
|
|
|
484 |
"""Run the StringChecker, giving the user possible things to do"""
|
|
|
485 |
|
|
|
486 |
done = False
|
|
|
487 |
pattern_entered = False
|
|
|
488 |
|
|
|
489 |
while not done:
|
|
|
490 |
print 'Menu:'
|
|
|
491 |
print '========================================'
|
|
|
492 |
print '1. Enter a pattern'
|
|
|
493 |
print '2. Test string'
|
|
|
494 |
print '3. Quit'
|
|
|
495 |
print
|
|
|
496 |
s = raw_input('Choice >>> ')
|
|
|
497 |
print
|
|
|
498 |
|
|
|
499 |
if s == '1':
|
|
|
500 |
self.__clear()
|
|
|
501 |
self.__input_last_letter()
|
|
|
502 |
self.__input_pattern()
|
|
|
503 |
dfa_table = self.NFA.from_pattern_to_dfa(self.possible_letters, self.pattern)
|
|
|
504 |
self.DFA.read_dfa_table(dfa_table)
|
|
|
505 |
pattern_entered = True
|
|
|
506 |
print
|
|
|
507 |
elif s == '2':
|
|
|
508 |
if pattern_entered:
|
|
|
509 |
self.__input_test_str()
|
|
|
510 |
self.__print_occurrence()
|
|
|
511 |
else:
|
|
|
512 |
print 'Enter a pattern first'
|
|
|
513 |
print
|
|
|
514 |
elif s == '3':
|
|
|
515 |
done = True
|
|
|
516 |
else:
|
|
|
517 |
print 'Bad Selection'
|
|
|
518 |
print
|
|
|
519 |
|
|
|
520 |
|
|
|
521 |
if __name__ == '__main__':
|
|
|
522 |
checker = StringChecker()
|
|
|
523 |
checker.main_menu()
|
|
|
524 |
|