Blame | Last modification | View Log | RSS feed
#!/usr/bin/env python
# Copyright: Ira W. Snyder
# Start Date: 2005-11-05
# End Date: 2005-11-05
# License: Public Domain
#
# Changelog Follows:
# - 2005-11-05
# - Ported over the necessary parts of the automaton class from Project #1.
# This is now called DFA.
# - Added the functions read_dfa_table() and find_end_of_occurrence() to DFA.
# - Ported over the necessary parts of the nfa class from Project #2.
# This is now called NFA.
# - Added the function from_pattern_to_dfa() to NFA.
# - Created the StringChecker class.
#
# Check for <Python-2.3 compatibility (boolean values)
try:
True, False
except NameError:
(True, False) = (1, 0)
import string, sys
class DFA:
"""Implements a Deterministic Finite-State Automaton"""
def __init__(self):
"""Constructor"""
self.clear()
self.verbose = False
def clear(self):
"""Clear all of the important private variables"""
self.automaton_entered = False
self.num_states = 0
self.num_letters = 0
self.final_states = []
self.trans_function = []
def check_state(self, s):
"""Checks the string representing a state to see if it's a valid state.
Returns True if it is valid, False otherwise"""
retVal = True
try:
state = int(s)
except (TypeError, ValueError):
retVal = False
state = 99999999
if state >= self.num_states:
retVal = False
return retVal
def get_letter_num(self, s):
"""Get the number value representing the character s"""
s = s.lower()
for i in range(26):
if s == string.letters[i]:
return i
return -1
def next_state(self, cur_state, letter):
"""Return the next state (as an integer) given the current state, and
the next letter of input"""
letter_num = self.get_letter_num(letter)
# There is a bad letter in the input
if letter_num == -1 or letter_num >= self.num_letters:
raise ValueError
next_state = self.trans_function[cur_state][letter_num]
if self.verbose:
print 'state %d letter %s -> state %d' % (cur_state, letter, next_state)
return next_state
def find_end_of_occurrence(self, input_string):
"""Find and return the position of the end of the first occurrence of the
pattern in input_string. Return -1 if there is no occurrence"""
cur_state = 0 # initial state is always q0
try:
for i in range(len(input_string)):
c = input_string[i]
cur_state = self.next_state(cur_state, c)
if self.is_final_state(cur_state):
return i
except ValueError:
print 'There was a bad letter in the input, stopping here!'
return -1 # not found
def is_final_state(self, s):
"""Returns True if s is a final state, False otherwise"""
retVal = True
try:
self.final_states.index(s)
except ValueError:
retVal = False
return retVal
def read_dfa_table(self, dfa_table):
"""Take a dfa table, as it is represented by the NFA class,
and set ourselves up as a DFA using that information."""
self.clear()
for entry in dfa_table:
# Find the number of letters in the table
self.num_letters = len(entry) - 3
# Find the number of states in the table
self.num_states += 1
# Find all of the final states in the table
if entry[0]:
self.final_states.append(entry[1])
# The transition function (as we need it) is stored from
# position 3 to the end of every entry in the table
self.trans_function.append(entry[3:])
# We've fully entered the automaton now
self.automaton_entered = True
class NFA:
"""Implements a Non-Deterministic Finite-State Automaton"""
def __init__(self):
"""Constructor"""
self.__clear()
self.verbose = False
def __clear(self):
"""Clear all instance variables of the NFA class"""
self.trans_func = {}
self.num_states = 0
self.final_states = []
self.initial_state = 0
self.possible_letters = []
self.dfa_table = []
def __state_in_range(self, state):
"""Check if a given state is in the acceptable range"""
# Check to make sure this can be converted to an int.
# Return False if the conversion fails.
try:
temp = int(state)
except (TypeError, ValueError):
return False
# Make sure this is a state in the range
if temp >= 0 and temp < self.num_states:
return True
# We weren't in the correct range, so return False
return False
def __next_states(self, state, letter):
"""Return the next states for the key (state, letter)"""
try:
next = self.trans_func[(state, letter)]
except KeyError:
# Create a new empty list for this state if it doesn't exist yet
next = self.trans_func[(state, letter)] = []
# Make sure that we can go to ourselves by the empty letter
if letter == '^' and state not in next:
next.append(state)
return next
def __E(self, state, letter='^', storage=None, visited=None):
"""Calculate E(state) and return it. This handles circular E()
calculations."""
# Work around weird mutable default argument stuff
if storage is None:
storage = []
# Work around weird mutable default argument stuff
if visited is None:
visited = []
# Find all of the direct next states that we can get to by
# the empty string, and append anything that is not already there
for i in self.__next_states(state, letter):
if i not in storage:
storage.append(i)
# Visit everything in storage that has not been visited already
for i in storage:
if i not in visited:
visited.append(i)
temp = self.__E(i, letter, storage, visited)
# Avoid duplicating things in storage
for j in temp:
if j not in storage:
storage.append(j)
return storage
def __unique_sort(self, li):
"""Make sure everything in a list is unique, and then sort it"""
newlist = []
for i in li:
if i not in newlist:
newlist.append(i)
newlist.sort()
return newlist
def __is_final_state(self, state):
"""Check if a state is a final state."""
if state in self.final_states:
return True
return False
def __is_list_final(self, states):
"""Check if at least one state in the list "states" is final"""
for s in states:
if self.__is_final_state(s):
return True
return False
def __get_temp_record(self, num_letters):
"""Create a record (for the DFA table) that has the proper number
of slots"""
blank = None
return [blank for i in range(num_letters + 3)]
def __get_possible_letters(self):
"""Create a list of all the possible letters for the NFA,
and store it in possible_letters"""
# Get the list of all letters
possible_letters = []
for (state, letter) in self.trans_func.keys():
if letter not in possible_letters and letter != '^':
possible_letters.append(letter)
possible_letters = self.__unique_sort(possible_letters)
self.possible_letters = possible_letters
def __generate_dfa_table(self):
"""Generate a table for the DFA representation of the NFA entered earlier"""
# Get all the possible letters
self.__get_possible_letters()
# Prime the dfa table with the first state
self.make_basic_record(self.__E(0))
# Loop until we resolve every letter in the table
while self.find_unresolved_letter() != (-1, -1):
(record, letter_pos) = self.find_unresolved_letter()
self.resolve_letter(record, letter_pos)
def resolve_letter(self, record, letter_pos):
"""Resolve a letter in the table, either adding a new entry if the
required state doesn't already exist, or putting a link to
an existing state"""
# Get the NFA equivalent multi-state
fromstates = self.dfa_table[record][2]
tostates = []
# Find all the states we can go to from our multi-state.
# It is _EXTREMELY_ important that you call E(s) for every state that
# you can get to, otherwise you miss certain situations.
for from_s in fromstates:
for next_s in self.__next_states(from_s, self.possible_letters[letter_pos-3]):
tostates.extend(self.__E(next_s))
tostates = self.__unique_sort(tostates)
# Make the letter we are trying to resolve point to a record in the table.
# make_basic_record() will return the correct record, even if it needs to
# create a new one.
self.dfa_table[record][letter_pos] = self.make_basic_record(tostates)
def find_unresolved_letter(self):
"""Returns an index pair of an unresolved letter that exists in the dfa_table.
If there are no more letters, return (-1, -1)"""
for i in range(len(self.dfa_table)):
for j in range(len(self.possible_letters)):
if self.dfa_table[i][j+3] == None:
return (i, j+3)
return (-1, -1)
def make_basic_record(self, from_states):
"""Create a record, if necessary, for the states "from_states."
If a corresponding state already exists, return it's index. A new state
will have everything but the letters filled in."""
if self.dfa_state_exists(from_states) == -1: #doesn't exist
temp_record = self.__get_temp_record(len(self.possible_letters))
recordnum = len(self.dfa_table)
temp_record[1] = recordnum
temp_record[2] = self.__unique_sort(from_states)
temp_record[0] = self.__is_list_final(from_states)
self.dfa_table.append(temp_record)
# always return a re-call of the function. This will not fail, since a new
# record is added above if a record does not already exist.
return self.dfa_state_exists(from_states)
def dfa_state_exists(self, from_states):
"""Find out if this state already exists in the dfa table.
If it does exist, return it's dfa state name.
If it does not exist, return -1"""
sorted_states = self.__unique_sort(from_states)
for i in range(len(self.dfa_table)):
if self.dfa_table[i][2] == sorted_states:
return self.dfa_table[i][1]
return -1
def from_pattern_to_dfa(self, possible_letters, pattern):
"""Convert a pattern to a DFA. This is done using an intermediate
NFA representation of the form (all_letters)* pattern (all_letters)*"""
self.__clear()
# Find the number of states we'll need
self.num_states = len(pattern) + 1
# The last state is the only final state, always
self.final_states = [self.num_states - 1]
# Get all of the possible letters
self.possible_letters = possible_letters
# Create the initial state
for l in self.possible_letters:
if l not in self.__next_states(0, l):
self.__next_states(0, l).append(0)
# Create the final state
for l in self.possible_letters:
if l not in self.__next_states(self.num_states - 1, l):
self.__next_states(self.num_states - 1, l).append(self.num_states - 1)
# Create each state in between
for s in range(len(pattern)):
self.__next_states(s, pattern[s]).append(s+1)
# Make sure that q0 is the initial state
self.initial_state = 0
# Generate and return the table
self.__generate_dfa_table()
return self.dfa_table
class StringChecker:
"""An object that will check a string against a simple pattern,
and will find the first occurrence of the pattern."""
def __init__(self):
"""Constructor"""
self.__clear()
def __clear(self):
"""Clear all of the private variables"""
self.DFA = DFA()
self.NFA = NFA()
self.pattern = ''
self.test_str = ''
self.possible_letters = ''
def __input_last_letter(self):
"""Get the last possible letter from the user"""
done = False
while not done:
print 'Please input the last possible'
s = raw_input('letter that will be in your input or pattern: ')
if len(s) != 1 and s not in string.letters:
print 'Incorrect input, try again'
continue
done = True
for i in range(len(string.letters)):
if string.letters[i] == s:
self.possible_letters = string.letters[:i+1]
break
def __input_pattern(self):
"""Get the pattern from the user"""
done = False
while not done:
s = raw_input('Please input a pattern to find: ')
if not self.__valid_string(s):
continue
done = True
self.pattern = s
def __input_test_str(self):
"""Get the string in which to find the pattern, from the user"""
done = False
while not done:
s = raw_input('Please input the string to test against: ')
if not self.__valid_string(s):
continue
done = True
self.test_str = s
def __valid_string(self, str):
"""Make sure all letters in str are within the valid range"""
for c in str:
if c not in self.possible_letters:
print 'Bad letter (%s) in the string, try again' % (c, )
return False
return True
def __print_occurrence(self):
"""Find and print the (non)occurrence of the pattern in the test string"""
end_occurrence = self.DFA.find_end_of_occurrence(self.test_str)
if end_occurrence == -1:
print 'There was no occurrence of "%s" in the string "%s"' % (
self.pattern, self.test_str)
else:
print 'There was an occurrence of "%s" in the string "%s",' % (
self.pattern, self.test_str)
print 'which starts at position %s' % (
end_occurrence - len(self.pattern) + 1, )
def main_menu(self):
"""Run the StringChecker, giving the user possible things to do"""
done = False
pattern_entered = False
while not done:
print 'Menu:'
print '========================================'
print '1. Enter a pattern'
print '2. Test string'
print '3. Quit'
s = raw_input('Choice >>> ')
if s == '1':
self.__clear()
self.__input_last_letter()
self.__input_pattern()
dfa_table = self.NFA.from_pattern_to_dfa(self.possible_letters, self.pattern)
self.DFA.read_dfa_table(dfa_table)
pattern_entered = True
elif s == '2':
if pattern_entered:
self.__input_test_str()
self.__print_occurrence()
else:
print 'Enter a pattern first'
elif s == '3':
done = True
else:
print 'Bad Selection'
if __name__ == '__main__':
checker = StringChecker()
checker.main_menu()