Subversion Repositories programming

Rev

Blame | Last modification | View Log | RSS feed

#!/usr/bin/env python
# Copyright: Ira W. Snyder
# Start Date: 2005-11-05
# End Date: 2005-11-05
# License: Public Domain
#
# Changelog Follows:
# - 2005-11-05
# - Ported over the necessary parts of the automaton class from Project #1.
#   This is now called DFA.
# - Added the functions read_dfa_table() and find_end_of_occurrence() to DFA.
# - Ported over the necessary parts of the nfa class from Project #2.
#   This is now called NFA.
# - Added the function from_pattern_to_dfa() to NFA.
# - Created the StringChecker class.
#

# Check for <Python-2.3 compatibility (boolean values)
try:
  True, False
except NameError:
  (True, False) = (1, 0)

import string, sys

class DFA:
    """Implements a Deterministic Finite-State Automaton"""

    def __init__(self):
        """Constructor"""

        self.clear()
        self.verbose = False

    def clear(self):
        """Clear all of the important private variables"""

        self.automaton_entered = False
        self.num_states = 0
        self.num_letters = 0
        self.final_states = []
        self.trans_function = []

    def check_state(self, s):
        """Checks the string representing a state to see if it's a valid state.
        Returns True if it is valid, False otherwise"""

        retVal = True

        try:
            state = int(s)
        except (TypeError, ValueError):
            retVal = False
            state = 99999999

        if state >= self.num_states:
            retVal = False

        return retVal

    def get_letter_num(self, s):
        """Get the number value representing the character s"""

        s = s.lower()

        for i in range(26):
            if s == string.letters[i]:
                return i

        return -1

    def next_state(self, cur_state, letter):
        """Return the next state (as an integer) given the current state, and
        the next letter of input"""

        letter_num = self.get_letter_num(letter)

        # There is a bad letter in the input
        if letter_num == -1 or letter_num >= self.num_letters:
            raise ValueError

        next_state = self.trans_function[cur_state][letter_num]

        if self.verbose:
            print 'state %d letter %s -> state %d' % (cur_state, letter, next_state)

        return next_state

    def find_end_of_occurrence(self, input_string):
        """Find and return the position of the end of the first occurrence of the
        pattern in input_string. Return -1 if there is no occurrence"""

        cur_state = 0 # initial state is always q0

        try:
            for i in range(len(input_string)):
                c = input_string[i]
                cur_state = self.next_state(cur_state, c)

                if self.is_final_state(cur_state):
                    return i
        except ValueError:
            print 'There was a bad letter in the input, stopping here!'

        return -1 # not found

    def is_final_state(self, s):
        """Returns True if s is a final state, False otherwise"""

        retVal = True

        try:
            self.final_states.index(s)
        except ValueError:
            retVal = False

        return retVal

    def read_dfa_table(self, dfa_table):
        """Take a dfa table, as it is represented by the NFA class,
        and set ourselves up as a DFA using that information."""

        self.clear()

        for entry in dfa_table:
            # Find the number of letters in the table
            self.num_letters = len(entry) - 3

            # Find the number of states in the table
            self.num_states += 1

            # Find all of the final states in the table
            if entry[0]:
                self.final_states.append(entry[1])

            # The transition function (as we need it) is stored from
            # position 3 to the end of every entry in the table
            self.trans_function.append(entry[3:])

        # We've fully entered the automaton now
        self.automaton_entered = True

class NFA:
    """Implements a Non-Deterministic Finite-State Automaton"""

    def __init__(self):
        """Constructor"""
        self.__clear()
        self.verbose = False

    def __clear(self):
        """Clear all instance variables of the NFA class"""

        self.trans_func = {}
        self.num_states = 0
        self.final_states = []
        self.initial_state = 0
        self.possible_letters = []
        self.dfa_table = []

    def __state_in_range(self, state):
        """Check if a given state is in the acceptable range"""

        # Check to make sure this can be converted to an int.
        # Return False if the conversion fails.
        try:
            temp = int(state)
        except (TypeError, ValueError):
            return False

        # Make sure this is a state in the range
        if temp >= 0 and temp < self.num_states:
            return True

        # We weren't in the correct range, so return False
        return False

    def __next_states(self, state, letter):
        """Return the next states for the key (state, letter)"""

        try:
            next = self.trans_func[(state, letter)]
        except KeyError:
            # Create a new empty list for this state if it doesn't exist yet
            next = self.trans_func[(state, letter)] = []

        # Make sure that we can go to ourselves by the empty letter
        if letter == '^' and state not in next:
            next.append(state)

        return next

    def __E(self, state, letter='^', storage=None, visited=None):
        """Calculate E(state) and return it. This handles circular E()
        calculations."""

        # Work around weird mutable default argument stuff
        if storage is None:
            storage = []

        # Work around weird mutable default argument stuff
        if visited is None:
            visited = []

        # Find all of the direct next states that we can get to by
        # the empty string, and append anything that is not already there
        for i in self.__next_states(state, letter):
            if i not in storage:
                storage.append(i)

        # Visit everything in storage that has not been visited already
        for i in storage:
            if i not in visited:
                visited.append(i)
                temp = self.__E(i, letter, storage, visited)

                # Avoid duplicating things in storage
                for j in temp:
                    if j not in storage:
                        storage.append(j)

        return storage

    def __unique_sort(self, li):
        """Make sure everything in a list is unique, and then sort it"""

        newlist = []

        for i in li:
            if i not in newlist:
                newlist.append(i)

        newlist.sort()
        return newlist

    def __is_final_state(self, state):
        """Check if a state is a final state."""

        if state in self.final_states:
            return True

        return False


    def __is_list_final(self, states):
        """Check if at least one state in the list "states" is final"""

        for s in states:
            if self.__is_final_state(s):
                return True

        return False

    def __get_temp_record(self, num_letters):
        """Create a record (for the DFA table) that has the proper number
        of slots"""

        blank = None
        return [blank for i in range(num_letters + 3)]

    def __get_possible_letters(self):
        """Create a list of all the possible letters for the NFA,
        and store it in possible_letters"""

        # Get the list of all letters
        possible_letters = []

        for (state, letter) in self.trans_func.keys():
            if letter not in possible_letters and letter != '^':
                possible_letters.append(letter)

        possible_letters = self.__unique_sort(possible_letters)
        self.possible_letters = possible_letters

    def __generate_dfa_table(self):
        """Generate a table for the DFA representation of the NFA entered earlier"""

        # Get all the possible letters
        self.__get_possible_letters()

        # Prime the dfa table with the first state
        self.make_basic_record(self.__E(0))

        # Loop until we resolve every letter in the table
        while self.find_unresolved_letter() != (-1, -1):
            (record, letter_pos) = self.find_unresolved_letter()
            self.resolve_letter(record, letter_pos)

    def resolve_letter(self, record, letter_pos):
        """Resolve a letter in the table, either adding a new entry if the
        required state doesn't already exist, or putting a link to
        an existing state"""

        # Get the NFA equivalent multi-state
        fromstates = self.dfa_table[record][2]
        tostates = []

        # Find all the states we can go to from our multi-state.
        # It is _EXTREMELY_ important that you call E(s) for every state that
        # you can get to, otherwise you miss certain situations.
        for from_s in fromstates:
            for next_s in self.__next_states(from_s, self.possible_letters[letter_pos-3]):
                tostates.extend(self.__E(next_s))

        tostates = self.__unique_sort(tostates)

        # Make the letter we are trying to resolve point to a record in the table.
        # make_basic_record() will return the correct record, even if it needs to
        # create a new one.
        self.dfa_table[record][letter_pos] = self.make_basic_record(tostates)

    def find_unresolved_letter(self):
        """Returns an index pair of an unresolved letter that exists in the dfa_table.
        If there are no more letters, return (-1, -1)"""

        for i in range(len(self.dfa_table)):
            for j in range(len(self.possible_letters)):
                if self.dfa_table[i][j+3] == None:
                    return (i, j+3)

        return (-1, -1)

    def make_basic_record(self, from_states):
        """Create a record, if necessary, for the states "from_states."
        If a corresponding state already exists, return it's index. A new state
        will have everything but the letters filled in."""

        if self.dfa_state_exists(from_states) == -1: #doesn't exist
            temp_record = self.__get_temp_record(len(self.possible_letters))
            recordnum = len(self.dfa_table)

            temp_record[1] = recordnum
            temp_record[2] = self.__unique_sort(from_states)
            temp_record[0] = self.__is_list_final(from_states)

            self.dfa_table.append(temp_record)

        # always return a re-call of the function. This will not fail, since a new
        # record is added above if a record does not already exist.
        return self.dfa_state_exists(from_states)


    def dfa_state_exists(self, from_states):
        """Find out if this state already exists in the dfa table.
        If it does exist, return it's dfa state name.
        If it does not exist, return -1"""

        sorted_states = self.__unique_sort(from_states)

        for i in range(len(self.dfa_table)):
            if self.dfa_table[i][2] == sorted_states:
                return self.dfa_table[i][1]

        return -1

    def from_pattern_to_dfa(self, possible_letters, pattern):
        """Convert a pattern to a DFA. This is done using an intermediate
        NFA representation of the form (all_letters)* pattern (all_letters)*"""

        self.__clear()

        # Find the number of states we'll need
        self.num_states = len(pattern) + 1

        # The last state is the only final state, always
        self.final_states = [self.num_states - 1]

        # Get all of the possible letters
        self.possible_letters = possible_letters

        # Create the initial state
        for l in self.possible_letters:
            if l not in self.__next_states(0, l):
                self.__next_states(0, l).append(0)

        # Create the final state
        for l in self.possible_letters:
            if l not in self.__next_states(self.num_states - 1, l):
                self.__next_states(self.num_states - 1, l).append(self.num_states - 1)

        # Create each state in between
        for s in range(len(pattern)):
            self.__next_states(s, pattern[s]).append(s+1)

        # Make sure that q0 is the initial state
        self.initial_state = 0

        # Generate and return the table
        self.__generate_dfa_table()
        return self.dfa_table

class StringChecker:
    """An object that will check a string against a simple pattern,
    and will find the first occurrence of the pattern."""

    def __init__(self):
        """Constructor"""

        self.__clear()

    def __clear(self):
        """Clear all of the private variables"""

        self.DFA = DFA()
        self.NFA = NFA()
        self.pattern = ''
        self.test_str = ''
        self.possible_letters = ''

    def __input_last_letter(self):
        """Get the last possible letter from the user"""

        done = False

        while not done:
            print 'Please input the last possible'
            s = raw_input('letter that will be in your input or pattern: ')

            if len(s) != 1 and s not in string.letters:
                print 'Incorrect input, try again'
                print
                continue

            done = True
            for i in range(len(string.letters)):
                if string.letters[i] == s:
                    self.possible_letters = string.letters[:i+1]
                    break

    def __input_pattern(self):
        """Get the pattern from the user"""

        done = False

        while not done:
            s = raw_input('Please input a pattern to find: ')

            if not self.__valid_string(s):
                continue

            done = True
            self.pattern = s

    def __input_test_str(self):
        """Get the string in which to find the pattern, from the user"""

        done = False

        while not done:
            s = raw_input('Please input the string to test against: ')

            if not self.__valid_string(s):
                continue

            done = True
            self.test_str = s

    def __valid_string(self, str):
        """Make sure all letters in str are within the valid range"""

        for c in str:
            if c not in self.possible_letters:
                print 'Bad letter (%s) in the string, try again' % (c, )
                print
                return False

        return True

    def __print_occurrence(self):
        """Find and print the (non)occurrence of the pattern in the test string"""

        end_occurrence = self.DFA.find_end_of_occurrence(self.test_str)

        if end_occurrence == -1:
            print 'There was no occurrence of "%s" in the string "%s"' % (
                    self.pattern, self.test_str)
        else:
            print 'There was an occurrence of "%s" in the string "%s",' % (
                    self.pattern, self.test_str)
            print 'which starts at position %s' % (
                    end_occurrence - len(self.pattern) + 1, )

    def main_menu(self):
        """Run the StringChecker, giving the user possible things to do"""

        done = False
        pattern_entered = False

        while not done:
            print 'Menu:'
            print '========================================'
            print '1. Enter a pattern'
            print '2. Test string'
            print '3. Quit'
            print
            s = raw_input('Choice >>> ')
            print

            if s == '1':
                self.__clear()
                self.__input_last_letter()
                self.__input_pattern()
                dfa_table = self.NFA.from_pattern_to_dfa(self.possible_letters, self.pattern)
                self.DFA.read_dfa_table(dfa_table)
                pattern_entered = True
                print
            elif s == '2':
                if pattern_entered:
                    self.__input_test_str()
                    self.__print_occurrence()
                else:
                    print 'Enter a pattern first'
                print
            elif s == '3':
                done = True
            else:
                print 'Bad Selection'
                print


if __name__ == '__main__':
    checker = StringChecker()
    checker.main_menu()