WebSVN – programming – /projects/rarslave/Par2Classifier.py

################################################################################
# The PAR2 Classifier Module
#
# Copyright 2006, Ira W. Snyder (devel@irasnyder.com)
# License: GNU General Public License v2 (or, at your option, any later version)
#
# This class makes a best guess as to what type of set we're dealing
# with here. Of course, this is just a _GUESS_, but I hope it's pretty good.
#
# Future Improvements
# 1. Read a config file to get a few extra customizable types, which
#    will let you run an arbitrary command on a set of files that contains
#    a certain type of file
################################################################################

import os, re, sys

class Par2Classifier (object):

    def __init__ (self, Par2Parser):
        # Give this function a valid Par2Parser that has already
        # been run on your set of files. It will be used to figure
        # out the types that are in this set

        # An enum of types
        ( self.UNKNOWN_TYPE,     # Represents a type that we don't know about yet

          self.S_SPLIT_000,      # Single file protected,    LXSplit, starting with .000
          self.M_SPLIT_000,      # Multiple files protected, LXSplit, starting with .000
          self.S_SPLIT_001,      # Single file protected,    LXSplit, starting with .001
          self.M_SPLIT_001,      # Multiple files protected, LXSplit, starting with .001

          self.S_PROT_SPLIT_000, # All .DDD files protected, LXSplit, starting with .000 (single set)
          self.M_PROT_SPLIT_000, # All .DDD files protected, LXSplit, starting with .000 (multiple sets)
          self.S_PROT_SPLIT_001, # All .DDD files protected, LXSplit, starting with .001 (single set)
          self.M_PROT_SPLIT_001, # All .DDD files protected, LXSplit, starting with .001 (multiple sets)

          self.S_RAR_OLD,        # Single set of rars,   Old style,   starting with .rDD
          self.M_RAR_OLD,        # Multiple set of rars, Old style,   starting with .rDD
          self.S_RAR_NEW,        # Single set of rars,   New style,   starting with .partDD.rar
          self.M_RAR_NEW,        # Multiple set of rars, New style,   starting with .partDD.rar

        ) = range (13)      # WARNING: If adding types, increment this number appropriately

        # Storage for regexes
        self.regexes = {
            self.UNKNOWN_TYPE : None,
            
            self.S_SPLIT_000 : '^.*\.000$',
            self.M_SPLIT_000 : '^.*\.000$',
            self.S_SPLIT_001 : '^.*\.001$',
            self.M_SPLIT_001 : '^.*\.001$',

            self.S_PROT_SPLIT_000 : '^.*\.000$',
            self.M_PROT_SPLIT_000 : '^.*\.000$',
            self.S_PROT_SPLIT_001 : '^.*\.001$',
            self.M_PROT_SPLIT_001 : '^.*\.001$',

            self.S_RAR_OLD : '^.*\.r00$',
            self.M_RAR_OLD : '^.*\.r00$',
            self.S_RAR_NEW : '^.*\.part0*1\.rar$',
            self.M_RAR_NEW : '^.*\.part0*1\.rar$',
        }

        self.Par2Parser = Par2Parser
        self.type = self.__get_type ()
        self.heads = self.__get_extractable_files ()

    def __get_type (self):
        # This will return one of the enums above corresponding to the
        # best guess about what type of set we're trying to extract.
        #
        # This information will be used later to decide how to repair
        # and possibly extract this set of files

        matches = self.__apply_regex ('^.*\.000$', self.Par2Parser.protected_files)

        # Check to see if we've got a M_PROT_SPLIT_000 set
        if len (matches) > 1:
            return      self.M_PROT_SPLIT_000

        # Check to see if we've got a S_PROT_SPLIT_000 set
        if len (matches) == 1:
            return      self.S_PROT_SPLIT_000

        matches = self.__apply_regex ('^.*\.001$', self.Par2Parser.protected_files)

        # Check to see if we've got a M_PROT_SPLIT_001 set
        if len (matches) > 1:
            return      self.M_PROT_SPLIT_001

        # Check if we've got a S_PROT_SPLIT_001 set
        if len (matches) == 1:
            return      self.S_PROT_SPLIT_001

        matches = self.__apply_regex ('^.*\.000$', self.Par2Parser.possible_files)

        # Check if we've got a M_SPLIT_000 set
        if len (matches) > 1 and len (self.Par2Parser.protected_files) > 1:
            return      self.M_SPLIT_000

        # Check if we've got a S_SPLIT_000 set
        if len (matches) == 1 and len (self.Par2Parser.protected_files) == 1:
            return      self.S_SPLIT_000

        matches = self.__apply_regex ('^.*\.001$', self.Par2Parser.possible_files)

        # Check if we've got a M_SPLIT_001 set
        if len (matches) > 1 and len (self.Par2Parser.protected_files) > 1:
            return      self.M_SPLIT_001

        # Check if we've got a S_SPLIT_001 set
        if len (matches) == 1 and len (self.Par2Parser.protected_files) == 1:
            return      self.S_SPLIT_001

        matches = self.__apply_regex ('^.*\.part0*1\.rar$',
                                      self.Par2Parser.protected_files,
                                      ignorecase=True)

        # Check if we've got a M_RAR_NEW set
        if len (matches) > 1:
            return      self.M_RAR_NEW

        # Check if we've got a S_RAR_NEW set
        if len (matches) == 1:
            return      self.S_RAR_NEW

        matches = self.__apply_regex ('^.*\.r00$',
                                      self.Par2Parser.protected_files,
                                      ignorecase=True)

        # Check if we've got a M_RAR_OLD set
        if len (matches) > 1:
            return      self.M_RAR_OLD

        # Check if we've got a S_RAR_OLD set
        if len (matches) == 1:
            return      self.S_RAR_OLD

        return self.UNKNOWN_TYPE

    def __apply_regex (self, regex_str, li, ignorecase=False, search=False):
        """Runs the regex given through the list, and returns matches
           a list of things that either match() (the default) or search()"""

        # Decide whether or not to use re.IGNORECASE
        if ignorecase:
            regex = re.compile (regex_str, re.IGNORECASE)
        else:
            regex = re.compile (regex_str)

        # Decide whether to use re.search() or re.match()
        if search:
            return [i for i in li if regex.search (i)]
        else:
            return [i for i in li if regex.match (i)]

    def __get_extractable_files (self):
        # Returns a list of all of the "head" extractable files
        # NOTE: an extractable file is a .000, .001, .r00, .rar
        #       or whatever else makes sense for this type of set
        #
        if self.type == self.S_SPLIT_000 or self.type == self.M_SPLIT_000:
            return self.__apply_regex ('^.*\.000$', self.Par2Parser.possible_files)

        if self.type == self.S_SPLIT_001 or self.type == self.M_SPLIT_001:
            return self.__apply_regex ('^.*\.001$', self.Par2Parser.possible_files)

        if self.type == self.S_PROT_SPLIT_000 or self.type == self.M_PROT_SPLIT_000:
            return self.__apply_regex ('^.*\.000$', self.Par2Parser.protected_files)

        if self.type == self.S_PROT_SPLIT_001 or self.type == self.M_PROT_SPLIT_001:
            return self.__apply_regex ('^.*\.001$', self.Par2Parser.protected_files)

        if self.type == self.S_RAR_OLD or self.type == self.M_RAR_OLD:
            return self.__apply_regex ('^.*\.r00$', self.Par2Parser.protected_files,
                    ignorecase=True)

        if self.type == self.S_RAR_NEW or self.type == self.M_RAR_NEW:
            return self.__apply_regex ('^.*\.part0*1\.rar$', self.Par2Parser.protected_files,
                    ignorecase=True)

        return [] # Return nothing by default (this covers UNKNOWN_TYPE)
        
if __name__ == '__main__':
    pass
Subversion Repositories programming

(root)/projects/rarslave/Par2Classifier.py – Rev 359