Rev 358 | Blame | Compare with Previous | Last modification | View Log | RSS feed
################################################################################# The PAR2 Classifier Module## Copyright 2006, Ira W. Snyder (devel@irasnyder.com)# License: GNU General Public License v2 (or, at your option, any later version)## This class makes a best guess as to what type of set we're dealing# with here. Of course, this is just a _GUESS_, but I hope it's pretty good.## Future Improvements# 1. Read a config file to get a few extra customizable types, which# will let you run an arbitrary command on a set of files that contains# a certain type of file################################################################################import os, re, sysclass Par2Classifier (object):def __init__ (self, Par2Parser):# Give this function a valid Par2Parser that has already# been run on your set of files. It will be used to figure# out the types that are in this set# An enum of types( self.UNKNOWN_TYPE, # Represents a type that we don't know about yetself.S_SPLIT_000, # Single file protected, LXSplit, starting with .000self.M_SPLIT_000, # Multiple files protected, LXSplit, starting with .000self.S_SPLIT_001, # Single file protected, LXSplit, starting with .001self.M_SPLIT_001, # Multiple files protected, LXSplit, starting with .001self.S_PROT_SPLIT_000, # All .DDD files protected, LXSplit, starting with .000 (single set)self.M_PROT_SPLIT_000, # All .DDD files protected, LXSplit, starting with .000 (multiple sets)self.S_PROT_SPLIT_001, # All .DDD files protected, LXSplit, starting with .001 (single set)self.M_PROT_SPLIT_001, # All .DDD files protected, LXSplit, starting with .001 (multiple sets)self.S_RAR_OLD, # Single set of rars, Old style, starting with .rDDself.M_RAR_OLD, # Multiple set of rars, Old style, starting with .rDDself.S_RAR_NEW, # Single set of rars, New style, starting with .partDD.rarself.M_RAR_NEW, # Multiple set of rars, New style, starting with .partDD.rar) = range (13) # WARNING: If adding types, increment this number appropriately# Storage for regexesself.regexes = {self.UNKNOWN_TYPE : None,self.S_SPLIT_000 : '^.*\.000$',self.M_SPLIT_000 : '^.*\.000$',self.S_SPLIT_001 : '^.*\.001$',self.M_SPLIT_001 : '^.*\.001$',self.S_PROT_SPLIT_000 : '^.*\.000$',self.M_PROT_SPLIT_000 : '^.*\.000$',self.S_PROT_SPLIT_001 : '^.*\.001$',self.M_PROT_SPLIT_001 : '^.*\.001$',self.S_RAR_OLD : '^.*\.r00$',self.M_RAR_OLD : '^.*\.r00$',self.S_RAR_NEW : '^.*\.part0*1\.rar$',self.M_RAR_NEW : '^.*\.part0*1\.rar$',}self.Par2Parser = Par2Parserself.type = self.__get_type ()self.heads = self.__get_extractable_files ()def __get_type (self):# This will return one of the enums above corresponding to the# best guess about what type of set we're trying to extract.## This information will be used later to decide how to repair# and possibly extract this set of filesmatches = self.__apply_regex ('^.*\.000$', self.Par2Parser.protected_files)# Check to see if we've got a M_PROT_SPLIT_000 setif len (matches) > 1:return self.M_PROT_SPLIT_000# Check to see if we've got a S_PROT_SPLIT_000 setif len (matches) == 1:return self.S_PROT_SPLIT_000matches = self.__apply_regex ('^.*\.001$', self.Par2Parser.protected_files)# Check to see if we've got a M_PROT_SPLIT_001 setif len (matches) > 1:return self.M_PROT_SPLIT_001# Check if we've got a S_PROT_SPLIT_001 setif len (matches) == 1:return self.S_PROT_SPLIT_001matches = self.__apply_regex ('^.*\.000$', self.Par2Parser.possible_files)# Check if we've got a M_SPLIT_000 setif len (matches) > 1 and len (self.Par2Parser.protected_files) > 1:return self.M_SPLIT_000# Check if we've got a S_SPLIT_000 setif len (matches) == 1 and len (self.Par2Parser.protected_files) == 1:return self.S_SPLIT_000matches = self.__apply_regex ('^.*\.001$', self.Par2Parser.possible_files)# Check if we've got a M_SPLIT_001 setif len (matches) > 1 and len (self.Par2Parser.protected_files) > 1:return self.M_SPLIT_001# Check if we've got a S_SPLIT_001 setif len (matches) == 1 and len (self.Par2Parser.protected_files) == 1:return self.S_SPLIT_001matches = self.__apply_regex ('^.*\.part0*1\.rar$',self.Par2Parser.protected_files,ignorecase=True)# Check if we've got a M_RAR_NEW setif len (matches) > 1:return self.M_RAR_NEW# Check if we've got a S_RAR_NEW setif len (matches) == 1:return self.S_RAR_NEWmatches = self.__apply_regex ('^.*\.r00$',self.Par2Parser.protected_files,ignorecase=True)# Check if we've got a M_RAR_OLD setif len (matches) > 1:return self.M_RAR_OLD# Check if we've got a S_RAR_OLD setif len (matches) == 1:return self.S_RAR_OLDreturn self.UNKNOWN_TYPEdef __apply_regex (self, regex_str, li, ignorecase=False, search=False):"""Runs the regex given through the list, and returns matchesa list of things that either match() (the default) or search()"""# Decide whether or not to use re.IGNORECASEif ignorecase:regex = re.compile (regex_str, re.IGNORECASE)else:regex = re.compile (regex_str)# Decide whether to use re.search() or re.match()if search:return [i for i in li if regex.search (i)]else:return [i for i in li if regex.match (i)]def __get_extractable_files (self):# Returns a list of all of the "head" extractable files# NOTE: an extractable file is a .000, .001, .r00, .rar# or whatever else makes sense for this type of set#if self.type == self.S_SPLIT_000 or self.type == self.M_SPLIT_000:return self.__apply_regex ('^.*\.000$', self.Par2Parser.possible_files)if self.type == self.S_SPLIT_001 or self.type == self.M_SPLIT_001:return self.__apply_regex ('^.*\.001$', self.Par2Parser.possible_files)if self.type == self.S_PROT_SPLIT_000 or self.type == self.M_PROT_SPLIT_000:return self.__apply_regex ('^.*\.000$', self.Par2Parser.protected_files)if self.type == self.S_PROT_SPLIT_001 or self.type == self.M_PROT_SPLIT_001:return self.__apply_regex ('^.*\.001$', self.Par2Parser.protected_files)if self.type == self.S_RAR_OLD or self.type == self.M_RAR_OLD:return self.__apply_regex ('^.*\.r00$', self.Par2Parser.protected_files,ignorecase=True)if self.type == self.S_RAR_NEW or self.type == self.M_RAR_NEW:return self.__apply_regex ('^.*\.part0*1\.rar$', self.Par2Parser.protected_files,ignorecase=True)return [] # Return nothing by default (this covers UNKNOWN_TYPE)if __name__ == '__main__':pass