Rev 358 | Blame | Compare with Previous | Last modification | View Log | RSS feed
################################################################################
# The PAR2 Classifier Module
#
# Copyright 2006, Ira W. Snyder (devel@irasnyder.com)
# License: GNU General Public License v2 (or, at your option, any later version)
#
# This class makes a best guess as to what type of set we're dealing
# with here. Of course, this is just a _GUESS_, but I hope it's pretty good.
#
# Future Improvements
# 1. Read a config file to get a few extra customizable types, which
# will let you run an arbitrary command on a set of files that contains
# a certain type of file
################################################################################
import os, re, sys
class Par2Classifier (object):
def __init__ (self, Par2Parser):
# Give this function a valid Par2Parser that has already
# been run on your set of files. It will be used to figure
# out the types that are in this set
# An enum of types
( self.UNKNOWN_TYPE, # Represents a type that we don't know about yet
self.S_SPLIT_000, # Single file protected, LXSplit, starting with .000
self.M_SPLIT_000, # Multiple files protected, LXSplit, starting with .000
self.S_SPLIT_001, # Single file protected, LXSplit, starting with .001
self.M_SPLIT_001, # Multiple files protected, LXSplit, starting with .001
self.S_PROT_SPLIT_000, # All .DDD files protected, LXSplit, starting with .000 (single set)
self.M_PROT_SPLIT_000, # All .DDD files protected, LXSplit, starting with .000 (multiple sets)
self.S_PROT_SPLIT_001, # All .DDD files protected, LXSplit, starting with .001 (single set)
self.M_PROT_SPLIT_001, # All .DDD files protected, LXSplit, starting with .001 (multiple sets)
self.S_RAR_OLD, # Single set of rars, Old style, starting with .rDD
self.M_RAR_OLD, # Multiple set of rars, Old style, starting with .rDD
self.S_RAR_NEW, # Single set of rars, New style, starting with .partDD.rar
self.M_RAR_NEW, # Multiple set of rars, New style, starting with .partDD.rar
) = range (13) # WARNING: If adding types, increment this number appropriately
# Storage for regexes
self.regexes = {
self.UNKNOWN_TYPE : None,
self.S_SPLIT_000 : '^.*\.000$',
self.M_SPLIT_000 : '^.*\.000$',
self.S_SPLIT_001 : '^.*\.001$',
self.M_SPLIT_001 : '^.*\.001$',
self.S_PROT_SPLIT_000 : '^.*\.000$',
self.M_PROT_SPLIT_000 : '^.*\.000$',
self.S_PROT_SPLIT_001 : '^.*\.001$',
self.M_PROT_SPLIT_001 : '^.*\.001$',
self.S_RAR_OLD : '^.*\.r00$',
self.M_RAR_OLD : '^.*\.r00$',
self.S_RAR_NEW : '^.*\.part0*1\.rar$',
self.M_RAR_NEW : '^.*\.part0*1\.rar$',
}
self.Par2Parser = Par2Parser
self.type = self.__get_type ()
self.heads = self.__get_extractable_files ()
def __get_type (self):
# This will return one of the enums above corresponding to the
# best guess about what type of set we're trying to extract.
#
# This information will be used later to decide how to repair
# and possibly extract this set of files
matches = self.__apply_regex ('^.*\.000$', self.Par2Parser.protected_files)
# Check to see if we've got a M_PROT_SPLIT_000 set
if len (matches) > 1:
return self.M_PROT_SPLIT_000
# Check to see if we've got a S_PROT_SPLIT_000 set
if len (matches) == 1:
return self.S_PROT_SPLIT_000
matches = self.__apply_regex ('^.*\.001$', self.Par2Parser.protected_files)
# Check to see if we've got a M_PROT_SPLIT_001 set
if len (matches) > 1:
return self.M_PROT_SPLIT_001
# Check if we've got a S_PROT_SPLIT_001 set
if len (matches) == 1:
return self.S_PROT_SPLIT_001
matches = self.__apply_regex ('^.*\.000$', self.Par2Parser.possible_files)
# Check if we've got a M_SPLIT_000 set
if len (matches) > 1 and len (self.Par2Parser.protected_files) > 1:
return self.M_SPLIT_000
# Check if we've got a S_SPLIT_000 set
if len (matches) == 1 and len (self.Par2Parser.protected_files) == 1:
return self.S_SPLIT_000
matches = self.__apply_regex ('^.*\.001$', self.Par2Parser.possible_files)
# Check if we've got a M_SPLIT_001 set
if len (matches) > 1 and len (self.Par2Parser.protected_files) > 1:
return self.M_SPLIT_001
# Check if we've got a S_SPLIT_001 set
if len (matches) == 1 and len (self.Par2Parser.protected_files) == 1:
return self.S_SPLIT_001
matches = self.__apply_regex ('^.*\.part0*1\.rar$',
self.Par2Parser.protected_files,
ignorecase=True)
# Check if we've got a M_RAR_NEW set
if len (matches) > 1:
return self.M_RAR_NEW
# Check if we've got a S_RAR_NEW set
if len (matches) == 1:
return self.S_RAR_NEW
matches = self.__apply_regex ('^.*\.r00$',
self.Par2Parser.protected_files,
ignorecase=True)
# Check if we've got a M_RAR_OLD set
if len (matches) > 1:
return self.M_RAR_OLD
# Check if we've got a S_RAR_OLD set
if len (matches) == 1:
return self.S_RAR_OLD
return self.UNKNOWN_TYPE
def __apply_regex (self, regex_str, li, ignorecase=False, search=False):
"""Runs the regex given through the list, and returns matches
a list of things that either match() (the default) or search()"""
# Decide whether or not to use re.IGNORECASE
if ignorecase:
regex = re.compile (regex_str, re.IGNORECASE)
else:
regex = re.compile (regex_str)
# Decide whether to use re.search() or re.match()
if search:
return [i for i in li if regex.search (i)]
else:
return [i for i in li if regex.match (i)]
def __get_extractable_files (self):
# Returns a list of all of the "head" extractable files
# NOTE: an extractable file is a .000, .001, .r00, .rar
# or whatever else makes sense for this type of set
#
if self.type == self.S_SPLIT_000 or self.type == self.M_SPLIT_000:
return self.__apply_regex ('^.*\.000$', self.Par2Parser.possible_files)
if self.type == self.S_SPLIT_001 or self.type == self.M_SPLIT_001:
return self.__apply_regex ('^.*\.001$', self.Par2Parser.possible_files)
if self.type == self.S_PROT_SPLIT_000 or self.type == self.M_PROT_SPLIT_000:
return self.__apply_regex ('^.*\.000$', self.Par2Parser.protected_files)
if self.type == self.S_PROT_SPLIT_001 or self.type == self.M_PROT_SPLIT_001:
return self.__apply_regex ('^.*\.001$', self.Par2Parser.protected_files)
if self.type == self.S_RAR_OLD or self.type == self.M_RAR_OLD:
return self.__apply_regex ('^.*\.r00$', self.Par2Parser.protected_files,
ignorecase=True)
if self.type == self.S_RAR_NEW or self.type == self.M_RAR_NEW:
return self.__apply_regex ('^.*\.part0*1\.rar$', self.Par2Parser.protected_files,
ignorecase=True)
return [] # Return nothing by default (this covers UNKNOWN_TYPE)
if __name__ == '__main__':
pass