Rev 276 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | RSS feed
#!/usr/bin/env python# Copyright (c) 2005,2006 Ira W. Snyder (devel@irasnyder.com)# License: GNU General Public License v2 (or at your option, any later version)## Changelog Follows:# - 2005-10-13# - Added get_par2_filenames() to parse par2 files# - Added the parset object to represent each parset.## - 2005-10-14# - Finished the parset object. It will now verify and extract parsets.# - Small changes to the parset object. This makes the parjoin part# much more reliable.# - Added the OptionParser to make this nice to run at the command line.# - Made recursiveness an option.# - Made start directory an option.# - Check for appropriate programs before starting.## - 2005-10-17# - Use a regular expression to handle the deletable types.## - 2005-10-18# - Use regular expressions to handle all finding of files, instead of# using the glob module.# - Add a config class to handle all the default config stuff sanely.# This makes it easier to change some of the main parts of the program to# your specific configuration.# - Move the docrcchecks variable inside the get_par2_filenames() function,# which is where it belongs anyway.# - Added command-line option to check for required programs at start.## - 2005-10-20# - Added a config option to extract with full path.## - 2005-10-22# - Re-wrote the config class so that there is a config file, which# resides at ~/.config/rarslave/rarslave.conf by default.# - Added the command-line option -c to write out an updated version# of the config file (to fill in any missing options with the defaults)# - Added the command-line option -f to write out a new default config file,# which overwrites any user changes.# - Made all regexes case insensitive.# - Made all command-line options override the config file.## - 2005-10-30# - Added the '-o' option, to output debugging info. Hopefully next time# someone finds a bug, they can output this and send it to me with a# description of the bug they're seeing.## - 2005-11-05# - Added an output system to rarslave. This makes a nice status report# possible at the end of the program run.## - 2005-11-06# - Fixed the rar command so that it can extract files whose names begin# with a hyphen.## - 2006-03-08# - Make an interactive mode which asks the user before deleting files.################################################################################## REQUIREMENTS:## This code requires the programs cfv, par2repair, lxsplit, and rar to be able# to function properly. I will attempt to check that these are in your path.################################################################################import ConfigParser, osclass rarslave_config:"""A simple class to hold the default configs for the whole program"""def __read_config(self, filename='~/.config/rarslave/rarslave.conf'):"""Attempt to open and read the rarslave config file"""# Make sure the filename is correctedfilename = os.path.abspath(os.path.expanduser(filename))user_config = {}# Write the default config if it doesn't existif not os.path.isfile(filename):self.write_config(default=True)config = ConfigParser.ConfigParser()config.read(filename)for section in config.sections():for option in config.options(section):user_config[(section, option)] = config.get(section, option)return user_configdef write_config(self, filename='~/.config/rarslave/rarslave.conf', default=False):"""Write out the current config to the config file. If you set default=True, thenthe default config file will be written."""config = ConfigParser.ConfigParser()# Correct filenamefilename = os.path.abspath(os.path.expanduser(filename))# Reset all config to make sure we write the default one, if necessaryif default:self.__user_config = {}print 'Writing default config to %s' % (filename, )# [directories] sectionconfig.add_section('directories')for (s, k) in self.__defaults.keys():if s == 'directories':config.set(s, k, self.get_value(s, k))# [options] sectionconfig.add_section('options')for (s, k) in self.__defaults.keys():if s == 'options':config.set(s, k, self.get_value(s, k))# [regular_expressions] sectionconfig.add_section('regular expressions')for (s, k) in self.__defaults.keys():if s == 'regular expressions':config.set(s, k, self.get_value(s, k))# Try to make the ~/.config/rarslave/ directoryif not os.path.isdir(os.path.split(filename)[0]):try:os.makedirs(os.path.split(filename)[0])except:print 'Could not make directory: %s' % (os.path.split(filename)[0], )sys.exit()# Try to write the config file to disktry:fsock = open(filename, 'w')try:config.write(fsock)finally:fsock.close()except:print 'Could not open: %s for writing' % (filename, )sys.exit()def __get_default_val(self, section, key):return self.__defaults[(section, key)]def get_value(self, section, key):"""Get a config value. Attempts to get the value from the user'sconfig first, and then uses the default."""try:value = self.__user_config[(section, key)]except:# This should work, unless you write something stupid# into the code, so DON'T DO ITvalue = self.__get_default_val(section, key)# Convert config options to booleans for easier useif value == 'True':value = Trueif value == 'False':value = Falsereturn valuedef __init__(self):self.__defaults = {('directories', 'working_directory') : '~/downloads/usenet',('options', 'recursive') : True,('options', 'check_required_programs') : False,('options', 'extract_with_full_path') : False,('options', 'interactive') : False,('regular expressions', 'par2_regex') : '.*\.par2$',('regular expressions', 'video_file_regex') : '.*\.(avi|ogm|mkv|mp4)$',('regular expressions', 'temp_repair_regex') : '.*\.1$',('regular expressions', 'remove_regex') : '^.*\.(rar|r\d\d)$' }self.__user_config = self.__read_config()# This is the global config variable.config = rarslave_config()# This is the global options variable. (to be set later)options = None################################################################################# The rarslave_output class## This class handles the nice output summary which is printed at the end# of a run################################################################################class rarslave_output:# Data structure: list of lists# [ [status, filename], ... ]## Where status is one of:# 0: Verified and Extracted Perfectly# 1: Failed to Verify (and therefore Extract)# 2: Verified correctly, but failed to Extract#def __init__(self):self.output_list = []self.good_files = 0self.unverified = 0self.unextractable = 0self.corrupt_par2 = 0def print_equal_line(self, size=80):"""Print an 80 character line of equal signs"""str = ''for i in range(size):str += '='print strdef print_results_table(self):"""Print a nice table of the results from this run"""# Print the table of good files (if we have any)if self.good_files > 0:self.print_equal_line()print 'Files that were extracted perfectly'self.print_equal_line()for entry in self.output_list:if entry[0] == 0:print '%s' % (entry[1], )# Print the table of unverified files (if we have any)if self.unverified > 0:self.print_equal_line()print 'Files that failed to verify (and extract)'self.print_equal_line()for entry in self.output_list:if entry[0] == 1:print '%s' % (entry[1], )# Print the table of unextracted files (if we have any)if self.unextractable > 0:self.print_equal_line()print 'Files that were verified, but failed to extract'self.print_equal_line()for entry in self.output_list:if entry[0] == 2:print '%s' % (entry[1], )# Print the table of corrupt PAR2 files (if we have any)if self.corrupt_par2 > 0:self.print_equal_line()print 'Files that had corrupt par2 files'self.print_equal_line()for entry in self.output_list:if entry[0] == 3:print '%s' % (entry[1], )# Print a blank line at the enddef add_file(self, status, filename):if status == 0:self.good_files += 1elif status == 1:self.unverified += 1elif status == 2:self.unextractable += 1elif status == 3:self.corrupt_par2 += 1else:# We have a bad value, so raise a ValueErrorraise ValueErrorself.output_list.append([status, filename])# This is the global output variableoutput = rarslave_output()################################################################################# The PAR2 Parser## This was stolen from cfv (see http://cfv.sourceforge.net/ for a copy)################################################################################import struct, errnodef chompnulls(line):p = line.find('\0')if p < 0: return lineelse: return line[:p]def get_par2_filenames(filename):"""Get all of the filenames that are protected by the par2file given as the filename"""try:file = open(filename, 'rb')except:print 'Could not open %s' % (filename, )return []# We always want to do crc checksdocrcchecks = Truepkt_header_fmt = '< 8s Q 16s 16s 16s'pkt_header_size = struct.calcsize(pkt_header_fmt)file_pkt_fmt = '< 16s 16s 16s Q'file_pkt_size = struct.calcsize(file_pkt_fmt)main_pkt_fmt = '< Q I'main_pkt_size = struct.calcsize(main_pkt_fmt)seen_file_ids = {}expected_file_ids = Nonefilenames = []while 1:d = file.read(pkt_header_size)if not d:breakmagic, pkt_len, pkt_md5, set_id, pkt_type = struct.unpack(pkt_header_fmt, d)if docrcchecks:import md5control_md5 = md5.new()control_md5.update(d[0x20:])d = file.read(pkt_len - pkt_header_size)control_md5.update(d)if control_md5.digest() != pkt_md5:raise EnvironmentError, (errno.EINVAL, \"corrupt par2 file - bad packet hash")if pkt_type == 'PAR 2.0\0FileDesc':if not docrcchecks:d = file.read(pkt_len - pkt_header_size)file_id, file_md5, file_md5_16k, file_size = \struct.unpack(file_pkt_fmt, d[:file_pkt_size])if seen_file_ids.get(file_id) is None:seen_file_ids[file_id] = 1filename = chompnulls(d[file_pkt_size:])filenames.append(filename)elif pkt_type == "PAR 2.0\0Main\0\0\0\0":if not docrcchecks:d = file.read(pkt_len - pkt_header_size)if expected_file_ids is None:expected_file_ids = []slice_size, num_files = struct.unpack(main_pkt_fmt, d[:main_pkt_size])num_nonrecovery = (len(d)-main_pkt_size)/16 - num_filesfor i in range(main_pkt_size,main_pkt_size+(num_files+num_nonrecovery)*16,16):expected_file_ids.append(d[i:i+16])else:if not docrcchecks:file.seek(pkt_len - pkt_header_size, 1)if expected_file_ids is None:raise EnvironmentError, (errno.EINVAL, \"corrupt or unsupported par2 file - no main packet found")for id in expected_file_ids:if not seen_file_ids.has_key(id):raise EnvironmentError, (errno.EINVAL, \"corrupt or unsupported par2 file - " \"expected file description packet not found")return filenames################################################################################# The parset object## This is an object based representation of a parset, and will verify itself# and extract itself, if possible.################################################################################import os, glob, reclass parset:def __init__(self, par_filename):self.parfile = par_filenameself.extra_pars = []self.files = Falseself.used_parjoin = Falseself.verified = Falseself.extracted = Falsedef print_debug_info(self):"""Special function for debugging"""print '========== DEBUG INFO STARTS HERE =========='print '=== parfile ==='print self.parfileprint '=== extra_pars ==='for f in self.extra_pars:print fprint '=== files ==='for f in self.files:print fprint '=========== DEBUG INFO ENDS HERE ==========='def get_filenames(self):return get_par2_filenames(self.parfile)def all_there(self):"""Check if all the files for the parset are present.This will help us decide which par2 checker to use first"""for f in self.files:if not os.path.isfile(f):return False# The files were all therereturn Truedef verify(self):"""This will verify the parset by the most efficient method first,and then move to a slower method if that one fails"""retval = False #not verified yet# if all the files are there, try verifying fastif self.all_there():retval = self.__fast_verify()if retval == False:# Failed to verify fast, so try it slow, maybe it needs repairretval = self.__slow_verify()# If we've got a video file, maybe we should try to parjoin itelif self.__has_video_file():retval = self.__parjoin()else: #not all there, maybe we can slow-repairretval = self.__slow_verify()self.verified = retvalreturn self.verifieddef __fast_verify(self):retval = os.system('cfv -v -f "%s"' % (self.parfile, ))if retval == 0:return True #successreturn False #failuredef __slow_verify(self):retval = os.system('par2repair "%s"' % (self.parfile, ))if retval == 0:return True #successreturn False #failuredef __parjoin(self):retval = os.system('lxsplit -j "%s.001"' % (self.files[0], ))retval = self.__fast_verify()if retval == False:# Failed to verify fast, so try it slow, maybe it needs repairretval = self.__slow_verify()if retval == False: # failed to verify, so remove the lxsplit created filetry:os.remove(self.files[0])except OSError:print 'Failed to remove file: %s' % (self.files[0], )self.used_parjoin = retvalself.verified = retvalreturn self.verifieddef __has_video_file(self):regex = re.compile(config.get_value('regular expressions', 'video_file_regex'),re.IGNORECASE)for f in self.files:if regex.match(f):return Truereturn Falsedef __remove_currentset(self):"""Remove all of the files that are extractable, as well as the pars.Leave everything else alone"""if not self.extracted:print 'Did not extract yet, not removing currentset'returnfiles_to_remove = []# remove the main parfiles_to_remove.append(self.parfile)# remove all of the extra parsfor i in self.extra_pars:files_to_remove.append(i)# remove any rars that are associated (leave EVERYTHING else)# This regex matches both old and new style rar(s) by default.regex = re.compile(config.get_value('regular expressions', 'remove_regex'),re.IGNORECASE)for i in self.files:if regex.match(i):files_to_remove.append(i)# remove any .{001,002,...} files (from parjoin)if self.used_parjoin:for i in os.listdir(os.getcwd()):if i != self.files[0] and self.files[0] in i:files_to_remove.append(i)# remove any temp repair filesregex = re.compile(config.get_value('regular expressions', 'temp_repair_regex'),re.IGNORECASE)[files_to_remove.append(f) for f in os.listdir(os.getcwd()) if regex.match(f)]# interactively remove filesif options.interactive:print # blank linefor f in files_to_remove:print fprint '========================================'done = Falsewhile not done:s = raw_input("Delete files [y,n]: ")s.lower()if s == 'y' or s == 'yes':done = Trueself.__remove_list_of_files(files_to_remove)elif s == 'n' or s == 'no':done = Trueprint 'Not removing files'else:print 'Bad selection, try again...'else:self.__remove_list_of_files(files_to_remove)def __remove_list_of_files(self, files_to_remove):"""Remove all files in the list"""# remove duplicates from the listtemp = []for f in files_to_remove:if f not in temp:temp.append(f)files_to_remove = temp# remove the filesfor f in files_to_remove:try:os.remove(f)except OSError:print 'WW: Problem deleting: %s' % fdef __get_extract_file(self):"""Find the first extractable file"""for i in self.files:if os.path.splitext(i)[1] == '.rar':return ireturn Nonedef extract(self):"""Attempt to extract all of the files related to this parset"""if not self.verified:self.extracted = Falseoutput.add_file(1, self.parfile)return False #failed to extractextract_file = self.__get_extract_file()if extract_file != None:if config.get_value('options', 'extract_with_full_path'):retval = os.system('rar x -o+ -- "%s"' % (extract_file, ))else:retval = os.system('rar e -o+ -- "%s"' % (extract_file, ))if retval != 0:output.add_file(2, self.parfile)self.extracted = Falsereturn self.extracted# we extracted ok, so remove the currentsetself.extracted = Trueself.__remove_currentset()output.add_file(0, self.parfile)return self.extracted################################################################################# The rarslave program itself################################################################################import os, sysfrom optparse import OptionParserdef check_required_progs():"""Check if the required programs are installed"""shell_not_found = 32512needed = []if os.system('cfv --help > /dev/null 2>&1') == shell_not_found:needed.append('cfv')if os.system('par2repair --help > /dev/null 2>&1') == shell_not_found:needed.append('par2repair')if os.system('lxsplit --help > /dev/null 2>&1') == shell_not_found:needed.append('lxpsplit')if os.system('rar --help > /dev/null 2>&1') == shell_not_found:needed.append('rar')if needed:for n in needed:print 'Needed program "%s" not found in $PATH' % (n, )sys.exit(1)def get_parsets():"""Get a representation of each parset in the current directory, andreturn them as a list of parset instances"""regex = re.compile(config.get_value('regular expressions', 'par2_regex'),re.IGNORECASE)par2files = [f for f in os.listdir(os.getcwd()) if regex.match(f)]parsets = []for i in par2files:try:filenames = get_par2_filenames(i)create_new = Trueexcept EnvironmentError:output.add_file(3, i)continue# if we already have an instance for this set, append# this par file to the extra_pars fieldfor j in parsets:if j.files == filenames:j.extra_pars.append(i)create_new = False# we haven't seen this set yet, so we'll create it nowif create_new == True:cur = parset(i)cur.files = filenamesparsets.append(cur)return parsetsdef directory_worker(dir):"""Attempts to find, verify, and extract every parset in the directorygiven as a parameter"""cwd = os.getcwd()os.chdir(dir)parsets = get_parsets()# Print debug info if we're supposed toif options.debug_info:for p in parsets:p.print_debug_info()# No debug infoelse:# Verify each parsetfor p in parsets:p.verify()# Attempt to extract each parsetfor p in parsets:p.extract()os.chdir(cwd)def main():# Build the OptionParserparser = OptionParser()parser.add_option('-n', '--not-recursive',action='store_false', dest='recursive',default=config.get_value('options', 'recursive'),help="Don't run recursively")parser.add_option('-d', '--work-dir',dest='work_dir',default=config.get_value('directories', 'working_directory'),help="Start running at DIR", metavar='DIR')parser.add_option('-p', '--check-required-programs',action='store_true', dest='check_progs',default=config.get_value('options', 'check_required_programs'),help="Check for required programs")parser.add_option('-f', '--write-default-config',action='store_true', dest='write_def_config',default=False, help="Write out a new default config")parser.add_option('-c', '--write-new-config',action='store_true', dest='write_config',default=False, help="Write out the current config")parser.add_option('-o', '--output-debug-info',action='store_true', dest='debug_info',default=False,help="Output debug info for every parset, then exit")parser.add_option('-i', '--interactive', dest='interactive', action='store_true',default=config.get_value('options', 'interactive'),help="Confirm before removing files")# Parse the given optionsglobal options(options, args) = parser.parse_args()# Fix up the working directoryoptions.work_dir = os.path.abspath(os.path.expanduser(options.work_dir))# Check that we have the required programs installedif options.check_progs:check_required_progs()# Write out a new default config, if we need itif options.write_def_config:config.write_config(default=True)# Write out the current config (adds new options to an existing config)if options.write_config:config.write_config()# Run rarslave!if options.recursive:for root, dirs, files in os.walk(options.work_dir):directory_worker(root)else:directory_worker(options.work_dir)# Print the resultsoutput.print_results_table()if __name__ == '__main__':main()