X-Git-Url: https://www.irasnyder.com/gitweb/?p=rarslave2.git;a=blobdiff_plain;f=rarslave.py;h=20c028840d3f38f43fb57475301a3e3b9aaf760e;hp=789452e7b133b82381e2d15fca6074556712edfb;hb=HEAD;hpb=193658c01f9fa9adb1aca7b0cfe94ae40f64b11b diff --git a/rarslave.py b/rarslave.py old mode 100644 new mode 100755 index 789452e..20c0288 --- a/rarslave.py +++ b/rarslave.py @@ -1,361 +1,370 @@ #!/usr/bin/env python -# vim: set ts=4 sts=4 sw=4 textwidth=112 : +# vim: set ts=4 sts=4 sw=4 textwidth=80: + +""" +The main program of the rarslave project + +This handles all of the commandline and configuration file work, then tries to +repair, extract, and delete any PAR2Sets that it finds. +""" + +__author__ = "Ira W. Snyder (devel@irasnyder.com)" +__copyright__ = "Copyright (c) 2006-2008 Ira W. Snyder (devel@irasnyder.com)" +__license__ = "GNU GPL v2 (or, at your option, any later version)" + +# rarslave.py -- a usenet autorepair and autoextract utility +# +# Copyright (C) 2006-2008 Ira W. Snyder (devel@irasnyder.com) +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -import re, os, sys -import par2parser +VERSION = "2.1.0" +PROGRAM = "rarslave" -# Global Variables -(TYPE_OLDRAR, TYPE_NEWRAR, TYPE_ZIP, TYPE_NOEXTRACT) = range (4) +import os, sys, optparse, logging, ConfigParser +from subprocess import CalledProcessError +import PAR2Set -class RarslaveExtractor (object): +################################################################################ - def __init__ (self, type): - self.type = type - self.heads = [] +# A simple-ish configuration class +class RarslaveConfig(object): - def addHead (self, dir, head): - assert os.path.isdir (dir) - # REQUIRES that the dir is valid, but not that the file is valid, so that - # we can move a file that doesn't exist yet. - # FIXME: probably CAN add this back, since we should be running this AFTER repair. - #assert os.path.isfile (os.path.join (dir, head)) + DEFAULT_CONFIG_FILE = PAR2Set.utils.absolutePath( + os.path.join('~', '.config', 'rarslave', 'rarslave.conf')) - self.heads.append (os.path.join (dir, head)) + def __init__(self, fileName=DEFAULT_CONFIG_FILE): - def extract (self, todir): - # Extract all heads of this set + # Make sure that the fileName is in absolute form + self.fileName = os.path.abspath(os.path.expanduser(fileName)) - # Create the directory $todir if it doesn't exist - if not os.path.isdir (todir): - # TODO: LOGGER - try: - os.makedirs (todir) - except OSError: - # TODO: LOGGER - # Failed mkdir -p, clean up time ... - pass # FIXME: temporary for syntax + # Open it with ConfigParser + self.config = ConfigParser.SafeConfigParser() + self.config.read(fileName) - # Extract all heads - extraction_func = \ - { TYPE_OLDRAR : self.__extract_rar, - TYPE_NEWRAR : self.__extract_rar, - TYPE_ZIP : self.__extract_zip, - TYPE_NOEXTRACT : self.__extract_noextract }[self.type] + # Setup the default dictionary + self.defaults = dict() - # Call the extraction function on each head - for h in self.heads: - extraction_func (h, todir) + # Add all of the defaults + self.add_default('directories', 'start', + os.path.join('~', 'downloads'), + PAR2Set.utils.absolutePath) + self.add_default('options', 'recursive', True, self.toBool) + self.add_default('options', 'interactive', False, self.toBool) + self.add_default('options', 'verbosity', 0, self.toInt) + self.add_default('options', 'delete', True, self.toBool) - def __extract_rar (self, file, todir): - assert os.path.isfile (file) - assert os.path.isdir (todir) + # Add a new default value + def add_default(self, section, key, value, typeConverter): - RAR_CMD = 'unrar x -o+ -- ' + self.defaults[(section, key)] = (value, typeConverter) - #file = full_abspath (file) - #todir = full_abspath (todir) + # Get the default value + def get_default(self, section, key): - cmd = '%s \"%s\"' % (RAR_CMD, file) - ret = run_command (cmd, todir) + (value, typeConverter) = self.defaults[(section, key)] + return value - def __extract_zip (self, file, todir): - ZIP_CMD = 'unzip \"%s\" -d \"%s\"' + # Coerce the value from a string into the correct type + def coerceValue(self, section, key, value): - cmd = ZIP_CMD % (file, todir) - ret = run_command (cmd) + (defaultValue, typeConverter) = self.defaults[(section, key)] - def __extract_noextract (self, file, todir): - # Just move this file to the $todir, since no extraction is needed - # FIXME: NOTE: mv will fail by itself if you're moving to the same dir! - cmd = 'mv \"%s\" \"%s\"' % (file, todir) - ret = run_command (cmd) + # Try the coercion, error and exit if there is a problem + try: + return typeConverter(value) + except: + sys.stderr.write('Unable to parse configuration file\n') + sys.stderr.write('-> at section: %s\n' % section) + sys.stderr.write('-> at key: %s\n' % key) + sys.exit(2) + # Return the value + def get(self, section, key): + try: + # Get the user-provided value + value = self.config.get(section, key) + except: + # Oops, they didn't provide it, use the default + # NOTE: if you get an exception here, check your code ;) + value = self.defaults[(section, key)] -class RarslaveRepairer (object): - # Verify (and repair) the set - # Make sure it worked, otherwise clean up and return failure + # Try to evaluate some safe things, for convenience + return self.coerceValue(section, key, value) - def __init__ (self, dir, file, join=False): - self.dir = dir # the directory containing the par2 file - self.file = file # the par2 file - self.join = join # True if the par2 set is 001 002 ... + # Convert a string to an int (any base) + def toInt(s): + return int(s, 0) - assert os.path.isdir (dir) - assert os.path.isfile (os.path.join (dir, file)) + # Mark it static + toInt = staticmethod(toInt) - def checkAndRepair (self): - # Form the command: - # par2repair -- PAR2 PAR2_EXTRA [JOIN_FILES] - PAR2_CMD = 'par2repair -- ' + # Convert a string to a bool + def toBool(s): + if s in ['t', 'T', 'True', 'true', 'yes', '1']: + return True - # Get set up - basename = get_basename (self.file) - all_files = find_likely_files (basename, self.dir) - all_files.sort () - par2_files = find_par2_files (all_files) + if s in ['f', 'F', 'False', 'false', 'no', '0']: + return False - # assemble the command - command = "%s \"%s\" " % (PAR2_CMD, self.file) + raise ValueError - for f in par2_files: - if f != self.file: - command += "\"%s\" " % get_filename(f) + # Mark it static + toBool = staticmethod(toBool) - if self.join: - for f in all_files: - if f not in par2_files: - command += "\"%s\" " % get_filename(f) +################################################################################ - # run the command - ret = run_command (command, self.dir) +# Global configuration, read from default configuration file +config = RarslaveConfig() -def run_command (cmd, indir=None): - # Runs the specified command-line in the directory given (or, in the current directory - # if none is given). It returns the status code given by the application. +################################################################################ - pwd = os.getcwd () +# A tiny class to hold logging output until we're finished +class DelayedLogger (object): - if indir != None: - assert os.path.isdir (indir) # MUST be a directory! - os.chdir (pwd) + """A small class to hold logging output until the program is finished running. + It emulates sys.stdout in the needed ways for the logging module.""" - # FIXME: re-enable this after testing - print 'RUNNING (%s): %s' % (indir, cmd) - # return os.system (cmd) + def __init__ (self, output=sys.stdout.write): + self.__messages = [] + self.__output = output + def write (self, msg): + self.__messages.append (msg) -def full_abspath (p): - return os.path.abspath (os.path.expanduser (p)) + def flush (self): + pass -def get_filename (f): - # TODO: I don't think that we should enforce this... - # TODO: ... because I think we should be able to get the filename, regardless - # TODO: of whether this is a legit filename RIGHT NOW or not. - # assert os.path.isfile (f) - return os.path.split (f)[1] + def size (self): + """Returns the number of messages queued for printing""" + return len (self.__messages) -def get_basename (name): - """Strips most kinds of endings from a filename""" + def close (self): + """Print all messages, clear the queue""" + map(self.__output, self.__messages) + self.__messages = [] - regex = '^(.+)\.(par2|vol\d+\+\d+|\d\d\d|part\d+|rar|zip|avi|mp4|mkv|ogm)$' - r = re.compile (regex, re.IGNORECASE) - done = False +################################################################################ - while not done: - done = True +# Convert from the verbose command line option to the logging level that +# will be used by the logging class to print messages +def findLogLevel(options): - if r.match (name): - g = r.match (name).groups() - name = g[0] - done = False + level = options.verbose - options.quiet - return name + if level < -3: + level = -3 -def find_likely_files (name, dir): - """Finds files which are likely to be part of the set corresponding - to $name in the directory $dir""" + if level > 1: + level = 1 - if not os.path.isdir (os.path.abspath (dir)): - raise ValueError # bad directory given + LEVELS = { + 1 : logging.DEBUG, + 0 : logging.INFO, + -1 : logging.WARNING, + -2 : logging.ERROR, + -3 : logging.CRITICAL + } - dir = os.path.abspath (dir) - ename = re.escape (name) - regex = re.compile ('^%s.*$' % (ename, )) + return LEVELS[level] - return [f for f in os.listdir (dir) if regex.match (f)] +################################################################################ -def find_par2_files (files): - """Find all par2 files in the list $files""" +def parseCommandLineOptions(): - regex = re.compile ('^.*\.par2$', re.IGNORECASE) - return [f for f in files if regex.match (f)] + # Build the OptionParser + parser = optparse.OptionParser() + parser.add_option('-n', '--not-recursive', dest='recursive', action='store_false', + default=config.get('options', 'recursive'), + help="Don't run recursively") -def find_all_par2_files (dir): - """Finds all par2 files in a directory""" - # NOTE: does NOT return absolute paths + parser.add_option('-d', '--directory', dest='directory', type='string', + default=config.get('directories', 'start'), + help="Start working at DIR", metavar='DIR') - if not os.path.isdir (os.path.abspath (dir)): - raise ValueError # bad directory given - - dir = os.path.abspath (dir) - files = os.listdir (dir) + parser.add_option('-i', '--interactive', dest='interactive', action='store_true', + default=config.get('options', 'interactive'), + help="Confirm before removing files") - return find_par2_files (files) - -def has_extension (f, ext): - """Checks if f has the extension ext""" - - if ext[0] != '.': - ext = '.' + ext + parser.add_option('--no-delete', dest='delete', action='store_false', + default=config.get('options', 'delete'), + help="Do not delete files used to repair") - ext = re.escape (ext) - regex = re.compile ('^.*%s$' % (ext, ), re.IGNORECASE) - return regex.match (f) + parser.add_option('-q', '--quiet', dest='quiet', action='count', + default=0, help="Output fatal messages only") -def find_extraction_heads (dir, files): - """Takes a list of possible files and finds likely heads of - extraction.""" + parser.add_option('-v', '--verbose', dest='verbose', action='count', + default=config.get('options', 'verbosity'), + help="Output extra information") - # NOTE: perhaps this should happen AFTER repair is - # NOTE: successful. That way all files would already exist + parser.add_option('-V', '--version', dest='version', action='store_true', + default=False, help="Output version information") + + parser.version = VERSION - # According to various sources online: - # 1) pre rar-3.0: .rar .r00 .r01 ... - # 2) post rar-3.0: .part01.rar .part02.rar - # 3) zip all ver: .zip + # Parse the given options + (options, args) = parser.parse_args() - extractor = None - p2files = find_par2_files (files) + # Postprocess the options, basically sanitizing them + options.directory = PAR2Set.utils.absolutePath(options.directory) - # Old RAR type, find all files ending in .rar - if is_oldrar (files): - extractor = RarslaveExtractor (TYPE_OLDRAR) - regex = re.compile ('^.*\.rar$', re.IGNORECASE) - for f in files: - if regex.match (f): - extractor.addHead (dir, f) + # Make sure that the directory is valid + if not os.path.isdir (options.directory): + sys.stderr.write ('\"%s\" is not a valid directory. Use the \"-d\"\n' % options.directory) + sys.stderr.write ('option to override the working directory temporarily, or edit the\n') + sys.stderr.write ('configuration file to override the working directory permanently.\n') + sys.exit (1) + + if options.version: + print PROGRAM + ' - ' + VERSION + print + print 'Copyright (c) 2005-2008 Ira W. Snyder (devel@irasnyder.com)' + print + print 'This program comes with ABSOLUTELY NO WARRANTY.' + print 'This is free software, and you are welcome to redistribute it' + print 'under certain conditions. See the file COPYING for details.' + sys.exit (0) + + return (options, args) + +################################################################################ + +# Find each unique CompareSet in the given directory and set of files +def findUniqueSets(directory, files): + + regex = r'^.*\.par2' + s = [] + + for f in PAR2Set.utils.findMatches(regex, files): + + try: + c = PAR2Set.CompareSet(directory, f) + except: + # We just ignore any errors that happen, such as + # parsing the PAR file + pass + else: + # Ok, we got a valid set, add it to s + if c not in s: + s.append(c) + + return s + +################################################################################ + +# Run each PAR2Set type on a CompareSet +def runEachType(cs, options): + + types = ( + PAR2Set.JoinProtected, + PAR2Set.Join, + PAR2Set.ZIP, + PAR2Set.OldRAR, + PAR2Set.NewRAR, + PAR2Set.ExtractFirstOldRAR, + PAR2Set.ExtractFirstNewRAR, + PAR2Set.NoExtract, + ) + + detected = False + + # Try to detect each type in turn + for t in types: + try: + instance = t(cs, options) + except TypeError: + logging.debug('%s not detected for %s' % (t.__name__, cs.parityFile)) + continue + else: + detected = True + logging.debug('%s detected for %s' % (t.__name__, cs.parityFile)) + + # We detected something, try to run it + try: + instance.run() + except (OSError, CalledProcessError): + logging.critical('Failure: %s' % instance) + else: + # Leave early, we're done + logging.info('Success: %s' % instance) + return + + # Check that at least one detection worked + if not detected: + logging.critical('Detection failed: %s' % cs.parityFile) + logging.debug('The following information will help to create a detector') + logging.debug('===== BEGIN CompareSet RAW INFO =====') + logging.debug(str(cs)) + logging.debug('===== END CompareSet RAW INFO =====') + + # If we got here, either the detection didn't work or the run itself didn't + # work, so print out the message telling the user that we were unsuccessful + logging.critical('Unsuccessful: %s' % cs.parityFile) + +################################################################################ + +def runDirectory(directory, files, options): + + logging.debug('Running in directory: %s' % directory) + sets = findUniqueSets(directory, files) + + for cs in sets: + try: + runEachType(cs, options) + except Exception, e: + import traceback + logging.error('Unknown Exception: %s' % cs.parityFile) + logging.error('===== BEGIN Bactrace =====') + [logging.error(l) for l in traceback.format_exc(e).split('\n')] + logging.error('===== END Bactrace =====') + +################################################################################ - if is_newrar (files): - extractor = RarslaveExtractor (TYPE_NEWRAR) - regex = re.compile ('^.*\.part01.rar$', re.IGNORECASE) - for f in files: - if regex.match (f): - extractor.addHead (dir, f) +def main (): - if is_zip (files): - extractor = RarslaveExtractor (TYPE_ZIP) - regex = re.compile ('^.*\.zip$', re.IGNORECASE) - for f in files: - if regex.match (f): - extractor.addHead (dir, f) - - if is_noextract (files): - # Use the Par2 Parser (from cfv) here to find out what files are protected. - # Since these are not being extracted, they will be mv'd to another directory - # later. - extractor = RarslaveExtractor (TYPE_NOEXTRACT) - - for f in p2files: - done = False - try: - prot_files = par2parser.get_protected_files (dir, f) - done = True - except: #FIXME: add the actual exceptions - print 'ERROR PARSING P2FILE ...', f - continue - - if done: - break - - if done: - for f in prot_files: - extractor.addHead (dir, f) - else: - print 'BADNESS' - - # Make sure we found the type - assert extractor != None - - return extractor - -def is_oldrar (files): - for f in files: - if has_extension (f, '.r00'): - return True - -def is_newrar (files): - for f in files: - if has_extension (f, '.part01.rar'): - return True - -def is_zip (files): - for f in files: - if has_extension (f, '.zip'): - return True - -def is_noextract (files): - # Type that needs no extraction. - # TODO: Add others ??? - for f in files: - if has_extension (f, '.001'): - return True - -def find_deleteable_files (files): - # Deleteable types regex should come from the config - dfiles = [] - dregex = re.compile ('^.*\.(par2|\d|\d\d\d|rar|r\d\d|zip)$', re.IGNORECASE) - - return [f for f in files if dregex.match (f)] - -def printlist (li): - for f in li: - print f + # Parse all of the command line options + (options, args) = parseCommandLineOptions() -def main (): - # Setup stage - print '\nSETUP STAGE' - DIR = os.path.abspath ('test_material/01/') - p2files = find_all_par2_files (DIR) - p2file = p2files[0] - - # Repair stage - print '\nREPAIR STAGE' - repairer = RarslaveRepairer (DIR, p2file) - repairer.checkAndRepair () - - # Extraction stage - print '\nEXTRACTION STAGE' - files = find_likely_files (get_basename (p2file), DIR) - extractor = find_extraction_heads (DIR, files) - extractor.extract('extract_dir') - - # Deletion stage - print '\nDELETION STAGE' - printlist ( find_deleteable_files (files) ) - - print '\n\n' - - # Setup stage - print '\nSETUP STAGE' - DIR = os.path.abspath ('test_material/13/') - p2files = find_all_par2_files (DIR) - p2file = p2files[0] - - # Repair stage - print '\nREPAIR STAGE' - RarslaveRepairer (DIR, p2file, join=True).checkAndRepair () - - # Extraction stage - print '\nEXTRACTION STAGE' - files = find_likely_files (get_basename (p2file), DIR) - find_extraction_heads (DIR, files).extract ('extract_dir') - - # Deletion stage - print '\nDELETION STAGE' - printlist ( find_deleteable_files (files) ) - - print '\n\n' - - # Setup stage - print '\nSETUP STAGE' - DIR = os.path.abspath ('test_material/14/') - p2files = find_all_par2_files (DIR) - p2file = p2files[0] - - # Repair stage - print '\nREPAIR STAGE' - RarslaveRepairer (DIR, p2file, join=True).checkAndRepair () - - # Extraction stage - print '\nEXTRACTION STAGE' - files = find_likely_files (get_basename (p2file), DIR) - find_extraction_heads (DIR, files).extract ('extract_dir') - - # Deletion stage - print '\nDELETEION STAGE' - printlist ( find_deleteable_files (files) ) + # Set up the logger + logger = DelayedLogger() + logging.basicConfig(stream=logger, level=logging.WARNING, \ + format='%(levelname)-8s %(message)s') + logging.getLogger().setLevel (findLogLevel(options)) + + # Run recursively + if options.recursive: + for (directory, subDirectories, files) in os.walk(options.directory): + runDirectory(directory, files, options) + # Non-recursive + else: + directory = options.directory + files = os.listdir(directory) + runDirectory(directory, files, options) + + # Print out all of the messages that have been accumulating + # in the DelayedLogger() + if logger.size() > 0: + print + print 'Log' + print '=' * 80 + logger.close() + +# Check if we were called directly if __name__ == '__main__': - main () + main () +