X-Git-Url: https://www.irasnyder.com/gitweb/?p=rarslave2.git;a=blobdiff_plain;f=rarslave.py;h=20c028840d3f38f43fb57475301a3e3b9aaf760e;hp=3f251f3b4f27733f33b09d5901f1a693ac80cdc2;hb=HEAD;hpb=e13c9c644cd762dcc0266dfa6b75fc003454ed61 diff --git a/rarslave.py b/rarslave.py old mode 100644 new mode 100755 index 3f251f3..20c0288 --- a/rarslave.py +++ b/rarslave.py @@ -1,426 +1,370 @@ #!/usr/bin/env python -# vim: set ts=4 sts=4 sw=4 textwidth=112 : +# vim: set ts=4 sts=4 sw=4 textwidth=80: + +""" +The main program of the rarslave project + +This handles all of the commandline and configuration file work, then tries to +repair, extract, and delete any PAR2Sets that it finds. +""" + +__author__ = "Ira W. Snyder (devel@irasnyder.com)" +__copyright__ = "Copyright (c) 2006-2008 Ira W. Snyder (devel@irasnyder.com)" +__license__ = "GNU GPL v2 (or, at your option, any later version)" + +# rarslave.py -- a usenet autorepair and autoextract utility +# +# Copyright (C) 2006-2008 Ira W. Snyder (devel@irasnyder.com) +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -import re, os, sys -import par2parser -import RarslaveConfig +VERSION = "2.1.0" +PROGRAM = "rarslave" -# Global Variables -(TYPE_OLDRAR, TYPE_NEWRAR, TYPE_ZIP, TYPE_NOEXTRACT) = range (4) -(ECHECK, EEXTRACT, EDELETE) = range(1,4) -config = RarslaveConfig.RarslaveConfig() +import os, sys, optparse, logging, ConfigParser +from subprocess import CalledProcessError +import PAR2Set -class RarslaveExtractor (object): +################################################################################ - def __init__ (self, type): - self.type = type - self.heads = [] +# A simple-ish configuration class +class RarslaveConfig(object): - def addHead (self, dir, head): - assert os.path.isdir (dir) - # REQUIRES that the dir is valid, but not that the file is valid, so that - # we can move a file that doesn't exist yet. - # FIXME: probably CAN add this back, since we should be running this AFTER repair. - #assert os.path.isfile (os.path.join (dir, head)) - - self.heads.append (os.path.join (dir, head)) + DEFAULT_CONFIG_FILE = PAR2Set.utils.absolutePath( + os.path.join('~', '.config', 'rarslave', 'rarslave.conf')) - def extract (self, todir=None): - # Extract all heads of this set + def __init__(self, fileName=DEFAULT_CONFIG_FILE): - # Create the directory $todir if it doesn't exist - if todir != None and not os.path.isdir (todir): - # TODO: LOGGER - try: - os.makedirs (todir) - except OSError: - # TODO: LOGGER - return -EEXTRACT + # Make sure that the fileName is in absolute form + self.fileName = os.path.abspath(os.path.expanduser(fileName)) - # Extract all heads - extraction_func = \ - { TYPE_OLDRAR : self.__extract_rar, - TYPE_NEWRAR : self.__extract_rar, - TYPE_ZIP : self.__extract_zip, - TYPE_NOEXTRACT : self.__extract_noextract }[self.type] + # Open it with ConfigParser + self.config = ConfigParser.SafeConfigParser() + self.config.read(fileName) - # Call the extraction function on each head - for h in self.heads: - if todir == None: - # Run in the head's directory - extraction_func (h, os.path.dirname (h)) - else: - extraction_func (h, todir) + # Setup the default dictionary + self.defaults = dict() - def __extract_rar (self, file, todir): - assert os.path.isfile (file) - assert os.path.isdir (todir) + # Add all of the defaults + self.add_default('directories', 'start', + os.path.join('~', 'downloads'), + PAR2Set.utils.absolutePath) + self.add_default('options', 'recursive', True, self.toBool) + self.add_default('options', 'interactive', False, self.toBool) + self.add_default('options', 'verbosity', 0, self.toInt) + self.add_default('options', 'delete', True, self.toBool) - RAR_CMD = config.get_value ('commands', 'unrar') + # Add a new default value + def add_default(self, section, key, value, typeConverter): - cmd = '%s \"%s\"' % (RAR_CMD, file) - ret = run_command (cmd, todir) + self.defaults[(section, key)] = (value, typeConverter) - # Check error code - if ret != 0: - return -EEXTRACT + # Get the default value + def get_default(self, section, key): - def __extract_zip (self, file, todir): - ZIP_CMD = config.get_value ('commands', 'unzip') + (value, typeConverter) = self.defaults[(section, key)] + return value - cmd = ZIP_CMD % (file, todir) - ret = run_command (cmd) + # Coerce the value from a string into the correct type + def coerceValue(self, section, key, value): - # Check error code - if ret != 0: - return -EEXTRACT + (defaultValue, typeConverter) = self.defaults[(section, key)] - def __extract_noextract (self, file, todir): - # Just move this file to the $todir, since no extraction is needed - # FIXME: NOTE: mv will fail by itself if you're moving to the same dir! - NOEXTRACT_CMD = config.get_value ('commands', 'noextract') + # Try the coercion, error and exit if there is a problem + try: + return typeConverter(value) + except: + sys.stderr.write('Unable to parse configuration file\n') + sys.stderr.write('-> at section: %s\n' % section) + sys.stderr.write('-> at key: %s\n' % key) + sys.exit(2) - cmd = NOEXTRACT_CMD % (file, todir) - ret = run_command (cmd) + # Return the value + def get(self, section, key): - # Check error code - if ret != 0: - return -EEXTRACT + try: + # Get the user-provided value + value = self.config.get(section, key) + except: + # Oops, they didn't provide it, use the default + # NOTE: if you get an exception here, check your code ;) + value = self.defaults[(section, key)] + # Try to evaluate some safe things, for convenience + return self.coerceValue(section, key, value) + # Convert a string to an int (any base) + def toInt(s): + return int(s, 0) -class RarslaveRepairer (object): - # Verify (and repair) the set - # Make sure it worked, otherwise clean up and return failure + # Mark it static + toInt = staticmethod(toInt) - def __init__ (self, dir, file, join=False): - self.dir = dir # the directory containing the par2 file - self.file = file # the par2 file - self.join = join # True if the par2 set is 001 002 ... + # Convert a string to a bool + def toBool(s): + if s in ['t', 'T', 'True', 'true', 'yes', '1']: + return True - assert os.path.isdir (dir) - assert os.path.isfile (os.path.join (dir, file)) + if s in ['f', 'F', 'False', 'false', 'no', '0']: + return False - def checkAndRepair (self): - # Form the command: - # par2repair -- PAR2 PAR2_EXTRA [JOIN_FILES] - PAR2_CMD = config.get_value ('commands', 'par2repair') + raise ValueError - # Get set up - basename = get_basename (self.file) - all_files = find_likely_files (basename, self.dir) - all_files.sort () - par2_files = find_par2_files (all_files) + # Mark it static + toBool = staticmethod(toBool) - # assemble the command - command = "%s \"%s\" " % (PAR2_CMD, self.file) +################################################################################ - for f in par2_files: - if f != self.file: - command += "\"%s\" " % get_filename(f) +# Global configuration, read from default configuration file +config = RarslaveConfig() - if self.join: - for f in all_files: - if f not in par2_files: - command += "\"%s\" " % get_filename(f) +################################################################################ - # run the command - ret = run_command (command, self.dir) +# A tiny class to hold logging output until we're finished +class DelayedLogger (object): - # check the result - if ret != 0: - # TODO: logger - print 'error during checkAndRepair()' - return -ECHECK + """A small class to hold logging output until the program is finished running. + It emulates sys.stdout in the needed ways for the logging module.""" -def run_command (cmd, indir=None): - # Runs the specified command-line in the directory given (or, in the current directory - # if none is given). It returns the status code given by the application. + def __init__ (self, output=sys.stdout.write): + self.__messages = [] + self.__output = output - pwd = os.getcwd () + def write (self, msg): + self.__messages.append (msg) - if indir != None: - assert os.path.isdir (indir) # MUST be a directory! - os.chdir (indir) + def flush (self): + pass - # FIXME: re-enable this after testing - print 'RUNNING (%s): %s' % (indir, cmd) - return 0 + def size (self): + """Returns the number of messages queued for printing""" + return len (self.__messages) - # return os.system (cmd) + def close (self): + """Print all messages, clear the queue""" + map(self.__output, self.__messages) + self.__messages = [] +################################################################################ -def full_abspath (p): - return os.path.abspath (os.path.expanduser (p)) +# Convert from the verbose command line option to the logging level that +# will be used by the logging class to print messages +def findLogLevel(options): -def get_filename (f): - # TODO: I don't think that we should enforce this... - # TODO: ... because I think we should be able to get the filename, regardless - # TODO: of whether this is a legit filename RIGHT NOW or not. - # assert os.path.isfile (f) - return os.path.split (f)[1] + level = options.verbose - options.quiet -def get_basename (name): - """Strips most kinds of endings from a filename""" + if level < -3: + level = -3 - regex = config.get_value ('regular expressions', 'basename_regex') - r = re.compile (regex, re.IGNORECASE) - done = False + if level > 1: + level = 1 - while not done: - done = True + LEVELS = { + 1 : logging.DEBUG, + 0 : logging.INFO, + -1 : logging.WARNING, + -2 : logging.ERROR, + -3 : logging.CRITICAL + } - if r.match (name): - g = r.match (name).groups() - name = g[0] - done = False + return LEVELS[level] - return name +################################################################################ -def find_likely_files (name, dir): - """Finds files which are likely to be part of the set corresponding - to $name in the directory $dir""" +def parseCommandLineOptions(): - if not os.path.isdir (os.path.abspath (dir)): - raise ValueError # bad directory given + # Build the OptionParser + parser = optparse.OptionParser() + parser.add_option('-n', '--not-recursive', dest='recursive', action='store_false', + default=config.get('options', 'recursive'), + help="Don't run recursively") - dir = os.path.abspath (dir) - ename = re.escape (name) - regex = re.compile ('^%s.*$' % (ename, )) + parser.add_option('-d', '--directory', dest='directory', type='string', + default=config.get('directories', 'start'), + help="Start working at DIR", metavar='DIR') - return [f for f in os.listdir (dir) if regex.match (f)] + parser.add_option('-i', '--interactive', dest='interactive', action='store_true', + default=config.get('options', 'interactive'), + help="Confirm before removing files") -def find_par2_files (files): - """Find all par2 files in the list $files""" + parser.add_option('--no-delete', dest='delete', action='store_false', + default=config.get('options', 'delete'), + help="Do not delete files used to repair") - PAR2_REGEX = config.get_value ('regular expressions', 'par2_regex') - regex = re.compile (PAR2_REGEX, re.IGNORECASE) - return [f for f in files if regex.match (f)] + parser.add_option('-q', '--quiet', dest='quiet', action='count', + default=0, help="Output fatal messages only") -def find_all_par2_files (dir): - """Finds all par2 files in a directory""" - # NOTE: does NOT return absolute paths + parser.add_option('-v', '--verbose', dest='verbose', action='count', + default=config.get('options', 'verbosity'), + help="Output extra information") - if not os.path.isdir (os.path.abspath (dir)): - raise ValueError # bad directory given - - dir = os.path.abspath (dir) - files = os.listdir (dir) - - return find_par2_files (files) - -def has_extension (f, ext): - """Checks if f has the extension ext""" - - if ext[0] != '.': - ext = '.' + ext + parser.add_option('-V', '--version', dest='version', action='store_true', + default=False, help="Output version information") + + parser.version = VERSION - ext = re.escape (ext) - regex = re.compile ('^.*%s$' % (ext, ), re.IGNORECASE) - return regex.match (f) + # Parse the given options + (options, args) = parser.parse_args() -def find_extraction_heads (dir, files): - """Takes a list of possible files and finds likely heads of - extraction.""" + # Postprocess the options, basically sanitizing them + options.directory = PAR2Set.utils.absolutePath(options.directory) - # NOTE: perhaps this should happen AFTER repair is - # NOTE: successful. That way all files would already exist - - # According to various sources online: - # 1) pre rar-3.0: .rar .r00 .r01 ... - # 2) post rar-3.0: .part01.rar .part02.rar - # 3) zip all ver: .zip - - extractor = None - p2files = find_par2_files (files) - - # Old RAR type, find all files ending in .rar - if is_oldrar (files): - extractor = RarslaveExtractor (TYPE_OLDRAR) - regex = re.compile ('^.*\.rar$', re.IGNORECASE) - for f in files: - if regex.match (f): - extractor.addHead (dir, f) - - if is_newrar (files): - extractor = RarslaveExtractor (TYPE_NEWRAR) - regex = re.compile ('^.*\.part01.rar$', re.IGNORECASE) - for f in files: - if regex.match (f): - extractor.addHead (dir, f) - - if is_zip (files): - extractor = RarslaveExtractor (TYPE_ZIP) - regex = re.compile ('^.*\.zip$', re.IGNORECASE) - for f in files: - if regex.match (f): - extractor.addHead (dir, f) - - if is_noextract (files): - # Use the Par2 Parser (from cfv) here to find out what files are protected. - # Since these are not being extracted, they will be mv'd to another directory - # later. - extractor = RarslaveExtractor (TYPE_NOEXTRACT) - - for f in p2files: - done = False - try: - prot_files = par2parser.get_protected_files (dir, f) - done = True - except: #FIXME: add the actual exceptions - print 'ERROR PARSING P2FILE ...', f - continue - - if done: - break - - if done: - for f in prot_files: - extractor.addHead (dir, f) - else: - print 'BADNESS' - - # Make sure we found the type - assert extractor != None - - return extractor - -def is_oldrar (files): - for f in files: - if has_extension (f, '.r00'): - return True - -def is_newrar (files): - for f in files: - if has_extension (f, '.part01.rar'): - return True - -def is_zip (files): - for f in files: - if has_extension (f, '.zip'): - return True - -def is_noextract (files): - # Type that needs no extraction. - # TODO: Add others ??? - for f in files: - if has_extension (f, '.001'): - return True - -def find_deleteable_files (files): - # Deleteable types regex should come from the config - dfiles = [] - DELETE_REGEX = config.get_value ('regular expressions', 'delete_regex') - dregex = re.compile (DELETE_REGEX, re.IGNORECASE) - - return [f for f in files if dregex.match (f)] - -def printlist (li): - for f in li: - print f - -class PAR2Set (object): - - dir = None - file = None - likely_files = [] - - def __init__ (self, dir, file): - assert os.path.isdir (dir) - assert os.path.isfile (os.path.join (dir, file)) - - self.dir = dir - self.file = file - - basename = get_basename (file) - self.likely_files = find_likely_files (basename, dir) - - def __list_eq (self, l1, l2): - - if len(l1) != len(l2): - return False - - for e in l1: - if e not in l2: - return False - - return True - - def __eq__ (self, rhs): - return self.__list_eq (self.likely_files, rhs.likely_files) - - def run_all (self): - par2files = find_par2_files (self.likely_files) - par2head = par2files[0] - - join = is_noextract (self.likely_files) - - # Repair Stage - repairer = RarslaveRepairer (self.dir, par2head, join) - ret = repairer.checkAndRepair () # FIXME: Check return value - - if ret: # FAILURE - return -ECHECK - - # Extraction Stage - EXTRACT_DIR = config.get_value ('directories', 'extract_directory') - extractor = find_extraction_heads (self.dir, self.likely_files) - ret = extractor.extract (EXTRACT_DIR) - - if ret: # FAILURE - return -EEXTRACT - - # Deletion Stage - DELETE_INTERACTIVE = config.get_value ('options', 'interactive') - deleteable_files = find_deleteable_files (self.likely_files) - ret = delete_list (deleteable_files, DELETE_INTERACTIVE) - - if ret: # FAILURE - return -EDELETE - - return 0 - -def delete_list (files, interactive=False): - # Delete a list of files - # TODO: Add the ability to confirm deletion, like in the original rarslave - - if interactive: - # TODO: prompt here - # prompt -> OK_TO_DELETE -> do nothing, fall through - # prompt -> NOT_OK -> return immediately - pass - - for f in files: - # FIXME: re-enable this in production - # os.remove (f) - print 'rm', f - - return 0 + # Make sure that the directory is valid + if not os.path.isdir (options.directory): + sys.stderr.write ('\"%s\" is not a valid directory. Use the \"-d\"\n' % options.directory) + sys.stderr.write ('option to override the working directory temporarily, or edit the\n') + sys.stderr.write ('configuration file to override the working directory permanently.\n') + sys.exit (1) + + if options.version: + print PROGRAM + ' - ' + VERSION + print + print 'Copyright (c) 2005-2008 Ira W. Snyder (devel@irasnyder.com)' + print + print 'This program comes with ABSOLUTELY NO WARRANTY.' + print 'This is free software, and you are welcome to redistribute it' + print 'under certain conditions. See the file COPYING for details.' + sys.exit (0) + + return (options, args) + +################################################################################ + +# Find each unique CompareSet in the given directory and set of files +def findUniqueSets(directory, files): + + regex = r'^.*\.par2' + s = [] + + for f in PAR2Set.utils.findMatches(regex, files): + + try: + c = PAR2Set.CompareSet(directory, f) + except: + # We just ignore any errors that happen, such as + # parsing the PAR file + pass + else: + # Ok, we got a valid set, add it to s + if c not in s: + s.append(c) + + return s + +################################################################################ + +# Run each PAR2Set type on a CompareSet +def runEachType(cs, options): + + types = ( + PAR2Set.JoinProtected, + PAR2Set.Join, + PAR2Set.ZIP, + PAR2Set.OldRAR, + PAR2Set.NewRAR, + PAR2Set.ExtractFirstOldRAR, + PAR2Set.ExtractFirstNewRAR, + PAR2Set.NoExtract, + ) + + detected = False + + # Try to detect each type in turn + for t in types: + try: + instance = t(cs, options) + except TypeError: + logging.debug('%s not detected for %s' % (t.__name__, cs.parityFile)) + continue + else: + detected = True + logging.debug('%s detected for %s' % (t.__name__, cs.parityFile)) + + # We detected something, try to run it + try: + instance.run() + except (OSError, CalledProcessError): + logging.critical('Failure: %s' % instance) + else: + # Leave early, we're done + logging.info('Success: %s' % instance) + return + + # Check that at least one detection worked + if not detected: + logging.critical('Detection failed: %s' % cs.parityFile) + logging.debug('The following information will help to create a detector') + logging.debug('===== BEGIN CompareSet RAW INFO =====') + logging.debug(str(cs)) + logging.debug('===== END CompareSet RAW INFO =====') + + # If we got here, either the detection didn't work or the run itself didn't + # work, so print out the message telling the user that we were unsuccessful + logging.critical('Unsuccessful: %s' % cs.parityFile) + +################################################################################ + +def runDirectory(directory, files, options): + + logging.debug('Running in directory: %s' % directory) + sets = findUniqueSets(directory, files) + + for cs in sets: + try: + runEachType(cs, options) + except Exception, e: + import traceback + logging.error('Unknown Exception: %s' % cs.parityFile) + logging.error('===== BEGIN Bactrace =====') + [logging.error(l) for l in traceback.format_exc(e).split('\n')] + logging.error('===== END Bactrace =====') + +################################################################################ +def main (): -def generate_all_parsets (dir): - # Generate all parsets in the given directory. + # Parse all of the command line options + (options, args) = parseCommandLineOptions() - assert os.path.isdir (dir) # Directory MUST be valid + # Set up the logger + logger = DelayedLogger() + logging.basicConfig(stream=logger, level=logging.WARNING, \ + format='%(levelname)-8s %(message)s') + logging.getLogger().setLevel (findLogLevel(options)) - parsets = [] - p2files = find_all_par2_files (dir) + # Run recursively + if options.recursive: + for (directory, subDirectories, files) in os.walk(options.directory): + runDirectory(directory, files, options) - for f in p2files: - p = PAR2Set (dir, f) - if p not in parsets: - parsets.append (p) + # Non-recursive + else: + directory = options.directory + files = os.listdir(directory) - return parsets + runDirectory(directory, files, options) -def main (): - TOPDIR = os.path.abspath ('test_material') - - for (dir, subdirs, files) in os.walk (TOPDIR): - print 'DEBUG: IN DIRECTORY:', dir - parsets = generate_all_parsets (dir) - for p in parsets: - p.run_all () + # Print out all of the messages that have been accumulating + # in the DelayedLogger() + if logger.size() > 0: + print + print 'Log' + print '=' * 80 + logger.close() +# Check if we were called directly if __name__ == '__main__': - main () + main () +