Subversion Repositories programming

Rev

Rev 220 | Rev 276 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | RSS feed

#!/usr/bin/env python

# Copyright: Ira W. Snyder (devel@irasnyder.com)
# Start Date: 2005-10-13
# End Date:
# License: GNU General Public License v2 (or at your option, any later version)
#
# Changelog Follows:
# - 2005-10-13
# - Added get_par2_filenames() to parse par2 files
# - Added the parset object to represent each parset.
#
# - 2005-10-14
# - Finished the parset object. It will now verify and extract parsets.
# - Small changes to the parset object. This makes the parjoin part
#   much more reliable.
# - Added the OptionParser to make this nice to run at the command line.
# - Made recursiveness an option.
# - Made start directory an option.
# - Check for appropriate programs before starting.
#
# - 2005-10-17
# - Use a regular expression to handle the deletable types.
#
# - 2005-10-18
# - Use regular expressions to handle all finding of files, instead of
#   using the glob module.
# - Add a config class to handle all the default config stuff sanely.
#   This makes it easier to change some of the main parts of the program to
#   your specific configuration.
# - Move the docrcchecks variable inside the get_par2_filenames() function,
#   which is where it belongs anyway.
# - Added command-line option to check for required programs at start.
#
# - 2005-10-20
# - Added a config option to extract with full path.
#
# - 2005-10-22
# - Re-wrote the config class so that there is a config file, which
#   resides at ~/.config/rarslave/rarslave.conf by default.
# - Added the command-line option -c to write out an updated version
#   of the config file (to fill in any missing options with the defaults)
# - Added the command-line option -f to write out a new default config file,
#   which overwrites any user changes.
# - Made all regexes case insensitive.
# - Made all command-line options override the config file.
#
# - 2005-10-30
# - Added the '-o' option, to output debugging info. Hopefully next time
#   someone finds a bug, they can output this and send it to me with a
#   description of the bug they're seeing.
#
# - 2005-11-05
# - Added an output system to rarslave. This makes a nice status report
#   possible at the end of the program run.
#
# - 2005-11-06
# - Fixed the rar command so that it can extract files whose names begin
#   with a hyphen.
#
# - 2006-03-08
# - Make an interactive mode which asks the user before deleting files.
#

################################################################################
# REQUIREMENTS:
#
# This code requires the programs cfv, par2repair, lxsplit, and rar to be able
# to function properly. I will attempt to check that these are in your path.
################################################################################

import ConfigParser, os

class rarslave_config:
    """A simple class to hold the default configs for the whole program"""

    def __read_config(self, filename='~/.config/rarslave/rarslave.conf'):
        """Attempt to open and read the rarslave config file"""

        # Make sure the filename is corrected
        filename = os.path.abspath(os.path.expanduser(filename))

        user_config = {}

        # Write the default config if it doesn't exist
        if not os.path.isfile(filename):
            self.write_config(default=True)

        config = ConfigParser.ConfigParser()
        config.read(filename)

        for section in config.sections():
            for option in config.options(section):
                user_config[(section, option)] = config.get(section, option)

        return user_config

    def write_config(self, filename='~/.config/rarslave/rarslave.conf', default=False):
        """Write out the current config to the config file. If you set default=True, then
        the default config file will be written."""

        config = ConfigParser.ConfigParser()

        # Correct filename
        filename = os.path.abspath(os.path.expanduser(filename))

        # Reset all config to make sure we write the default one, if necessary
        if default:
            self.__user_config = {}
            print 'Writing default config to %s' % (filename, )

        # [directories] section
        config.add_section('directories')
        for (s, k) in self.__defaults.keys():
            if s == 'directories':
                config.set(s, k, self.get_value(s, k))

        # [options] section
        config.add_section('options')
        for (s, k) in self.__defaults.keys():
            if s == 'options':
                config.set(s, k, self.get_value(s, k))

        # [regular_expressions] section
        config.add_section('regular expressions')
        for (s, k) in self.__defaults.keys():
            if s == 'regular expressions':
                config.set(s, k, self.get_value(s, k))

        # Try to make the ~/.config/rarslave/ directory
        if not os.path.isdir(os.path.split(filename)[0]):
            try:
                os.makedirs(os.path.split(filename)[0])
            except:
                print 'Could not make directory: %s' % (os.path.split(filename)[0], )
                sys.exit()

        # Try to write the config file to disk
        try:
            fsock = open(filename, 'w')
            try:
                config.write(fsock)
            finally:
                fsock.close()
        except:
            print 'Could not open: %s for writing' % (filename, )
            sys.exit()

    def __get_default_val(self, section, key):
        return self.__defaults[(section, key)]

    def get_value(self, section, key):
        """Get a config value. Attempts to get the value from the user's
        config first, and then uses the default."""

        try:
            value = self.__user_config[(section, key)]
        except:
            # This should work, unless you write something stupid
            # into the code, so DON'T DO IT
            value = self.__get_default_val(section, key)

        # Convert config options to booleans for easier use
        if value == 'True':
            value = True

        if value == 'False':
            value = False

        return value

    def __init__(self):
        self.__defaults = {
            ('directories', 'working_directory') : '~/downloads/usenet',
            ('options', 'recursive') : True,
            ('options', 'check_required_programs') : False,
            ('options', 'extract_with_full_path') : False,
            ('options', 'interactive') : False,
            ('regular expressions', 'par2_regex') : '.*\.par2$',
            ('regular expressions', 'video_file_regex') : '.*\.(avi|ogm|mkv|mp4)$',
            ('regular expressions', 'temp_repair_regex') : '.*\.1$',
            ('regular expressions', 'remove_regex') : '^.*\.(rar|r\d\d)$' }

        self.__user_config = self.__read_config()

# This is the global config variable.
config = rarslave_config()

# This is the global options variable. (to be set later)
options = None

################################################################################
# The rarslave_output class
#
# This class handles the nice output summary which is printed at the end
# of a run
################################################################################

class rarslave_output:
    # Data structure: list of lists
    # [ [status, filename], ... ]
    #
    # Where status is one of:
    # 0: Verified and Extracted Perfectly
    # 1: Failed to Verify (and therefore Extract)
    # 2: Verified correctly, but failed to Extract
    #

    def __init__(self):
        self.output_list    = []
        self.good_files     = 0
        self.unverified     = 0
        self.unextractable  = 0
        self.corrupt_par2   = 0

    def print_equal_line(self, size=80):
        """Print an 80 character line of equal signs"""

        str = ''

        for i in range(size):
            str += '='

        print str

    def print_results_table(self):
        """Print a nice table of the results from this run"""

        # Print the table of good files (if we have any)
        if self.good_files > 0:
            print
            self.print_equal_line()
            print 'Files that were extracted perfectly'
            self.print_equal_line()

            for entry in self.output_list:
                if entry[0] == 0:
                    print '%s' % (entry[1], )

        # Print the table of unverified files (if we have any)
        if self.unverified > 0:
            print
            self.print_equal_line()
            print 'Files that failed to verify (and extract)'
            self.print_equal_line()

            for entry in self.output_list:
                if entry[0] == 1:
                    print '%s' % (entry[1], )

        # Print the table of unextracted files (if we have any)
        if self.unextractable > 0:
            print
            self.print_equal_line()
            print 'Files that were verified, but failed to extract'
            self.print_equal_line()

            for entry in self.output_list:
                if entry[0] == 2:
                    print '%s' % (entry[1], )

        # Print the table of corrupt PAR2 files (if we have any)
        if self.corrupt_par2 > 0:
            print
            self.print_equal_line()
            print 'Files that had corrupt par2 files'
            self.print_equal_line()

            for entry in self.output_list:
                if entry[0] == 3:
                    print '%s' % (entry[1], )

        # Print a blank line at the end
        print

    def add_file(self, status, filename):

        if status == 0:
            self.good_files += 1
        elif status == 1:
            self.unverified += 1
        elif status == 2:
            self.unextractable += 1
        elif status == 3:
            self.corrupt_par2 += 1
        else:
            # We have a bad value, so raise a ValueError
            raise ValueError

        self.output_list.append([status, filename])

# This is the global output variable
output = rarslave_output()

################################################################################
# The PAR2 Parser
#
# This was stolen from cfv (see http://cfv.sourceforge.net/ for a copy)
################################################################################

import struct, errno

def chompnulls(line):
    p = line.find('\0')
    if p < 0: return line
    else:     return line[:p]

def get_par2_filenames(filename):
    """Get all of the filenames that are protected by the par2
    file given as the filename"""

    try:
        file = open(filename, 'rb')
    except:
        print 'Could not open %s' % (filename, )
        return []

    # We always want to do crc checks
    docrcchecks = True

    pkt_header_fmt = '< 8s Q 16s 16s 16s'
    pkt_header_size = struct.calcsize(pkt_header_fmt)
    file_pkt_fmt = '< 16s 16s 16s Q'
    file_pkt_size = struct.calcsize(file_pkt_fmt)
    main_pkt_fmt = '< Q I'
    main_pkt_size = struct.calcsize(main_pkt_fmt)

    seen_file_ids = {}
    expected_file_ids = None
    filenames = []

    while 1:
        d = file.read(pkt_header_size)
        if not d:
            break

        magic, pkt_len, pkt_md5, set_id, pkt_type = struct.unpack(pkt_header_fmt, d)

        if docrcchecks:
            import md5
            control_md5 = md5.new()
            control_md5.update(d[0x20:])
            d = file.read(pkt_len - pkt_header_size)
            control_md5.update(d)

            if control_md5.digest() != pkt_md5:
                raise EnvironmentError, (errno.EINVAL, \
                    "corrupt par2 file - bad packet hash")

        if pkt_type == 'PAR 2.0\0FileDesc':
            if not docrcchecks:
                d = file.read(pkt_len - pkt_header_size)

            file_id, file_md5, file_md5_16k, file_size = \
                struct.unpack(file_pkt_fmt, d[:file_pkt_size])

            if seen_file_ids.get(file_id) is None:
                seen_file_ids[file_id] = 1
                filename = chompnulls(d[file_pkt_size:])
                filenames.append(filename)

        elif pkt_type == "PAR 2.0\0Main\0\0\0\0":
            if not docrcchecks:
                d = file.read(pkt_len - pkt_header_size)

            if expected_file_ids is None:
                expected_file_ids = []
                slice_size, num_files = struct.unpack(main_pkt_fmt, d[:main_pkt_size])
                num_nonrecovery = (len(d)-main_pkt_size)/16 - num_files

                for i in range(main_pkt_size,main_pkt_size+(num_files+num_nonrecovery)*16,16):
                    expected_file_ids.append(d[i:i+16])

        else:
            if not docrcchecks:
                file.seek(pkt_len - pkt_header_size, 1)

    if expected_file_ids is None:
        raise EnvironmentError, (errno.EINVAL, \
            "corrupt or unsupported par2 file - no main packet found")

    for id in expected_file_ids:
        if not seen_file_ids.has_key(id):
            raise EnvironmentError, (errno.EINVAL, \
                "corrupt or unsupported par2 file - " \
                "expected file description packet not found")

    return filenames

################################################################################
# The parset object
#
# This is an object based representation of a parset, and will verify itself
# and extract itself, if possible.
################################################################################

import os, glob, re

class parset:
    def __init__(self, par_filename):
        self.parfile = par_filename
        self.extra_pars = []
        self.files = False
        self.used_parjoin = False
        self.verified = False
        self.extracted = False

    def print_debug_info(self):
        """Special function for debugging"""
        print '========== DEBUG INFO STARTS HERE =========='
        print '=== parfile ==='
        print self.parfile
        print

        print '=== extra_pars ==='
        for f in self.extra_pars:
            print f

        print

        print '=== files ==='
        for f in self.files:
            print f

        print '=========== DEBUG INFO ENDS HERE ==========='

    def get_filenames(self):
        return get_par2_filenames(self.parfile)

    def all_there(self):
        """Check if all the files for the parset are present.
        This will help us decide which par2 checker to use first"""
        for f in self.files:
            if not os.path.isfile(f):
                return False

        # The files were all there
        return True

    def verify(self):
        """This will verify the parset by the most efficient method first,
        and then move to a slower method if that one fails"""

        retval = False #not verified yet

        # if all the files are there, try verifying fast
        if self.all_there():
            retval = self.__fast_verify()

            if retval == False:
                # Failed to verify fast, so try it slow, maybe it needs repair
                retval = self.__slow_verify()

        # If we've got a video file, maybe we should try to parjoin it
        elif self.__has_video_file():
            retval = self.__parjoin()

        else: #not all there, maybe we can slow-repair
            retval = self.__slow_verify()

        self.verified = retval
        return self.verified

    def __fast_verify(self):
        retval = os.system('cfv -v -f "%s"' % (self.parfile, ))

        if retval == 0:
            return True #success

        return False #failure

    def __slow_verify(self):
        retval = os.system('par2repair "%s"' % (self.parfile, ))

        if retval == 0:
            return True #success

        return False #failure

    def __parjoin(self):
        retval = os.system('lxsplit -j "%s.001"' % (self.files[0], ))

        retval = self.__fast_verify()

        if retval == False:
            # Failed to verify fast, so try it slow, maybe it needs repair
            retval = self.__slow_verify()

        if retval == False: # failed to verify, so remove the lxsplit created file
            try:
                os.remove(self.files[0])
            except OSError:
                print 'Failed to remove file: %s' % (self.files[0], )

        self.used_parjoin = retval
        self.verified = retval
        return self.verified

    def __has_video_file(self):
        regex = re.compile(
                config.get_value('regular expressions', 'video_file_regex'),
                re.IGNORECASE)

        for f in self.files:
            if regex.match(f):
                return True

        return False

    def __remove_currentset(self):
        """Remove all of the files that are extractable, as well as the pars.
        Leave everything else alone"""

        if not self.extracted:
            print 'Did not extract yet, not removing currentset'
            return

        files_to_remove = []

        # remove the main par
        files_to_remove.append(self.parfile)

        # remove all of the extra pars
        for i in self.extra_pars:
            files_to_remove.append(i)

        # remove any rars that are associated (leave EVERYTHING else)
        # This regex matches both old and new style rar(s) by default.
        regex = re.compile(
                config.get_value('regular expressions', 'remove_regex'),
                re.IGNORECASE)

        for i in self.files:
            if regex.match(i):
                files_to_remove.append(i)

        # remove any .{001,002,...} files (from parjoin)
        if self.used_parjoin:
            for i in os.listdir(os.getcwd()):
                if i != self.files[0] and self.files[0] in i:
                    files_to_remove.append(i)

        # remove any temp repair files
        regex = re.compile(
                config.get_value('regular expressions', 'temp_repair_regex'),
                re.IGNORECASE)
        [files_to_remove.append(f) for f in os.listdir(os.getcwd()) if regex.match(f)]

        # interactively remove files
        if options.interactive:

            print # blank line
            for f in files_to_remove:
                print f

            print '========================================'

            done = False
            while not done:
                s = raw_input("Delete files [y,n]: ")
                s.lower()

                if s == 'y' or s == 'yes':
                    done = True
                    self.__remove_list_of_files(files_to_remove)
                elif s == 'n' or s == 'no':
                    done = True
                    print 'Not removing files'
                else:
                    print 'Bad selection, try again...'
        else:
            self.__remove_list_of_files(files_to_remove)

    def __remove_list_of_files(self, files_to_remove):
        """Remove all files in the list"""

        for f in files_to_remove:
            os.remove(f)

    def __get_extract_file(self):
        """Find the first extractable file"""
        for i in self.files:
            if os.path.splitext(i)[1] == '.rar':
                return i

        return None

    def extract(self):
        """Attempt to extract all of the files related to this parset"""
        if not self.verified:
            self.extracted = False
            output.add_file(1, self.parfile)
            return False #failed to extract

        extract_file = self.__get_extract_file()

        if extract_file != None:
            if config.get_value('options', 'extract_with_full_path'):
                retval = os.system('rar x -o+ -- "%s"' % (extract_file, ))
            else:
                retval = os.system('rar e -o+ -- "%s"' % (extract_file, ))

            if retval != 0:
                output.add_file(2, self.parfile)
                self.extracted = False
                return self.extracted

        # we extracted ok, so remove the currentset
        self.extracted = True
        self.__remove_currentset()

        output.add_file(0, self.parfile)

        return self.extracted


################################################################################
# The rarslave program itself
################################################################################

import os, sys
from optparse import OptionParser

def check_required_progs():
    """Check if the required programs are installed"""

    shell_not_found = 32512
    needed = []

    if os.system('cfv --help > /dev/null 2>&1') == shell_not_found:
        needed.append('cfv')

    if os.system('par2repair --help > /dev/null 2>&1') == shell_not_found:
        needed.append('par2repair')

    if os.system('lxsplit --help > /dev/null 2>&1') == shell_not_found:
        needed.append('lxpsplit')

    if os.system('rar --help > /dev/null 2>&1') == shell_not_found:
        needed.append('rar')

    if needed:
        for n in needed:
            print 'Needed program "%s" not found in $PATH' % (n, )

        sys.exit(1)

def get_parsets():
    """Get a representation of each parset in the current directory, and
    return them as a list of parset instances"""

    regex = re.compile(
            config.get_value('regular expressions', 'par2_regex'),
            re.IGNORECASE)
    par2files = [f for f in os.listdir(os.getcwd()) if regex.match(f)]

    parsets = []

    for i in par2files:
        try:
            filenames = get_par2_filenames(i)
            create_new = True
        except EnvironmentError:
            output.add_file(3, i)
            continue

        # if we already have an instance for this set, append
        # this par file to the extra_pars field
        for j in parsets:
            if j.files == filenames:
                j.extra_pars.append(i)
                create_new = False

        # we haven't seen this set yet, so we'll create it now
        if create_new == True:
            cur = parset(i)
            cur.files = filenames
            parsets.append(cur)

    return parsets

def directory_worker(dir):
    """Attempts to find, verify, and extract every parset in the directory
    given as a parameter"""

    cwd = os.getcwd()
    os.chdir(dir)

    parsets = get_parsets()

    # Print debug info if we're supposed to
    if options.debug_info:
        for p in parsets:
            p.print_debug_info()

    # No debug info
    else:

        # Verify each parset
        for p in parsets:
            p.verify()

        # Attempt to extract each parset
        for p in parsets:
            p.extract()

    os.chdir(cwd)

def main():

    # Build the OptionParser
    parser = OptionParser()
    parser.add_option('-n', '--not-recursive',
                      action='store_false', dest='recursive',
                      default=config.get_value('options', 'recursive'),
                      help="Don't run recursively")

    parser.add_option('-d', '--work-dir',
                      dest='work_dir',
                      default=config.get_value('directories', 'working_directory'),
                      help="Start running at DIR", metavar='DIR')

    parser.add_option('-p', '--check-required-programs',
                       action='store_true', dest='check_progs',
                       default=config.get_value('options', 'check_required_programs'),
                       help="Check for required programs")

    parser.add_option('-f', '--write-default-config',
                      action='store_true', dest='write_def_config',
                      default=False, help="Write out a new default config")

    parser.add_option('-c', '--write-new-config',
                      action='store_true', dest='write_config',
                      default=False, help="Write out the current config")

    parser.add_option('-o', '--output-debug-info',
                       action='store_true', dest='debug_info',
                       default=False,
                       help="Output debug info for every parset, then exit")

    parser.add_option('-i', '--interactive', dest='interactive', action='store_true',
                      default=config.get_value('options', 'interactive'),
                      help="Confirm before removing files")

    # Parse the given options
    global options
    (options, args) = parser.parse_args()

    # Fix up the working directory
    options.work_dir = os.path.abspath(os.path.expanduser(options.work_dir))

    # Check that we have the required programs installed
    if options.check_progs:
        check_required_progs()

    # Write out a new default config, if we need it
    if options.write_def_config:
        config.write_config(default=True)

    # Write out the current config (adds new options to an existing config)
    if options.write_config:
        config.write_config()

    # Run rarslave!
    if options.recursive:
        for root, dirs, files in os.walk(options.work_dir):
            directory_worker(root)
    else:
        directory_worker(options.work_dir)

    # Print the results
    output.print_results_table()

if __name__ == '__main__':
    main()