WebSVN – programming – Path Comparison – / – /projects/ Rev 178 and /projects/ Rev 179

Ignore whitespace Rev 178 → Rev 179

 /projects/rarslave/rarslave.py
 ,0 → 1,720
+#!/usr/bin/env python
+# Copyright: Ira W. Snyder (devel@irasnyder.com)
+# Start Date: 2005-10-13
+# End Date:
+# License: GNU General Public License v2 (or at your option, any later version)
+#
+# Changelog Follows:
+# - 2005-10-13
+# - Added get_par2_filenames() to parse par2 files
+# - Added the parset object to represent each parset.
+#
+# - 2005-10-14
+# - Finished the parset object. It will now verify and extract parsets.
+# - Small changes to the parset object. This makes the parjoin part
+#   much more reliable.
+# - Added the OptionParser to make this nice to run at the command line.
+# - Made recursiveness an option.
+# - Made start directory an option.
+# - Check for appropriate programs before starting.
+#
+# - 2005-10-17
+# - Use a regular expression to handle the deletable types.
+#
+# - 2005-10-18
+# - Use regular expressions to handle all finding of files, instead of
+#   using the glob module.
+# - Add a config class to handle all the default config stuff sanely.
+#   This makes it easier to change some of the main parts of the program to
+#   your specific configuration.
+# - Move the docrcchecks variable inside the get_par2_filenames() function,
+#   which is where it belongs anyway.
+# - Added command-line option to check for required programs at start.
+#
+# - 2005-10-20
+# - Added a config option to extract with full path.
+#
+# - 2005-10-22
+# - Re-wrote the config class so that there is a config file, which
+#   resides at ~/.config/rarslave/rarslave.conf by default.
+# - Added the command-line option -c to write out an updated version
+#   of the config file (to fill in any missing options with the defaults)
+# - Added the command-line option -f to write out a new default config file,
+#   which overwrites any user changes.
+# - Made all regexes case insensitive.
+# - Made all command-line options override the config file.
+#
+# - 2005-10-30
+# - Added the '-o' option, to output debugging info. Hopefully next time
+#   someone finds a bug, they can output this and send it to me with a
+#   description of the bug they're seeing.
+#
+# - 2005-11-05
+# - Added an output system to rarslave. This makes a nice status report
+#   possible at the end of the program run.
+#
+# - 2005-11-06
+# - Fixed the rar command so that it can extract files whose names begin
+#   with a hyphen.
+#
+################################################################################
+# REQUIREMENTS:
+#
+# This code requires the programs cfv, par2repair, lxsplit, and rar to be able
+# to function properly. I will attempt to check that these are in your path.
+################################################################################
+import ConfigParser, os
+class rarslave_config:
+    """A simple class to hold the default configs for the whole program"""
+    def __read_config(self, filename='~/.config/rarslave/rarslave.conf'):
+        """Attempt to open and read the rarslave config file"""
+        # Make sure the filename is corrected
+        filename = os.path.abspath(os.path.expanduser(filename))
+        user_config = {}
+        # Write the default config if it doesn't exist
+        if not os.path.isfile(filename):
+            self.write_config(default=True)
+        config = ConfigParser.ConfigParser()
+        config.read(filename)
+        for section in config.sections():
+            for option in config.options(section):
+                user_config[(section, option)] = config.get(section, option)
+        return user_config
+    def write_config(self, filename='~/.config/rarslave/rarslave.conf', default=False):
+        """Write out the current config to the config file. If you set default=True, then
+        the default config file will be written."""
+        config = ConfigParser.ConfigParser()
+        # Correct filename
+        filename = os.path.abspath(os.path.expanduser(filename))
+        # Reset all config to make sure we write the default one, if necessary
+        if default:
+            self.__user_config = {}
+            print 'Writing default config to %s' % (filename, )
+        # [directories] section
+        config.add_section('directories')
+        for (s, k) in self.__defaults.keys():
+            if s == 'directories':
+                config.set(s, k, self.get_value(s, k))
+        # [options] section
+        config.add_section('options')
+        for (s, k) in self.__defaults.keys():
+            if s == 'options':
+                config.set(s, k, self.get_value(s, k))
+        # [regular_expressions] section
+        config.add_section('regular expressions')
+        for (s, k) in self.__defaults.keys():
+            if s == 'regular expressions':
+                config.set(s, k, self.get_value(s, k))
+        # Try to make the ~/.config/rarslave/ directory
+        if not os.path.isdir(os.path.split(filename)[0]):
+            try:
+                os.makedirs(os.path.split(filename)[0])
+            except:
+                print 'Could not make directory: %s' % (os.path.split(filename)[0], )
+                sys.exit()
+        # Try to write the config file to disk
+        try:
+            fsock = open(filename, 'w')
+            try:
+                config.write(fsock)
+            finally:
+                fsock.close()
+        except:
+            print 'Could not open: %s for writing' % (filename, )
+            sys.exit()
+    def __get_default_val(self, section, key):
+        return self.__defaults[(section, key)]
+    def get_value(self, section, key):
+        """Get a config value. Attempts to get the value from the user's
+        config first, and then uses the default."""
+        try:
+            value = self.__user_config[(section, key)]
+        except:
+            # This should work, unless you write something stupid
+            # into the code, so DON'T DO IT
+            value = self.__get_default_val(section, key)
+        # Convert config options to booleans for easier use
+        if value == 'True':
+            value = True
+        if value == 'False':
+            value = False
+        return value
+    def __init__(self):
+        self.__defaults = {
+            ('directories', 'working_directory') : '~/downloads/usenet',
+            ('options', 'recursive') : True,
+            ('options', 'check_required_programs') : False,
+            ('options', 'extract_with_full_path') : False,
+            ('regular expressions', 'par2_regex') : '.*\.par2$',
+            ('regular expressions', 'video_file_regex') : '.*\.(avi|ogm|mkv|mp4)$',
+            ('regular expressions', 'temp_repair_regex') : '.*\.1$',
+            ('regular expressions', 'remove_regex') : '^.*\.(rar|r\d\d)$' }
+        self.__user_config = self.__read_config()
+# This is the global config variable.
+config = rarslave_config()
+################################################################################
+# The rarslave_output class
+#
+# This class handles the nice output summary which is printed at the end
+# of a run
+################################################################################
+class rarslave_output:
+    # Data structure: list of lists
+    # [ [status, filename], ... ]
+    #
+    # Where status is one of:
+    # 0: Verified and Extracted Perfectly
+    # 1: Failed to Verify (and therefore Extract)
+    # 2: Verified correctly, but failed to Extract
+    #
+    def __init__(self):
+        self.output_list    = []
+        self.good_files     = 0
+        self.unverified     = 0
+        self.unextractable  = 0
+        self.corrupt_par2   = 0
+    def print_equal_line(self, size=80):
+        """Print an 80 character line of equal signs"""
+        str = ''
+        for i in range(size):
+            str += '='
+        print str
+    def print_results_table(self):
+        """Print a nice table of the results from this run"""
+        # Print the table of good files (if we have any)
+        if self.good_files > 0:
+            print
+            self.print_equal_line()
+            print 'Files that were extracted perfectly'
+            self.print_equal_line()
+            for entry in self.output_list:
+                if entry[0] == 0:
+                    print '%s' % (entry[1], )
+        # Print the table of unverified files (if we have any)
+        if self.unverified > 0:
+            print
+            self.print_equal_line()
+            print 'Files that failed to verify (and extract)'
+            self.print_equal_line()
+            for entry in self.output_list:
+                if entry[0] == 1:
+                    print '%s' % (entry[1], )
+        # Print the table of unextracted files (if we have any)
+        if self.unextractable > 0:
+            print
+            self.print_equal_line()
+            print 'Files that were verified, but failed to extract'
+            self.print_equal_line()
+            for entry in self.output_list:
+                if entry[0] == 2:
+                    print '%s' % (entry[1], )
+        # Print the table of corrupt PAR2 files (if we have any)
+        if self.corrupt_par2 > 0:
+            print
+            self.print_equal_line()
+            print 'Files that had corrupt par2 files'
+            self.print_equal_line()
+            for entry in self.output_list:
+                if entry[0] == 3:
+                    print '%s' % (entry[1], )
+        # Print a blank line at the end
+        print
+    def add_file(self, status, filename):
+        if status == 0:
+            self.good_files += 1
+        elif status == 1:
+            self.unverified += 1
+        elif status == 2:
+            self.unextractable += 1
+        elif status == 3:
+            self.corrupt_par2 += 1
+        else:
+            # We have a bad value, so raise a ValueError
+            raise ValueError
+        self.output_list.append([status, filename])
+# This is the global output variable
+output = rarslave_output()
+################################################################################
+# The PAR2 Parser
+#
+# This was stolen from cfv (see http://cfv.sourceforge.net/ for a copy)
+################################################################################
+import struct, errno
+def chompnulls(line):
+    p = line.find('\0')
+    if p < 0: return line
+    else:     return line[:p]
+def get_par2_filenames(filename):
+    """Get all of the filenames that are protected by the par2
+    file given as the filename"""
+    try:
+        file = open(filename, 'rb')
+    except:
+        print 'Could not open %s' % (filename, )
+        return []
+    # We always want to do crc checks
+    docrcchecks = True
+    pkt_header_fmt = '< 8s Q 16s 16s 16s'
+    pkt_header_size = struct.calcsize(pkt_header_fmt)
+    file_pkt_fmt = '< 16s 16s 16s Q'
+    file_pkt_size = struct.calcsize(file_pkt_fmt)
+    main_pkt_fmt = '< Q I'
+    main_pkt_size = struct.calcsize(main_pkt_fmt)
+    seen_file_ids = {}
+    expected_file_ids = None
+    filenames = []
+    while 1:
+        d = file.read(pkt_header_size)
+        if not d:
+            break
+        magic, pkt_len, pkt_md5, set_id, pkt_type = struct.unpack(pkt_header_fmt, d)
+        if docrcchecks:
+            import md5
+            control_md5 = md5.new()
+            control_md5.update(d[0x20:])
+            d = file.read(pkt_len - pkt_header_size)
+            control_md5.update(d)
+            if control_md5.digest() != pkt_md5:
+                raise EnvironmentError, (errno.EINVAL, \
+                    "corrupt par2 file - bad packet hash")
+        if pkt_type == 'PAR 2.0\0FileDesc':
+            if not docrcchecks:
+                d = file.read(pkt_len - pkt_header_size)
+            file_id, file_md5, file_md5_16k, file_size = \
+                struct.unpack(file_pkt_fmt, d[:file_pkt_size])
+            if seen_file_ids.get(file_id) is None:
+                seen_file_ids[file_id] = 1
+                filename = chompnulls(d[file_pkt_size:])
+                filenames.append(filename)
+        elif pkt_type == "PAR 2.0\0Main\0\0\0\0":
+            if not docrcchecks:
+                d = file.read(pkt_len - pkt_header_size)
+            if expected_file_ids is None:
+                expected_file_ids = []
+                slice_size, num_files = struct.unpack(main_pkt_fmt, d[:main_pkt_size])
+                num_nonrecovery = (len(d)-main_pkt_size)/16 - num_files
+                for i in range(main_pkt_size,main_pkt_size+(num_files+num_nonrecovery)*16,16):
+                    expected_file_ids.append(d[i:i+16])
+        else:
+            if not docrcchecks:
+                file.seek(pkt_len - pkt_header_size, 1)
+    if expected_file_ids is None:
+        raise EnvironmentError, (errno.EINVAL, \
+            "corrupt or unsupported par2 file - no main packet found")
+    for id in expected_file_ids:
+        if not seen_file_ids.has_key(id):
+            raise EnvironmentError, (errno.EINVAL, \
+                "corrupt or unsupported par2 file - " \
+                "expected file description packet not found")
+    return filenames
+################################################################################
+# The parset object
+#
+# This is an object based representation of a parset, and will verify itself
+# and extract itself, if possible.
+################################################################################
+import os, glob, re
+class parset:
+    def __init__(self, par_filename):
+        self.parfile = par_filename
+        self.extra_pars = []
+        self.files = False
+        self.used_parjoin = False
+        self.verified = False
+        self.extracted = False
+    def print_debug_info(self):
+        """Special function for debugging"""
+        print '========== DEBUG INFO STARTS HERE =========='
+        print 'parfile: %s' % (self.parfile, )
+        print 'extra_pars: %s' % (self.extra_pars, )
+        print 'files: %s' % (self.files, )
+        print '========== DEBUG INFO ENDS HERE =========='
+    def get_filenames(self):
+        return get_par2_filenames(self.parfile)
+    def all_there(self):
+        """Check if all the files for the parset are present.
+        This will help us decide which par2 checker to use first"""
+        for f in self.files:
+            if not os.path.isfile(f):
+                return False
+        # The files were all there
+        return True
+    def verify(self):
+        """This will verify the parset by the most efficient method first,
+        and then move to a slower method if that one fails"""
+        retval = False #not verified yet
+        # if all the files are there, try verifying fast
+        if self.all_there():
+            retval = self.__fast_verify()
+            if retval == False:
+                # Failed to verify fast, so try it slow, maybe it needs repair
+                retval = self.__slow_verify()
+        # If we've got a video file, maybe we should try to parjoin it
+        elif self.__has_video_file():
+            retval = self.__parjoin()
+        else: #not all there, maybe we can slow-repair
+            retval = self.__slow_verify()
+        self.verified = retval
+        return self.verified
+    def __fast_verify(self):
+        retval = os.system('cfv -v -f "%s"' % (self.parfile, ))
+        if retval == 0:
+            return True #success
+        return False #failure
+    def __slow_verify(self):
+        retval = os.system('par2repair "%s"' % (self.parfile, ))
+        if retval == 0:
+            return True #success
+        return False #failure
+    def __parjoin(self):
+        retval = os.system('lxsplit -j "%s.001"' % (self.files[0], ))
+        retval = self.__fast_verify()
+        if retval == False:
+            # Failed to verify fast, so try it slow, maybe it needs repair
+            retval = self.__slow_verify()
+        if retval == False: # failed to verify, so remove the lxsplit created file
+            try:
+                os.remove(self.files[0])
+            except OSError:
+                print 'Failed to remove file: %s' % (self.files[0], )
+        self.used_parjoin = retval
+        self.verified = retval
+        return self.verified
+    def __has_video_file(self):
+        regex = re.compile(
+                config.get_value('regular expressions', 'video_file_regex'),
+                re.IGNORECASE)
+        for f in self.files:
+            if regex.match(f):
+                return True
+        return False
+    def __remove_currentset(self):
+        """Remove all of the files that are extractable, as well as the pars.
+        Leave everything else alone"""
+        if not self.extracted:
+            print 'Did not extract yet, not removing currentset'
+            return
+        # remove the main par
+        os.remove(self.parfile)
+        # remove all of the extra pars
+        for i in self.extra_pars:
+            os.remove(i)
+        # remove any rars that are associated (leave EVERYTHING else)
+        # This regex matches both old and new style rar(s) by default.
+        regex = re.compile(
+                config.get_value('regular expressions', 'remove_regex'),
+                re.IGNORECASE)
+        for i in self.files:
+            if regex.match(i):
+                os.remove(i)
+        # remove any .{001,002,...} files (from parjoin)
+        if self.used_parjoin:
+            for i in os.listdir(os.getcwd()):
+                if i != self.files[0] and self.files[0] in i:
+                    os.remove(i)
+        # remove any temp repair files
+        regex = re.compile(
+                config.get_value('regular expressions', 'temp_repair_regex'),
+                re.IGNORECASE)
+        [os.remove(f) for f in os.listdir(os.getcwd()) if regex.match(f)]
+    def __get_extract_file(self):
+        """Find the first extractable file"""
+        for i in self.files:
+            if os.path.splitext(i)[1] == '.rar':
+                return i
+        return None
+    def extract(self):
+        """Attempt to extract all of the files related to this parset"""
+        if not self.verified:
+            self.extracted = False
+            output.add_file(1, self.parfile)
+            return False #failed to extract
+        extract_file = self.__get_extract_file()
+        if extract_file != None:
+            if config.get_value('options', 'extract_with_full_path'):
+                retval = os.system('rar x -o+ -- "%s"' % (extract_file, ))
+            else:
+                retval = os.system('rar e -o+ -- "%s"' % (extract_file, ))
+            if retval != 0:
+                output.add_file(2, self.parfile)
+                self.extracted = False
+                return self.extracted
+        # we extracted ok, so remove the currentset
+        self.extracted = True
+        self.__remove_currentset()
+        output.add_file(0, self.parfile)
+        return self.extracted
+################################################################################
+# The rarslave program itself
+################################################################################
+import os, sys
+from optparse import OptionParser
+def check_required_progs():
+    """Check if the required programs are installed"""
+    shell_not_found = 32512
+    needed = []
+    if os.system('cfv --help > /dev/null 2>&1') == shell_not_found:
+        needed.append('cfv')
+    if os.system('par2repair --help > /dev/null 2>&1') == shell_not_found:
+        needed.append('par2repair')
+    if os.system('lxsplit --help > /dev/null 2>&1') == shell_not_found:
+        needed.append('lxpsplit')
+    if os.system('rar --help > /dev/null 2>&1') == shell_not_found:
+        needed.append('rar')
+    if needed:
+        for n in needed:
+            print 'Needed program "%s" not found in $PATH' % (n, )
+        sys.exit(1)
+def get_parsets():
+    """Get a representation of each parset in the current directory, and
+    return them as a list of parset instances"""
+    regex = re.compile(
+            config.get_value('regular expressions', 'par2_regex'),
+            re.IGNORECASE)
+    par2files = [f for f in os.listdir(os.getcwd()) if regex.match(f)]
+    parsets = []
+    for i in par2files:
+        try:
+            filenames = get_par2_filenames(i)
+            create_new = True
+        except EnvironmentError:
+            output.add_file(3, i)
+            continue
+        # if we already have an instance for this set, append
+        # this par file to the extra_pars field
+        for j in parsets:
+            if j.files == filenames:
+                j.extra_pars.append(i)
+                create_new = False
+        # we haven't seen this set yet, so we'll create it now
+        if create_new == True:
+            cur = parset(i)
+            cur.files = filenames
+            parsets.append(cur)
+    return parsets
+def directory_worker(dir, options):
+    """Attempts to find, verify, and extract every parset in the directory
+    given as a parameter"""
+    cwd = os.getcwd()
+    os.chdir(dir)
+    parsets = get_parsets()
+    # Print debug info if we're supposed to
+    if options.debug_info:
+        for p in parsets:
+            p.debug()
+    # No debug info
+    else:
+        # Verify each parset
+        for p in parsets:
+            p.verify()
+        # Attempt to extract each parset
+        for p in parsets:
+            p.extract()
+    os.chdir(cwd)
+def main():
+    # Build the OptionParser
+    parser = OptionParser()
+    parser.add_option('-n', '--not-recursive',
+                      action='store_false', dest='recursive',
+                      default=config.get_value('options', 'recursive'),
+                      help="Don't run recursively")
+    parser.add_option('-d', '--work-dir',
+                      dest='work_dir',
+                      default=config.get_value('directories', 'working_directory'),
+                      help="Start running at DIR", metavar='DIR')
+    parser.add_option('-p', '--check-required-programs',
+                       action='store_true', dest='check_progs',
+                       default=config.get_value('options', 'check_required_programs'),
+                       help="Check for required programs")
+    parser.add_option('-f', '--write-default-config',
+                      action='store_true', dest='write_def_config',
+                      default=False, help="Write out a new default config")
+    parser.add_option('-c', '--write-new-config',
+                      action='store_true', dest='write_config',
+                      default=False, help="Write out the current config")
+    parser.add_option('-o', '--output-debug-info',
+                       action='store_true', dest='debug_info',
+                       default=False,
+                       help="Output debug info for every parset, then exit")
+    # Parse the given options
+    (options, args) = parser.parse_args()
+    # Fix up the working directory
+    options.work_dir = os.path.abspath(os.path.expanduser(options.work_dir))
+    # Check that we have the required programs installed
+    if options.check_progs:
+        check_required_progs()
+    # Write out a new default config, if we need it
+    if options.write_def_config:
+        config.write_config(default=True)
+    # Write out the current config (adds new options to an existing config)
+    if options.write_config:
+        config.write_config()
+    # Run rarslave!
+    if options.recursive:
+        for root, dirs, files in os.walk(options.work_dir):
+            directory_worker(root, options)
+    else:
+        directory_worker(options.work_dir, options)
+    # Print the results
+    output.print_results_table()
+if __name__ == '__main__':
+    main()
 Property changes:
 Added: svn:executable
 +*
 \ No newline at end of property

Subversion Repositories programming

Compare Revisions

Ignore whitespace Rev 178 → Rev 179