Subversion Repositories programming

Compare Revisions

Ignore whitespace Rev 179 → Rev 178

/shell_programs/rarslave.py
0,0 → 1,720
#!/usr/bin/env python
 
# Copyright: Ira W. Snyder (devel@irasnyder.com)
# Start Date: 2005-10-13
# End Date:
# License: GNU General Public License v2 (or at your option, any later version)
#
# Changelog Follows:
# - 2005-10-13
# - Added get_par2_filenames() to parse par2 files
# - Added the parset object to represent each parset.
#
# - 2005-10-14
# - Finished the parset object. It will now verify and extract parsets.
# - Small changes to the parset object. This makes the parjoin part
# much more reliable.
# - Added the OptionParser to make this nice to run at the command line.
# - Made recursiveness an option.
# - Made start directory an option.
# - Check for appropriate programs before starting.
#
# - 2005-10-17
# - Use a regular expression to handle the deletable types.
#
# - 2005-10-18
# - Use regular expressions to handle all finding of files, instead of
# using the glob module.
# - Add a config class to handle all the default config stuff sanely.
# This makes it easier to change some of the main parts of the program to
# your specific configuration.
# - Move the docrcchecks variable inside the get_par2_filenames() function,
# which is where it belongs anyway.
# - Added command-line option to check for required programs at start.
#
# - 2005-10-20
# - Added a config option to extract with full path.
#
# - 2005-10-22
# - Re-wrote the config class so that there is a config file, which
# resides at ~/.config/rarslave/rarslave.conf by default.
# - Added the command-line option -c to write out an updated version
# of the config file (to fill in any missing options with the defaults)
# - Added the command-line option -f to write out a new default config file,
# which overwrites any user changes.
# - Made all regexes case insensitive.
# - Made all command-line options override the config file.
#
# - 2005-10-30
# - Added the '-o' option, to output debugging info. Hopefully next time
# someone finds a bug, they can output this and send it to me with a
# description of the bug they're seeing.
#
# - 2005-11-05
# - Added an output system to rarslave. This makes a nice status report
# possible at the end of the program run.
#
# - 2005-11-06
# - Fixed the rar command so that it can extract files whose names begin
# with a hyphen.
#
 
################################################################################
# REQUIREMENTS:
#
# This code requires the programs cfv, par2repair, lxsplit, and rar to be able
# to function properly. I will attempt to check that these are in your path.
################################################################################
 
import ConfigParser, os
 
class rarslave_config:
"""A simple class to hold the default configs for the whole program"""
 
def __read_config(self, filename='~/.config/rarslave/rarslave.conf'):
"""Attempt to open and read the rarslave config file"""
 
# Make sure the filename is corrected
filename = os.path.abspath(os.path.expanduser(filename))
 
user_config = {}
 
# Write the default config if it doesn't exist
if not os.path.isfile(filename):
self.write_config(default=True)
 
config = ConfigParser.ConfigParser()
config.read(filename)
 
for section in config.sections():
for option in config.options(section):
user_config[(section, option)] = config.get(section, option)
 
return user_config
 
def write_config(self, filename='~/.config/rarslave/rarslave.conf', default=False):
"""Write out the current config to the config file. If you set default=True, then
the default config file will be written."""
 
config = ConfigParser.ConfigParser()
 
# Correct filename
filename = os.path.abspath(os.path.expanduser(filename))
 
# Reset all config to make sure we write the default one, if necessary
if default:
self.__user_config = {}
print 'Writing default config to %s' % (filename, )
 
# [directories] section
config.add_section('directories')
for (s, k) in self.__defaults.keys():
if s == 'directories':
config.set(s, k, self.get_value(s, k))
 
# [options] section
config.add_section('options')
for (s, k) in self.__defaults.keys():
if s == 'options':
config.set(s, k, self.get_value(s, k))
 
# [regular_expressions] section
config.add_section('regular expressions')
for (s, k) in self.__defaults.keys():
if s == 'regular expressions':
config.set(s, k, self.get_value(s, k))
 
# Try to make the ~/.config/rarslave/ directory
if not os.path.isdir(os.path.split(filename)[0]):
try:
os.makedirs(os.path.split(filename)[0])
except:
print 'Could not make directory: %s' % (os.path.split(filename)[0], )
sys.exit()
 
# Try to write the config file to disk
try:
fsock = open(filename, 'w')
try:
config.write(fsock)
finally:
fsock.close()
except:
print 'Could not open: %s for writing' % (filename, )
sys.exit()
 
def __get_default_val(self, section, key):
return self.__defaults[(section, key)]
 
def get_value(self, section, key):
"""Get a config value. Attempts to get the value from the user's
config first, and then uses the default."""
 
try:
value = self.__user_config[(section, key)]
except:
# This should work, unless you write something stupid
# into the code, so DON'T DO IT
value = self.__get_default_val(section, key)
 
# Convert config options to booleans for easier use
if value == 'True':
value = True
 
if value == 'False':
value = False
 
return value
 
def __init__(self):
self.__defaults = {
('directories', 'working_directory') : '~/downloads/usenet',
('options', 'recursive') : True,
('options', 'check_required_programs') : False,
('options', 'extract_with_full_path') : False,
('regular expressions', 'par2_regex') : '.*\.par2$',
('regular expressions', 'video_file_regex') : '.*\.(avi|ogm|mkv|mp4)$',
('regular expressions', 'temp_repair_regex') : '.*\.1$',
('regular expressions', 'remove_regex') : '^.*\.(rar|r\d\d)$' }
 
self.__user_config = self.__read_config()
 
# This is the global config variable.
config = rarslave_config()
 
################################################################################
# The rarslave_output class
#
# This class handles the nice output summary which is printed at the end
# of a run
################################################################################
 
class rarslave_output:
# Data structure: list of lists
# [ [status, filename], ... ]
#
# Where status is one of:
# 0: Verified and Extracted Perfectly
# 1: Failed to Verify (and therefore Extract)
# 2: Verified correctly, but failed to Extract
#
 
def __init__(self):
self.output_list = []
self.good_files = 0
self.unverified = 0
self.unextractable = 0
self.corrupt_par2 = 0
 
def print_equal_line(self, size=80):
"""Print an 80 character line of equal signs"""
 
str = ''
 
for i in range(size):
str += '='
 
print str
 
def print_results_table(self):
"""Print a nice table of the results from this run"""
 
# Print the table of good files (if we have any)
if self.good_files > 0:
print
self.print_equal_line()
print 'Files that were extracted perfectly'
self.print_equal_line()
 
for entry in self.output_list:
if entry[0] == 0:
print '%s' % (entry[1], )
 
# Print the table of unverified files (if we have any)
if self.unverified > 0:
print
self.print_equal_line()
print 'Files that failed to verify (and extract)'
self.print_equal_line()
 
for entry in self.output_list:
if entry[0] == 1:
print '%s' % (entry[1], )
 
# Print the table of unextracted files (if we have any)
if self.unextractable > 0:
print
self.print_equal_line()
print 'Files that were verified, but failed to extract'
self.print_equal_line()
 
for entry in self.output_list:
if entry[0] == 2:
print '%s' % (entry[1], )
 
# Print the table of corrupt PAR2 files (if we have any)
if self.corrupt_par2 > 0:
print
self.print_equal_line()
print 'Files that had corrupt par2 files'
self.print_equal_line()
 
for entry in self.output_list:
if entry[0] == 3:
print '%s' % (entry[1], )
 
# Print a blank line at the end
print
 
def add_file(self, status, filename):
 
if status == 0:
self.good_files += 1
elif status == 1:
self.unverified += 1
elif status == 2:
self.unextractable += 1
elif status == 3:
self.corrupt_par2 += 1
else:
# We have a bad value, so raise a ValueError
raise ValueError
 
self.output_list.append([status, filename])
 
# This is the global output variable
output = rarslave_output()
 
################################################################################
# The PAR2 Parser
#
# This was stolen from cfv (see http://cfv.sourceforge.net/ for a copy)
################################################################################
 
import struct, errno
 
def chompnulls(line):
p = line.find('\0')
if p < 0: return line
else: return line[:p]
 
def get_par2_filenames(filename):
"""Get all of the filenames that are protected by the par2
file given as the filename"""
 
try:
file = open(filename, 'rb')
except:
print 'Could not open %s' % (filename, )
return []
 
# We always want to do crc checks
docrcchecks = True
 
pkt_header_fmt = '< 8s Q 16s 16s 16s'
pkt_header_size = struct.calcsize(pkt_header_fmt)
file_pkt_fmt = '< 16s 16s 16s Q'
file_pkt_size = struct.calcsize(file_pkt_fmt)
main_pkt_fmt = '< Q I'
main_pkt_size = struct.calcsize(main_pkt_fmt)
 
seen_file_ids = {}
expected_file_ids = None
filenames = []
 
while 1:
d = file.read(pkt_header_size)
if not d:
break
 
magic, pkt_len, pkt_md5, set_id, pkt_type = struct.unpack(pkt_header_fmt, d)
 
if docrcchecks:
import md5
control_md5 = md5.new()
control_md5.update(d[0x20:])
d = file.read(pkt_len - pkt_header_size)
control_md5.update(d)
 
if control_md5.digest() != pkt_md5:
raise EnvironmentError, (errno.EINVAL, \
"corrupt par2 file - bad packet hash")
 
if pkt_type == 'PAR 2.0\0FileDesc':
if not docrcchecks:
d = file.read(pkt_len - pkt_header_size)
 
file_id, file_md5, file_md5_16k, file_size = \
struct.unpack(file_pkt_fmt, d[:file_pkt_size])
 
if seen_file_ids.get(file_id) is None:
seen_file_ids[file_id] = 1
filename = chompnulls(d[file_pkt_size:])
filenames.append(filename)
 
elif pkt_type == "PAR 2.0\0Main\0\0\0\0":
if not docrcchecks:
d = file.read(pkt_len - pkt_header_size)
 
if expected_file_ids is None:
expected_file_ids = []
slice_size, num_files = struct.unpack(main_pkt_fmt, d[:main_pkt_size])
num_nonrecovery = (len(d)-main_pkt_size)/16 - num_files
 
for i in range(main_pkt_size,main_pkt_size+(num_files+num_nonrecovery)*16,16):
expected_file_ids.append(d[i:i+16])
 
else:
if not docrcchecks:
file.seek(pkt_len - pkt_header_size, 1)
 
if expected_file_ids is None:
raise EnvironmentError, (errno.EINVAL, \
"corrupt or unsupported par2 file - no main packet found")
 
for id in expected_file_ids:
if not seen_file_ids.has_key(id):
raise EnvironmentError, (errno.EINVAL, \
"corrupt or unsupported par2 file - " \
"expected file description packet not found")
 
return filenames
 
################################################################################
# The parset object
#
# This is an object based representation of a parset, and will verify itself
# and extract itself, if possible.
################################################################################
 
import os, glob, re
 
class parset:
def __init__(self, par_filename):
self.parfile = par_filename
self.extra_pars = []
self.files = False
self.used_parjoin = False
self.verified = False
self.extracted = False
 
def print_debug_info(self):
"""Special function for debugging"""
print '========== DEBUG INFO STARTS HERE =========='
print 'parfile: %s' % (self.parfile, )
print 'extra_pars: %s' % (self.extra_pars, )
print 'files: %s' % (self.files, )
print '========== DEBUG INFO ENDS HERE =========='
 
def get_filenames(self):
return get_par2_filenames(self.parfile)
 
def all_there(self):
"""Check if all the files for the parset are present.
This will help us decide which par2 checker to use first"""
for f in self.files:
if not os.path.isfile(f):
return False
 
# The files were all there
return True
 
def verify(self):
"""This will verify the parset by the most efficient method first,
and then move to a slower method if that one fails"""
 
retval = False #not verified yet
 
# if all the files are there, try verifying fast
if self.all_there():
retval = self.__fast_verify()
 
if retval == False:
# Failed to verify fast, so try it slow, maybe it needs repair
retval = self.__slow_verify()
 
# If we've got a video file, maybe we should try to parjoin it
elif self.__has_video_file():
retval = self.__parjoin()
 
else: #not all there, maybe we can slow-repair
retval = self.__slow_verify()
 
self.verified = retval
return self.verified
 
def __fast_verify(self):
retval = os.system('cfv -v -f "%s"' % (self.parfile, ))
 
if retval == 0:
return True #success
 
return False #failure
 
def __slow_verify(self):
retval = os.system('par2repair "%s"' % (self.parfile, ))
 
if retval == 0:
return True #success
 
return False #failure
 
def __parjoin(self):
retval = os.system('lxsplit -j "%s.001"' % (self.files[0], ))
 
retval = self.__fast_verify()
 
if retval == False:
# Failed to verify fast, so try it slow, maybe it needs repair
retval = self.__slow_verify()
 
if retval == False: # failed to verify, so remove the lxsplit created file
try:
os.remove(self.files[0])
except OSError:
print 'Failed to remove file: %s' % (self.files[0], )
 
self.used_parjoin = retval
self.verified = retval
return self.verified
 
def __has_video_file(self):
regex = re.compile(
config.get_value('regular expressions', 'video_file_regex'),
re.IGNORECASE)
 
for f in self.files:
if regex.match(f):
return True
 
return False
 
def __remove_currentset(self):
"""Remove all of the files that are extractable, as well as the pars.
Leave everything else alone"""
 
if not self.extracted:
print 'Did not extract yet, not removing currentset'
return
 
# remove the main par
os.remove(self.parfile)
 
# remove all of the extra pars
for i in self.extra_pars:
os.remove(i)
 
# remove any rars that are associated (leave EVERYTHING else)
# This regex matches both old and new style rar(s) by default.
regex = re.compile(
config.get_value('regular expressions', 'remove_regex'),
re.IGNORECASE)
 
for i in self.files:
if regex.match(i):
os.remove(i)
 
# remove any .{001,002,...} files (from parjoin)
if self.used_parjoin:
for i in os.listdir(os.getcwd()):
if i != self.files[0] and self.files[0] in i:
os.remove(i)
 
# remove any temp repair files
regex = re.compile(
config.get_value('regular expressions', 'temp_repair_regex'),
re.IGNORECASE)
[os.remove(f) for f in os.listdir(os.getcwd()) if regex.match(f)]
 
def __get_extract_file(self):
"""Find the first extractable file"""
for i in self.files:
if os.path.splitext(i)[1] == '.rar':
return i
 
return None
 
def extract(self):
"""Attempt to extract all of the files related to this parset"""
if not self.verified:
self.extracted = False
output.add_file(1, self.parfile)
return False #failed to extract
 
extract_file = self.__get_extract_file()
 
if extract_file != None:
if config.get_value('options', 'extract_with_full_path'):
retval = os.system('rar x -o+ -- "%s"' % (extract_file, ))
else:
retval = os.system('rar e -o+ -- "%s"' % (extract_file, ))
 
if retval != 0:
output.add_file(2, self.parfile)
self.extracted = False
return self.extracted
 
# we extracted ok, so remove the currentset
self.extracted = True
self.__remove_currentset()
 
output.add_file(0, self.parfile)
 
return self.extracted
 
 
################################################################################
# The rarslave program itself
################################################################################
 
import os, sys
from optparse import OptionParser
 
def check_required_progs():
"""Check if the required programs are installed"""
 
shell_not_found = 32512
needed = []
 
if os.system('cfv --help > /dev/null 2>&1') == shell_not_found:
needed.append('cfv')
 
if os.system('par2repair --help > /dev/null 2>&1') == shell_not_found:
needed.append('par2repair')
 
if os.system('lxsplit --help > /dev/null 2>&1') == shell_not_found:
needed.append('lxpsplit')
 
if os.system('rar --help > /dev/null 2>&1') == shell_not_found:
needed.append('rar')
 
if needed:
for n in needed:
print 'Needed program "%s" not found in $PATH' % (n, )
 
sys.exit(1)
 
def get_parsets():
"""Get a representation of each parset in the current directory, and
return them as a list of parset instances"""
 
regex = re.compile(
config.get_value('regular expressions', 'par2_regex'),
re.IGNORECASE)
par2files = [f for f in os.listdir(os.getcwd()) if regex.match(f)]
 
parsets = []
 
for i in par2files:
try:
filenames = get_par2_filenames(i)
create_new = True
except EnvironmentError:
output.add_file(3, i)
continue
 
# if we already have an instance for this set, append
# this par file to the extra_pars field
for j in parsets:
if j.files == filenames:
j.extra_pars.append(i)
create_new = False
 
# we haven't seen this set yet, so we'll create it now
if create_new == True:
cur = parset(i)
cur.files = filenames
parsets.append(cur)
 
return parsets
 
def directory_worker(dir, options):
"""Attempts to find, verify, and extract every parset in the directory
given as a parameter"""
 
cwd = os.getcwd()
os.chdir(dir)
 
parsets = get_parsets()
 
# Print debug info if we're supposed to
if options.debug_info:
for p in parsets:
p.debug()
 
# No debug info
else:
 
# Verify each parset
for p in parsets:
p.verify()
 
# Attempt to extract each parset
for p in parsets:
p.extract()
 
os.chdir(cwd)
 
def main():
 
# Build the OptionParser
parser = OptionParser()
parser.add_option('-n', '--not-recursive',
action='store_false', dest='recursive',
default=config.get_value('options', 'recursive'),
help="Don't run recursively")
 
parser.add_option('-d', '--work-dir',
dest='work_dir',
default=config.get_value('directories', 'working_directory'),
help="Start running at DIR", metavar='DIR')
 
parser.add_option('-p', '--check-required-programs',
action='store_true', dest='check_progs',
default=config.get_value('options', 'check_required_programs'),
help="Check for required programs")
 
parser.add_option('-f', '--write-default-config',
action='store_true', dest='write_def_config',
default=False, help="Write out a new default config")
 
parser.add_option('-c', '--write-new-config',
action='store_true', dest='write_config',
default=False, help="Write out the current config")
 
parser.add_option('-o', '--output-debug-info',
action='store_true', dest='debug_info',
default=False,
help="Output debug info for every parset, then exit")
 
# Parse the given options
(options, args) = parser.parse_args()
 
# Fix up the working directory
options.work_dir = os.path.abspath(os.path.expanduser(options.work_dir))
 
# Check that we have the required programs installed
if options.check_progs:
check_required_progs()
 
# Write out a new default config, if we need it
if options.write_def_config:
config.write_config(default=True)
 
# Write out the current config (adds new options to an existing config)
if options.write_config:
config.write_config()
 
# Run rarslave!
if options.recursive:
for root, dirs, files in os.walk(options.work_dir):
directory_worker(root, options)
else:
directory_worker(options.work_dir, options)
 
# Print the results
output.print_results_table()
 
if __name__ == '__main__':
main()
 
Property changes:
Added: svn:executable
## -0,0 +1 ##
+*
\ No newline at end of property