0,0 → 1,720 |
#!/usr/bin/env python |
|
# Copyright: Ira W. Snyder (devel@irasnyder.com) |
# Start Date: 2005-10-13 |
# End Date: |
# License: GNU General Public License v2 (or at your option, any later version) |
# |
# Changelog Follows: |
# - 2005-10-13 |
# - Added get_par2_filenames() to parse par2 files |
# - Added the parset object to represent each parset. |
# |
# - 2005-10-14 |
# - Finished the parset object. It will now verify and extract parsets. |
# - Small changes to the parset object. This makes the parjoin part |
# much more reliable. |
# - Added the OptionParser to make this nice to run at the command line. |
# - Made recursiveness an option. |
# - Made start directory an option. |
# - Check for appropriate programs before starting. |
# |
# - 2005-10-17 |
# - Use a regular expression to handle the deletable types. |
# |
# - 2005-10-18 |
# - Use regular expressions to handle all finding of files, instead of |
# using the glob module. |
# - Add a config class to handle all the default config stuff sanely. |
# This makes it easier to change some of the main parts of the program to |
# your specific configuration. |
# - Move the docrcchecks variable inside the get_par2_filenames() function, |
# which is where it belongs anyway. |
# - Added command-line option to check for required programs at start. |
# |
# - 2005-10-20 |
# - Added a config option to extract with full path. |
# |
# - 2005-10-22 |
# - Re-wrote the config class so that there is a config file, which |
# resides at ~/.config/rarslave/rarslave.conf by default. |
# - Added the command-line option -c to write out an updated version |
# of the config file (to fill in any missing options with the defaults) |
# - Added the command-line option -f to write out a new default config file, |
# which overwrites any user changes. |
# - Made all regexes case insensitive. |
# - Made all command-line options override the config file. |
# |
# - 2005-10-30 |
# - Added the '-o' option, to output debugging info. Hopefully next time |
# someone finds a bug, they can output this and send it to me with a |
# description of the bug they're seeing. |
# |
# - 2005-11-05 |
# - Added an output system to rarslave. This makes a nice status report |
# possible at the end of the program run. |
# |
# - 2005-11-06 |
# - Fixed the rar command so that it can extract files whose names begin |
# with a hyphen. |
# |
|
################################################################################ |
# REQUIREMENTS: |
# |
# This code requires the programs cfv, par2repair, lxsplit, and rar to be able |
# to function properly. I will attempt to check that these are in your path. |
################################################################################ |
|
import ConfigParser, os |
|
class rarslave_config: |
"""A simple class to hold the default configs for the whole program""" |
|
def __read_config(self, filename='~/.config/rarslave/rarslave.conf'): |
"""Attempt to open and read the rarslave config file""" |
|
# Make sure the filename is corrected |
filename = os.path.abspath(os.path.expanduser(filename)) |
|
user_config = {} |
|
# Write the default config if it doesn't exist |
if not os.path.isfile(filename): |
self.write_config(default=True) |
|
config = ConfigParser.ConfigParser() |
config.read(filename) |
|
for section in config.sections(): |
for option in config.options(section): |
user_config[(section, option)] = config.get(section, option) |
|
return user_config |
|
def write_config(self, filename='~/.config/rarslave/rarslave.conf', default=False): |
"""Write out the current config to the config file. If you set default=True, then |
the default config file will be written.""" |
|
config = ConfigParser.ConfigParser() |
|
# Correct filename |
filename = os.path.abspath(os.path.expanduser(filename)) |
|
# Reset all config to make sure we write the default one, if necessary |
if default: |
self.__user_config = {} |
print 'Writing default config to %s' % (filename, ) |
|
# [directories] section |
config.add_section('directories') |
for (s, k) in self.__defaults.keys(): |
if s == 'directories': |
config.set(s, k, self.get_value(s, k)) |
|
# [options] section |
config.add_section('options') |
for (s, k) in self.__defaults.keys(): |
if s == 'options': |
config.set(s, k, self.get_value(s, k)) |
|
# [regular_expressions] section |
config.add_section('regular expressions') |
for (s, k) in self.__defaults.keys(): |
if s == 'regular expressions': |
config.set(s, k, self.get_value(s, k)) |
|
# Try to make the ~/.config/rarslave/ directory |
if not os.path.isdir(os.path.split(filename)[0]): |
try: |
os.makedirs(os.path.split(filename)[0]) |
except: |
print 'Could not make directory: %s' % (os.path.split(filename)[0], ) |
sys.exit() |
|
# Try to write the config file to disk |
try: |
fsock = open(filename, 'w') |
try: |
config.write(fsock) |
finally: |
fsock.close() |
except: |
print 'Could not open: %s for writing' % (filename, ) |
sys.exit() |
|
def __get_default_val(self, section, key): |
return self.__defaults[(section, key)] |
|
def get_value(self, section, key): |
"""Get a config value. Attempts to get the value from the user's |
config first, and then uses the default.""" |
|
try: |
value = self.__user_config[(section, key)] |
except: |
# This should work, unless you write something stupid |
# into the code, so DON'T DO IT |
value = self.__get_default_val(section, key) |
|
# Convert config options to booleans for easier use |
if value == 'True': |
value = True |
|
if value == 'False': |
value = False |
|
return value |
|
def __init__(self): |
self.__defaults = { |
('directories', 'working_directory') : '~/downloads/usenet', |
('options', 'recursive') : True, |
('options', 'check_required_programs') : False, |
('options', 'extract_with_full_path') : False, |
('regular expressions', 'par2_regex') : '.*\.par2$', |
('regular expressions', 'video_file_regex') : '.*\.(avi|ogm|mkv|mp4)$', |
('regular expressions', 'temp_repair_regex') : '.*\.1$', |
('regular expressions', 'remove_regex') : '^.*\.(rar|r\d\d)$' } |
|
self.__user_config = self.__read_config() |
|
# This is the global config variable. |
config = rarslave_config() |
|
################################################################################ |
# The rarslave_output class |
# |
# This class handles the nice output summary which is printed at the end |
# of a run |
################################################################################ |
|
class rarslave_output: |
# Data structure: list of lists |
# [ [status, filename], ... ] |
# |
# Where status is one of: |
# 0: Verified and Extracted Perfectly |
# 1: Failed to Verify (and therefore Extract) |
# 2: Verified correctly, but failed to Extract |
# |
|
def __init__(self): |
self.output_list = [] |
self.good_files = 0 |
self.unverified = 0 |
self.unextractable = 0 |
self.corrupt_par2 = 0 |
|
def print_equal_line(self, size=80): |
"""Print an 80 character line of equal signs""" |
|
str = '' |
|
for i in range(size): |
str += '=' |
|
print str |
|
def print_results_table(self): |
"""Print a nice table of the results from this run""" |
|
# Print the table of good files (if we have any) |
if self.good_files > 0: |
print |
self.print_equal_line() |
print 'Files that were extracted perfectly' |
self.print_equal_line() |
|
for entry in self.output_list: |
if entry[0] == 0: |
print '%s' % (entry[1], ) |
|
# Print the table of unverified files (if we have any) |
if self.unverified > 0: |
print |
self.print_equal_line() |
print 'Files that failed to verify (and extract)' |
self.print_equal_line() |
|
for entry in self.output_list: |
if entry[0] == 1: |
print '%s' % (entry[1], ) |
|
# Print the table of unextracted files (if we have any) |
if self.unextractable > 0: |
print |
self.print_equal_line() |
print 'Files that were verified, but failed to extract' |
self.print_equal_line() |
|
for entry in self.output_list: |
if entry[0] == 2: |
print '%s' % (entry[1], ) |
|
# Print the table of corrupt PAR2 files (if we have any) |
if self.corrupt_par2 > 0: |
print |
self.print_equal_line() |
print 'Files that had corrupt par2 files' |
self.print_equal_line() |
|
for entry in self.output_list: |
if entry[0] == 3: |
print '%s' % (entry[1], ) |
|
# Print a blank line at the end |
print |
|
def add_file(self, status, filename): |
|
if status == 0: |
self.good_files += 1 |
elif status == 1: |
self.unverified += 1 |
elif status == 2: |
self.unextractable += 1 |
elif status == 3: |
self.corrupt_par2 += 1 |
else: |
# We have a bad value, so raise a ValueError |
raise ValueError |
|
self.output_list.append([status, filename]) |
|
# This is the global output variable |
output = rarslave_output() |
|
################################################################################ |
# The PAR2 Parser |
# |
# This was stolen from cfv (see http://cfv.sourceforge.net/ for a copy) |
################################################################################ |
|
import struct, errno |
|
def chompnulls(line): |
p = line.find('\0') |
if p < 0: return line |
else: return line[:p] |
|
def get_par2_filenames(filename): |
"""Get all of the filenames that are protected by the par2 |
file given as the filename""" |
|
try: |
file = open(filename, 'rb') |
except: |
print 'Could not open %s' % (filename, ) |
return [] |
|
# We always want to do crc checks |
docrcchecks = True |
|
pkt_header_fmt = '< 8s Q 16s 16s 16s' |
pkt_header_size = struct.calcsize(pkt_header_fmt) |
file_pkt_fmt = '< 16s 16s 16s Q' |
file_pkt_size = struct.calcsize(file_pkt_fmt) |
main_pkt_fmt = '< Q I' |
main_pkt_size = struct.calcsize(main_pkt_fmt) |
|
seen_file_ids = {} |
expected_file_ids = None |
filenames = [] |
|
while 1: |
d = file.read(pkt_header_size) |
if not d: |
break |
|
magic, pkt_len, pkt_md5, set_id, pkt_type = struct.unpack(pkt_header_fmt, d) |
|
if docrcchecks: |
import md5 |
control_md5 = md5.new() |
control_md5.update(d[0x20:]) |
d = file.read(pkt_len - pkt_header_size) |
control_md5.update(d) |
|
if control_md5.digest() != pkt_md5: |
raise EnvironmentError, (errno.EINVAL, \ |
"corrupt par2 file - bad packet hash") |
|
if pkt_type == 'PAR 2.0\0FileDesc': |
if not docrcchecks: |
d = file.read(pkt_len - pkt_header_size) |
|
file_id, file_md5, file_md5_16k, file_size = \ |
struct.unpack(file_pkt_fmt, d[:file_pkt_size]) |
|
if seen_file_ids.get(file_id) is None: |
seen_file_ids[file_id] = 1 |
filename = chompnulls(d[file_pkt_size:]) |
filenames.append(filename) |
|
elif pkt_type == "PAR 2.0\0Main\0\0\0\0": |
if not docrcchecks: |
d = file.read(pkt_len - pkt_header_size) |
|
if expected_file_ids is None: |
expected_file_ids = [] |
slice_size, num_files = struct.unpack(main_pkt_fmt, d[:main_pkt_size]) |
num_nonrecovery = (len(d)-main_pkt_size)/16 - num_files |
|
for i in range(main_pkt_size,main_pkt_size+(num_files+num_nonrecovery)*16,16): |
expected_file_ids.append(d[i:i+16]) |
|
else: |
if not docrcchecks: |
file.seek(pkt_len - pkt_header_size, 1) |
|
if expected_file_ids is None: |
raise EnvironmentError, (errno.EINVAL, \ |
"corrupt or unsupported par2 file - no main packet found") |
|
for id in expected_file_ids: |
if not seen_file_ids.has_key(id): |
raise EnvironmentError, (errno.EINVAL, \ |
"corrupt or unsupported par2 file - " \ |
"expected file description packet not found") |
|
return filenames |
|
################################################################################ |
# The parset object |
# |
# This is an object based representation of a parset, and will verify itself |
# and extract itself, if possible. |
################################################################################ |
|
import os, glob, re |
|
class parset: |
def __init__(self, par_filename): |
self.parfile = par_filename |
self.extra_pars = [] |
self.files = False |
self.used_parjoin = False |
self.verified = False |
self.extracted = False |
|
def print_debug_info(self): |
"""Special function for debugging""" |
print '========== DEBUG INFO STARTS HERE ==========' |
print 'parfile: %s' % (self.parfile, ) |
print 'extra_pars: %s' % (self.extra_pars, ) |
print 'files: %s' % (self.files, ) |
print '========== DEBUG INFO ENDS HERE ==========' |
|
def get_filenames(self): |
return get_par2_filenames(self.parfile) |
|
def all_there(self): |
"""Check if all the files for the parset are present. |
This will help us decide which par2 checker to use first""" |
for f in self.files: |
if not os.path.isfile(f): |
return False |
|
# The files were all there |
return True |
|
def verify(self): |
"""This will verify the parset by the most efficient method first, |
and then move to a slower method if that one fails""" |
|
retval = False #not verified yet |
|
# if all the files are there, try verifying fast |
if self.all_there(): |
retval = self.__fast_verify() |
|
if retval == False: |
# Failed to verify fast, so try it slow, maybe it needs repair |
retval = self.__slow_verify() |
|
# If we've got a video file, maybe we should try to parjoin it |
elif self.__has_video_file(): |
retval = self.__parjoin() |
|
else: #not all there, maybe we can slow-repair |
retval = self.__slow_verify() |
|
self.verified = retval |
return self.verified |
|
def __fast_verify(self): |
retval = os.system('cfv -v -f "%s"' % (self.parfile, )) |
|
if retval == 0: |
return True #success |
|
return False #failure |
|
def __slow_verify(self): |
retval = os.system('par2repair "%s"' % (self.parfile, )) |
|
if retval == 0: |
return True #success |
|
return False #failure |
|
def __parjoin(self): |
retval = os.system('lxsplit -j "%s.001"' % (self.files[0], )) |
|
retval = self.__fast_verify() |
|
if retval == False: |
# Failed to verify fast, so try it slow, maybe it needs repair |
retval = self.__slow_verify() |
|
if retval == False: # failed to verify, so remove the lxsplit created file |
try: |
os.remove(self.files[0]) |
except OSError: |
print 'Failed to remove file: %s' % (self.files[0], ) |
|
self.used_parjoin = retval |
self.verified = retval |
return self.verified |
|
def __has_video_file(self): |
regex = re.compile( |
config.get_value('regular expressions', 'video_file_regex'), |
re.IGNORECASE) |
|
for f in self.files: |
if regex.match(f): |
return True |
|
return False |
|
def __remove_currentset(self): |
"""Remove all of the files that are extractable, as well as the pars. |
Leave everything else alone""" |
|
if not self.extracted: |
print 'Did not extract yet, not removing currentset' |
return |
|
# remove the main par |
os.remove(self.parfile) |
|
# remove all of the extra pars |
for i in self.extra_pars: |
os.remove(i) |
|
# remove any rars that are associated (leave EVERYTHING else) |
# This regex matches both old and new style rar(s) by default. |
regex = re.compile( |
config.get_value('regular expressions', 'remove_regex'), |
re.IGNORECASE) |
|
for i in self.files: |
if regex.match(i): |
os.remove(i) |
|
# remove any .{001,002,...} files (from parjoin) |
if self.used_parjoin: |
for i in os.listdir(os.getcwd()): |
if i != self.files[0] and self.files[0] in i: |
os.remove(i) |
|
# remove any temp repair files |
regex = re.compile( |
config.get_value('regular expressions', 'temp_repair_regex'), |
re.IGNORECASE) |
[os.remove(f) for f in os.listdir(os.getcwd()) if regex.match(f)] |
|
def __get_extract_file(self): |
"""Find the first extractable file""" |
for i in self.files: |
if os.path.splitext(i)[1] == '.rar': |
return i |
|
return None |
|
def extract(self): |
"""Attempt to extract all of the files related to this parset""" |
if not self.verified: |
self.extracted = False |
output.add_file(1, self.parfile) |
return False #failed to extract |
|
extract_file = self.__get_extract_file() |
|
if extract_file != None: |
if config.get_value('options', 'extract_with_full_path'): |
retval = os.system('rar x -o+ -- "%s"' % (extract_file, )) |
else: |
retval = os.system('rar e -o+ -- "%s"' % (extract_file, )) |
|
if retval != 0: |
output.add_file(2, self.parfile) |
self.extracted = False |
return self.extracted |
|
# we extracted ok, so remove the currentset |
self.extracted = True |
self.__remove_currentset() |
|
output.add_file(0, self.parfile) |
|
return self.extracted |
|
|
################################################################################ |
# The rarslave program itself |
################################################################################ |
|
import os, sys |
from optparse import OptionParser |
|
def check_required_progs(): |
"""Check if the required programs are installed""" |
|
shell_not_found = 32512 |
needed = [] |
|
if os.system('cfv --help > /dev/null 2>&1') == shell_not_found: |
needed.append('cfv') |
|
if os.system('par2repair --help > /dev/null 2>&1') == shell_not_found: |
needed.append('par2repair') |
|
if os.system('lxsplit --help > /dev/null 2>&1') == shell_not_found: |
needed.append('lxpsplit') |
|
if os.system('rar --help > /dev/null 2>&1') == shell_not_found: |
needed.append('rar') |
|
if needed: |
for n in needed: |
print 'Needed program "%s" not found in $PATH' % (n, ) |
|
sys.exit(1) |
|
def get_parsets(): |
"""Get a representation of each parset in the current directory, and |
return them as a list of parset instances""" |
|
regex = re.compile( |
config.get_value('regular expressions', 'par2_regex'), |
re.IGNORECASE) |
par2files = [f for f in os.listdir(os.getcwd()) if regex.match(f)] |
|
parsets = [] |
|
for i in par2files: |
try: |
filenames = get_par2_filenames(i) |
create_new = True |
except EnvironmentError: |
output.add_file(3, i) |
continue |
|
# if we already have an instance for this set, append |
# this par file to the extra_pars field |
for j in parsets: |
if j.files == filenames: |
j.extra_pars.append(i) |
create_new = False |
|
# we haven't seen this set yet, so we'll create it now |
if create_new == True: |
cur = parset(i) |
cur.files = filenames |
parsets.append(cur) |
|
return parsets |
|
def directory_worker(dir, options): |
"""Attempts to find, verify, and extract every parset in the directory |
given as a parameter""" |
|
cwd = os.getcwd() |
os.chdir(dir) |
|
parsets = get_parsets() |
|
# Print debug info if we're supposed to |
if options.debug_info: |
for p in parsets: |
p.debug() |
|
# No debug info |
else: |
|
# Verify each parset |
for p in parsets: |
p.verify() |
|
# Attempt to extract each parset |
for p in parsets: |
p.extract() |
|
os.chdir(cwd) |
|
def main(): |
|
# Build the OptionParser |
parser = OptionParser() |
parser.add_option('-n', '--not-recursive', |
action='store_false', dest='recursive', |
default=config.get_value('options', 'recursive'), |
help="Don't run recursively") |
|
parser.add_option('-d', '--work-dir', |
dest='work_dir', |
default=config.get_value('directories', 'working_directory'), |
help="Start running at DIR", metavar='DIR') |
|
parser.add_option('-p', '--check-required-programs', |
action='store_true', dest='check_progs', |
default=config.get_value('options', 'check_required_programs'), |
help="Check for required programs") |
|
parser.add_option('-f', '--write-default-config', |
action='store_true', dest='write_def_config', |
default=False, help="Write out a new default config") |
|
parser.add_option('-c', '--write-new-config', |
action='store_true', dest='write_config', |
default=False, help="Write out the current config") |
|
parser.add_option('-o', '--output-debug-info', |
action='store_true', dest='debug_info', |
default=False, |
help="Output debug info for every parset, then exit") |
|
# Parse the given options |
(options, args) = parser.parse_args() |
|
# Fix up the working directory |
options.work_dir = os.path.abspath(os.path.expanduser(options.work_dir)) |
|
# Check that we have the required programs installed |
if options.check_progs: |
check_required_progs() |
|
# Write out a new default config, if we need it |
if options.write_def_config: |
config.write_config(default=True) |
|
# Write out the current config (adds new options to an existing config) |
if options.write_config: |
config.write_config() |
|
# Run rarslave! |
if options.recursive: |
for root, dirs, files in os.walk(options.work_dir): |
directory_worker(root, options) |
else: |
directory_worker(options.work_dir, options) |
|
# Print the results |
output.print_results_table() |
|
if __name__ == '__main__': |
main() |
|
Property changes: |
Added: svn:executable |
+* |
\ No newline at end of property |