Rev 358 | Blame | Compare with Previous | Last modification | View Log | RSS feed
"""Simple Par2 parsing class
This program is part of rarslave, an open-source program for
automatically checking, repairing, and extracting files, primarily
those which are downloaded from usenet.
Visit https://svn.irasnyder.com/svn/programming/rarslave for the
latest version.
"""
__author__ = "Ira W. Snyder (devel@irasnyder.com)"
__copyright__ = "Copyright (c) 2005,2006 Ira W. Snyder (devel@irasnyder.com)"
__license__ = "GNU GPL v2 (or, at your option, any later version"
################################################################################
# The PAR2 Parser Module
#
# Copyright 2006, Ira W. Snyder (devel@irasnyder.com)
# License: GNU General Public License v2 (or, at your option, any later version)
#
# This was mostly "stolen" (read: borrowed, the GPL rocks) from cfv.
# See http://cfv.sourceforge.net/ for a copy.
################################################################################
import struct, errno
import os, re
class Par2Parser (object):
def __init__ (self, par2_filename):
"""Constructor for the Par2Parser class.
This will automatically perform all of the parsing and finding
values for the entire class. This makes this class slow, so try
not to re-construct it too many times."""
self.__par2_filename = par2_filename
self.__good_par2_files = []
self.__corrupt_par2_files = []
self.__protected_files = []
self.__possible_files = []
self.__set_basename = ''
self.__escaped_basename = ''
self.__main_logic ()
def get_par2_filename (self):
"""Get the main Par2 filename."""
return self.__par2_filename
def get_good_par2_files (self):
"""Get a list of good Par2 files in this set."""
return self.__good_par2_files
def get_corrupt_par2_files (self):
"""Get a list of corrupt Par2 files in this set."""
return self.__corrupt_par2_files
def get_protected_files (self):
"""Get a list of filenames protected by the Par2 set."""
return self.__protected_files
def get_possible_files (self):
"""Get a list of files that have a name similar to the main
Par2 filename."""
return self.__possible_files
def get_set_basename (self):
"""Get the base filename of the Par2 set.
The base filename is the Par2 filename given, minus the
first filename extension.
Example: For "test.par2", the base filename would be "test"."""
return self.__set_basename
def get_escaped_basename (self):
"""Get the escaped base filename of the Par2 set.
This name is suitable for use in regular expressions where
you just want to match using the literal base name."""
return self.__escaped_basename
def __main_logic (self):
"""Fills in all of the private variables in the class.
This should be called before you use any of the getter
methods in the class."""
####################################################
# 1. Find out if we're parsing a "vol" file or not
####################################################
# Get the base filename of self.par2_filename
if (re.search ('\.vol\d+\+\d+\.par2$', self.__par2_filename, re.IGNORECASE)):
self.__set_basename = os.path.splitext (self.__par2_filename)[0]
self.__set_basename = os.path.splitext (self.__set_basename)[0]
else:
self.__set_basename = os.path.splitext (self.__par2_filename)[0]
####################################################
# 2. re.escape the filename
####################################################
self.__escaped_basename = re.escape (self.__set_basename)
####################################################
# 3. use the escaped filename to find all other files in the current set
# a. should be good for .000, .001, .r00, .rar
# b. should also find all par2 files
####################################################
regex = re.compile ('^%s' % (self.__escaped_basename, ))
self.__possible_files = [f for f in os.listdir(os.getcwd()) if regex.match (f)]
####################################################
# 4. Parse all par2 files
# a. add to the good_par2_files list if it is good
# b. add to the corrupt_par2_files list if it is corrupt
####################################################
regex = re.compile ('\.par2$', re.IGNORECASE)
for f in self.__possible_files:
if regex.search (f):
# Try to parse the par2 file
try:
filenames = self.__parse_par2_file (f)
self.__good_par2_files.append (f)
except:
self.__corrupt_par2_files.append (f)
####################################################
# 5. Parse good_par2_files[0], if it exists
# a. if it doesn't exist, we can't really parse any of them
# so return what we've got
####################################################
if len(self.__good_par2_files) > 0:
self.__good_par2_files.sort()
f = self.__good_par2_files[0]
self.__par2_filename = self.__good_par2_files[0]
self.__protected_files = self.__parse_par2_file (f)
def __chompnulls (self, line):
"""Returns the line up to the first null character"""
p = line.find('\0')
if p < 0:
return line
else:
return line[:p]
def __parse_par2_file (self, filename):
"""Get all of the filenames that are protected by the par2
file given as the filename"""
try:
file = open(filename, 'rb')
except:
print 'Could not open %s' % (filename, )
return []
# We always want to do crc checks
docrcchecks = True
pkt_header_fmt = '< 8s Q 16s 16s 16s'
pkt_header_size = struct.calcsize(pkt_header_fmt)
file_pkt_fmt = '< 16s 16s 16s Q'
file_pkt_size = struct.calcsize(file_pkt_fmt)
main_pkt_fmt = '< Q I'
main_pkt_size = struct.calcsize(main_pkt_fmt)
seen_file_ids = {}
expected_file_ids = None
filenames = []
while 1:
d = file.read(pkt_header_size)
if not d:
break
magic, pkt_len, pkt_md5, set_id, pkt_type = struct.unpack(pkt_header_fmt, d)
if docrcchecks:
import md5
control_md5 = md5.new()
control_md5.update(d[0x20:])
d = file.read(pkt_len - pkt_header_size)
control_md5.update(d)
if control_md5.digest() != pkt_md5:
raise EnvironmentError, (errno.EINVAL, \
"corrupt par2 file - bad packet hash")
if pkt_type == 'PAR 2.0\0FileDesc':
if not docrcchecks:
d = file.read(pkt_len - pkt_header_size)
file_id, file_md5, file_md5_16k, file_size = \
struct.unpack(file_pkt_fmt, d[:file_pkt_size])
if seen_file_ids.get(file_id) is None:
seen_file_ids[file_id] = 1
filename = self.__chompnulls(d[file_pkt_size:])
filenames.append(filename)
elif pkt_type == "PAR 2.0\0Main\0\0\0\0":
if not docrcchecks:
d = file.read(pkt_len - pkt_header_size)
if expected_file_ids is None:
expected_file_ids = []
slice_size, num_files = struct.unpack(main_pkt_fmt, d[:main_pkt_size])
num_nonrecovery = (len(d)-main_pkt_size)/16 - num_files
for i in range(main_pkt_size,main_pkt_size+(num_files+num_nonrecovery)*16,16):
expected_file_ids.append(d[i:i+16])
else:
if not docrcchecks:
file.seek(pkt_len - pkt_header_size, 1)
if expected_file_ids is None:
raise EnvironmentError, (errno.EINVAL, \
"corrupt or unsupported par2 file - no main packet found")
for id in expected_file_ids:
if not seen_file_ids.has_key(id):
raise EnvironmentError, (errno.EINVAL, \
"corrupt or unsupported par2 file - " \
"expected file description packet not found")
return filenames
if __name__ == '__main__':
fname = raw_input ("Enter PAR2 Filename to test: ")
fname = os.path.abspath (os.path.expanduser (fname))
while not os.path.isfile (fname):
print "not a file, try again!"
fname = raw_input ("Enter PAR2 Filename to test: ")
fname = os.path.abspath (os.path.expanduser (fname))
os.chdir(os.path.dirname(fname))
fname = os.path.basename (fname)
p = Par2Parser (fname)
print "par2_filename:", p.get_par2_filename()
print "good_par2_files:", p.get_good_par2_files()
print "corrupt_par2_files:", p.get_corrupt_par2_files()
print "protected_files:", p.get_protected_files()
print "possible_files:", p.get_possible_files()
print "set_basename:", p.get_set_basename()
print "escaped_basename:", p.get_escaped_basename()