Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | RSS feed
################################################################################
# The PAR2 Parser Module
#
# Copyright 2006, Ira W. Snyder (devel@irasnyder.com)
# License: GNU General Public License v2 (or, at your option, any later version)
#
# This was mostly "stolen" (read: borrowed, the GPL rocks) from cfv.
# See http://cfv.sourceforge.net/ for a copy.
################################################################################
import struct, errno
import os, re
class Par2Parser (object):
def __init__ (self, par2_filename):
"""Constructor for the Par2Parser class"""
self.par2_filename = par2_filename
self.good_par2_files = []
self.corrupt_par2_files = []
self.protected_files = []
self.possible_files = []
self.set_basename = ''
self.escaped_basename = ''
self.__main_logic ()
def __main_logic (self):
####################################################
# 1. Find out if we're parsing a "vol" file or not
####################################################
# Get the base filename of self.par2_filename
if (re.search ('\.vol\d+\+\d+\.par2$', self.par2_filename, re.IGNORECASE)):
self.set_basename = os.path.splitext (self.par2_filename)[0]
self.set_basename = os.path.splitext (self.set_basename)[0]
else:
self.set_basename = os.path.splitext (self.par2_filename)[0]
####################################################
# 2. re.escape the filename
####################################################
self.escaped_basename = re.escape (self.set_basename)
####################################################
# 3. use the escaped filename to find all other files in the current set
# a. should be good for .000, .001, .r00, .rar
# b. should also find all par2 files
####################################################
regex = re.compile ('^%s' % (self.escaped_basename, ))
self.possible_files = [f for f in os.listdir(os.getcwd()) if regex.match (f)]
####################################################
# 4. Parse all par2 files
# a. add to the good_par2_files list if it is good
# b. add to the corrupt_par2_files list if it is corrupt
####################################################
regex = re.compile ('\.par2$', re.IGNORECASE)
for f in self.possible_files:
if regex.search (f):
# Try to parse the par2 file
try:
filenames = self.__parse_par2_file (f)
self.good_par2_files.append (f)
except:
self.corrupt_par2_files.append (f)
####################################################
# 5. Parse good_par2_files[0], if it exists
# a. if it doesn't exist, we can't really parse any of them
# so return what we've got
####################################################
if len(self.good_par2_files) > 0:
f = self.good_par2_files[0]
self.protected_files = self.__parse_par2_file (f)
def __chompnulls (self, line):
"""Returns the line up to the first null character"""
p = line.find('\0')
if p < 0:
return line
else:
return line[:p]
def __parse_par2_file (self, filename):
"""Get all of the filenames that are protected by the par2
file given as the filename"""
try:
file = open(filename, 'rb')
except:
print 'Could not open %s' % (filename, )
return []
# We always want to do crc checks
docrcchecks = True
pkt_header_fmt = '< 8s Q 16s 16s 16s'
pkt_header_size = struct.calcsize(pkt_header_fmt)
file_pkt_fmt = '< 16s 16s 16s Q'
file_pkt_size = struct.calcsize(file_pkt_fmt)
main_pkt_fmt = '< Q I'
main_pkt_size = struct.calcsize(main_pkt_fmt)
seen_file_ids = {}
expected_file_ids = None
filenames = []
while 1:
d = file.read(pkt_header_size)
if not d:
break
magic, pkt_len, pkt_md5, set_id, pkt_type = struct.unpack(pkt_header_fmt, d)
if docrcchecks:
import md5
control_md5 = md5.new()
control_md5.update(d[0x20:])
d = file.read(pkt_len - pkt_header_size)
control_md5.update(d)
if control_md5.digest() != pkt_md5:
raise EnvironmentError, (errno.EINVAL, \
"corrupt par2 file - bad packet hash")
if pkt_type == 'PAR 2.0\0FileDesc':
if not docrcchecks:
d = file.read(pkt_len - pkt_header_size)
file_id, file_md5, file_md5_16k, file_size = \
struct.unpack(file_pkt_fmt, d[:file_pkt_size])
if seen_file_ids.get(file_id) is None:
seen_file_ids[file_id] = 1
filename = self.__chompnulls(d[file_pkt_size:])
filenames.append(filename)
elif pkt_type == "PAR 2.0\0Main\0\0\0\0":
if not docrcchecks:
d = file.read(pkt_len - pkt_header_size)
if expected_file_ids is None:
expected_file_ids = []
slice_size, num_files = struct.unpack(main_pkt_fmt, d[:main_pkt_size])
num_nonrecovery = (len(d)-main_pkt_size)/16 - num_files
for i in range(main_pkt_size,main_pkt_size+(num_files+num_nonrecovery)*16,16):
expected_file_ids.append(d[i:i+16])
else:
if not docrcchecks:
file.seek(pkt_len - pkt_header_size, 1)
if expected_file_ids is None:
raise EnvironmentError, (errno.EINVAL, \
"corrupt or unsupported par2 file - no main packet found")
for id in expected_file_ids:
if not seen_file_ids.has_key(id):
raise EnvironmentError, (errno.EINVAL, \
"corrupt or unsupported par2 file - " \
"expected file description packet not found")
return filenames
if __name__ == '__main__':
fname = 'Gunslinger_Girl_06.DVD(AAC.H264)[KAA][D8028AB7].vol26+22.PAR2'
p = Par2Parser (fname)
print "par2_filename:", p.par2_filename
print "good_par2_files:", p.good_par2_files
print "corrupt_par2_files:", p.corrupt_par2_files
print "protected_files:", p.protected_files
print "possible_files:", p.possible_files
print "set_basename:", p.set_basename
print "escaped_basename:", p.escaped_basename