From: Ira W. Snyder Date: Mon, 25 Dec 2006 01:45:56 +0000 (-0800) Subject: [RARSLAVE] Add PAR2 Parser X-Git-Tag: v2.0.0~60 X-Git-Url: https://www.irasnyder.com/gitweb/?p=rarslave2.git;a=commitdiff_plain;h=7ec7c3baa71901a5ec2244deee0a0008416d7d07 [RARSLAVE] Add PAR2 Parser Add the class par2parser which parses PAR2 files to extract the list of files which are "protected" by them. Signed-off-by: Ira W. Snyder --- diff --git a/par2parser.py b/par2parser.py new file mode 100644 index 0000000..784bcea --- /dev/null +++ b/par2parser.py @@ -0,0 +1,109 @@ +#!/usr/bin/env python +# vim: set ts=4 sts=4 sw=4 textwidth=92: + +################################################################################ +# The PAR2 Parser +# +# This was stolen from cfv (see http://cfv.sourceforge.net/ for a copy) +################################################################################ + +import struct, errno, os + +def get_full_filename (dir, file): + return os.path.abspath (os.path.expanduser (os.path.join (dir, file))) + +def chompnulls(line): + p = line.find('\0') + if p < 0: return line + else: return line[:p] + +def get_protected_files (dir, filename): + """Get all of the filenames that are protected by the par2 + file given as the filename""" + + full_filename = get_full_filename (dir, filename) + + try: + file = open(full_filename, 'rb') + except: + print 'Could not open %s' % (full_filename, ) + return [] + + # We always want to do crc checks + docrcchecks = True + + pkt_header_fmt = '< 8s Q 16s 16s 16s' + pkt_header_size = struct.calcsize(pkt_header_fmt) + file_pkt_fmt = '< 16s 16s 16s Q' + file_pkt_size = struct.calcsize(file_pkt_fmt) + main_pkt_fmt = '< Q I' + main_pkt_size = struct.calcsize(main_pkt_fmt) + + seen_file_ids = {} + expected_file_ids = None + filenames = [] + + while 1: + d = file.read(pkt_header_size) + if not d: + break + + magic, pkt_len, pkt_md5, set_id, pkt_type = struct.unpack(pkt_header_fmt, d) + + if docrcchecks: + import md5 + control_md5 = md5.new() + control_md5.update(d[0x20:]) + d = file.read(pkt_len - pkt_header_size) + control_md5.update(d) + + if control_md5.digest() != pkt_md5: + raise EnvironmentError, (errno.EINVAL, \ + "corrupt par2 file - bad packet hash") + + if pkt_type == 'PAR 2.0\0FileDesc': + if not docrcchecks: + d = file.read(pkt_len - pkt_header_size) + + file_id, file_md5, file_md5_16k, file_size = \ + struct.unpack(file_pkt_fmt, d[:file_pkt_size]) + + if seen_file_ids.get(file_id) is None: + seen_file_ids[file_id] = 1 + filename = chompnulls(d[file_pkt_size:]) + filenames.append(filename) + + elif pkt_type == "PAR 2.0\0Main\0\0\0\0": + if not docrcchecks: + d = file.read(pkt_len - pkt_header_size) + + if expected_file_ids is None: + expected_file_ids = [] + slice_size, num_files = struct.unpack(main_pkt_fmt, d[:main_pkt_size]) + num_nonrecovery = (len(d)-main_pkt_size)/16 - num_files + + for i in range(main_pkt_size,main_pkt_size+(num_files+num_nonrecovery)*16,16): + expected_file_ids.append(d[i:i+16]) + + else: + if not docrcchecks: + file.seek(pkt_len - pkt_header_size, 1) + + if expected_file_ids is None: + raise EnvironmentError, (errno.EINVAL, \ + "corrupt or unsupported par2 file - no main packet found") + + for id in expected_file_ids: + if not seen_file_ids.has_key(id): + raise EnvironmentError, (errno.EINVAL, \ + "corrupt or unsupported par2 file - " \ + "expected file description packet not found") + + return filenames + +def main (): + pass + +if __name__ == '__main__': + main () +