WebSVN – programming – /projects/rarslave/Par2Parser.py

################################################################################
# The PAR2 Parser Module
#
# Copyright 2006, Ira W. Snyder (devel@irasnyder.com)
# License: GNU General Public License v2 (or, at your option, any later version)
#
# This was mostly "stolen" (read: borrowed, the GPL rocks) from cfv.
# See http://cfv.sourceforge.net/ for a copy.
################################################################################

import struct, errno
import os, re

class Par2Parser (object):

    def __init__ (self, par2_filename):
        """Constructor for the Par2Parser class"""

        self.par2_filename = par2_filename
        self.good_par2_files = []
        self.corrupt_par2_files = []
        self.protected_files = []
        self.possible_files = []
        self.set_basename = ''
        self.escaped_basename = ''

        self.__main_logic ()

    def __main_logic (self):
        ####################################################
        # 1. Find out if we're parsing a "vol" file or not
        ####################################################

        # Get the base filename of self.par2_filename
        if (re.search ('\.vol\d+\+\d+\.par2$', self.par2_filename, re.IGNORECASE)):
            self.set_basename = os.path.splitext (self.par2_filename)[0]
            self.set_basename = os.path.splitext (self.set_basename)[0]
        else:
            self.set_basename = os.path.splitext (self.par2_filename)[0]

        ####################################################
        # 2. re.escape the filename
        ####################################################
        self.escaped_basename = re.escape (self.set_basename)

        ####################################################
        # 3. use the escaped filename to find all other files in the current set
        #    a. should be good for .000, .001, .r00, .rar
        #    b. should also find all par2 files
        ####################################################
        regex = re.compile ('^%s' % (self.escaped_basename, ))
        self.possible_files = [f for f in os.listdir(os.getcwd()) if regex.match (f)]

        ####################################################
        # 4. Parse all par2 files
        #    a. add to the good_par2_files list if it is good
        #    b. add to the corrupt_par2_files list if it is corrupt
        ####################################################
        regex = re.compile ('\.par2$', re.IGNORECASE)
        for f in self.possible_files:
            if regex.search (f):

                # Try to parse the par2 file
                try:
                    filenames = self.__parse_par2_file (f)
                    self.good_par2_files.append (f)
                except:
                    self.corrupt_par2_files.append (f)

        ####################################################
        # 5. Parse good_par2_files[0], if it exists
        #    a. if it doesn't exist, we can't really parse any of them
        #       so return what we've got
        ####################################################
        if len(self.good_par2_files) > 0:
            f = self.good_par2_files[0]
            self.protected_files = self.__parse_par2_file (f)

    def __chompnulls (self, line):
        """Returns the line up to the first null character"""
        p = line.find('\0')

        if p < 0:
            return line
        else:
            return line[:p]

    def __parse_par2_file (self, filename):
        """Get all of the filenames that are protected by the par2
        file given as the filename"""

        try:
            file = open(filename, 'rb')
        except:
            print 'Could not open %s' % (filename, )
            return []

        # We always want to do crc checks
        docrcchecks = True

        pkt_header_fmt = '< 8s Q 16s 16s 16s'
        pkt_header_size = struct.calcsize(pkt_header_fmt)
        file_pkt_fmt = '< 16s 16s 16s Q'
        file_pkt_size = struct.calcsize(file_pkt_fmt)
        main_pkt_fmt = '< Q I'
        main_pkt_size = struct.calcsize(main_pkt_fmt)

        seen_file_ids = {}
        expected_file_ids = None
        filenames = []

        while 1:
            d = file.read(pkt_header_size)
            if not d:
                break

            magic, pkt_len, pkt_md5, set_id, pkt_type = struct.unpack(pkt_header_fmt, d)

            if docrcchecks:
                import md5
                control_md5 = md5.new()
                control_md5.update(d[0x20:])
                d = file.read(pkt_len - pkt_header_size)
                control_md5.update(d)

                if control_md5.digest() != pkt_md5:
                    raise EnvironmentError, (errno.EINVAL, \
                        "corrupt par2 file - bad packet hash")

            if pkt_type == 'PAR 2.0\0FileDesc':
                if not docrcchecks:
                    d = file.read(pkt_len - pkt_header_size)

                file_id, file_md5, file_md5_16k, file_size = \
                    struct.unpack(file_pkt_fmt, d[:file_pkt_size])

                if seen_file_ids.get(file_id) is None:
                    seen_file_ids[file_id] = 1
                    filename = self.__chompnulls(d[file_pkt_size:])
                    filenames.append(filename)

            elif pkt_type == "PAR 2.0\0Main\0\0\0\0":
                if not docrcchecks:
                    d = file.read(pkt_len - pkt_header_size)

                if expected_file_ids is None:
                    expected_file_ids = []
                    slice_size, num_files = struct.unpack(main_pkt_fmt, d[:main_pkt_size])
                    num_nonrecovery = (len(d)-main_pkt_size)/16 - num_files

                    for i in range(main_pkt_size,main_pkt_size+(num_files+num_nonrecovery)*16,16):
                        expected_file_ids.append(d[i:i+16])

            else:
                if not docrcchecks:
                    file.seek(pkt_len - pkt_header_size, 1)

        if expected_file_ids is None:
            raise EnvironmentError, (errno.EINVAL, \
                "corrupt or unsupported par2 file - no main packet found")

        for id in expected_file_ids:
            if not seen_file_ids.has_key(id):
                raise EnvironmentError, (errno.EINVAL, \
                    "corrupt or unsupported par2 file - " \
                    "expected file description packet not found")

        return filenames

if __name__ == '__main__':

    fname = 'Gunslinger_Girl_06.DVD(AAC.H264)[KAA][D8028AB7].vol26+22.PAR2'
    p = Par2Parser (fname)

    print "par2_filename:", p.par2_filename
    print "good_par2_files:", p.good_par2_files
    print "corrupt_par2_files:", p.corrupt_par2_files
    print "protected_files:", p.protected_files
    print "possible_files:", p.possible_files
    print "set_basename:", p.set_basename
    print "escaped_basename:", p.escaped_basename
Subversion Repositories programming

(root)/projects/rarslave/Par2Parser.py – Rev 358