Subversion Repositories programming

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
358 ira 1
################################################################################
2
# The PAR2 Parser Module
3
#
4
# Copyright 2006, Ira W. Snyder (devel@irasnyder.com)
5
# License: GNU General Public License v2 (or, at your option, any later version)
6
#
7
# This was mostly "stolen" (read: borrowed, the GPL rocks) from cfv.
8
# See http://cfv.sourceforge.net/ for a copy.
9
################################################################################
10
 
11
import struct, errno
12
import os, re
13
 
14
class Par2Parser (object):
15
 
16
    def __init__ (self, par2_filename):
17
        """Constructor for the Par2Parser class"""
18
 
19
        self.par2_filename = par2_filename
20
        self.good_par2_files = []
21
        self.corrupt_par2_files = []
22
        self.protected_files = []
23
        self.possible_files = []
24
        self.set_basename = ''
25
        self.escaped_basename = ''
26
 
27
        self.__main_logic ()
28
 
29
    def __main_logic (self):
30
        ####################################################
31
        # 1. Find out if we're parsing a "vol" file or not
32
        ####################################################
33
 
34
        # Get the base filename of self.par2_filename
35
        if (re.search ('\.vol\d+\+\d+\.par2$', self.par2_filename, re.IGNORECASE)):
36
            self.set_basename = os.path.splitext (self.par2_filename)[0]
37
            self.set_basename = os.path.splitext (self.set_basename)[0]
38
        else:
39
            self.set_basename = os.path.splitext (self.par2_filename)[0]
40
 
41
        ####################################################
42
        # 2. re.escape the filename
43
        ####################################################
44
        self.escaped_basename = re.escape (self.set_basename)
45
 
46
        ####################################################
47
        # 3. use the escaped filename to find all other files in the current set
48
        #    a. should be good for .000, .001, .r00, .rar
49
        #    b. should also find all par2 files
50
        ####################################################
51
        regex = re.compile ('^%s' % (self.escaped_basename, ))
52
        self.possible_files = [f for f in os.listdir(os.getcwd()) if regex.match (f)]
53
 
54
        ####################################################
55
        # 4. Parse all par2 files
56
        #    a. add to the good_par2_files list if it is good
57
        #    b. add to the corrupt_par2_files list if it is corrupt
58
        ####################################################
59
        regex = re.compile ('\.par2$', re.IGNORECASE)
60
        for f in self.possible_files:
61
            if regex.search (f):
62
 
63
                # Try to parse the par2 file
64
                try:
65
                    filenames = self.__parse_par2_file (f)
66
                    self.good_par2_files.append (f)
67
                except:
68
                    self.corrupt_par2_files.append (f)
69
 
70
        ####################################################
71
        # 5. Parse good_par2_files[0], if it exists
72
        #    a. if it doesn't exist, we can't really parse any of them
73
        #       so return what we've got
74
        ####################################################
75
        if len(self.good_par2_files) > 0:
76
            f = self.good_par2_files[0]
77
            self.protected_files = self.__parse_par2_file (f)
78
 
79
    def __chompnulls (self, line):
80
        """Returns the line up to the first null character"""
81
        p = line.find('\0')
82
 
83
        if p < 0:
84
            return line
85
        else:
86
            return line[:p]
87
 
88
    def __parse_par2_file (self, filename):
89
        """Get all of the filenames that are protected by the par2
90
        file given as the filename"""
91
 
92
        try:
93
            file = open(filename, 'rb')
94
        except:
95
            print 'Could not open %s' % (filename, )
96
            return []
97
 
98
        # We always want to do crc checks
99
        docrcchecks = True
100
 
101
        pkt_header_fmt = '< 8s Q 16s 16s 16s'
102
        pkt_header_size = struct.calcsize(pkt_header_fmt)
103
        file_pkt_fmt = '< 16s 16s 16s Q'
104
        file_pkt_size = struct.calcsize(file_pkt_fmt)
105
        main_pkt_fmt = '< Q I'
106
        main_pkt_size = struct.calcsize(main_pkt_fmt)
107
 
108
        seen_file_ids = {}
109
        expected_file_ids = None
110
        filenames = []
111
 
112
        while 1:
113
            d = file.read(pkt_header_size)
114
            if not d:
115
                break
116
 
117
            magic, pkt_len, pkt_md5, set_id, pkt_type = struct.unpack(pkt_header_fmt, d)
118
 
119
            if docrcchecks:
120
                import md5
121
                control_md5 = md5.new()
122
                control_md5.update(d[0x20:])
123
                d = file.read(pkt_len - pkt_header_size)
124
                control_md5.update(d)
125
 
126
                if control_md5.digest() != pkt_md5:
127
                    raise EnvironmentError, (errno.EINVAL, \
128
                        "corrupt par2 file - bad packet hash")
129
 
130
            if pkt_type == 'PAR 2.0\0FileDesc':
131
                if not docrcchecks:
132
                    d = file.read(pkt_len - pkt_header_size)
133
 
134
                file_id, file_md5, file_md5_16k, file_size = \
135
                    struct.unpack(file_pkt_fmt, d[:file_pkt_size])
136
 
137
                if seen_file_ids.get(file_id) is None:
138
                    seen_file_ids[file_id] = 1
139
                    filename = self.__chompnulls(d[file_pkt_size:])
140
                    filenames.append(filename)
141
 
142
            elif pkt_type == "PAR 2.0\0Main\0\0\0\0":
143
                if not docrcchecks:
144
                    d = file.read(pkt_len - pkt_header_size)
145
 
146
                if expected_file_ids is None:
147
                    expected_file_ids = []
148
                    slice_size, num_files = struct.unpack(main_pkt_fmt, d[:main_pkt_size])
149
                    num_nonrecovery = (len(d)-main_pkt_size)/16 - num_files
150
 
151
                    for i in range(main_pkt_size,main_pkt_size+(num_files+num_nonrecovery)*16,16):
152
                        expected_file_ids.append(d[i:i+16])
153
 
154
            else:
155
                if not docrcchecks:
156
                    file.seek(pkt_len - pkt_header_size, 1)
157
 
158
        if expected_file_ids is None:
159
            raise EnvironmentError, (errno.EINVAL, \
160
                "corrupt or unsupported par2 file - no main packet found")
161
 
162
        for id in expected_file_ids:
163
            if not seen_file_ids.has_key(id):
164
                raise EnvironmentError, (errno.EINVAL, \
165
                    "corrupt or unsupported par2 file - " \
166
                    "expected file description packet not found")
167
 
168
        return filenames
169
 
170
if __name__ == '__main__':
171
 
172
    fname = 'Gunslinger_Girl_06.DVD(AAC.H264)[KAA][D8028AB7].vol26+22.PAR2'
173
    p = Par2Parser (fname)
174
 
175
    print "par2_filename:", p.par2_filename
176
    print "good_par2_files:", p.good_par2_files
177
    print "corrupt_par2_files:", p.corrupt_par2_files
178
    print "protected_files:", p.protected_files
179
    print "possible_files:", p.possible_files
180
    print "set_basename:", p.set_basename
181
    print "escaped_basename:", p.escaped_basename
182