358 |
ira |
1 |
################################################################################
|
|
|
2 |
# The PAR2 Parser Module
|
|
|
3 |
#
|
|
|
4 |
# Copyright 2006, Ira W. Snyder (devel@irasnyder.com)
|
|
|
5 |
# License: GNU General Public License v2 (or, at your option, any later version)
|
|
|
6 |
#
|
|
|
7 |
# This was mostly "stolen" (read: borrowed, the GPL rocks) from cfv.
|
|
|
8 |
# See http://cfv.sourceforge.net/ for a copy.
|
|
|
9 |
################################################################################
|
|
|
10 |
|
|
|
11 |
import struct, errno
|
|
|
12 |
import os, re
|
|
|
13 |
|
|
|
14 |
class Par2Parser (object):
|
|
|
15 |
|
|
|
16 |
def __init__ (self, par2_filename):
|
|
|
17 |
"""Constructor for the Par2Parser class"""
|
|
|
18 |
|
|
|
19 |
self.par2_filename = par2_filename
|
|
|
20 |
self.good_par2_files = []
|
|
|
21 |
self.corrupt_par2_files = []
|
|
|
22 |
self.protected_files = []
|
|
|
23 |
self.possible_files = []
|
|
|
24 |
self.set_basename = ''
|
|
|
25 |
self.escaped_basename = ''
|
|
|
26 |
|
|
|
27 |
self.__main_logic ()
|
|
|
28 |
|
|
|
29 |
def __main_logic (self):
|
|
|
30 |
####################################################
|
|
|
31 |
# 1. Find out if we're parsing a "vol" file or not
|
|
|
32 |
####################################################
|
|
|
33 |
|
|
|
34 |
# Get the base filename of self.par2_filename
|
|
|
35 |
if (re.search ('\.vol\d+\+\d+\.par2$', self.par2_filename, re.IGNORECASE)):
|
|
|
36 |
self.set_basename = os.path.splitext (self.par2_filename)[0]
|
|
|
37 |
self.set_basename = os.path.splitext (self.set_basename)[0]
|
|
|
38 |
else:
|
|
|
39 |
self.set_basename = os.path.splitext (self.par2_filename)[0]
|
|
|
40 |
|
|
|
41 |
####################################################
|
|
|
42 |
# 2. re.escape the filename
|
|
|
43 |
####################################################
|
|
|
44 |
self.escaped_basename = re.escape (self.set_basename)
|
|
|
45 |
|
|
|
46 |
####################################################
|
|
|
47 |
# 3. use the escaped filename to find all other files in the current set
|
|
|
48 |
# a. should be good for .000, .001, .r00, .rar
|
|
|
49 |
# b. should also find all par2 files
|
|
|
50 |
####################################################
|
|
|
51 |
regex = re.compile ('^%s' % (self.escaped_basename, ))
|
|
|
52 |
self.possible_files = [f for f in os.listdir(os.getcwd()) if regex.match (f)]
|
|
|
53 |
|
|
|
54 |
####################################################
|
|
|
55 |
# 4. Parse all par2 files
|
|
|
56 |
# a. add to the good_par2_files list if it is good
|
|
|
57 |
# b. add to the corrupt_par2_files list if it is corrupt
|
|
|
58 |
####################################################
|
|
|
59 |
regex = re.compile ('\.par2$', re.IGNORECASE)
|
|
|
60 |
for f in self.possible_files:
|
|
|
61 |
if regex.search (f):
|
|
|
62 |
|
|
|
63 |
# Try to parse the par2 file
|
|
|
64 |
try:
|
|
|
65 |
filenames = self.__parse_par2_file (f)
|
|
|
66 |
self.good_par2_files.append (f)
|
|
|
67 |
except:
|
|
|
68 |
self.corrupt_par2_files.append (f)
|
|
|
69 |
|
|
|
70 |
####################################################
|
|
|
71 |
# 5. Parse good_par2_files[0], if it exists
|
|
|
72 |
# a. if it doesn't exist, we can't really parse any of them
|
|
|
73 |
# so return what we've got
|
|
|
74 |
####################################################
|
|
|
75 |
if len(self.good_par2_files) > 0:
|
|
|
76 |
f = self.good_par2_files[0]
|
|
|
77 |
self.protected_files = self.__parse_par2_file (f)
|
|
|
78 |
|
|
|
79 |
def __chompnulls (self, line):
|
|
|
80 |
"""Returns the line up to the first null character"""
|
|
|
81 |
p = line.find('\0')
|
|
|
82 |
|
|
|
83 |
if p < 0:
|
|
|
84 |
return line
|
|
|
85 |
else:
|
|
|
86 |
return line[:p]
|
|
|
87 |
|
|
|
88 |
def __parse_par2_file (self, filename):
|
|
|
89 |
"""Get all of the filenames that are protected by the par2
|
|
|
90 |
file given as the filename"""
|
|
|
91 |
|
|
|
92 |
try:
|
|
|
93 |
file = open(filename, 'rb')
|
|
|
94 |
except:
|
|
|
95 |
print 'Could not open %s' % (filename, )
|
|
|
96 |
return []
|
|
|
97 |
|
|
|
98 |
# We always want to do crc checks
|
|
|
99 |
docrcchecks = True
|
|
|
100 |
|
|
|
101 |
pkt_header_fmt = '< 8s Q 16s 16s 16s'
|
|
|
102 |
pkt_header_size = struct.calcsize(pkt_header_fmt)
|
|
|
103 |
file_pkt_fmt = '< 16s 16s 16s Q'
|
|
|
104 |
file_pkt_size = struct.calcsize(file_pkt_fmt)
|
|
|
105 |
main_pkt_fmt = '< Q I'
|
|
|
106 |
main_pkt_size = struct.calcsize(main_pkt_fmt)
|
|
|
107 |
|
|
|
108 |
seen_file_ids = {}
|
|
|
109 |
expected_file_ids = None
|
|
|
110 |
filenames = []
|
|
|
111 |
|
|
|
112 |
while 1:
|
|
|
113 |
d = file.read(pkt_header_size)
|
|
|
114 |
if not d:
|
|
|
115 |
break
|
|
|
116 |
|
|
|
117 |
magic, pkt_len, pkt_md5, set_id, pkt_type = struct.unpack(pkt_header_fmt, d)
|
|
|
118 |
|
|
|
119 |
if docrcchecks:
|
|
|
120 |
import md5
|
|
|
121 |
control_md5 = md5.new()
|
|
|
122 |
control_md5.update(d[0x20:])
|
|
|
123 |
d = file.read(pkt_len - pkt_header_size)
|
|
|
124 |
control_md5.update(d)
|
|
|
125 |
|
|
|
126 |
if control_md5.digest() != pkt_md5:
|
|
|
127 |
raise EnvironmentError, (errno.EINVAL, \
|
|
|
128 |
"corrupt par2 file - bad packet hash")
|
|
|
129 |
|
|
|
130 |
if pkt_type == 'PAR 2.0\0FileDesc':
|
|
|
131 |
if not docrcchecks:
|
|
|
132 |
d = file.read(pkt_len - pkt_header_size)
|
|
|
133 |
|
|
|
134 |
file_id, file_md5, file_md5_16k, file_size = \
|
|
|
135 |
struct.unpack(file_pkt_fmt, d[:file_pkt_size])
|
|
|
136 |
|
|
|
137 |
if seen_file_ids.get(file_id) is None:
|
|
|
138 |
seen_file_ids[file_id] = 1
|
|
|
139 |
filename = self.__chompnulls(d[file_pkt_size:])
|
|
|
140 |
filenames.append(filename)
|
|
|
141 |
|
|
|
142 |
elif pkt_type == "PAR 2.0\0Main\0\0\0\0":
|
|
|
143 |
if not docrcchecks:
|
|
|
144 |
d = file.read(pkt_len - pkt_header_size)
|
|
|
145 |
|
|
|
146 |
if expected_file_ids is None:
|
|
|
147 |
expected_file_ids = []
|
|
|
148 |
slice_size, num_files = struct.unpack(main_pkt_fmt, d[:main_pkt_size])
|
|
|
149 |
num_nonrecovery = (len(d)-main_pkt_size)/16 - num_files
|
|
|
150 |
|
|
|
151 |
for i in range(main_pkt_size,main_pkt_size+(num_files+num_nonrecovery)*16,16):
|
|
|
152 |
expected_file_ids.append(d[i:i+16])
|
|
|
153 |
|
|
|
154 |
else:
|
|
|
155 |
if not docrcchecks:
|
|
|
156 |
file.seek(pkt_len - pkt_header_size, 1)
|
|
|
157 |
|
|
|
158 |
if expected_file_ids is None:
|
|
|
159 |
raise EnvironmentError, (errno.EINVAL, \
|
|
|
160 |
"corrupt or unsupported par2 file - no main packet found")
|
|
|
161 |
|
|
|
162 |
for id in expected_file_ids:
|
|
|
163 |
if not seen_file_ids.has_key(id):
|
|
|
164 |
raise EnvironmentError, (errno.EINVAL, \
|
|
|
165 |
"corrupt or unsupported par2 file - " \
|
|
|
166 |
"expected file description packet not found")
|
|
|
167 |
|
|
|
168 |
return filenames
|
|
|
169 |
|
|
|
170 |
if __name__ == '__main__':
|
|
|
171 |
|
|
|
172 |
fname = 'Gunslinger_Girl_06.DVD(AAC.H264)[KAA][D8028AB7].vol26+22.PAR2'
|
|
|
173 |
p = Par2Parser (fname)
|
|
|
174 |
|
|
|
175 |
print "par2_filename:", p.par2_filename
|
|
|
176 |
print "good_par2_files:", p.good_par2_files
|
|
|
177 |
print "corrupt_par2_files:", p.corrupt_par2_files
|
|
|
178 |
print "protected_files:", p.protected_files
|
|
|
179 |
print "possible_files:", p.possible_files
|
|
|
180 |
print "set_basename:", p.set_basename
|
|
|
181 |
print "escaped_basename:", p.escaped_basename
|
|
|
182 |
|