364 |
ira |
1 |
"""Simple Par2 parsing class
|
|
|
2 |
|
|
|
3 |
This program is part of rarslave, an open-source program for
|
|
|
4 |
automatically checking, repairing, and extracting files, primarily
|
|
|
5 |
those which are downloaded from usenet.
|
|
|
6 |
|
|
|
7 |
Visit https://svn.irasnyder.com/svn/programming/rarslave for the
|
|
|
8 |
latest version.
|
|
|
9 |
"""
|
|
|
10 |
|
|
|
11 |
__author__ = "Ira W. Snyder (devel@irasnyder.com)"
|
|
|
12 |
__copyright__ = "Copyright (c) 2005,2006 Ira W. Snyder (devel@irasnyder.com)"
|
|
|
13 |
__license__ = "GNU GPL v2 (or, at your option, any later version"
|
|
|
14 |
|
358 |
ira |
15 |
################################################################################
|
|
|
16 |
# The PAR2 Parser Module
|
|
|
17 |
#
|
|
|
18 |
# Copyright 2006, Ira W. Snyder (devel@irasnyder.com)
|
|
|
19 |
# License: GNU General Public License v2 (or, at your option, any later version)
|
|
|
20 |
#
|
|
|
21 |
# This was mostly "stolen" (read: borrowed, the GPL rocks) from cfv.
|
|
|
22 |
# See http://cfv.sourceforge.net/ for a copy.
|
|
|
23 |
################################################################################
|
|
|
24 |
|
|
|
25 |
import struct, errno
|
|
|
26 |
import os, re
|
|
|
27 |
|
|
|
28 |
class Par2Parser (object):
|
364 |
ira |
29 |
def __init__ (self, par2_filename):
|
|
|
30 |
"""Constructor for the Par2Parser class.
|
|
|
31 |
|
|
|
32 |
This will automatically perform all of the parsing and finding
|
|
|
33 |
values for the entire class. This makes this class slow, so try
|
|
|
34 |
not to re-construct it too many times."""
|
358 |
ira |
35 |
|
364 |
ira |
36 |
self.__par2_filename = par2_filename
|
|
|
37 |
self.__good_par2_files = []
|
|
|
38 |
self.__corrupt_par2_files = []
|
|
|
39 |
self.__protected_files = []
|
|
|
40 |
self.__possible_files = []
|
|
|
41 |
self.__set_basename = ''
|
|
|
42 |
self.__escaped_basename = ''
|
358 |
ira |
43 |
|
364 |
ira |
44 |
self.__main_logic ()
|
358 |
ira |
45 |
|
364 |
ira |
46 |
def get_par2_filename (self):
|
|
|
47 |
"""Get the main Par2 filename."""
|
|
|
48 |
return self.__par2_filename
|
358 |
ira |
49 |
|
364 |
ira |
50 |
def get_good_par2_files (self):
|
|
|
51 |
"""Get a list of good Par2 files in this set."""
|
|
|
52 |
return self.__good_par2_files
|
358 |
ira |
53 |
|
364 |
ira |
54 |
def get_corrupt_par2_files (self):
|
|
|
55 |
"""Get a list of corrupt Par2 files in this set."""
|
|
|
56 |
return self.__corrupt_par2_files
|
358 |
ira |
57 |
|
364 |
ira |
58 |
def get_protected_files (self):
|
|
|
59 |
"""Get a list of filenames protected by the Par2 set."""
|
|
|
60 |
return self.__protected_files
|
358 |
ira |
61 |
|
364 |
ira |
62 |
def get_possible_files (self):
|
|
|
63 |
"""Get a list of files that have a name similar to the main
|
|
|
64 |
Par2 filename."""
|
|
|
65 |
return self.__possible_files
|
358 |
ira |
66 |
|
364 |
ira |
67 |
def get_set_basename (self):
|
|
|
68 |
"""Get the base filename of the Par2 set.
|
|
|
69 |
|
|
|
70 |
The base filename is the Par2 filename given, minus the
|
|
|
71 |
first filename extension.
|
|
|
72 |
|
|
|
73 |
Example: For "test.par2", the base filename would be "test"."""
|
|
|
74 |
return self.__set_basename
|
358 |
ira |
75 |
|
364 |
ira |
76 |
def get_escaped_basename (self):
|
|
|
77 |
"""Get the escaped base filename of the Par2 set.
|
|
|
78 |
|
|
|
79 |
This name is suitable for use in regular expressions where
|
|
|
80 |
you just want to match using the literal base name."""
|
|
|
81 |
return self.__escaped_basename
|
358 |
ira |
82 |
|
364 |
ira |
83 |
def __main_logic (self):
|
|
|
84 |
"""Fills in all of the private variables in the class.
|
358 |
ira |
85 |
|
364 |
ira |
86 |
This should be called before you use any of the getter
|
|
|
87 |
methods in the class."""
|
|
|
88 |
####################################################
|
|
|
89 |
# 1. Find out if we're parsing a "vol" file or not
|
|
|
90 |
####################################################
|
358 |
ira |
91 |
|
364 |
ira |
92 |
# Get the base filename of self.par2_filename
|
|
|
93 |
if (re.search ('\.vol\d+\+\d+\.par2$', self.__par2_filename, re.IGNORECASE)):
|
|
|
94 |
self.__set_basename = os.path.splitext (self.__par2_filename)[0]
|
|
|
95 |
self.__set_basename = os.path.splitext (self.__set_basename)[0]
|
|
|
96 |
else:
|
|
|
97 |
self.__set_basename = os.path.splitext (self.__par2_filename)[0]
|
358 |
ira |
98 |
|
364 |
ira |
99 |
####################################################
|
|
|
100 |
# 2. re.escape the filename
|
|
|
101 |
####################################################
|
|
|
102 |
self.__escaped_basename = re.escape (self.__set_basename)
|
358 |
ira |
103 |
|
364 |
ira |
104 |
####################################################
|
|
|
105 |
# 3. use the escaped filename to find all other files in the current set
|
|
|
106 |
# a. should be good for .000, .001, .r00, .rar
|
|
|
107 |
# b. should also find all par2 files
|
|
|
108 |
####################################################
|
|
|
109 |
regex = re.compile ('^%s' % (self.__escaped_basename, ))
|
|
|
110 |
self.__possible_files = [f for f in os.listdir(os.getcwd()) if regex.match (f)]
|
358 |
ira |
111 |
|
364 |
ira |
112 |
####################################################
|
|
|
113 |
# 4. Parse all par2 files
|
|
|
114 |
# a. add to the good_par2_files list if it is good
|
|
|
115 |
# b. add to the corrupt_par2_files list if it is corrupt
|
|
|
116 |
####################################################
|
|
|
117 |
regex = re.compile ('\.par2$', re.IGNORECASE)
|
|
|
118 |
for f in self.__possible_files:
|
|
|
119 |
if regex.search (f):
|
358 |
ira |
120 |
|
364 |
ira |
121 |
# Try to parse the par2 file
|
|
|
122 |
try:
|
|
|
123 |
filenames = self.__parse_par2_file (f)
|
|
|
124 |
self.__good_par2_files.append (f)
|
|
|
125 |
except:
|
|
|
126 |
self.__corrupt_par2_files.append (f)
|
358 |
ira |
127 |
|
364 |
ira |
128 |
####################################################
|
|
|
129 |
# 5. Parse good_par2_files[0], if it exists
|
|
|
130 |
# a. if it doesn't exist, we can't really parse any of them
|
|
|
131 |
# so return what we've got
|
|
|
132 |
####################################################
|
|
|
133 |
if len(self.__good_par2_files) > 0:
|
|
|
134 |
self.__good_par2_files.sort()
|
|
|
135 |
f = self.__good_par2_files[0]
|
|
|
136 |
self.__par2_filename = self.__good_par2_files[0]
|
|
|
137 |
self.__protected_files = self.__parse_par2_file (f)
|
358 |
ira |
138 |
|
364 |
ira |
139 |
def __chompnulls (self, line):
|
|
|
140 |
"""Returns the line up to the first null character"""
|
|
|
141 |
p = line.find('\0')
|
358 |
ira |
142 |
|
364 |
ira |
143 |
if p < 0:
|
|
|
144 |
return line
|
|
|
145 |
else:
|
|
|
146 |
return line[:p]
|
358 |
ira |
147 |
|
364 |
ira |
148 |
def __parse_par2_file (self, filename):
|
|
|
149 |
"""Get all of the filenames that are protected by the par2
|
|
|
150 |
file given as the filename"""
|
358 |
ira |
151 |
|
364 |
ira |
152 |
try:
|
|
|
153 |
file = open(filename, 'rb')
|
|
|
154 |
except:
|
|
|
155 |
print 'Could not open %s' % (filename, )
|
|
|
156 |
return []
|
358 |
ira |
157 |
|
364 |
ira |
158 |
# We always want to do crc checks
|
|
|
159 |
docrcchecks = True
|
358 |
ira |
160 |
|
364 |
ira |
161 |
pkt_header_fmt = '< 8s Q 16s 16s 16s'
|
|
|
162 |
pkt_header_size = struct.calcsize(pkt_header_fmt)
|
|
|
163 |
file_pkt_fmt = '< 16s 16s 16s Q'
|
|
|
164 |
file_pkt_size = struct.calcsize(file_pkt_fmt)
|
|
|
165 |
main_pkt_fmt = '< Q I'
|
|
|
166 |
main_pkt_size = struct.calcsize(main_pkt_fmt)
|
358 |
ira |
167 |
|
364 |
ira |
168 |
seen_file_ids = {}
|
|
|
169 |
expected_file_ids = None
|
|
|
170 |
filenames = []
|
358 |
ira |
171 |
|
364 |
ira |
172 |
while 1:
|
|
|
173 |
d = file.read(pkt_header_size)
|
|
|
174 |
if not d:
|
|
|
175 |
break
|
358 |
ira |
176 |
|
364 |
ira |
177 |
magic, pkt_len, pkt_md5, set_id, pkt_type = struct.unpack(pkt_header_fmt, d)
|
358 |
ira |
178 |
|
364 |
ira |
179 |
if docrcchecks:
|
|
|
180 |
import md5
|
|
|
181 |
control_md5 = md5.new()
|
|
|
182 |
control_md5.update(d[0x20:])
|
|
|
183 |
d = file.read(pkt_len - pkt_header_size)
|
|
|
184 |
control_md5.update(d)
|
358 |
ira |
185 |
|
364 |
ira |
186 |
if control_md5.digest() != pkt_md5:
|
|
|
187 |
raise EnvironmentError, (errno.EINVAL, \
|
|
|
188 |
"corrupt par2 file - bad packet hash")
|
358 |
ira |
189 |
|
364 |
ira |
190 |
if pkt_type == 'PAR 2.0\0FileDesc':
|
|
|
191 |
if not docrcchecks:
|
|
|
192 |
d = file.read(pkt_len - pkt_header_size)
|
358 |
ira |
193 |
|
364 |
ira |
194 |
file_id, file_md5, file_md5_16k, file_size = \
|
|
|
195 |
struct.unpack(file_pkt_fmt, d[:file_pkt_size])
|
358 |
ira |
196 |
|
364 |
ira |
197 |
if seen_file_ids.get(file_id) is None:
|
|
|
198 |
seen_file_ids[file_id] = 1
|
|
|
199 |
filename = self.__chompnulls(d[file_pkt_size:])
|
|
|
200 |
filenames.append(filename)
|
358 |
ira |
201 |
|
364 |
ira |
202 |
elif pkt_type == "PAR 2.0\0Main\0\0\0\0":
|
|
|
203 |
if not docrcchecks:
|
|
|
204 |
d = file.read(pkt_len - pkt_header_size)
|
|
|
205 |
|
|
|
206 |
if expected_file_ids is None:
|
|
|
207 |
expected_file_ids = []
|
|
|
208 |
slice_size, num_files = struct.unpack(main_pkt_fmt, d[:main_pkt_size])
|
|
|
209 |
num_nonrecovery = (len(d)-main_pkt_size)/16 - num_files
|
|
|
210 |
|
|
|
211 |
for i in range(main_pkt_size,main_pkt_size+(num_files+num_nonrecovery)*16,16):
|
|
|
212 |
expected_file_ids.append(d[i:i+16])
|
|
|
213 |
|
|
|
214 |
else:
|
|
|
215 |
if not docrcchecks:
|
|
|
216 |
file.seek(pkt_len - pkt_header_size, 1)
|
|
|
217 |
|
|
|
218 |
if expected_file_ids is None:
|
|
|
219 |
raise EnvironmentError, (errno.EINVAL, \
|
|
|
220 |
"corrupt or unsupported par2 file - no main packet found")
|
|
|
221 |
|
|
|
222 |
for id in expected_file_ids:
|
|
|
223 |
if not seen_file_ids.has_key(id):
|
|
|
224 |
raise EnvironmentError, (errno.EINVAL, \
|
|
|
225 |
"corrupt or unsupported par2 file - " \
|
|
|
226 |
"expected file description packet not found")
|
|
|
227 |
|
|
|
228 |
return filenames
|
|
|
229 |
|
358 |
ira |
230 |
if __name__ == '__main__':
|
|
|
231 |
|
364 |
ira |
232 |
fname = raw_input ("Enter PAR2 Filename to test: ")
|
|
|
233 |
fname = os.path.abspath (os.path.expanduser (fname))
|
358 |
ira |
234 |
|
364 |
ira |
235 |
while not os.path.isfile (fname):
|
|
|
236 |
print "not a file, try again!"
|
|
|
237 |
fname = raw_input ("Enter PAR2 Filename to test: ")
|
|
|
238 |
fname = os.path.abspath (os.path.expanduser (fname))
|
358 |
ira |
239 |
|
364 |
ira |
240 |
|
|
|
241 |
os.chdir(os.path.dirname(fname))
|
|
|
242 |
fname = os.path.basename (fname)
|
|
|
243 |
|
|
|
244 |
p = Par2Parser (fname)
|
|
|
245 |
|
|
|
246 |
print "par2_filename:", p.get_par2_filename()
|
|
|
247 |
print "good_par2_files:", p.get_good_par2_files()
|
|
|
248 |
print "corrupt_par2_files:", p.get_corrupt_par2_files()
|
|
|
249 |
print "protected_files:", p.get_protected_files()
|
|
|
250 |
print "possible_files:", p.get_possible_files()
|
|
|
251 |
print "set_basename:", p.get_set_basename()
|
|
|
252 |
print "escaped_basename:", p.get_escaped_basename()
|
|
|
253 |
|