Subversion Repositories programming

Rev

Rev 358 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
364 ira 1
"""Simple Par2 parsing class
2
 
3
This program is part of rarslave, an open-source program for
4
automatically checking, repairing, and extracting files, primarily
5
those which are downloaded from usenet.
6
 
7
Visit https://svn.irasnyder.com/svn/programming/rarslave for the
8
latest version.
9
"""
10
 
11
__author__ = "Ira W. Snyder (devel@irasnyder.com)"
12
__copyright__ = "Copyright (c) 2005,2006 Ira W. Snyder (devel@irasnyder.com)"
13
__license__ = "GNU GPL v2 (or, at your option, any later version"
14
 
358 ira 15
################################################################################
16
# The PAR2 Parser Module
17
#
18
# Copyright 2006, Ira W. Snyder (devel@irasnyder.com)
19
# License: GNU General Public License v2 (or, at your option, any later version)
20
#
21
# This was mostly "stolen" (read: borrowed, the GPL rocks) from cfv.
22
# See http://cfv.sourceforge.net/ for a copy.
23
################################################################################
24
 
25
import struct, errno
26
import os, re
27
 
28
class Par2Parser (object):
364 ira 29
	def __init__ (self, par2_filename):
30
		"""Constructor for the Par2Parser class.
31
 
32
		   This will automatically perform all of the parsing and finding
33
		   values for the entire class. This makes this class slow, so try
34
		   not to re-construct it too many times."""
358 ira 35
 
364 ira 36
		self.__par2_filename = par2_filename
37
		self.__good_par2_files = []
38
		self.__corrupt_par2_files = []
39
		self.__protected_files = []
40
		self.__possible_files = []
41
		self.__set_basename = ''
42
		self.__escaped_basename = ''
358 ira 43
 
364 ira 44
		self.__main_logic ()
358 ira 45
 
364 ira 46
	def get_par2_filename (self):
47
		"""Get the main Par2 filename."""
48
		return self.__par2_filename
358 ira 49
 
364 ira 50
	def get_good_par2_files (self):
51
		"""Get a list of good Par2 files in this set."""
52
		return self.__good_par2_files
358 ira 53
 
364 ira 54
	def get_corrupt_par2_files (self):
55
		"""Get a list of corrupt Par2 files in this set."""
56
		return self.__corrupt_par2_files
358 ira 57
 
364 ira 58
	def get_protected_files (self):
59
		"""Get a list of filenames protected by the Par2 set."""
60
		return self.__protected_files
358 ira 61
 
364 ira 62
	def get_possible_files (self):
63
		"""Get a list of files that have a name similar to the main
64
		   Par2 filename."""
65
		return self.__possible_files
358 ira 66
 
364 ira 67
	def get_set_basename (self):
68
		"""Get the base filename of the Par2 set.
69
 
70
		   The base filename is the Par2 filename given, minus the
71
		   first filename extension.
72
 
73
		   Example: For "test.par2", the base filename would be "test"."""
74
		return self.__set_basename
358 ira 75
 
364 ira 76
	def get_escaped_basename (self):
77
		"""Get the escaped base filename of the Par2 set.
78
 
79
		   This name is suitable for use in regular expressions where
80
		   you just want to match using the literal base name."""
81
		return self.__escaped_basename
358 ira 82
 
364 ira 83
	def __main_logic (self):
84
		"""Fills in all of the private variables in the class.
358 ira 85
 
364 ira 86
		   This should be called before you use any of the getter
87
		   methods in the class."""
88
		####################################################
89
		# 1. Find out if we're parsing a "vol" file or not
90
		####################################################
358 ira 91
 
364 ira 92
		# Get the base filename of self.par2_filename
93
		if (re.search ('\.vol\d+\+\d+\.par2$', self.__par2_filename, re.IGNORECASE)):
94
			self.__set_basename = os.path.splitext (self.__par2_filename)[0]
95
			self.__set_basename = os.path.splitext (self.__set_basename)[0]
96
		else:
97
			self.__set_basename = os.path.splitext (self.__par2_filename)[0]
358 ira 98
 
364 ira 99
		####################################################
100
		# 2. re.escape the filename
101
		####################################################
102
		self.__escaped_basename = re.escape (self.__set_basename)
358 ira 103
 
364 ira 104
		####################################################
105
		# 3. use the escaped filename to find all other files in the current set
106
		#    a. should be good for .000, .001, .r00, .rar
107
		#    b. should also find all par2 files
108
		####################################################
109
		regex = re.compile ('^%s' % (self.__escaped_basename, ))
110
		self.__possible_files = [f for f in os.listdir(os.getcwd()) if regex.match (f)]
358 ira 111
 
364 ira 112
		####################################################
113
		# 4. Parse all par2 files
114
		#    a. add to the good_par2_files list if it is good
115
		#    b. add to the corrupt_par2_files list if it is corrupt
116
		####################################################
117
		regex = re.compile ('\.par2$', re.IGNORECASE)
118
		for f in self.__possible_files:
119
			if regex.search (f):
358 ira 120
 
364 ira 121
				# Try to parse the par2 file
122
				try:
123
					filenames = self.__parse_par2_file (f)
124
					self.__good_par2_files.append (f)
125
				except:
126
					self.__corrupt_par2_files.append (f)
358 ira 127
 
364 ira 128
		####################################################
129
		# 5. Parse good_par2_files[0], if it exists
130
		#    a. if it doesn't exist, we can't really parse any of them
131
		#       so return what we've got
132
		####################################################
133
		if len(self.__good_par2_files) > 0:
134
			self.__good_par2_files.sort()
135
			f = self.__good_par2_files[0]
136
			self.__par2_filename = self.__good_par2_files[0]
137
			self.__protected_files = self.__parse_par2_file (f)
358 ira 138
 
364 ira 139
	def __chompnulls (self, line):
140
		"""Returns the line up to the first null character"""
141
		p = line.find('\0')
358 ira 142
 
364 ira 143
		if p < 0:
144
			return line
145
		else:
146
			return line[:p]
358 ira 147
 
364 ira 148
	def __parse_par2_file (self, filename):
149
		"""Get all of the filenames that are protected by the par2
150
		file given as the filename"""
358 ira 151
 
364 ira 152
		try:
153
			file = open(filename, 'rb')
154
		except:
155
			print 'Could not open %s' % (filename, )
156
			return []
358 ira 157
 
364 ira 158
		# We always want to do crc checks
159
		docrcchecks = True
358 ira 160
 
364 ira 161
		pkt_header_fmt = '< 8s Q 16s 16s 16s'
162
		pkt_header_size = struct.calcsize(pkt_header_fmt)
163
		file_pkt_fmt = '< 16s 16s 16s Q'
164
		file_pkt_size = struct.calcsize(file_pkt_fmt)
165
		main_pkt_fmt = '< Q I'
166
		main_pkt_size = struct.calcsize(main_pkt_fmt)
358 ira 167
 
364 ira 168
		seen_file_ids = {}
169
		expected_file_ids = None
170
		filenames = []
358 ira 171
 
364 ira 172
		while 1:
173
			d = file.read(pkt_header_size)
174
			if not d:
175
				break
358 ira 176
 
364 ira 177
			magic, pkt_len, pkt_md5, set_id, pkt_type = struct.unpack(pkt_header_fmt, d)
358 ira 178
 
364 ira 179
			if docrcchecks:
180
				import md5
181
				control_md5 = md5.new()
182
				control_md5.update(d[0x20:])
183
				d = file.read(pkt_len - pkt_header_size)
184
				control_md5.update(d)
358 ira 185
 
364 ira 186
				if control_md5.digest() != pkt_md5:
187
					raise EnvironmentError, (errno.EINVAL, \
188
						"corrupt par2 file - bad packet hash")
358 ira 189
 
364 ira 190
			if pkt_type == 'PAR 2.0\0FileDesc':
191
				if not docrcchecks:
192
					d = file.read(pkt_len - pkt_header_size)
358 ira 193
 
364 ira 194
				file_id, file_md5, file_md5_16k, file_size = \
195
					struct.unpack(file_pkt_fmt, d[:file_pkt_size])
358 ira 196
 
364 ira 197
				if seen_file_ids.get(file_id) is None:
198
					seen_file_ids[file_id] = 1
199
					filename = self.__chompnulls(d[file_pkt_size:])
200
					filenames.append(filename)
358 ira 201
 
364 ira 202
			elif pkt_type == "PAR 2.0\0Main\0\0\0\0":
203
				if not docrcchecks:
204
					d = file.read(pkt_len - pkt_header_size)
205
 
206
				if expected_file_ids is None:
207
					expected_file_ids = []
208
					slice_size, num_files = struct.unpack(main_pkt_fmt, d[:main_pkt_size])
209
					num_nonrecovery = (len(d)-main_pkt_size)/16 - num_files
210
 
211
					for i in range(main_pkt_size,main_pkt_size+(num_files+num_nonrecovery)*16,16):
212
						expected_file_ids.append(d[i:i+16])
213
 
214
			else:
215
				if not docrcchecks:
216
					file.seek(pkt_len - pkt_header_size, 1)
217
 
218
		if expected_file_ids is None:
219
			raise EnvironmentError, (errno.EINVAL, \
220
				"corrupt or unsupported par2 file - no main packet found")
221
 
222
		for id in expected_file_ids:
223
			if not seen_file_ids.has_key(id):
224
				raise EnvironmentError, (errno.EINVAL, \
225
					"corrupt or unsupported par2 file - " \
226
					"expected file description packet not found")
227
 
228
		return filenames
229
 
358 ira 230
if __name__ == '__main__':
231
 
364 ira 232
	fname = raw_input ("Enter PAR2 Filename to test: ")
233
	fname = os.path.abspath (os.path.expanduser (fname))
358 ira 234
 
364 ira 235
	while not os.path.isfile (fname):
236
		print "not a file, try again!"
237
		fname = raw_input ("Enter PAR2 Filename to test: ")
238
		fname = os.path.abspath (os.path.expanduser (fname))
358 ira 239
 
364 ira 240
 
241
	os.chdir(os.path.dirname(fname))
242
	fname = os.path.basename (fname)
243
 
244
	p = Par2Parser (fname)
245
 
246
	print "par2_filename:", p.get_par2_filename()
247
	print "good_par2_files:", p.get_good_par2_files()
248
	print "corrupt_par2_files:", p.get_corrupt_par2_files()
249
	print "protected_files:", p.get_protected_files()
250
	print "possible_files:", p.get_possible_files()
251
	print "set_basename:", p.get_set_basename()
252
	print "escaped_basename:", p.get_escaped_basename()
253