Subversion Repositories programming

Rev

Rev 358 | Show entire file | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 358 Rev 364
Line -... Line 1...
-
 
1
"""Simple Par2 parsing class
-
 
2
 
-
 
3
This program is part of rarslave, an open-source program for
-
 
4
automatically checking, repairing, and extracting files, primarily
-
 
5
those which are downloaded from usenet.
-
 
6
 
-
 
7
Visit https://svn.irasnyder.com/svn/programming/rarslave for the
-
 
8
latest version.
-
 
9
"""
-
 
10
 
-
 
11
__author__ = "Ira W. Snyder (devel@irasnyder.com)"
-
 
12
__copyright__ = "Copyright (c) 2005,2006 Ira W. Snyder (devel@irasnyder.com)"
-
 
13
__license__ = "GNU GPL v2 (or, at your option, any later version"
-
 
14
 
1
################################################################################
15
################################################################################
2
# The PAR2 Parser Module
16
# The PAR2 Parser Module
3
#
17
#
4
# Copyright 2006, Ira W. Snyder (devel@irasnyder.com)
18
# Copyright 2006, Ira W. Snyder (devel@irasnyder.com)
5
# License: GNU General Public License v2 (or, at your option, any later version)
19
# License: GNU General Public License v2 (or, at your option, any later version)
Line 10... Line 24...
10
 
24
 
11
import struct, errno
25
import struct, errno
12
import os, re
26
import os, re
13
 
27
 
14
class Par2Parser (object):
28
class Par2Parser (object):
-
 
29
	def __init__ (self, par2_filename):
-
 
30
		"""Constructor for the Par2Parser class.
-
 
31
		   
-
 
32
		   This will automatically perform all of the parsing and finding
-
 
33
		   values for the entire class. This makes this class slow, so try
-
 
34
		   not to re-construct it too many times."""
-
 
35
 
-
 
36
		self.__par2_filename = par2_filename
-
 
37
		self.__good_par2_files = []
-
 
38
		self.__corrupt_par2_files = []
-
 
39
		self.__protected_files = []
-
 
40
		self.__possible_files = []
-
 
41
		self.__set_basename = ''
-
 
42
		self.__escaped_basename = ''
-
 
43
 
-
 
44
		self.__main_logic ()
-
 
45
 
-
 
46
	def get_par2_filename (self):
-
 
47
		"""Get the main Par2 filename."""
-
 
48
		return self.__par2_filename
-
 
49
 
-
 
50
	def get_good_par2_files (self):
-
 
51
		"""Get a list of good Par2 files in this set."""
-
 
52
		return self.__good_par2_files
-
 
53
 
-
 
54
	def get_corrupt_par2_files (self):
-
 
55
		"""Get a list of corrupt Par2 files in this set."""
-
 
56
		return self.__corrupt_par2_files
-
 
57
 
-
 
58
	def get_protected_files (self):
-
 
59
		"""Get a list of filenames protected by the Par2 set."""
-
 
60
		return self.__protected_files
-
 
61
 
-
 
62
	def get_possible_files (self):
-
 
63
		"""Get a list of files that have a name similar to the main
-
 
64
		   Par2 filename."""
-
 
65
		return self.__possible_files
-
 
66
 
-
 
67
	def get_set_basename (self):
-
 
68
		"""Get the base filename of the Par2 set.
-
 
69
		   
-
 
70
		   The base filename is the Par2 filename given, minus the
-
 
71
		   first filename extension.
-
 
72
		   
-
 
73
		   Example: For "test.par2", the base filename would be "test"."""
-
 
74
		return self.__set_basename
-
 
75
 
-
 
76
	def get_escaped_basename (self):
-
 
77
		"""Get the escaped base filename of the Par2 set.
-
 
78
		   
-
 
79
		   This name is suitable for use in regular expressions where
-
 
80
		   you just want to match using the literal base name."""
-
 
81
		return self.__escaped_basename
-
 
82
 
-
 
83
	def __main_logic (self):
-
 
84
		"""Fills in all of the private variables in the class.
-
 
85
 
-
 
86
		   This should be called before you use any of the getter
-
 
87
		   methods in the class."""
-
 
88
		####################################################
-
 
89
		# 1. Find out if we're parsing a "vol" file or not
-
 
90
		####################################################
-
 
91
 
-
 
92
		# Get the base filename of self.par2_filename
-
 
93
		if (re.search ('\.vol\d+\+\d+\.par2$', self.__par2_filename, re.IGNORECASE)):
-
 
94
			self.__set_basename = os.path.splitext (self.__par2_filename)[0]
-
 
95
			self.__set_basename = os.path.splitext (self.__set_basename)[0]
-
 
96
		else:
-
 
97
			self.__set_basename = os.path.splitext (self.__par2_filename)[0]
-
 
98
 
-
 
99
		####################################################
-
 
100
		# 2. re.escape the filename
-
 
101
		####################################################
-
 
102
		self.__escaped_basename = re.escape (self.__set_basename)
-
 
103
 
-
 
104
		####################################################
-
 
105
		# 3. use the escaped filename to find all other files in the current set
-
 
106
		#    a. should be good for .000, .001, .r00, .rar
-
 
107
		#    b. should also find all par2 files
-
 
108
		####################################################
-
 
109
		regex = re.compile ('^%s' % (self.__escaped_basename, ))
-
 
110
		self.__possible_files = [f for f in os.listdir(os.getcwd()) if regex.match (f)]
-
 
111
 
-
 
112
		####################################################
-
 
113
		# 4. Parse all par2 files
-
 
114
		#    a. add to the good_par2_files list if it is good
-
 
115
		#    b. add to the corrupt_par2_files list if it is corrupt
-
 
116
		####################################################
-
 
117
		regex = re.compile ('\.par2$', re.IGNORECASE)
-
 
118
		for f in self.__possible_files:
-
 
119
			if regex.search (f):
-
 
120
 
-
 
121
				# Try to parse the par2 file
-
 
122
				try:
-
 
123
					filenames = self.__parse_par2_file (f)
-
 
124
					self.__good_par2_files.append (f)
-
 
125
				except:
-
 
126
					self.__corrupt_par2_files.append (f)
-
 
127
 
-
 
128
		####################################################
-
 
129
		# 5. Parse good_par2_files[0], if it exists
-
 
130
		#    a. if it doesn't exist, we can't really parse any of them
-
 
131
		#       so return what we've got
-
 
132
		####################################################
-
 
133
		if len(self.__good_par2_files) > 0:
-
 
134
			self.__good_par2_files.sort()
-
 
135
			f = self.__good_par2_files[0]
-
 
136
			self.__par2_filename = self.__good_par2_files[0]
-
 
137
			self.__protected_files = self.__parse_par2_file (f)
-
 
138
 
-
 
139
	def __chompnulls (self, line):
-
 
140
		"""Returns the line up to the first null character"""
-
 
141
		p = line.find('\0')
-
 
142
 
-
 
143
		if p < 0:
-
 
144
			return line
-
 
145
		else:
-
 
146
			return line[:p]
-
 
147
 
-
 
148
	def __parse_par2_file (self, filename):
-
 
149
		"""Get all of the filenames that are protected by the par2
-
 
150
		file given as the filename"""
-
 
151
 
-
 
152
		try:
-
 
153
			file = open(filename, 'rb')
-
 
154
		except:
-
 
155
			print 'Could not open %s' % (filename, )
-
 
156
			return []
-
 
157
 
-
 
158
		# We always want to do crc checks
-
 
159
		docrcchecks = True
-
 
160
 
-
 
161
		pkt_header_fmt = '< 8s Q 16s 16s 16s'
-
 
162
		pkt_header_size = struct.calcsize(pkt_header_fmt)
-
 
163
		file_pkt_fmt = '< 16s 16s 16s Q'
-
 
164
		file_pkt_size = struct.calcsize(file_pkt_fmt)
-
 
165
		main_pkt_fmt = '< Q I'
-
 
166
		main_pkt_size = struct.calcsize(main_pkt_fmt)
-
 
167
 
-
 
168
		seen_file_ids = {}
-
 
169
		expected_file_ids = None
-
 
170
		filenames = []
-
 
171
 
-
 
172
		while 1:
-
 
173
			d = file.read(pkt_header_size)
-
 
174
			if not d:
-
 
175
				break
-
 
176
 
-
 
177
			magic, pkt_len, pkt_md5, set_id, pkt_type = struct.unpack(pkt_header_fmt, d)
-
 
178
 
-
 
179
			if docrcchecks:
-
 
180
				import md5
-
 
181
				control_md5 = md5.new()
-
 
182
				control_md5.update(d[0x20:])
-
 
183
				d = file.read(pkt_len - pkt_header_size)
-
 
184
				control_md5.update(d)
-
 
185
 
-
 
186
				if control_md5.digest() != pkt_md5:
-
 
187
					raise EnvironmentError, (errno.EINVAL, \
-
 
188
						"corrupt par2 file - bad packet hash")
-
 
189
 
-
 
190
			if pkt_type == 'PAR 2.0\0FileDesc':
-
 
191
				if not docrcchecks:
-
 
192
					d = file.read(pkt_len - pkt_header_size)
-
 
193
 
-
 
194
				file_id, file_md5, file_md5_16k, file_size = \
-
 
195
					struct.unpack(file_pkt_fmt, d[:file_pkt_size])
-
 
196
 
-
 
197
				if seen_file_ids.get(file_id) is None:
-
 
198
					seen_file_ids[file_id] = 1
-
 
199
					filename = self.__chompnulls(d[file_pkt_size:])
-
 
200
					filenames.append(filename)
-
 
201
 
-
 
202
			elif pkt_type == "PAR 2.0\0Main\0\0\0\0":
-
 
203
				if not docrcchecks:
-
 
204
					d = file.read(pkt_len - pkt_header_size)
-
 
205
 
-
 
206
				if expected_file_ids is None:
-
 
207
					expected_file_ids = []
-
 
208
					slice_size, num_files = struct.unpack(main_pkt_fmt, d[:main_pkt_size])
-
 
209
					num_nonrecovery = (len(d)-main_pkt_size)/16 - num_files
-
 
210
 
-
 
211
					for i in range(main_pkt_size,main_pkt_size+(num_files+num_nonrecovery)*16,16):
-
 
212
						expected_file_ids.append(d[i:i+16])
-
 
213
 
-
 
214
			else:
-
 
215
				if not docrcchecks:
-
 
216
					file.seek(pkt_len - pkt_header_size, 1)
-
 
217
 
-
 
218
		if expected_file_ids is None:
-
 
219
			raise EnvironmentError, (errno.EINVAL, \
-
 
220
				"corrupt or unsupported par2 file - no main packet found")
-
 
221
 
-
 
222
		for id in expected_file_ids:
-
 
223
			if not seen_file_ids.has_key(id):
-
 
224
				raise EnvironmentError, (errno.EINVAL, \
-
 
225
					"corrupt or unsupported par2 file - " \
-
 
226
					"expected file description packet not found")
15
 
227
 
16
    def __init__ (self, par2_filename):
-
 
17
        """Constructor for the Par2Parser class"""
-
 
18
 
-
 
19
        self.par2_filename = par2_filename
-
 
20
        self.good_par2_files = []
-
 
21
        self.corrupt_par2_files = []
-
 
22
        self.protected_files = []
-
 
23
        self.possible_files = []
-
 
24
        self.set_basename = ''
-
 
25
        self.escaped_basename = ''
-
 
26
 
-
 
27
        self.__main_logic ()
-
 
28
 
-
 
29
    def __main_logic (self):
-
 
30
        ####################################################
-
 
31
        # 1. Find out if we're parsing a "vol" file or not
-
 
32
        ####################################################
-
 
33
 
-
 
34
        # Get the base filename of self.par2_filename
-
 
35
        if (re.search ('\.vol\d+\+\d+\.par2$', self.par2_filename, re.IGNORECASE)):
-
 
36
            self.set_basename = os.path.splitext (self.par2_filename)[0]
-
 
37
            self.set_basename = os.path.splitext (self.set_basename)[0]
-
 
38
        else:
-
 
39
            self.set_basename = os.path.splitext (self.par2_filename)[0]
-
 
40
 
-
 
41
        ####################################################
-
 
42
        # 2. re.escape the filename
-
 
43
        ####################################################
-
 
44
        self.escaped_basename = re.escape (self.set_basename)
-
 
45
 
-
 
46
        ####################################################
-
 
47
        # 3. use the escaped filename to find all other files in the current set
-
 
48
        #    a. should be good for .000, .001, .r00, .rar
-
 
49
        #    b. should also find all par2 files
-
 
50
        ####################################################
-
 
51
        regex = re.compile ('^%s' % (self.escaped_basename, ))
-
 
52
        self.possible_files = [f for f in os.listdir(os.getcwd()) if regex.match (f)]
-
 
53
 
-
 
54
        ####################################################
-
 
55
        # 4. Parse all par2 files
-
 
56
        #    a. add to the good_par2_files list if it is good
-
 
57
        #    b. add to the corrupt_par2_files list if it is corrupt
-
 
58
        ####################################################
-
 
59
        regex = re.compile ('\.par2$', re.IGNORECASE)
-
 
60
        for f in self.possible_files:
-
 
61
            if regex.search (f):
-
 
62
 
-
 
63
                # Try to parse the par2 file
-
 
64
                try:
-
 
65
                    filenames = self.__parse_par2_file (f)
-
 
66
                    self.good_par2_files.append (f)
-
 
67
                except:
-
 
68
                    self.corrupt_par2_files.append (f)
-
 
69
 
-
 
70
        ####################################################
-
 
71
        # 5. Parse good_par2_files[0], if it exists
-
 
72
        #    a. if it doesn't exist, we can't really parse any of them
-
 
73
        #       so return what we've got
-
 
74
        ####################################################
-
 
75
        if len(self.good_par2_files) > 0:
-
 
76
            f = self.good_par2_files[0]
-
 
77
            self.protected_files = self.__parse_par2_file (f)
-
 
78
 
-
 
79
    def __chompnulls (self, line):
-
 
80
        """Returns the line up to the first null character"""
-
 
81
        p = line.find('\0')
-
 
82
 
-
 
83
        if p < 0:
-
 
84
            return line
-
 
85
        else:
-
 
86
            return line[:p]
-
 
87
 
-
 
88
    def __parse_par2_file (self, filename):
-
 
89
        """Get all of the filenames that are protected by the par2
-
 
90
        file given as the filename"""
-
 
91
 
-
 
92
        try:
-
 
93
            file = open(filename, 'rb')
-
 
94
        except:
-
 
95
            print 'Could not open %s' % (filename, )
-
 
96
            return []
-
 
97
 
-
 
98
        # We always want to do crc checks
-
 
99
        docrcchecks = True
-
 
100
 
-
 
101
        pkt_header_fmt = '< 8s Q 16s 16s 16s'
-
 
102
        pkt_header_size = struct.calcsize(pkt_header_fmt)
-
 
103
        file_pkt_fmt = '< 16s 16s 16s Q'
-
 
104
        file_pkt_size = struct.calcsize(file_pkt_fmt)
-
 
105
        main_pkt_fmt = '< Q I'
-
 
106
        main_pkt_size = struct.calcsize(main_pkt_fmt)
-
 
107
 
-
 
108
        seen_file_ids = {}
-
 
109
        expected_file_ids = None
-
 
110
        filenames = []
-
 
111
 
-
 
112
        while 1:
-
 
113
            d = file.read(pkt_header_size)
-
 
114
            if not d:
-
 
115
                break
-
 
116
 
-
 
117
            magic, pkt_len, pkt_md5, set_id, pkt_type = struct.unpack(pkt_header_fmt, d)
-
 
118
 
-
 
119
            if docrcchecks:
-
 
120
                import md5
-
 
121
                control_md5 = md5.new()
-
 
122
                control_md5.update(d[0x20:])
-
 
123
                d = file.read(pkt_len - pkt_header_size)
-
 
124
                control_md5.update(d)
-
 
125
 
-
 
126
                if control_md5.digest() != pkt_md5:
-
 
127
                    raise EnvironmentError, (errno.EINVAL, \
-
 
128
                        "corrupt par2 file - bad packet hash")
-
 
129
 
-
 
130
            if pkt_type == 'PAR 2.0\0FileDesc':
-
 
131
                if not docrcchecks:
-
 
132
                    d = file.read(pkt_len - pkt_header_size)
-
 
133
 
-
 
134
                file_id, file_md5, file_md5_16k, file_size = \
-
 
135
                    struct.unpack(file_pkt_fmt, d[:file_pkt_size])
-
 
136
 
-
 
137
                if seen_file_ids.get(file_id) is None:
-
 
138
                    seen_file_ids[file_id] = 1
-
 
139
                    filename = self.__chompnulls(d[file_pkt_size:])
-
 
140
                    filenames.append(filename)
-
 
141
 
-
 
142
            elif pkt_type == "PAR 2.0\0Main\0\0\0\0":
-
 
143
                if not docrcchecks:
-
 
144
                    d = file.read(pkt_len - pkt_header_size)
-
 
145
 
-
 
146
                if expected_file_ids is None:
-
 
147
                    expected_file_ids = []
-
 
148
                    slice_size, num_files = struct.unpack(main_pkt_fmt, d[:main_pkt_size])
-
 
149
                    num_nonrecovery = (len(d)-main_pkt_size)/16 - num_files
-
 
150
 
-
 
151
                    for i in range(main_pkt_size,main_pkt_size+(num_files+num_nonrecovery)*16,16):
-
 
152
                        expected_file_ids.append(d[i:i+16])
-
 
153
 
-
 
154
            else:
-
 
155
                if not docrcchecks:
-
 
156
                    file.seek(pkt_len - pkt_header_size, 1)
-
 
157
 
-
 
158
        if expected_file_ids is None:
-
 
159
            raise EnvironmentError, (errno.EINVAL, \
-
 
160
                "corrupt or unsupported par2 file - no main packet found")
-
 
161
 
-
 
162
        for id in expected_file_ids:
-
 
163
            if not seen_file_ids.has_key(id):
-
 
164
                raise EnvironmentError, (errno.EINVAL, \
-
 
165
                    "corrupt or unsupported par2 file - " \
-
 
166
                    "expected file description packet not found")
-
 
167
 
-
 
168
        return filenames
228
		return filenames
169
 
229
 
170
if __name__ == '__main__':
230
if __name__ == '__main__':
171
 
231
 
172
    fname = 'Gunslinger_Girl_06.DVD(AAC.H264)[KAA][D8028AB7].vol26+22.PAR2'
232
	fname = raw_input ("Enter PAR2 Filename to test: ")
173
    p = Par2Parser (fname)
233
	fname = os.path.abspath (os.path.expanduser (fname))
174
 
234
 
-
 
235
	while not os.path.isfile (fname):
-
 
236
		print "not a file, try again!"
-
 
237
		fname = raw_input ("Enter PAR2 Filename to test: ")
-
 
238
		fname = os.path.abspath (os.path.expanduser (fname))
-
 
239
 
-
 
240
 
-
 
241
	os.chdir(os.path.dirname(fname))
-
 
242
	fname = os.path.basename (fname)
-
 
243
 
-
 
244
	p = Par2Parser (fname)
-
 
245
 
175
    print "par2_filename:", p.par2_filename
246
	print "par2_filename:", p.get_par2_filename()
176
    print "good_par2_files:", p.good_par2_files
247
	print "good_par2_files:", p.get_good_par2_files()
177
    print "corrupt_par2_files:", p.corrupt_par2_files
248
	print "corrupt_par2_files:", p.get_corrupt_par2_files()
178
    print "protected_files:", p.protected_files
249
	print "protected_files:", p.get_protected_files()
179
    print "possible_files:", p.possible_files
250
	print "possible_files:", p.get_possible_files()
180
    print "set_basename:", p.set_basename
251
	print "set_basename:", p.get_set_basename()
181
    print "escaped_basename:", p.escaped_basename
252
	print "escaped_basename:", p.get_escaped_basename()
182
 
253