WebSVN – programming – Blame – /projects/rarslave/Par2Parser.py

Rev	Author	Line No.	Line
358	ira	1	`################################################################################`
		2	`# The PAR2 Parser Module`
		3	`#`
		4	`# Copyright 2006, Ira W. Snyder (devel@irasnyder.com)`
		5	`# License: GNU General Public License v2 (or, at your option, any later version)`
		6	`#`
		7	`# This was mostly "stolen" (read: borrowed, the GPL rocks) from cfv.`
		8	`# See http://cfv.sourceforge.net/ for a copy.`
		9	`################################################################################`
		10
		11	`import struct, errno`
		12	`import os, re`
		13
		14	`class Par2Parser (object):`
		15
		16	`def __init__ (self, par2_filename):`
		17	`"""Constructor for the Par2Parser class"""`
		18
		19	`self.par2_filename = par2_filename`
		20	`self.good_par2_files = []`
		21	`self.corrupt_par2_files = []`
		22	`self.protected_files = []`
		23	`self.possible_files = []`
		24	`self.set_basename = ''`
		25	`self.escaped_basename = ''`
		26
		27	`self.__main_logic ()`
		28
		29	`def __main_logic (self):`
		30	`####################################################`
		31	`# 1. Find out if we're parsing a "vol" file or not`
		32	`####################################################`
		33
		34	`# Get the base filename of self.par2_filename`
		35	`if (re.search ('\.vol\d+\+\d+\.par2$', self.par2_filename, re.IGNORECASE)):`
		36	`self.set_basename = os.path.splitext (self.par2_filename)[0]`
		37	`self.set_basename = os.path.splitext (self.set_basename)[0]`
		38	`else:`
		39	`self.set_basename = os.path.splitext (self.par2_filename)[0]`
		40
		41	`####################################################`
		42	`# 2. re.escape the filename`
		43	`####################################################`
		44	`self.escaped_basename = re.escape (self.set_basename)`
		45
		46	`####################################################`
		47	`# 3. use the escaped filename to find all other files in the current set`
		48	`# a. should be good for .000, .001, .r00, .rar`
		49	`# b. should also find all par2 files`
		50	`####################################################`
		51	`regex = re.compile ('^%s' % (self.escaped_basename, ))`
		52	`self.possible_files = [f for f in os.listdir(os.getcwd()) if regex.match (f)]`
		53
		54	`####################################################`
		55	`# 4. Parse all par2 files`
		56	`# a. add to the good_par2_files list if it is good`
		57	`# b. add to the corrupt_par2_files list if it is corrupt`
		58	`####################################################`
		59	`regex = re.compile ('\.par2$', re.IGNORECASE)`
		60	`for f in self.possible_files:`
		61	`if regex.search (f):`
		62
		63	`# Try to parse the par2 file`
		64	`try:`
		65	`filenames = self.__parse_par2_file (f)`
		66	`self.good_par2_files.append (f)`
		67	`except:`
		68	`self.corrupt_par2_files.append (f)`
		69
		70	`####################################################`
		71	`# 5. Parse good_par2_files[0], if it exists`
		72	`# a. if it doesn't exist, we can't really parse any of them`
		73	`# so return what we've got`
		74	`####################################################`
		75	`if len(self.good_par2_files) > 0:`
		76	`f = self.good_par2_files[0]`
		77	`self.protected_files = self.__parse_par2_file (f)`
		78
		79	`def __chompnulls (self, line):`
		80	`"""Returns the line up to the first null character"""`
		81	`p = line.find('\0')`
		82
		83	`if p < 0:`
		84	`return line`
		85	`else:`
		86	`return line[:p]`
		87
		88	`def __parse_par2_file (self, filename):`
		89	`"""Get all of the filenames that are protected by the par2`
		90	`file given as the filename"""`
		91
		92	`try:`
		93	`file = open(filename, 'rb')`
		94	`except:`
		95	`print 'Could not open %s' % (filename, )`
		96	`return []`
		97
		98	`# We always want to do crc checks`
		99	`docrcchecks = True`
		100
		101	`pkt_header_fmt = '< 8s Q 16s 16s 16s'`
		102	`pkt_header_size = struct.calcsize(pkt_header_fmt)`
		103	`file_pkt_fmt = '< 16s 16s 16s Q'`
		104	`file_pkt_size = struct.calcsize(file_pkt_fmt)`
		105	`main_pkt_fmt = '< Q I'`
		106	`main_pkt_size = struct.calcsize(main_pkt_fmt)`
		107
		108	`seen_file_ids = {}`
		109	`expected_file_ids = None`
		110	`filenames = []`
		111
		112	`while 1:`
		113	`d = file.read(pkt_header_size)`
		114	`if not d:`
		115	`break`
		116
		117	`magic, pkt_len, pkt_md5, set_id, pkt_type = struct.unpack(pkt_header_fmt, d)`
		118
		119	`if docrcchecks:`
		120	`import md5`
		121	`control_md5 = md5.new()`
		122	`control_md5.update(d[0x20:])`
		123	`d = file.read(pkt_len - pkt_header_size)`
		124	`control_md5.update(d)`
		125
		126	`if control_md5.digest() != pkt_md5:`
		127	`raise EnvironmentError, (errno.EINVAL, \`
		128	`"corrupt par2 file - bad packet hash")`
		129
		130	`if pkt_type == 'PAR 2.0\0FileDesc':`
		131	`if not docrcchecks:`
		132	`d = file.read(pkt_len - pkt_header_size)`
		133
		134	`file_id, file_md5, file_md5_16k, file_size = \`
		135	`struct.unpack(file_pkt_fmt, d[:file_pkt_size])`
		136
		137	`if seen_file_ids.get(file_id) is None:`
		138	`seen_file_ids[file_id] = 1`
		139	`filename = self.__chompnulls(d[file_pkt_size:])`
		140	`filenames.append(filename)`
		141
		142	`elif pkt_type == "PAR 2.0\0Main\0\0\0\0":`
		143	`if not docrcchecks:`
		144	`d = file.read(pkt_len - pkt_header_size)`
		145
		146	`if expected_file_ids is None:`
		147	`expected_file_ids = []`
		148	`slice_size, num_files = struct.unpack(main_pkt_fmt, d[:main_pkt_size])`
		149	`num_nonrecovery = (len(d)-main_pkt_size)/16 - num_files`
		150
		151	`for i in range(main_pkt_size,main_pkt_size+(num_files+num_nonrecovery)*16,16):`
		152	`expected_file_ids.append(d[i:i+16])`
		153
		154	`else:`
		155	`if not docrcchecks:`
		156	`file.seek(pkt_len - pkt_header_size, 1)`
		157
		158	`if expected_file_ids is None:`
		159	`raise EnvironmentError, (errno.EINVAL, \`
		160	`"corrupt or unsupported par2 file - no main packet found")`
		161
		162	`for id in expected_file_ids:`
		163	`if not seen_file_ids.has_key(id):`
		164	`raise EnvironmentError, (errno.EINVAL, \`
		165	`"corrupt or unsupported par2 file - " \`
		166	`"expected file description packet not found")`
		167
		168	`return filenames`
		169
		170	`if __name__ == '__main__':`
		171
		172	`fname = 'Gunslinger_Girl_06.DVD(AAC.H264)[KAA][D8028AB7].vol26+22.PAR2'`
		173	`p = Par2Parser (fname)`
		174
		175	`print "par2_filename:", p.par2_filename`
		176	`print "good_par2_files:", p.good_par2_files`
		177	`print "corrupt_par2_files:", p.corrupt_par2_files`
		178	`print "protected_files:", p.protected_files`
		179	`print "possible_files:", p.possible_files`
		180	`print "set_basename:", p.set_basename`
		181	`print "escaped_basename:", p.escaped_basename`
		182

Subversion Repositories programming

(root)/projects/rarslave/Par2Parser.py – Rev 358