Rev 358 | Blame | Compare with Previous | Last modification | View Log | RSS feed
"""Simple Par2 parsing classThis program is part of rarslave, an open-source program forautomatically checking, repairing, and extracting files, primarilythose which are downloaded from usenet.Visit https://svn.irasnyder.com/svn/programming/rarslave for thelatest version."""__author__ = "Ira W. Snyder (devel@irasnyder.com)"__copyright__ = "Copyright (c) 2005,2006 Ira W. Snyder (devel@irasnyder.com)"__license__ = "GNU GPL v2 (or, at your option, any later version"################################################################################# The PAR2 Parser Module## Copyright 2006, Ira W. Snyder (devel@irasnyder.com)# License: GNU General Public License v2 (or, at your option, any later version)## This was mostly "stolen" (read: borrowed, the GPL rocks) from cfv.# See http://cfv.sourceforge.net/ for a copy.################################################################################import struct, errnoimport os, reclass Par2Parser (object):def __init__ (self, par2_filename):"""Constructor for the Par2Parser class.This will automatically perform all of the parsing and findingvalues for the entire class. This makes this class slow, so trynot to re-construct it too many times."""self.__par2_filename = par2_filenameself.__good_par2_files = []self.__corrupt_par2_files = []self.__protected_files = []self.__possible_files = []self.__set_basename = ''self.__escaped_basename = ''self.__main_logic ()def get_par2_filename (self):"""Get the main Par2 filename."""return self.__par2_filenamedef get_good_par2_files (self):"""Get a list of good Par2 files in this set."""return self.__good_par2_filesdef get_corrupt_par2_files (self):"""Get a list of corrupt Par2 files in this set."""return self.__corrupt_par2_filesdef get_protected_files (self):"""Get a list of filenames protected by the Par2 set."""return self.__protected_filesdef get_possible_files (self):"""Get a list of files that have a name similar to the mainPar2 filename."""return self.__possible_filesdef get_set_basename (self):"""Get the base filename of the Par2 set.The base filename is the Par2 filename given, minus thefirst filename extension.Example: For "test.par2", the base filename would be "test"."""return self.__set_basenamedef get_escaped_basename (self):"""Get the escaped base filename of the Par2 set.This name is suitable for use in regular expressions whereyou just want to match using the literal base name."""return self.__escaped_basenamedef __main_logic (self):"""Fills in all of the private variables in the class.This should be called before you use any of the gettermethods in the class."""##################################################### 1. Find out if we're parsing a "vol" file or not##################################################### Get the base filename of self.par2_filenameif (re.search ('\.vol\d+\+\d+\.par2$', self.__par2_filename, re.IGNORECASE)):self.__set_basename = os.path.splitext (self.__par2_filename)[0]self.__set_basename = os.path.splitext (self.__set_basename)[0]else:self.__set_basename = os.path.splitext (self.__par2_filename)[0]##################################################### 2. re.escape the filename####################################################self.__escaped_basename = re.escape (self.__set_basename)##################################################### 3. use the escaped filename to find all other files in the current set# a. should be good for .000, .001, .r00, .rar# b. should also find all par2 files####################################################regex = re.compile ('^%s' % (self.__escaped_basename, ))self.__possible_files = [f for f in os.listdir(os.getcwd()) if regex.match (f)]##################################################### 4. Parse all par2 files# a. add to the good_par2_files list if it is good# b. add to the corrupt_par2_files list if it is corrupt####################################################regex = re.compile ('\.par2$', re.IGNORECASE)for f in self.__possible_files:if regex.search (f):# Try to parse the par2 filetry:filenames = self.__parse_par2_file (f)self.__good_par2_files.append (f)except:self.__corrupt_par2_files.append (f)##################################################### 5. Parse good_par2_files[0], if it exists# a. if it doesn't exist, we can't really parse any of them# so return what we've got####################################################if len(self.__good_par2_files) > 0:self.__good_par2_files.sort()f = self.__good_par2_files[0]self.__par2_filename = self.__good_par2_files[0]self.__protected_files = self.__parse_par2_file (f)def __chompnulls (self, line):"""Returns the line up to the first null character"""p = line.find('\0')if p < 0:return lineelse:return line[:p]def __parse_par2_file (self, filename):"""Get all of the filenames that are protected by the par2file given as the filename"""try:file = open(filename, 'rb')except:print 'Could not open %s' % (filename, )return []# We always want to do crc checksdocrcchecks = Truepkt_header_fmt = '< 8s Q 16s 16s 16s'pkt_header_size = struct.calcsize(pkt_header_fmt)file_pkt_fmt = '< 16s 16s 16s Q'file_pkt_size = struct.calcsize(file_pkt_fmt)main_pkt_fmt = '< Q I'main_pkt_size = struct.calcsize(main_pkt_fmt)seen_file_ids = {}expected_file_ids = Nonefilenames = []while 1:d = file.read(pkt_header_size)if not d:breakmagic, pkt_len, pkt_md5, set_id, pkt_type = struct.unpack(pkt_header_fmt, d)if docrcchecks:import md5control_md5 = md5.new()control_md5.update(d[0x20:])d = file.read(pkt_len - pkt_header_size)control_md5.update(d)if control_md5.digest() != pkt_md5:raise EnvironmentError, (errno.EINVAL, \"corrupt par2 file - bad packet hash")if pkt_type == 'PAR 2.0\0FileDesc':if not docrcchecks:d = file.read(pkt_len - pkt_header_size)file_id, file_md5, file_md5_16k, file_size = \struct.unpack(file_pkt_fmt, d[:file_pkt_size])if seen_file_ids.get(file_id) is None:seen_file_ids[file_id] = 1filename = self.__chompnulls(d[file_pkt_size:])filenames.append(filename)elif pkt_type == "PAR 2.0\0Main\0\0\0\0":if not docrcchecks:d = file.read(pkt_len - pkt_header_size)if expected_file_ids is None:expected_file_ids = []slice_size, num_files = struct.unpack(main_pkt_fmt, d[:main_pkt_size])num_nonrecovery = (len(d)-main_pkt_size)/16 - num_filesfor i in range(main_pkt_size,main_pkt_size+(num_files+num_nonrecovery)*16,16):expected_file_ids.append(d[i:i+16])else:if not docrcchecks:file.seek(pkt_len - pkt_header_size, 1)if expected_file_ids is None:raise EnvironmentError, (errno.EINVAL, \"corrupt or unsupported par2 file - no main packet found")for id in expected_file_ids:if not seen_file_ids.has_key(id):raise EnvironmentError, (errno.EINVAL, \"corrupt or unsupported par2 file - " \"expected file description packet not found")return filenamesif __name__ == '__main__':fname = raw_input ("Enter PAR2 Filename to test: ")fname = os.path.abspath (os.path.expanduser (fname))while not os.path.isfile (fname):print "not a file, try again!"fname = raw_input ("Enter PAR2 Filename to test: ")fname = os.path.abspath (os.path.expanduser (fname))os.chdir(os.path.dirname(fname))fname = os.path.basename (fname)p = Par2Parser (fname)print "par2_filename:", p.get_par2_filename()print "good_par2_files:", p.get_good_par2_files()print "corrupt_par2_files:", p.get_corrupt_par2_files()print "protected_files:", p.get_protected_files()print "possible_files:", p.get_possible_files()print "set_basename:", p.get_set_basename()print "escaped_basename:", p.get_escaped_basename()