Package PAR2Set and derived classes
[rarslave2.git] / Par2Parser.py
index 804848f..6940101 100644 (file)
@@ -7,10 +7,7 @@
 # This was stolen from cfv (see http://cfv.sourceforge.net/ for a copy)
 ################################################################################
 
-import struct, errno, os
-
-def get_full_filename (dir, file):
-       return os.path.abspath (os.path.expanduser (os.path.join (dir, file)))
+import struct, errno, os, md5
 
 def chompnulls(line):
        p = line.find('\0')
@@ -21,13 +18,19 @@ def get_protected_files (dir, filename):
        """Get all of the filenames that are protected by the par2
        file given as the filename"""
 
-       full_filename = get_full_filename (dir, filename)
+       assert os.path.isdir (dir) # MUST be a valid directory
+       assert os.path.isfile (os.path.join (dir, filename))
+
+       full_filename = os.path.join (dir, filename)
 
        try:
                file = open(full_filename, 'rb')
        except:
                print 'Could not open %s' % (full_filename, )
-               raise EnvironmentError, (errno.EINVAL, 'can not open par2 file')
+               return []
+
+       # We always want to do crc checks
+       docrcchecks = True
 
        pkt_header_fmt = '< 8s Q 16s 16s 16s'
        pkt_header_size = struct.calcsize(pkt_header_fmt)
@@ -40,54 +43,67 @@ def get_protected_files (dir, filename):
        expected_file_ids = None
        filenames = []
 
-       while 1:
-               try:
+       # This try is here to ensure that we close the open file before
+       # returning. Since this code was (pretty much) borrowed verbatim
+       # from the cfv project, I didn't want to refactor it to make file
+       # closing more sane, so I just used a try / finally clause.
+       try:
+               while 1:
                        d = file.read(pkt_header_size)
-               except OverflowError:
-                       raise EnvironmentError, (errno.EINVAL, 'bad par2 file')
-               if not d:
-                       break
+                       if not d:
+                               break
 
-               magic, pkt_len, pkt_md5, set_id, pkt_type = struct.unpack(pkt_header_fmt, d)
+                       magic, pkt_len, pkt_md5, set_id, pkt_type = struct.unpack(pkt_header_fmt, d)
 
-               if pkt_type == 'PAR 2.0\0FileDesc':
-                       file_id, file_md5, file_md5_16k, file_size = \
-                               struct.unpack(file_pkt_fmt, d[:file_pkt_size])
+                       if docrcchecks:
+                               control_md5 = md5.new()
+                               control_md5.update(d[0x20:])
+                               d = file.read(pkt_len - pkt_header_size)
+                               control_md5.update(d)
 
-                       if seen_file_ids.get(file_id) is None:
-                               seen_file_ids[file_id] = 1
-                               filename = chompnulls(d[file_pkt_size:])
-                               filenames.append(filename)
+                               if control_md5.digest() != pkt_md5:
+                                       raise EnvironmentError, (errno.EINVAL, \
+                                               "corrupt par2 file - bad packet hash")
 
-               elif pkt_type == "PAR 2.0\0Main\0\0\0\0":
-                       try:
-                               d = file.read(pkt_len - pkt_header_size)
-                       except OverflowError:
-                               raise EnvironmentError, (errno.EINVAL, 'corrupt par2 file')
+                       if pkt_type == 'PAR 2.0\0FileDesc':
+                               if not docrcchecks:
+                                       d = file.read(pkt_len - pkt_header_size)
+
+                               file_id, file_md5, file_md5_16k, file_size = \
+                                       struct.unpack(file_pkt_fmt, d[:file_pkt_size])
+
+                               if seen_file_ids.get(file_id) is None:
+                                       seen_file_ids[file_id] = 1
+                                       filename = chompnulls(d[file_pkt_size:])
+                                       filenames.append(filename)
 
-                       if expected_file_ids is None:
-                               expected_file_ids = []
-                               slice_size, num_files = struct.unpack(main_pkt_fmt, d[:main_pkt_size])
-                               num_nonrecovery = (len(d)-main_pkt_size)/16 - num_files
+                       elif pkt_type == "PAR 2.0\0Main\0\0\0\0":
+                               if not docrcchecks:
+                                       d = file.read(pkt_len - pkt_header_size)
 
-                               for i in range(main_pkt_size,main_pkt_size+(num_files+num_nonrecovery)*16,16):
-                                       expected_file_ids.append(d[i:i+16])
+                               if expected_file_ids is None:
+                                       expected_file_ids = []
+                                       slice_size, num_files = struct.unpack(main_pkt_fmt, d[:main_pkt_size])
+                                       num_nonrecovery = (len(d)-main_pkt_size)/16 - num_files
 
-               else:
-                       try:
-                               file.seek(pkt_len - pkt_header_size, 1)
-                       except OverflowError, IOError:
-                               raise EnvironmentError, (errno.EINVAL, 'corrupt par2 file')
+                                       for i in range(main_pkt_size,main_pkt_size+(num_files+num_nonrecovery)*16,16):
+                                               expected_file_ids.append(d[i:i+16])
 
-       if expected_file_ids is None:
-               raise EnvironmentError, (errno.EINVAL, \
-                       "corrupt or unsupported par2 file - no main packet found")
+                       else:
+                               if not docrcchecks:
+                                       file.seek(pkt_len - pkt_header_size, 1)
 
-       for id in expected_file_ids:
-               if not seen_file_ids.has_key(id):
+               if expected_file_ids is None:
                        raise EnvironmentError, (errno.EINVAL, \
-                               "corrupt or unsupported par2 file - " \
-                               "expected file description packet not found")
+                               "corrupt or unsupported par2 file - no main packet found")
+
+               for id in expected_file_ids:
+                       if not seen_file_ids.has_key(id):
+                               raise EnvironmentError, (errno.EINVAL, \
+                                       "corrupt or unsupported par2 file - " \
+                                       "expected file description packet not found")
+       finally:
+               file.close ()
 
        return filenames