[RARSLAVE] Eliminate duplicates when finding deleteable files
[rarslave2.git] / Par2Parser.py
index 804848f..c57d1e6 100644 (file)
@@ -7,10 +7,7 @@
 # This was stolen from cfv (see http://cfv.sourceforge.net/ for a copy)
 ################################################################################
 
-import struct, errno, os
-
-def get_full_filename (dir, file):
-       return os.path.abspath (os.path.expanduser (os.path.join (dir, file)))
+import struct, errno, os, md5
 
 def chompnulls(line):
        p = line.find('\0')
@@ -21,13 +18,19 @@ def get_protected_files (dir, filename):
        """Get all of the filenames that are protected by the par2
        file given as the filename"""
 
-       full_filename = get_full_filename (dir, filename)
+       assert os.path.isdir (dir) # MUST be a valid directory
+       assert os.path.isfile (os.path.join (dir, filename))
+
+       full_filename = os.path.join (dir, filename)
 
        try:
                file = open(full_filename, 'rb')
        except:
                print 'Could not open %s' % (full_filename, )
-               raise EnvironmentError, (errno.EINVAL, 'can not open par2 file')
+               return []
+
+       # We always want to do crc checks
+       docrcchecks = True
 
        pkt_header_fmt = '< 8s Q 16s 16s 16s'
        pkt_header_size = struct.calcsize(pkt_header_fmt)
@@ -41,16 +44,26 @@ def get_protected_files (dir, filename):
        filenames = []
 
        while 1:
-               try:
-                       d = file.read(pkt_header_size)
-               except OverflowError:
-                       raise EnvironmentError, (errno.EINVAL, 'bad par2 file')
+               d = file.read(pkt_header_size)
                if not d:
                        break
 
                magic, pkt_len, pkt_md5, set_id, pkt_type = struct.unpack(pkt_header_fmt, d)
 
+               if docrcchecks:
+                       control_md5 = md5.new()
+                       control_md5.update(d[0x20:])
+                       d = file.read(pkt_len - pkt_header_size)
+                       control_md5.update(d)
+
+                       if control_md5.digest() != pkt_md5:
+                               raise EnvironmentError, (errno.EINVAL, \
+                                       "corrupt par2 file - bad packet hash")
+
                if pkt_type == 'PAR 2.0\0FileDesc':
+                       if not docrcchecks:
+                               d = file.read(pkt_len - pkt_header_size)
+
                        file_id, file_md5, file_md5_16k, file_size = \
                                struct.unpack(file_pkt_fmt, d[:file_pkt_size])
 
@@ -60,10 +73,8 @@ def get_protected_files (dir, filename):
                                filenames.append(filename)
 
                elif pkt_type == "PAR 2.0\0Main\0\0\0\0":
-                       try:
+                       if not docrcchecks:
                                d = file.read(pkt_len - pkt_header_size)
-                       except OverflowError:
-                               raise EnvironmentError, (errno.EINVAL, 'corrupt par2 file')
 
                        if expected_file_ids is None:
                                expected_file_ids = []
@@ -74,10 +85,8 @@ def get_protected_files (dir, filename):
                                        expected_file_ids.append(d[i:i+16])
 
                else:
-                       try:
+                       if not docrcchecks:
                                file.seek(pkt_len - pkt_header_size, 1)
-                       except OverflowError, IOError:
-                               raise EnvironmentError, (errno.EINVAL, 'corrupt par2 file')
 
        if expected_file_ids is None:
                raise EnvironmentError, (errno.EINVAL, \