Move common functionality into rsutil package
[rarslave2.git] / rsutil / par2parser.py
diff --git a/rsutil/par2parser.py b/rsutil/par2parser.py
new file mode 100644 (file)
index 0000000..6940101
--- /dev/null
@@ -0,0 +1,115 @@
+#!/usr/bin/env python
+# vim: set ts=4 sts=4 sw=4 textwidth=92:
+
+################################################################################
+# The PAR2 Parser
+#
+# This was stolen from cfv (see http://cfv.sourceforge.net/ for a copy)
+################################################################################
+
+import struct, errno, os, md5
+
+def chompnulls(line):
+       p = line.find('\0')
+       if p < 0: return line
+       else:     return line[:p]
+
+def get_protected_files (dir, filename):
+       """Get all of the filenames that are protected by the par2
+       file given as the filename"""
+
+       assert os.path.isdir (dir) # MUST be a valid directory
+       assert os.path.isfile (os.path.join (dir, filename))
+
+       full_filename = os.path.join (dir, filename)
+
+       try:
+               file = open(full_filename, 'rb')
+       except:
+               print 'Could not open %s' % (full_filename, )
+               return []
+
+       # We always want to do crc checks
+       docrcchecks = True
+
+       pkt_header_fmt = '< 8s Q 16s 16s 16s'
+       pkt_header_size = struct.calcsize(pkt_header_fmt)
+       file_pkt_fmt = '< 16s 16s 16s Q'
+       file_pkt_size = struct.calcsize(file_pkt_fmt)
+       main_pkt_fmt = '< Q I'
+       main_pkt_size = struct.calcsize(main_pkt_fmt)
+
+       seen_file_ids = {}
+       expected_file_ids = None
+       filenames = []
+
+       # This try is here to ensure that we close the open file before
+       # returning. Since this code was (pretty much) borrowed verbatim
+       # from the cfv project, I didn't want to refactor it to make file
+       # closing more sane, so I just used a try / finally clause.
+       try:
+               while 1:
+                       d = file.read(pkt_header_size)
+                       if not d:
+                               break
+
+                       magic, pkt_len, pkt_md5, set_id, pkt_type = struct.unpack(pkt_header_fmt, d)
+
+                       if docrcchecks:
+                               control_md5 = md5.new()
+                               control_md5.update(d[0x20:])
+                               d = file.read(pkt_len - pkt_header_size)
+                               control_md5.update(d)
+
+                               if control_md5.digest() != pkt_md5:
+                                       raise EnvironmentError, (errno.EINVAL, \
+                                               "corrupt par2 file - bad packet hash")
+
+                       if pkt_type == 'PAR 2.0\0FileDesc':
+                               if not docrcchecks:
+                                       d = file.read(pkt_len - pkt_header_size)
+
+                               file_id, file_md5, file_md5_16k, file_size = \
+                                       struct.unpack(file_pkt_fmt, d[:file_pkt_size])
+
+                               if seen_file_ids.get(file_id) is None:
+                                       seen_file_ids[file_id] = 1
+                                       filename = chompnulls(d[file_pkt_size:])
+                                       filenames.append(filename)
+
+                       elif pkt_type == "PAR 2.0\0Main\0\0\0\0":
+                               if not docrcchecks:
+                                       d = file.read(pkt_len - pkt_header_size)
+
+                               if expected_file_ids is None:
+                                       expected_file_ids = []
+                                       slice_size, num_files = struct.unpack(main_pkt_fmt, d[:main_pkt_size])
+                                       num_nonrecovery = (len(d)-main_pkt_size)/16 - num_files
+
+                                       for i in range(main_pkt_size,main_pkt_size+(num_files+num_nonrecovery)*16,16):
+                                               expected_file_ids.append(d[i:i+16])
+
+                       else:
+                               if not docrcchecks:
+                                       file.seek(pkt_len - pkt_header_size, 1)
+
+               if expected_file_ids is None:
+                       raise EnvironmentError, (errno.EINVAL, \
+                               "corrupt or unsupported par2 file - no main packet found")
+
+               for id in expected_file_ids:
+                       if not seen_file_ids.has_key(id):
+                               raise EnvironmentError, (errno.EINVAL, \
+                                       "corrupt or unsupported par2 file - " \
+                                       "expected file description packet not found")
+       finally:
+               file.close ()
+
+       return filenames
+
+def main ():
+       pass
+
+if __name__ == '__main__':
+       main ()
+