Move common functionality into rsutil package
[rarslave2.git] / rsutil / par2parser.py
1 #!/usr/bin/env python
2 # vim: set ts=4 sts=4 sw=4 textwidth=92:
3
4 ################################################################################
5 # The PAR2 Parser
6 #
7 # This was stolen from cfv (see http://cfv.sourceforge.net/ for a copy)
8 ################################################################################
9
10 import struct, errno, os, md5
11
12 def chompnulls(line):
13         p = line.find('\0')
14         if p < 0: return line
15         else:     return line[:p]
16
17 def get_protected_files (dir, filename):
18         """Get all of the filenames that are protected by the par2
19         file given as the filename"""
20
21         assert os.path.isdir (dir) # MUST be a valid directory
22         assert os.path.isfile (os.path.join (dir, filename))
23
24         full_filename = os.path.join (dir, filename)
25
26         try:
27                 file = open(full_filename, 'rb')
28         except:
29                 print 'Could not open %s' % (full_filename, )
30                 return []
31
32         # We always want to do crc checks
33         docrcchecks = True
34
35         pkt_header_fmt = '< 8s Q 16s 16s 16s'
36         pkt_header_size = struct.calcsize(pkt_header_fmt)
37         file_pkt_fmt = '< 16s 16s 16s Q'
38         file_pkt_size = struct.calcsize(file_pkt_fmt)
39         main_pkt_fmt = '< Q I'
40         main_pkt_size = struct.calcsize(main_pkt_fmt)
41
42         seen_file_ids = {}
43         expected_file_ids = None
44         filenames = []
45
46         # This try is here to ensure that we close the open file before
47         # returning. Since this code was (pretty much) borrowed verbatim
48         # from the cfv project, I didn't want to refactor it to make file
49         # closing more sane, so I just used a try / finally clause.
50         try:
51                 while 1:
52                         d = file.read(pkt_header_size)
53                         if not d:
54                                 break
55
56                         magic, pkt_len, pkt_md5, set_id, pkt_type = struct.unpack(pkt_header_fmt, d)
57
58                         if docrcchecks:
59                                 control_md5 = md5.new()
60                                 control_md5.update(d[0x20:])
61                                 d = file.read(pkt_len - pkt_header_size)
62                                 control_md5.update(d)
63
64                                 if control_md5.digest() != pkt_md5:
65                                         raise EnvironmentError, (errno.EINVAL, \
66                                                 "corrupt par2 file - bad packet hash")
67
68                         if pkt_type == 'PAR 2.0\0FileDesc':
69                                 if not docrcchecks:
70                                         d = file.read(pkt_len - pkt_header_size)
71
72                                 file_id, file_md5, file_md5_16k, file_size = \
73                                         struct.unpack(file_pkt_fmt, d[:file_pkt_size])
74
75                                 if seen_file_ids.get(file_id) is None:
76                                         seen_file_ids[file_id] = 1
77                                         filename = chompnulls(d[file_pkt_size:])
78                                         filenames.append(filename)
79
80                         elif pkt_type == "PAR 2.0\0Main\0\0\0\0":
81                                 if not docrcchecks:
82                                         d = file.read(pkt_len - pkt_header_size)
83
84                                 if expected_file_ids is None:
85                                         expected_file_ids = []
86                                         slice_size, num_files = struct.unpack(main_pkt_fmt, d[:main_pkt_size])
87                                         num_nonrecovery = (len(d)-main_pkt_size)/16 - num_files
88
89                                         for i in range(main_pkt_size,main_pkt_size+(num_files+num_nonrecovery)*16,16):
90                                                 expected_file_ids.append(d[i:i+16])
91
92                         else:
93                                 if not docrcchecks:
94                                         file.seek(pkt_len - pkt_header_size, 1)
95
96                 if expected_file_ids is None:
97                         raise EnvironmentError, (errno.EINVAL, \
98                                 "corrupt or unsupported par2 file - no main packet found")
99
100                 for id in expected_file_ids:
101                         if not seen_file_ids.has_key(id):
102                                 raise EnvironmentError, (errno.EINVAL, \
103                                         "corrupt or unsupported par2 file - " \
104                                         "expected file description packet not found")
105         finally:
106                 file.close ()
107
108         return filenames
109
110 def main ():
111         pass
112
113 if __name__ == '__main__':
114         main ()
115