Subversion Repositories programming

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
129 ira 1
#!/usr/bin/env python
2
 
3
# Copyright: Ira W. Snyder (devel@irasnyder.com)
4
# Start Date: 2005-10-13
5
# End Date:
6
# License: GNU General Public License v2 (or at your option, any later version)
7
#
8
# Changelog Follows:
9
# - 2005-10-13
10
# - Added get_par2_filenames() to parse par2 files
11
# - Added the parset object to represent each parset.
12
#
13
# - 2005-10-14
14
# - Finished the parset object. It will now verify and extract parsets.
15
# - Small changes to the parset object. This makes the parjoin part
16
#   much more reliable.
17
# - Added the OptionParser to make this nice to run at the command line.
18
# - Made recursiveness an option.
19
# - Made start directory an option.
20
# - Check for appropriate programs before starting.
21
#
22
 
23
################################################################################
24
# REQUIREMENTS:
25
#
26
# This code requires the programs cfv, par2repair, lxsplit, and rar to be able
27
# to function properly. I will attempt to check that these are in your path.
28
################################################################################
29
 
30
################################################################################
31
# Global Variables
32
################################################################################
33
WORK_DIR = '~/downloads/usenet'
34
################################################################################
35
 
36
################################################################################
37
# The PAR2 Parser
38
#
39
# This was stolen from cfv (see http://cfv.sourceforge.net/ for a copy)
40
################################################################################
41
 
42
import struct, errno
43
 
44
# We always want to do crc checks
45
docrcchecks = True
46
 
47
def chompnulls(line):
48
    p = line.find('\0')
49
    if p < 0: return line
50
    else:     return line[:p]
51
 
52
def get_par2_filenames(filename):
53
    """Get all of the filenames that are protected by the par2
54
    file given as the filename"""
55
 
56
    try:
57
        file = open(filename, 'rb')
58
    except:
59
        print 'Could not open %s' % (filename, )
60
        return []
61
 
62
    pkt_header_fmt = '< 8s Q 16s 16s 16s'
63
    pkt_header_size = struct.calcsize(pkt_header_fmt)
64
    file_pkt_fmt = '< 16s 16s 16s Q'
65
    file_pkt_size = struct.calcsize(file_pkt_fmt)
66
    main_pkt_fmt = '< Q I'
67
    main_pkt_size = struct.calcsize(main_pkt_fmt)
68
 
69
    seen_file_ids = {}
70
    expected_file_ids = None
71
    filenames = []
72
 
73
    while 1:
74
        d = file.read(pkt_header_size)
75
        if not d:
76
            break
77
 
78
        magic, pkt_len, pkt_md5, set_id, pkt_type = struct.unpack(pkt_header_fmt, d)
79
 
80
        if docrcchecks:
81
            import md5
82
            control_md5 = md5.new()
83
            control_md5.update(d[0x20:])
84
            d = file.read(pkt_len - pkt_header_size)
85
            control_md5.update(d)
86
 
87
            if control_md5.digest() != pkt_md5:
88
                raise EnvironmentError, (errno.EINVAL, \
89
                    "corrupt par2 file - bad packet hash")
90
 
91
        if pkt_type == 'PAR 2.0\0FileDesc':
92
            if not docrcchecks:
93
                d = file.read(pkt_len - pkt_header_size)
94
 
95
            file_id, file_md5, file_md5_16k, file_size = \
96
                struct.unpack(file_pkt_fmt, d[:file_pkt_size])
97
 
98
            if seen_file_ids.get(file_id) is None:
99
                seen_file_ids[file_id] = 1
100
                filename = chompnulls(d[file_pkt_size:])
101
                filenames.append(filename)
102
 
103
        elif pkt_type == "PAR 2.0\0Main\0\0\0\0":
104
            if not docrcchecks:
105
                d = file.read(pkt_len - pkt_header_size)
106
 
107
            if expected_file_ids is None:
108
                expected_file_ids = []
109
                slice_size, num_files = struct.unpack(main_pkt_fmt, d[:main_pkt_size])
110
                num_nonrecovery = (len(d)-main_pkt_size)/16 - num_files
111
 
112
                for i in range(main_pkt_size,main_pkt_size+(num_files+num_nonrecovery)*16,16):
113
                    expected_file_ids.append(d[i:i+16])
114
 
115
        else:
116
            if not docrcchecks:
117
                file.seek(pkt_len - pkt_header_size, 1)
118
 
119
    if expected_file_ids is None:
120
        raise EnvironmentError, (errno.EINVAL, \
121
            "corrupt or unsupported par2 file - no main packet found")
122
 
123
    for id in expected_file_ids:
124
        if not seen_file_ids.has_key(id):
125
            raise EnvironmentError, (errno.EINVAL, \
126
                "corrupt or unsupported par2 file - " \
127
                "expected file description packet not found")
128
 
129
    return filenames
130
 
131
################################################################################
132
# The parset object
133
#
134
# This is an object based representation of a parset, and will verify itself
135
# and extract itself, if possible.
136
################################################################################
137
 
132 ira 138
import os, glob, re
129 ira 139
 
140
class parset:
141
    def __init__(self, par_filename):
142
        self.parfile = par_filename
143
        self.extra_pars = []
144
        self.files = False
145
        self.used_parjoin = False
146
        self.verified = False
147
        self.extracted = False
148
 
149
    def get_filenames(self):
150
        return get_par2_filenames(parfile)
151
 
152
    def all_there(self):
153
        """Check if all the files for the parset are present.
154
        This will help us decide which par2 checker to use first"""
155
        for f in self.files:
156
            if not os.path.isfile(f):
157
                return False
158
 
159
        # The files were all there
160
        return True
161
 
162
    def verify(self):
163
        """This will verify the parset by the most efficient method first,
164
        and then move to a slower method if that one fails"""
165
 
166
        retval = False #not verified yet
167
 
168
        # if all the files are there, try verifying fast
169
        if self.all_there():
170
            retval = self.__fast_verify()
171
 
172
            if retval == False:
173
                # Failed to verify fast, so try it slow, maybe it needs repair
174
                retval = self.__slow_verify()
175
 
176
        # If we've got a video file, maybe we should try to parjoin it
177
        elif self.__has_video_file():
178
            retval = self.__parjoin()
179
 
180
        else: #not all there, maybe we can slow-repair
181
            retval = self.__slow_verify()
182
 
183
        self.verified = retval
184
        return self.verified
185
 
186
    def __fast_verify(self):
187
        retval = os.system('cfv -v -f "%s"' % (self.parfile, ))
188
 
189
        if retval == 0:
190
            return True #success
191
 
192
        return False #failure
193
 
194
    def __slow_verify(self):
195
        retval = os.system('par2repair "%s"' % (self.parfile, ))
196
 
197
        if retval == 0:
198
            return True #success
199
 
200
        return False #failure
201
 
202
    def __parjoin(self):
203
        retval = os.system('lxsplit -j "%s.001"' % (self.files[0], ))
204
 
205
        retval = self.__fast_verify()
206
 
207
        if retval == False:
208
            # Failed to verify fast, so try it slow, maybe it needs repair
209
            retval = self.__slow_verify()
210
 
211
        if retval == False: # failed to verify, so remove the lxsplit created file
212
            os.remove(self.files[0])
213
 
214
        self.used_parjoin = retval
215
        self.verified = retval
216
        return self.verified
217
 
218
    def __has_video_file(self):
219
        for f in self.files:
220
            if os.path.splitext(f)[1] in ('.avi', '.ogm', '.mkv'):
221
                return True
222
 
223
        return False
224
 
225
    def __remove_currentset(self):
226
        """Remove all of the files that are extractable, as well as the pars.
227
        Leave everything else alone"""
228
 
229
        if not self.extracted:
230
            print 'Did not extract yet, not removing currentset'
231
            return
232
 
233
        # remove the main par
234
        os.remove(self.parfile)
235
 
236
        # remove all of the extra pars
237
        for i in self.extra_pars:
238
            os.remove(i)
239
 
240
        # remove any rars that are associated (leave EVERYTHING else)
132 ira 241
        # This regex matches both old and new style rar(s).
242
        regex = re.compile('^.*\.(rar|r\d\d)$')
129 ira 243
        for i in self.files:
132 ira 244
            if regex.match(i):
129 ira 245
                os.remove(i)
246
 
247
        # remove any .0?? files (from parjoin)
248
        if self.used_parjoin:
249
            for i in os.listdir(os.getcwd()):
250
                if i != self.files[0] and self.files[0] in i:
251
                    os.remove(i)
252
 
253
        # remove any temp repair files
254
        for i in glob.glob('*.1'):
255
            os.remove(i)
256
 
257
    def __get_extract_file(self):
258
        """Find the first extractable file"""
259
        for i in self.files:
260
            if os.path.splitext(i)[1] == '.rar':
261
                return i
262
 
263
        return None
264
 
265
    def extract(self):
266
        """Attempt to extract all of the files related to this parset"""
267
        if not self.verified:
268
            self.extracted = False
269
            print 'Not (successfully) verified, not extracting'
270
            return False #failed to extract
271
 
272
        extract_file = self.__get_extract_file()
273
 
274
        if extract_file != None:
275
            retval = os.system('rar e -o+ "%s"' % (extract_file, ))
276
 
277
            if retval != 0:
278
                print 'Failed to extract'
279
                self.extracted = False
280
                return self.extracted
281
 
282
        # we extracted ok, so remove the currentset
283
        self.extracted = True
284
        self.__remove_currentset()
285
 
286
        return self.extracted
287
 
288
 
289
################################################################################
290
# The rarslave program itself
291
################################################################################
292
 
293
import os, sys, glob
294
from optparse import OptionParser
295
 
296
def check_required_progs():
297
    """Check if the required programs are installed"""
298
 
299
    shell_not_found = 32512
300
    needed = []
301
 
302
    if os.system('cfv --help > /dev/null 2>&1') == shell_not_found:
303
        needed.append('cfv')
304
 
305
    if os.system('par2repair --help > /dev/null 2>&1') == shell_not_found:
306
        needed.append('par2repair')
307
 
308
    if os.system('lxsplit --help > /dev/null 2>&1') == shell_not_found:
309
        needed.append('lxpsplit')
310
 
311
    if os.system('rar --help > /dev/null 2>&1') == shell_not_found:
312
        needed.append('rar')
313
 
314
    if needed:
315
        for n in needed:
316
            print 'Needed program "%s" not found in $PATH' % (n, )
317
 
318
        sys.exit(1)
319
 
320
def get_parsets():
321
    """Get a representation of each parset in the current directory, and
322
    return them as a list of parset instances"""
323
 
324
    par2files =  glob.glob('*.par2')
325
    par2files += glob.glob('*.PAR2')
326
 
327
    parsets = []
328
 
329
    for i in par2files:
132 ira 330
        try:
331
            filenames = get_par2_filenames(i)
332
            create_new = True
333
        except EnvironmentError:
334
            print 'CORRUPT PARFILE: %s' % (i, )
335
            continue
129 ira 336
 
337
        # if we already have an instance for this set, append
338
        # this par file to the extra_pars field
339
        for j in parsets:
340
            if j.files == filenames:
341
                j.extra_pars.append(i)
342
                create_new = False
343
 
344
        # we haven't seen this set yet, so we'll create it now
345
        if create_new == True:
346
            cur = parset(i)
347
            cur.files = filenames
348
            parsets.append(cur)
349
 
350
    return parsets
351
 
352
def directory_worker(dir):
353
    """Attempts to find, verify, and extract every parset in the directory
354
    given as a parameter"""
355
 
356
    cwd = os.getcwd()
357
    os.chdir(dir)
358
 
359
    parsets = get_parsets()
360
 
361
    # Verify each parset
362
    for p in parsets:
363
        p.verify()
364
 
365
    # Attempt to extract each parset
366
    for p in parsets:
367
        p.extract()
368
 
369
    os.chdir(cwd)
370
 
371
def main():
372
 
373
    # Build the OptionParser
374
    parser = OptionParser()
375
    parser.add_option('-n', '--not-recursive', action='store_false', dest='recursive',
376
                      default=True, help="don't run recursively")
377
    parser.add_option('-d', '--start-dir', dest='work_dir', default=WORK_DIR,
378
                      help='start running at DIR', metavar='DIR')
379
 
380
    # Parse the given options
381
    (options, args) = parser.parse_args()
382
 
383
    # Fix up the working directory
384
    options.work_dir = os.path.abspath(os.path.expanduser(options.work_dir))
385
 
386
    # Check that we have the required programs installed
387
    check_required_progs()
388
 
389
    # Run rarslave!
390
    if options.recursive:
391
        for root, dirs, files in os.walk(options.work_dir):
392
            directory_worker(root)
393
    else:
394
        directory_worker(options.work_dir)
395
 
396
if __name__ == '__main__':
397
    main()
398