Subversion Repositories programming

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
129 ira 1
#!/usr/bin/env python
2
 
3
# Copyright: Ira W. Snyder (devel@irasnyder.com)
4
# Start Date: 2005-10-13
5
# End Date:
6
# License: GNU General Public License v2 (or at your option, any later version)
7
#
8
# Changelog Follows:
9
# - 2005-10-13
10
# - Added get_par2_filenames() to parse par2 files
11
# - Added the parset object to represent each parset.
12
#
13
# - 2005-10-14
14
# - Finished the parset object. It will now verify and extract parsets.
15
# - Small changes to the parset object. This makes the parjoin part
16
#   much more reliable.
17
# - Added the OptionParser to make this nice to run at the command line.
18
# - Made recursiveness an option.
19
# - Made start directory an option.
20
# - Check for appropriate programs before starting.
21
#
134 ira 22
# - 2005-10-17
23
# - Use a regular expression to handle the deletable types.
24
#
25
# - 2005-10-18
26
# - Use regular expressions to handle all finding of files, instead of
27
#   using the glob module.
28
# - Add a config class to handle all the default config stuff sanely.
29
#   This makes it easier to change some of the main parts of the program to
30
#   your specific configuration.
31
# - Move the docrcchecks variable inside the get_par2_filenames() function,
32
#   which is where it belongs anyway.
33
# - Added command-line option to check for required programs at start.
34
#
129 ira 35
 
36
################################################################################
37
# REQUIREMENTS:
38
#
39
# This code requires the programs cfv, par2repair, lxsplit, and rar to be able
40
# to function properly. I will attempt to check that these are in your path.
41
################################################################################
42
 
134 ira 43
class rarslave_config:
44
    """A simple class to hold the default configs for the whole program"""
129 ira 45
 
134 ira 46
    WORK_DIR               = '~/downloads/usenet'
47
    DELETEABLE_TYPES_REGEX = '^.*\.(rar|r\d\d)$'
48
    TEMP_REPAIR_REGEX      = '.*\.1$'
49
    PAR2_REGEX             = '.*\.(PAR|par)2$'
50
    VIDEO_FILE_REGEX       = '.*\.(AVI|avi|OGM|ogm|MKV|mkv)$'
51
    RECURSIVE              = True
52
    CHECK_REQ_PROGS        = False
53
 
54
    def __init__(self):
55
        pass
56
 
57
config = rarslave_config()
58
 
129 ira 59
################################################################################
60
# The PAR2 Parser
61
#
62
# This was stolen from cfv (see http://cfv.sourceforge.net/ for a copy)
63
################################################################################
64
 
65
import struct, errno
66
 
67
def chompnulls(line):
68
    p = line.find('\0')
69
    if p < 0: return line
70
    else:     return line[:p]
71
 
72
def get_par2_filenames(filename):
73
    """Get all of the filenames that are protected by the par2
74
    file given as the filename"""
75
 
76
    try:
77
        file = open(filename, 'rb')
78
    except:
79
        print 'Could not open %s' % (filename, )
80
        return []
81
 
134 ira 82
    # We always want to do crc checks
83
    docrcchecks = True
84
 
129 ira 85
    pkt_header_fmt = '< 8s Q 16s 16s 16s'
86
    pkt_header_size = struct.calcsize(pkt_header_fmt)
87
    file_pkt_fmt = '< 16s 16s 16s Q'
88
    file_pkt_size = struct.calcsize(file_pkt_fmt)
89
    main_pkt_fmt = '< Q I'
90
    main_pkt_size = struct.calcsize(main_pkt_fmt)
91
 
92
    seen_file_ids = {}
93
    expected_file_ids = None
94
    filenames = []
95
 
96
    while 1:
97
        d = file.read(pkt_header_size)
98
        if not d:
99
            break
100
 
101
        magic, pkt_len, pkt_md5, set_id, pkt_type = struct.unpack(pkt_header_fmt, d)
102
 
103
        if docrcchecks:
104
            import md5
105
            control_md5 = md5.new()
106
            control_md5.update(d[0x20:])
107
            d = file.read(pkt_len - pkt_header_size)
108
            control_md5.update(d)
109
 
110
            if control_md5.digest() != pkt_md5:
111
                raise EnvironmentError, (errno.EINVAL, \
112
                    "corrupt par2 file - bad packet hash")
113
 
114
        if pkt_type == 'PAR 2.0\0FileDesc':
115
            if not docrcchecks:
116
                d = file.read(pkt_len - pkt_header_size)
117
 
118
            file_id, file_md5, file_md5_16k, file_size = \
119
                struct.unpack(file_pkt_fmt, d[:file_pkt_size])
120
 
121
            if seen_file_ids.get(file_id) is None:
122
                seen_file_ids[file_id] = 1
123
                filename = chompnulls(d[file_pkt_size:])
124
                filenames.append(filename)
125
 
126
        elif pkt_type == "PAR 2.0\0Main\0\0\0\0":
127
            if not docrcchecks:
128
                d = file.read(pkt_len - pkt_header_size)
129
 
130
            if expected_file_ids is None:
131
                expected_file_ids = []
132
                slice_size, num_files = struct.unpack(main_pkt_fmt, d[:main_pkt_size])
133
                num_nonrecovery = (len(d)-main_pkt_size)/16 - num_files
134
 
135
                for i in range(main_pkt_size,main_pkt_size+(num_files+num_nonrecovery)*16,16):
136
                    expected_file_ids.append(d[i:i+16])
137
 
138
        else:
139
            if not docrcchecks:
140
                file.seek(pkt_len - pkt_header_size, 1)
141
 
142
    if expected_file_ids is None:
143
        raise EnvironmentError, (errno.EINVAL, \
144
            "corrupt or unsupported par2 file - no main packet found")
145
 
146
    for id in expected_file_ids:
147
        if not seen_file_ids.has_key(id):
148
            raise EnvironmentError, (errno.EINVAL, \
149
                "corrupt or unsupported par2 file - " \
150
                "expected file description packet not found")
151
 
152
    return filenames
153
 
154
################################################################################
155
# The parset object
156
#
157
# This is an object based representation of a parset, and will verify itself
158
# and extract itself, if possible.
159
################################################################################
160
 
132 ira 161
import os, glob, re
129 ira 162
 
163
class parset:
164
    def __init__(self, par_filename):
165
        self.parfile = par_filename
166
        self.extra_pars = []
167
        self.files = False
168
        self.used_parjoin = False
169
        self.verified = False
170
        self.extracted = False
171
 
172
    def get_filenames(self):
173
        return get_par2_filenames(parfile)
174
 
175
    def all_there(self):
176
        """Check if all the files for the parset are present.
177
        This will help us decide which par2 checker to use first"""
178
        for f in self.files:
179
            if not os.path.isfile(f):
180
                return False
181
 
182
        # The files were all there
183
        return True
184
 
185
    def verify(self):
186
        """This will verify the parset by the most efficient method first,
187
        and then move to a slower method if that one fails"""
188
 
189
        retval = False #not verified yet
190
 
191
        # if all the files are there, try verifying fast
192
        if self.all_there():
193
            retval = self.__fast_verify()
194
 
195
            if retval == False:
196
                # Failed to verify fast, so try it slow, maybe it needs repair
197
                retval = self.__slow_verify()
198
 
199
        # If we've got a video file, maybe we should try to parjoin it
200
        elif self.__has_video_file():
201
            retval = self.__parjoin()
202
 
203
        else: #not all there, maybe we can slow-repair
204
            retval = self.__slow_verify()
205
 
206
        self.verified = retval
207
        return self.verified
208
 
209
    def __fast_verify(self):
210
        retval = os.system('cfv -v -f "%s"' % (self.parfile, ))
211
 
212
        if retval == 0:
213
            return True #success
214
 
215
        return False #failure
216
 
217
    def __slow_verify(self):
218
        retval = os.system('par2repair "%s"' % (self.parfile, ))
219
 
220
        if retval == 0:
221
            return True #success
222
 
223
        return False #failure
224
 
225
    def __parjoin(self):
226
        retval = os.system('lxsplit -j "%s.001"' % (self.files[0], ))
227
 
228
        retval = self.__fast_verify()
229
 
230
        if retval == False:
231
            # Failed to verify fast, so try it slow, maybe it needs repair
232
            retval = self.__slow_verify()
233
 
234
        if retval == False: # failed to verify, so remove the lxsplit created file
235
            os.remove(self.files[0])
236
 
237
        self.used_parjoin = retval
238
        self.verified = retval
239
        return self.verified
240
 
241
    def __has_video_file(self):
134 ira 242
        regex = re.compile(config.VIDEO_FILE_REGEX)
243
 
129 ira 244
        for f in self.files:
134 ira 245
            if regex.match(f):
129 ira 246
                return True
247
 
248
        return False
249
 
250
    def __remove_currentset(self):
251
        """Remove all of the files that are extractable, as well as the pars.
252
        Leave everything else alone"""
253
 
254
        if not self.extracted:
255
            print 'Did not extract yet, not removing currentset'
256
            return
257
 
258
        # remove the main par
259
        os.remove(self.parfile)
260
 
261
        # remove all of the extra pars
262
        for i in self.extra_pars:
263
            os.remove(i)
264
 
265
        # remove any rars that are associated (leave EVERYTHING else)
134 ira 266
        # This regex matches both old and new style rar(s) by default.
267
        regex = re.compile(config.DELETEABLE_TYPES_REGEX)
268
 
129 ira 269
        for i in self.files:
132 ira 270
            if regex.match(i):
129 ira 271
                os.remove(i)
272
 
134 ira 273
        # remove any .{001,002,...} files (from parjoin)
129 ira 274
        if self.used_parjoin:
275
            for i in os.listdir(os.getcwd()):
276
                if i != self.files[0] and self.files[0] in i:
277
                    os.remove(i)
278
 
279
        # remove any temp repair files
134 ira 280
        regex = re.compile(config.TEMP_REPAIR_REGEX)
281
        [os.remove(f) for f in os.listdir(os.getcwd()) if regex.match(f)]
129 ira 282
 
283
    def __get_extract_file(self):
284
        """Find the first extractable file"""
285
        for i in self.files:
286
            if os.path.splitext(i)[1] == '.rar':
287
                return i
288
 
289
        return None
290
 
291
    def extract(self):
292
        """Attempt to extract all of the files related to this parset"""
293
        if not self.verified:
294
            self.extracted = False
295
            print 'Not (successfully) verified, not extracting'
296
            return False #failed to extract
297
 
298
        extract_file = self.__get_extract_file()
299
 
300
        if extract_file != None:
301
            retval = os.system('rar e -o+ "%s"' % (extract_file, ))
302
 
303
            if retval != 0:
304
                print 'Failed to extract'
305
                self.extracted = False
306
                return self.extracted
307
 
308
        # we extracted ok, so remove the currentset
309
        self.extracted = True
310
        self.__remove_currentset()
311
 
312
        return self.extracted
313
 
314
 
315
################################################################################
316
# The rarslave program itself
317
################################################################################
318
 
134 ira 319
import os, sys
129 ira 320
from optparse import OptionParser
321
 
322
def check_required_progs():
323
    """Check if the required programs are installed"""
324
 
325
    shell_not_found = 32512
326
    needed = []
327
 
328
    if os.system('cfv --help > /dev/null 2>&1') == shell_not_found:
329
        needed.append('cfv')
330
 
331
    if os.system('par2repair --help > /dev/null 2>&1') == shell_not_found:
332
        needed.append('par2repair')
333
 
334
    if os.system('lxsplit --help > /dev/null 2>&1') == shell_not_found:
335
        needed.append('lxpsplit')
336
 
337
    if os.system('rar --help > /dev/null 2>&1') == shell_not_found:
338
        needed.append('rar')
339
 
340
    if needed:
341
        for n in needed:
342
            print 'Needed program "%s" not found in $PATH' % (n, )
343
 
344
        sys.exit(1)
345
 
346
def get_parsets():
347
    """Get a representation of each parset in the current directory, and
348
    return them as a list of parset instances"""
349
 
134 ira 350
    regex = re.compile(config.PAR2_REGEX)
351
    par2files = [f for f in os.listdir(os.getcwd()) if regex.match(f)]
129 ira 352
 
353
    parsets = []
354
 
355
    for i in par2files:
132 ira 356
        try:
357
            filenames = get_par2_filenames(i)
358
            create_new = True
359
        except EnvironmentError:
360
            print 'CORRUPT PARFILE: %s' % (i, )
361
            continue
129 ira 362
 
363
        # if we already have an instance for this set, append
364
        # this par file to the extra_pars field
365
        for j in parsets:
366
            if j.files == filenames:
367
                j.extra_pars.append(i)
368
                create_new = False
369
 
370
        # we haven't seen this set yet, so we'll create it now
371
        if create_new == True:
372
            cur = parset(i)
373
            cur.files = filenames
374
            parsets.append(cur)
375
 
376
    return parsets
377
 
378
def directory_worker(dir):
379
    """Attempts to find, verify, and extract every parset in the directory
380
    given as a parameter"""
381
 
382
    cwd = os.getcwd()
383
    os.chdir(dir)
384
 
385
    parsets = get_parsets()
386
 
387
    # Verify each parset
388
    for p in parsets:
389
        p.verify()
390
 
391
    # Attempt to extract each parset
392
    for p in parsets:
393
        p.extract()
394
 
395
    os.chdir(cwd)
396
 
397
def main():
398
 
399
    # Build the OptionParser
400
    parser = OptionParser()
134 ira 401
    parser.add_option('-n', '--not-recursive',
402
                      action='store_false', dest='recursive',
403
                      default=config.RECURSIVE, help="Don't run recursively")
404
 
405
    parser.add_option('-d', '--work-dir',
406
                      dest='work_dir', default=config.WORK_DIR,
407
                      help="Start running at DIR", metavar='DIR')
408
 
409
    parser.add_option('-p', '--check-required-programs',
410
                       action='store_true', dest='check_progs',
411
                       default=config.CHECK_REQ_PROGS, help="Don't check for required programs")
129 ira 412
 
413
    # Parse the given options
414
    (options, args) = parser.parse_args()
415
 
416
    # Fix up the working directory
417
    options.work_dir = os.path.abspath(os.path.expanduser(options.work_dir))
418
 
419
    # Check that we have the required programs installed
134 ira 420
    if options.check_progs:
421
        check_required_progs()
129 ira 422
 
423
    # Run rarslave!
424
    if options.recursive:
425
        for root, dirs, files in os.walk(options.work_dir):
426
            directory_worker(root)
427
    else:
428
        directory_worker(options.work_dir)
429
 
430
if __name__ == '__main__':
431
    main()
432