Subversion Repositories programming

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
129 ira 1
#!/usr/bin/env python
2
 
3
# Copyright: Ira W. Snyder (devel@irasnyder.com)
4
# Start Date: 2005-10-13
5
# End Date:
6
# License: GNU General Public License v2 (or at your option, any later version)
7
#
8
# Changelog Follows:
9
# - 2005-10-13
10
# - Added get_par2_filenames() to parse par2 files
11
# - Added the parset object to represent each parset.
12
#
13
# - 2005-10-14
14
# - Finished the parset object. It will now verify and extract parsets.
15
# - Small changes to the parset object. This makes the parjoin part
16
#   much more reliable.
17
# - Added the OptionParser to make this nice to run at the command line.
18
# - Made recursiveness an option.
19
# - Made start directory an option.
20
# - Check for appropriate programs before starting.
21
#
134 ira 22
# - 2005-10-17
23
# - Use a regular expression to handle the deletable types.
24
#
25
# - 2005-10-18
26
# - Use regular expressions to handle all finding of files, instead of
27
#   using the glob module.
28
# - Add a config class to handle all the default config stuff sanely.
29
#   This makes it easier to change some of the main parts of the program to
30
#   your specific configuration.
31
# - Move the docrcchecks variable inside the get_par2_filenames() function,
32
#   which is where it belongs anyway.
33
# - Added command-line option to check for required programs at start.
34
#
139 ira 35
# - 2005-10-20
36
# - Added a config option to extract with full path.
37
#
129 ira 38
 
39
################################################################################
40
# REQUIREMENTS:
41
#
42
# This code requires the programs cfv, par2repair, lxsplit, and rar to be able
43
# to function properly. I will attempt to check that these are in your path.
44
################################################################################
45
 
134 ira 46
class rarslave_config:
47
    """A simple class to hold the default configs for the whole program"""
129 ira 48
 
134 ira 49
    WORK_DIR               = '~/downloads/usenet'
50
    DELETEABLE_TYPES_REGEX = '^.*\.(rar|r\d\d)$'
51
    TEMP_REPAIR_REGEX      = '.*\.1$'
52
    PAR2_REGEX             = '.*\.(PAR|par)2$'
53
    VIDEO_FILE_REGEX       = '.*\.(AVI|avi|OGM|ogm|MKV|mkv)$'
54
    RECURSIVE              = True
55
    CHECK_REQ_PROGS        = False
139 ira 56
    EXTRACT_WITH_FULL_PATH = False
134 ira 57
 
58
    def __init__(self):
59
        pass
60
 
61
config = rarslave_config()
62
 
129 ira 63
################################################################################
64
# The PAR2 Parser
65
#
66
# This was stolen from cfv (see http://cfv.sourceforge.net/ for a copy)
67
################################################################################
68
 
69
import struct, errno
70
 
71
def chompnulls(line):
72
    p = line.find('\0')
73
    if p < 0: return line
74
    else:     return line[:p]
75
 
76
def get_par2_filenames(filename):
77
    """Get all of the filenames that are protected by the par2
78
    file given as the filename"""
79
 
80
    try:
81
        file = open(filename, 'rb')
82
    except:
83
        print 'Could not open %s' % (filename, )
84
        return []
85
 
134 ira 86
    # We always want to do crc checks
87
    docrcchecks = True
88
 
129 ira 89
    pkt_header_fmt = '< 8s Q 16s 16s 16s'
90
    pkt_header_size = struct.calcsize(pkt_header_fmt)
91
    file_pkt_fmt = '< 16s 16s 16s Q'
92
    file_pkt_size = struct.calcsize(file_pkt_fmt)
93
    main_pkt_fmt = '< Q I'
94
    main_pkt_size = struct.calcsize(main_pkt_fmt)
95
 
96
    seen_file_ids = {}
97
    expected_file_ids = None
98
    filenames = []
99
 
100
    while 1:
101
        d = file.read(pkt_header_size)
102
        if not d:
103
            break
104
 
105
        magic, pkt_len, pkt_md5, set_id, pkt_type = struct.unpack(pkt_header_fmt, d)
106
 
107
        if docrcchecks:
108
            import md5
109
            control_md5 = md5.new()
110
            control_md5.update(d[0x20:])
111
            d = file.read(pkt_len - pkt_header_size)
112
            control_md5.update(d)
113
 
114
            if control_md5.digest() != pkt_md5:
115
                raise EnvironmentError, (errno.EINVAL, \
116
                    "corrupt par2 file - bad packet hash")
117
 
118
        if pkt_type == 'PAR 2.0\0FileDesc':
119
            if not docrcchecks:
120
                d = file.read(pkt_len - pkt_header_size)
121
 
122
            file_id, file_md5, file_md5_16k, file_size = \
123
                struct.unpack(file_pkt_fmt, d[:file_pkt_size])
124
 
125
            if seen_file_ids.get(file_id) is None:
126
                seen_file_ids[file_id] = 1
127
                filename = chompnulls(d[file_pkt_size:])
128
                filenames.append(filename)
129
 
130
        elif pkt_type == "PAR 2.0\0Main\0\0\0\0":
131
            if not docrcchecks:
132
                d = file.read(pkt_len - pkt_header_size)
133
 
134
            if expected_file_ids is None:
135
                expected_file_ids = []
136
                slice_size, num_files = struct.unpack(main_pkt_fmt, d[:main_pkt_size])
137
                num_nonrecovery = (len(d)-main_pkt_size)/16 - num_files
138
 
139
                for i in range(main_pkt_size,main_pkt_size+(num_files+num_nonrecovery)*16,16):
140
                    expected_file_ids.append(d[i:i+16])
141
 
142
        else:
143
            if not docrcchecks:
144
                file.seek(pkt_len - pkt_header_size, 1)
145
 
146
    if expected_file_ids is None:
147
        raise EnvironmentError, (errno.EINVAL, \
148
            "corrupt or unsupported par2 file - no main packet found")
149
 
150
    for id in expected_file_ids:
151
        if not seen_file_ids.has_key(id):
152
            raise EnvironmentError, (errno.EINVAL, \
153
                "corrupt or unsupported par2 file - " \
154
                "expected file description packet not found")
155
 
156
    return filenames
157
 
158
################################################################################
159
# The parset object
160
#
161
# This is an object based representation of a parset, and will verify itself
162
# and extract itself, if possible.
163
################################################################################
164
 
132 ira 165
import os, glob, re
129 ira 166
 
167
class parset:
168
    def __init__(self, par_filename):
169
        self.parfile = par_filename
170
        self.extra_pars = []
171
        self.files = False
172
        self.used_parjoin = False
173
        self.verified = False
174
        self.extracted = False
175
 
176
    def get_filenames(self):
177
        return get_par2_filenames(parfile)
178
 
179
    def all_there(self):
180
        """Check if all the files for the parset are present.
181
        This will help us decide which par2 checker to use first"""
182
        for f in self.files:
183
            if not os.path.isfile(f):
184
                return False
185
 
186
        # The files were all there
187
        return True
188
 
189
    def verify(self):
190
        """This will verify the parset by the most efficient method first,
191
        and then move to a slower method if that one fails"""
192
 
193
        retval = False #not verified yet
194
 
195
        # if all the files are there, try verifying fast
196
        if self.all_there():
197
            retval = self.__fast_verify()
198
 
199
            if retval == False:
200
                # Failed to verify fast, so try it slow, maybe it needs repair
201
                retval = self.__slow_verify()
202
 
203
        # If we've got a video file, maybe we should try to parjoin it
204
        elif self.__has_video_file():
205
            retval = self.__parjoin()
206
 
207
        else: #not all there, maybe we can slow-repair
208
            retval = self.__slow_verify()
209
 
210
        self.verified = retval
211
        return self.verified
212
 
213
    def __fast_verify(self):
214
        retval = os.system('cfv -v -f "%s"' % (self.parfile, ))
215
 
216
        if retval == 0:
217
            return True #success
218
 
219
        return False #failure
220
 
221
    def __slow_verify(self):
222
        retval = os.system('par2repair "%s"' % (self.parfile, ))
223
 
224
        if retval == 0:
225
            return True #success
226
 
227
        return False #failure
228
 
229
    def __parjoin(self):
230
        retval = os.system('lxsplit -j "%s.001"' % (self.files[0], ))
231
 
232
        retval = self.__fast_verify()
233
 
234
        if retval == False:
235
            # Failed to verify fast, so try it slow, maybe it needs repair
236
            retval = self.__slow_verify()
237
 
238
        if retval == False: # failed to verify, so remove the lxsplit created file
239
            os.remove(self.files[0])
240
 
241
        self.used_parjoin = retval
242
        self.verified = retval
243
        return self.verified
244
 
245
    def __has_video_file(self):
134 ira 246
        regex = re.compile(config.VIDEO_FILE_REGEX)
247
 
129 ira 248
        for f in self.files:
134 ira 249
            if regex.match(f):
129 ira 250
                return True
251
 
252
        return False
253
 
254
    def __remove_currentset(self):
255
        """Remove all of the files that are extractable, as well as the pars.
256
        Leave everything else alone"""
257
 
258
        if not self.extracted:
259
            print 'Did not extract yet, not removing currentset'
260
            return
261
 
262
        # remove the main par
263
        os.remove(self.parfile)
264
 
265
        # remove all of the extra pars
266
        for i in self.extra_pars:
267
            os.remove(i)
268
 
269
        # remove any rars that are associated (leave EVERYTHING else)
134 ira 270
        # This regex matches both old and new style rar(s) by default.
271
        regex = re.compile(config.DELETEABLE_TYPES_REGEX)
272
 
129 ira 273
        for i in self.files:
132 ira 274
            if regex.match(i):
129 ira 275
                os.remove(i)
276
 
134 ira 277
        # remove any .{001,002,...} files (from parjoin)
129 ira 278
        if self.used_parjoin:
279
            for i in os.listdir(os.getcwd()):
280
                if i != self.files[0] and self.files[0] in i:
281
                    os.remove(i)
282
 
283
        # remove any temp repair files
134 ira 284
        regex = re.compile(config.TEMP_REPAIR_REGEX)
285
        [os.remove(f) for f in os.listdir(os.getcwd()) if regex.match(f)]
129 ira 286
 
287
    def __get_extract_file(self):
288
        """Find the first extractable file"""
289
        for i in self.files:
290
            if os.path.splitext(i)[1] == '.rar':
291
                return i
292
 
293
        return None
294
 
295
    def extract(self):
296
        """Attempt to extract all of the files related to this parset"""
297
        if not self.verified:
298
            self.extracted = False
299
            print 'Not (successfully) verified, not extracting'
300
            return False #failed to extract
301
 
302
        extract_file = self.__get_extract_file()
303
 
304
        if extract_file != None:
139 ira 305
            if config.EXTRACT_WITH_FULL_PATH:
306
                retval = os.system('rar x -o+ "%s"' % (extract_file, ))
307
            else:
308
                retval = os.system('rar e -o+ "%s"' % (extract_file, ))
129 ira 309
 
310
            if retval != 0:
311
                print 'Failed to extract'
312
                self.extracted = False
313
                return self.extracted
314
 
315
        # we extracted ok, so remove the currentset
316
        self.extracted = True
317
        self.__remove_currentset()
318
 
319
        return self.extracted
320
 
321
 
322
################################################################################
323
# The rarslave program itself
324
################################################################################
325
 
134 ira 326
import os, sys
129 ira 327
from optparse import OptionParser
328
 
329
def check_required_progs():
330
    """Check if the required programs are installed"""
331
 
332
    shell_not_found = 32512
333
    needed = []
334
 
335
    if os.system('cfv --help > /dev/null 2>&1') == shell_not_found:
336
        needed.append('cfv')
337
 
338
    if os.system('par2repair --help > /dev/null 2>&1') == shell_not_found:
339
        needed.append('par2repair')
340
 
341
    if os.system('lxsplit --help > /dev/null 2>&1') == shell_not_found:
342
        needed.append('lxpsplit')
343
 
344
    if os.system('rar --help > /dev/null 2>&1') == shell_not_found:
345
        needed.append('rar')
346
 
347
    if needed:
348
        for n in needed:
349
            print 'Needed program "%s" not found in $PATH' % (n, )
350
 
351
        sys.exit(1)
352
 
353
def get_parsets():
354
    """Get a representation of each parset in the current directory, and
355
    return them as a list of parset instances"""
356
 
134 ira 357
    regex = re.compile(config.PAR2_REGEX)
358
    par2files = [f for f in os.listdir(os.getcwd()) if regex.match(f)]
129 ira 359
 
360
    parsets = []
361
 
362
    for i in par2files:
132 ira 363
        try:
364
            filenames = get_par2_filenames(i)
365
            create_new = True
366
        except EnvironmentError:
367
            print 'CORRUPT PARFILE: %s' % (i, )
368
            continue
129 ira 369
 
370
        # if we already have an instance for this set, append
371
        # this par file to the extra_pars field
372
        for j in parsets:
373
            if j.files == filenames:
374
                j.extra_pars.append(i)
375
                create_new = False
376
 
377
        # we haven't seen this set yet, so we'll create it now
378
        if create_new == True:
379
            cur = parset(i)
380
            cur.files = filenames
381
            parsets.append(cur)
382
 
383
    return parsets
384
 
385
def directory_worker(dir):
386
    """Attempts to find, verify, and extract every parset in the directory
387
    given as a parameter"""
388
 
389
    cwd = os.getcwd()
390
    os.chdir(dir)
391
 
392
    parsets = get_parsets()
393
 
394
    # Verify each parset
395
    for p in parsets:
396
        p.verify()
397
 
398
    # Attempt to extract each parset
399
    for p in parsets:
400
        p.extract()
401
 
402
    os.chdir(cwd)
403
 
404
def main():
405
 
406
    # Build the OptionParser
407
    parser = OptionParser()
134 ira 408
    parser.add_option('-n', '--not-recursive',
409
                      action='store_false', dest='recursive',
410
                      default=config.RECURSIVE, help="Don't run recursively")
411
 
412
    parser.add_option('-d', '--work-dir',
413
                      dest='work_dir', default=config.WORK_DIR,
414
                      help="Start running at DIR", metavar='DIR')
415
 
416
    parser.add_option('-p', '--check-required-programs',
417
                       action='store_true', dest='check_progs',
418
                       default=config.CHECK_REQ_PROGS, help="Don't check for required programs")
129 ira 419
 
420
    # Parse the given options
421
    (options, args) = parser.parse_args()
422
 
423
    # Fix up the working directory
424
    options.work_dir = os.path.abspath(os.path.expanduser(options.work_dir))
425
 
426
    # Check that we have the required programs installed
134 ira 427
    if options.check_progs:
428
        check_required_progs()
129 ira 429
 
430
    # Run rarslave!
431
    if options.recursive:
432
        for root, dirs, files in os.walk(options.work_dir):
433
            directory_worker(root)
434
    else:
435
        directory_worker(options.work_dir)
436
 
437
if __name__ == '__main__':
438
    main()
439