Subversion Repositories programming

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
129 ira 1
#!/usr/bin/env python
2
 
3
# Copyright: Ira W. Snyder (devel@irasnyder.com)
4
# Start Date: 2005-10-13
5
# End Date:
6
# License: GNU General Public License v2 (or at your option, any later version)
7
#
8
# Changelog Follows:
9
# - 2005-10-13
10
# - Added get_par2_filenames() to parse par2 files
11
# - Added the parset object to represent each parset.
12
#
13
# - 2005-10-14
14
# - Finished the parset object. It will now verify and extract parsets.
15
# - Small changes to the parset object. This makes the parjoin part
16
#   much more reliable.
17
# - Added the OptionParser to make this nice to run at the command line.
18
# - Made recursiveness an option.
19
# - Made start directory an option.
20
# - Check for appropriate programs before starting.
21
#
134 ira 22
# - 2005-10-17
23
# - Use a regular expression to handle the deletable types.
24
#
25
# - 2005-10-18
26
# - Use regular expressions to handle all finding of files, instead of
27
#   using the glob module.
28
# - Add a config class to handle all the default config stuff sanely.
29
#   This makes it easier to change some of the main parts of the program to
30
#   your specific configuration.
31
# - Move the docrcchecks variable inside the get_par2_filenames() function,
32
#   which is where it belongs anyway.
33
# - Added command-line option to check for required programs at start.
34
#
139 ira 35
# - 2005-10-20
36
# - Added a config option to extract with full path.
37
#
141 ira 38
# - 2005-10-22
39
# - Re-wrote the config class so that there is a config file, which
40
#   resides at ~/.config/rarslave/rarslave.conf by default.
41
# - Added the command-line option -c to write out an updated version
42
#   of the config file (to fill in any missing options with the defaults)
43
# - Added the command-line option -f to write out a new default config file,
44
#   which overwrites any user changes.
45
# - Made all regexes case insensitive.
46
# - Made all command-line options override the config file.
47
#
129 ira 48
 
49
################################################################################
50
# REQUIREMENTS:
51
#
52
# This code requires the programs cfv, par2repair, lxsplit, and rar to be able
53
# to function properly. I will attempt to check that these are in your path.
54
################################################################################
55
 
141 ira 56
import ConfigParser, os
57
 
134 ira 58
class rarslave_config:
59
    """A simple class to hold the default configs for the whole program"""
129 ira 60
 
141 ira 61
    def __read_config(self, filename='~/.config/rarslave/rarslave.conf'):
62
        """Attempt to open and read the rarslave config file"""
134 ira 63
 
141 ira 64
        # Make sure the filename is corrected
65
        filename = os.path.abspath(os.path.expanduser(filename))
66
 
67
        user_config = {}
68
 
69
        # Write the default config if it doesn't exist
70
        if not os.path.isfile(filename):
71
            self.write_config(default=True)
72
 
73
        config = ConfigParser.ConfigParser()
74
        config.read(filename)
75
 
76
        for section in config.sections():
77
            for option in config.options(section):
78
                user_config[(section, option)] = config.get(section, option)
79
 
80
        return user_config
81
 
82
    def write_config(self, filename='~/.config/rarslave/rarslave.conf', default=False):
83
        """Write out the current config to the config file. If you set default=True, then
84
        the default config file will be written."""
85
 
86
        config = ConfigParser.ConfigParser()
87
 
88
        # Correct filename
89
        filename = os.path.abspath(os.path.expanduser(filename))
90
 
91
        # Reset all config to make sure we write the default one, if necessary
92
        if default:
93
            self.__user_config = {}
94
            print 'Writing default config to %s' % (filename, )
95
 
96
        # [directories] section
97
        config.add_section('directories')
98
        for (s, k) in self.__defaults.keys():
99
            if s == 'directories':
100
                config.set(s, k, self.get_value(s, k))
101
 
102
        # [options] section
103
        config.add_section('options')
104
        for (s, k) in self.__defaults.keys():
105
            if s == 'options':
106
                config.set(s, k, self.get_value(s, k))
107
 
108
        # [regular_expressions] section
109
        config.add_section('regular expressions')
110
        for (s, k) in self.__defaults.keys():
111
            if s == 'regular expressions':
112
                config.set(s, k, self.get_value(s, k))
113
 
114
        # Try to make the ~/.config/rarslave/ directory
115
        if not os.path.isdir(os.path.split(filename)[0]):
116
            try:
117
                os.makedirs(os.path.split(filename)[0])
118
            except:
119
                print 'Could not make directory: %s' % (os.path.split(filename)[0], )
120
                sys.exit()
121
 
122
        # Try to write the config file to disk
123
        try:
124
            fsock = open(filename, 'w')
125
            try:
126
                config.write(fsock)
127
            finally:
128
                fsock.close()
129
        except:
130
            print 'Could not open: %s for writing' % (filename, )
131
            sys.exit()
132
 
133
    def __get_default_val(self, section, key):
134
        return self.__defaults[(section, key)]
135
 
136
    def get_value(self, section, key):
137
        """Get a config value. Attempts to get the value from the user's
138
        config first, and then uses the default."""
139
 
140
        try:
141
            value = self.__user_config[(section, key)]
142
        except:
143
            # This should work, unless you write something stupid
144
            # into the code, so DON'T DO IT
145
            value = self.__get_default_val(section, key)
146
 
147
        # Convert config options to booleans for easier use
148
        if value == 'True':
149
            value = True
150
 
151
        if value == 'False':
152
            value = False
153
 
154
        return value
155
 
134 ira 156
    def __init__(self):
141 ira 157
        self.__defaults = {
158
            ('directories', 'working_directory') : '~/downloads/usenet',
159
            ('options', 'recursive') : True,
160
            ('options', 'check_required_programs') : False,
161
            ('options', 'extract_with_full_path') : False,
162
            ('regular expressions', 'par2_regex') : '.*\.par2$',
163
            ('regular expressions', 'video_file_regex') : '.*\.(avi|ogm|mkv|mp4)$',
164
            ('regular expressions', 'temp_repair_regex') : '.*\.1$',
165
            ('regular expressions', 'remove_regex') : '^.*\.(rar|r\d\d)$' }
134 ira 166
 
141 ira 167
        self.__user_config = self.__read_config()
168
 
169
# This is the global config variable.
134 ira 170
config = rarslave_config()
141 ira 171
 
129 ira 172
################################################################################
173
# The PAR2 Parser
174
#
175
# This was stolen from cfv (see http://cfv.sourceforge.net/ for a copy)
176
################################################################################
177
 
178
import struct, errno
179
 
180
def chompnulls(line):
181
    p = line.find('\0')
182
    if p < 0: return line
183
    else:     return line[:p]
184
 
185
def get_par2_filenames(filename):
186
    """Get all of the filenames that are protected by the par2
187
    file given as the filename"""
188
 
189
    try:
190
        file = open(filename, 'rb')
191
    except:
192
        print 'Could not open %s' % (filename, )
193
        return []
194
 
134 ira 195
    # We always want to do crc checks
196
    docrcchecks = True
197
 
129 ira 198
    pkt_header_fmt = '< 8s Q 16s 16s 16s'
199
    pkt_header_size = struct.calcsize(pkt_header_fmt)
200
    file_pkt_fmt = '< 16s 16s 16s Q'
201
    file_pkt_size = struct.calcsize(file_pkt_fmt)
202
    main_pkt_fmt = '< Q I'
203
    main_pkt_size = struct.calcsize(main_pkt_fmt)
204
 
205
    seen_file_ids = {}
206
    expected_file_ids = None
207
    filenames = []
208
 
209
    while 1:
210
        d = file.read(pkt_header_size)
211
        if not d:
212
            break
213
 
214
        magic, pkt_len, pkt_md5, set_id, pkt_type = struct.unpack(pkt_header_fmt, d)
215
 
216
        if docrcchecks:
217
            import md5
218
            control_md5 = md5.new()
219
            control_md5.update(d[0x20:])
220
            d = file.read(pkt_len - pkt_header_size)
221
            control_md5.update(d)
222
 
223
            if control_md5.digest() != pkt_md5:
224
                raise EnvironmentError, (errno.EINVAL, \
225
                    "corrupt par2 file - bad packet hash")
226
 
227
        if pkt_type == 'PAR 2.0\0FileDesc':
228
            if not docrcchecks:
229
                d = file.read(pkt_len - pkt_header_size)
230
 
231
            file_id, file_md5, file_md5_16k, file_size = \
232
                struct.unpack(file_pkt_fmt, d[:file_pkt_size])
233
 
234
            if seen_file_ids.get(file_id) is None:
235
                seen_file_ids[file_id] = 1
236
                filename = chompnulls(d[file_pkt_size:])
237
                filenames.append(filename)
238
 
239
        elif pkt_type == "PAR 2.0\0Main\0\0\0\0":
240
            if not docrcchecks:
241
                d = file.read(pkt_len - pkt_header_size)
242
 
243
            if expected_file_ids is None:
244
                expected_file_ids = []
245
                slice_size, num_files = struct.unpack(main_pkt_fmt, d[:main_pkt_size])
246
                num_nonrecovery = (len(d)-main_pkt_size)/16 - num_files
247
 
248
                for i in range(main_pkt_size,main_pkt_size+(num_files+num_nonrecovery)*16,16):
249
                    expected_file_ids.append(d[i:i+16])
250
 
251
        else:
252
            if not docrcchecks:
253
                file.seek(pkt_len - pkt_header_size, 1)
254
 
255
    if expected_file_ids is None:
256
        raise EnvironmentError, (errno.EINVAL, \
257
            "corrupt or unsupported par2 file - no main packet found")
258
 
259
    for id in expected_file_ids:
260
        if not seen_file_ids.has_key(id):
261
            raise EnvironmentError, (errno.EINVAL, \
262
                "corrupt or unsupported par2 file - " \
263
                "expected file description packet not found")
264
 
265
    return filenames
266
 
267
################################################################################
268
# The parset object
269
#
270
# This is an object based representation of a parset, and will verify itself
271
# and extract itself, if possible.
272
################################################################################
273
 
132 ira 274
import os, glob, re
129 ira 275
 
276
class parset:
277
    def __init__(self, par_filename):
278
        self.parfile = par_filename
279
        self.extra_pars = []
280
        self.files = False
281
        self.used_parjoin = False
282
        self.verified = False
283
        self.extracted = False
284
 
285
    def get_filenames(self):
286
        return get_par2_filenames(parfile)
287
 
288
    def all_there(self):
289
        """Check if all the files for the parset are present.
290
        This will help us decide which par2 checker to use first"""
291
        for f in self.files:
292
            if not os.path.isfile(f):
293
                return False
294
 
295
        # The files were all there
296
        return True
297
 
298
    def verify(self):
299
        """This will verify the parset by the most efficient method first,
300
        and then move to a slower method if that one fails"""
301
 
302
        retval = False #not verified yet
303
 
304
        # if all the files are there, try verifying fast
305
        if self.all_there():
306
            retval = self.__fast_verify()
307
 
308
            if retval == False:
309
                # Failed to verify fast, so try it slow, maybe it needs repair
310
                retval = self.__slow_verify()
311
 
312
        # If we've got a video file, maybe we should try to parjoin it
313
        elif self.__has_video_file():
314
            retval = self.__parjoin()
315
 
316
        else: #not all there, maybe we can slow-repair
317
            retval = self.__slow_verify()
318
 
319
        self.verified = retval
320
        return self.verified
321
 
322
    def __fast_verify(self):
323
        retval = os.system('cfv -v -f "%s"' % (self.parfile, ))
324
 
325
        if retval == 0:
326
            return True #success
327
 
328
        return False #failure
329
 
330
    def __slow_verify(self):
331
        retval = os.system('par2repair "%s"' % (self.parfile, ))
332
 
333
        if retval == 0:
334
            return True #success
335
 
336
        return False #failure
337
 
338
    def __parjoin(self):
339
        retval = os.system('lxsplit -j "%s.001"' % (self.files[0], ))
340
 
341
        retval = self.__fast_verify()
342
 
343
        if retval == False:
344
            # Failed to verify fast, so try it slow, maybe it needs repair
345
            retval = self.__slow_verify()
346
 
347
        if retval == False: # failed to verify, so remove the lxsplit created file
348
            os.remove(self.files[0])
349
 
350
        self.used_parjoin = retval
351
        self.verified = retval
352
        return self.verified
353
 
354
    def __has_video_file(self):
141 ira 355
        regex = re.compile(
356
                config.get_value('regular expressions', 'video_file_regex'),
357
                re.IGNORECASE)
358
 
129 ira 359
        for f in self.files:
134 ira 360
            if regex.match(f):
129 ira 361
                return True
362
 
363
        return False
364
 
365
    def __remove_currentset(self):
366
        """Remove all of the files that are extractable, as well as the pars.
367
        Leave everything else alone"""
368
 
369
        if not self.extracted:
370
            print 'Did not extract yet, not removing currentset'
371
            return
372
 
373
        # remove the main par
374
        os.remove(self.parfile)
375
 
376
        # remove all of the extra pars
377
        for i in self.extra_pars:
378
            os.remove(i)
379
 
380
        # remove any rars that are associated (leave EVERYTHING else)
134 ira 381
        # This regex matches both old and new style rar(s) by default.
141 ira 382
        regex = re.compile(
383
                config.get_value('regular expressions', 'remove_regex'),
384
                re.IGNORECASE)
134 ira 385
 
129 ira 386
        for i in self.files:
132 ira 387
            if regex.match(i):
129 ira 388
                os.remove(i)
389
 
134 ira 390
        # remove any .{001,002,...} files (from parjoin)
129 ira 391
        if self.used_parjoin:
392
            for i in os.listdir(os.getcwd()):
393
                if i != self.files[0] and self.files[0] in i:
394
                    os.remove(i)
395
 
396
        # remove any temp repair files
141 ira 397
        regex = re.compile(
398
                config.get_value('regular expressions', 'temp_repair_regex'),
399
                re.IGNORECASE)
134 ira 400
        [os.remove(f) for f in os.listdir(os.getcwd()) if regex.match(f)]
129 ira 401
 
402
    def __get_extract_file(self):
403
        """Find the first extractable file"""
404
        for i in self.files:
405
            if os.path.splitext(i)[1] == '.rar':
406
                return i
407
 
408
        return None
409
 
410
    def extract(self):
411
        """Attempt to extract all of the files related to this parset"""
412
        if not self.verified:
413
            self.extracted = False
414
            print 'Not (successfully) verified, not extracting'
415
            return False #failed to extract
416
 
417
        extract_file = self.__get_extract_file()
418
 
419
        if extract_file != None:
141 ira 420
            if config.get_value('options', 'extract_with_full_path'):
139 ira 421
                retval = os.system('rar x -o+ "%s"' % (extract_file, ))
422
            else:
423
                retval = os.system('rar e -o+ "%s"' % (extract_file, ))
129 ira 424
 
425
            if retval != 0:
426
                print 'Failed to extract'
427
                self.extracted = False
428
                return self.extracted
429
 
430
        # we extracted ok, so remove the currentset
431
        self.extracted = True
432
        self.__remove_currentset()
433
 
434
        return self.extracted
435
 
436
 
437
################################################################################
438
# The rarslave program itself
439
################################################################################
440
 
134 ira 441
import os, sys
129 ira 442
from optparse import OptionParser
443
 
444
def check_required_progs():
445
    """Check if the required programs are installed"""
446
 
447
    shell_not_found = 32512
448
    needed = []
449
 
450
    if os.system('cfv --help > /dev/null 2>&1') == shell_not_found:
451
        needed.append('cfv')
452
 
453
    if os.system('par2repair --help > /dev/null 2>&1') == shell_not_found:
454
        needed.append('par2repair')
455
 
456
    if os.system('lxsplit --help > /dev/null 2>&1') == shell_not_found:
457
        needed.append('lxpsplit')
458
 
459
    if os.system('rar --help > /dev/null 2>&1') == shell_not_found:
460
        needed.append('rar')
461
 
462
    if needed:
463
        for n in needed:
464
            print 'Needed program "%s" not found in $PATH' % (n, )
465
 
466
        sys.exit(1)
467
 
468
def get_parsets():
469
    """Get a representation of each parset in the current directory, and
470
    return them as a list of parset instances"""
471
 
141 ira 472
    regex = re.compile(
473
            config.get_value('regular expressions', 'par2_regex'),
474
            re.IGNORECASE)
134 ira 475
    par2files = [f for f in os.listdir(os.getcwd()) if regex.match(f)]
129 ira 476
 
477
    parsets = []
478
 
479
    for i in par2files:
132 ira 480
        try:
481
            filenames = get_par2_filenames(i)
482
            create_new = True
483
        except EnvironmentError:
484
            print 'CORRUPT PARFILE: %s' % (i, )
485
            continue
129 ira 486
 
487
        # if we already have an instance for this set, append
488
        # this par file to the extra_pars field
489
        for j in parsets:
490
            if j.files == filenames:
491
                j.extra_pars.append(i)
492
                create_new = False
493
 
494
        # we haven't seen this set yet, so we'll create it now
495
        if create_new == True:
496
            cur = parset(i)
497
            cur.files = filenames
498
            parsets.append(cur)
499
 
500
    return parsets
501
 
502
def directory_worker(dir):
503
    """Attempts to find, verify, and extract every parset in the directory
504
    given as a parameter"""
505
 
506
    cwd = os.getcwd()
507
    os.chdir(dir)
508
 
509
    parsets = get_parsets()
510
 
511
    # Verify each parset
512
    for p in parsets:
513
        p.verify()
514
 
515
    # Attempt to extract each parset
516
    for p in parsets:
517
        p.extract()
518
 
519
    os.chdir(cwd)
520
 
521
def main():
522
 
523
    # Build the OptionParser
524
    parser = OptionParser()
134 ira 525
    parser.add_option('-n', '--not-recursive',
526
                      action='store_false', dest='recursive',
141 ira 527
                      default=config.get_value('options', 'recursive'),
528
                      help="Don't run recursively")
529
 
134 ira 530
    parser.add_option('-d', '--work-dir',
141 ira 531
                      dest='work_dir',
532
                      default=config.get_value('directories', 'working_directory'),
134 ira 533
                      help="Start running at DIR", metavar='DIR')
141 ira 534
 
134 ira 535
    parser.add_option('-p', '--check-required-programs',
536
                       action='store_true', dest='check_progs',
141 ira 537
                       default=config.get_value('options', 'check_required_programs'),
538
                       help="Check for required programs")
129 ira 539
 
141 ira 540
    parser.add_option('-f', '--write-default-config',
541
                      action='store_true', dest='write_def_config',
542
                      default=False, help="Write out a new default config")
543
 
544
    parser.add_option('-c', '--write-new-config',
545
                      action='store_true', dest='write_config',
546
                      default=False, help="Write out the current config")
547
 
129 ira 548
    # Parse the given options
549
    (options, args) = parser.parse_args()
550
 
551
    # Fix up the working directory
552
    options.work_dir = os.path.abspath(os.path.expanduser(options.work_dir))
553
 
554
    # Check that we have the required programs installed
134 ira 555
    if options.check_progs:
556
        check_required_progs()
129 ira 557
 
141 ira 558
    # Write out a new default config, if we need it
559
    if options.write_def_config:
560
        config.write_config(default=True)
561
 
562
    # Write out the current config (adds new options to an existing config)
563
    if options.write_config:
564
        config.write_config()
565
 
129 ira 566
    # Run rarslave!
567
    if options.recursive:
568
        for root, dirs, files in os.walk(options.work_dir):
569
            directory_worker(root)
570
    else:
571
        directory_worker(options.work_dir)
572
 
573
if __name__ == '__main__':
574
    main()
575