Subversion Repositories programming

Rev

Rev 179 | Rev 275 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
129 ira 1
#!/usr/bin/env python
2
 
3
# Copyright: Ira W. Snyder (devel@irasnyder.com)
4
# Start Date: 2005-10-13
5
# End Date:
6
# License: GNU General Public License v2 (or at your option, any later version)
7
#
8
# Changelog Follows:
9
# - 2005-10-13
10
# - Added get_par2_filenames() to parse par2 files
11
# - Added the parset object to represent each parset.
12
#
13
# - 2005-10-14
14
# - Finished the parset object. It will now verify and extract parsets.
15
# - Small changes to the parset object. This makes the parjoin part
16
#   much more reliable.
17
# - Added the OptionParser to make this nice to run at the command line.
18
# - Made recursiveness an option.
19
# - Made start directory an option.
20
# - Check for appropriate programs before starting.
21
#
134 ira 22
# - 2005-10-17
23
# - Use a regular expression to handle the deletable types.
24
#
25
# - 2005-10-18
26
# - Use regular expressions to handle all finding of files, instead of
27
#   using the glob module.
28
# - Add a config class to handle all the default config stuff sanely.
29
#   This makes it easier to change some of the main parts of the program to
30
#   your specific configuration.
31
# - Move the docrcchecks variable inside the get_par2_filenames() function,
32
#   which is where it belongs anyway.
33
# - Added command-line option to check for required programs at start.
34
#
139 ira 35
# - 2005-10-20
36
# - Added a config option to extract with full path.
37
#
141 ira 38
# - 2005-10-22
39
# - Re-wrote the config class so that there is a config file, which
40
#   resides at ~/.config/rarslave/rarslave.conf by default.
41
# - Added the command-line option -c to write out an updated version
42
#   of the config file (to fill in any missing options with the defaults)
43
# - Added the command-line option -f to write out a new default config file,
44
#   which overwrites any user changes.
45
# - Made all regexes case insensitive.
46
# - Made all command-line options override the config file.
47
#
150 ira 48
# - 2005-10-30
49
# - Added the '-o' option, to output debugging info. Hopefully next time
50
#   someone finds a bug, they can output this and send it to me with a
51
#   description of the bug they're seeing.
52
#
152 ira 53
# - 2005-11-05
54
# - Added an output system to rarslave. This makes a nice status report
55
#   possible at the end of the program run.
56
#
153 ira 57
# - 2005-11-06
58
# - Fixed the rar command so that it can extract files whose names begin
59
#   with a hyphen.
60
#
129 ira 61
 
62
################################################################################
63
# REQUIREMENTS:
64
#
65
# This code requires the programs cfv, par2repair, lxsplit, and rar to be able
66
# to function properly. I will attempt to check that these are in your path.
67
################################################################################
68
 
141 ira 69
import ConfigParser, os
70
 
134 ira 71
class rarslave_config:
72
    """A simple class to hold the default configs for the whole program"""
129 ira 73
 
141 ira 74
    def __read_config(self, filename='~/.config/rarslave/rarslave.conf'):
75
        """Attempt to open and read the rarslave config file"""
134 ira 76
 
141 ira 77
        # Make sure the filename is corrected
78
        filename = os.path.abspath(os.path.expanduser(filename))
79
 
80
        user_config = {}
81
 
82
        # Write the default config if it doesn't exist
83
        if not os.path.isfile(filename):
84
            self.write_config(default=True)
85
 
86
        config = ConfigParser.ConfigParser()
87
        config.read(filename)
88
 
89
        for section in config.sections():
90
            for option in config.options(section):
91
                user_config[(section, option)] = config.get(section, option)
92
 
93
        return user_config
94
 
95
    def write_config(self, filename='~/.config/rarslave/rarslave.conf', default=False):
96
        """Write out the current config to the config file. If you set default=True, then
97
        the default config file will be written."""
98
 
99
        config = ConfigParser.ConfigParser()
100
 
101
        # Correct filename
102
        filename = os.path.abspath(os.path.expanduser(filename))
103
 
104
        # Reset all config to make sure we write the default one, if necessary
105
        if default:
106
            self.__user_config = {}
107
            print 'Writing default config to %s' % (filename, )
108
 
109
        # [directories] section
110
        config.add_section('directories')
111
        for (s, k) in self.__defaults.keys():
112
            if s == 'directories':
113
                config.set(s, k, self.get_value(s, k))
114
 
115
        # [options] section
116
        config.add_section('options')
117
        for (s, k) in self.__defaults.keys():
118
            if s == 'options':
119
                config.set(s, k, self.get_value(s, k))
120
 
121
        # [regular_expressions] section
122
        config.add_section('regular expressions')
123
        for (s, k) in self.__defaults.keys():
124
            if s == 'regular expressions':
125
                config.set(s, k, self.get_value(s, k))
126
 
127
        # Try to make the ~/.config/rarslave/ directory
128
        if not os.path.isdir(os.path.split(filename)[0]):
129
            try:
130
                os.makedirs(os.path.split(filename)[0])
131
            except:
132
                print 'Could not make directory: %s' % (os.path.split(filename)[0], )
133
                sys.exit()
134
 
135
        # Try to write the config file to disk
136
        try:
137
            fsock = open(filename, 'w')
138
            try:
139
                config.write(fsock)
140
            finally:
141
                fsock.close()
142
        except:
143
            print 'Could not open: %s for writing' % (filename, )
144
            sys.exit()
145
 
146
    def __get_default_val(self, section, key):
147
        return self.__defaults[(section, key)]
148
 
149
    def get_value(self, section, key):
150
        """Get a config value. Attempts to get the value from the user's
151
        config first, and then uses the default."""
152
 
153
        try:
154
            value = self.__user_config[(section, key)]
155
        except:
156
            # This should work, unless you write something stupid
157
            # into the code, so DON'T DO IT
158
            value = self.__get_default_val(section, key)
159
 
160
        # Convert config options to booleans for easier use
161
        if value == 'True':
162
            value = True
163
 
164
        if value == 'False':
165
            value = False
166
 
167
        return value
168
 
134 ira 169
    def __init__(self):
141 ira 170
        self.__defaults = {
171
            ('directories', 'working_directory') : '~/downloads/usenet',
172
            ('options', 'recursive') : True,
173
            ('options', 'check_required_programs') : False,
174
            ('options', 'extract_with_full_path') : False,
175
            ('regular expressions', 'par2_regex') : '.*\.par2$',
176
            ('regular expressions', 'video_file_regex') : '.*\.(avi|ogm|mkv|mp4)$',
177
            ('regular expressions', 'temp_repair_regex') : '.*\.1$',
178
            ('regular expressions', 'remove_regex') : '^.*\.(rar|r\d\d)$' }
134 ira 179
 
141 ira 180
        self.__user_config = self.__read_config()
181
 
182
# This is the global config variable.
134 ira 183
config = rarslave_config()
141 ira 184
 
129 ira 185
################################################################################
152 ira 186
# The rarslave_output class
187
#
188
# This class handles the nice output summary which is printed at the end
189
# of a run
190
################################################################################
191
 
192
class rarslave_output:
193
    # Data structure: list of lists
194
    # [ [status, filename], ... ]
195
    #
196
    # Where status is one of:
197
    # 0: Verified and Extracted Perfectly
198
    # 1: Failed to Verify (and therefore Extract)
199
    # 2: Verified correctly, but failed to Extract
200
    #
201
 
202
    def __init__(self):
203
        self.output_list    = []
204
        self.good_files     = 0
205
        self.unverified     = 0
206
        self.unextractable  = 0
207
        self.corrupt_par2   = 0
208
 
209
    def print_equal_line(self, size=80):
210
        """Print an 80 character line of equal signs"""
211
 
212
        str = ''
213
 
214
        for i in range(size):
215
            str += '='
216
 
217
        print str
218
 
219
    def print_results_table(self):
220
        """Print a nice table of the results from this run"""
221
 
222
        # Print the table of good files (if we have any)
223
        if self.good_files > 0:
224
            print
225
            self.print_equal_line()
226
            print 'Files that were extracted perfectly'
227
            self.print_equal_line()
228
 
229
            for entry in self.output_list:
230
                if entry[0] == 0:
231
                    print '%s' % (entry[1], )
232
 
233
        # Print the table of unverified files (if we have any)
234
        if self.unverified > 0:
235
            print
236
            self.print_equal_line()
237
            print 'Files that failed to verify (and extract)'
238
            self.print_equal_line()
239
 
240
            for entry in self.output_list:
241
                if entry[0] == 1:
242
                    print '%s' % (entry[1], )
243
 
244
        # Print the table of unextracted files (if we have any)
245
        if self.unextractable > 0:
246
            print
247
            self.print_equal_line()
248
            print 'Files that were verified, but failed to extract'
249
            self.print_equal_line()
250
 
251
            for entry in self.output_list:
252
                if entry[0] == 2:
253
                    print '%s' % (entry[1], )
254
 
255
        # Print the table of corrupt PAR2 files (if we have any)
256
        if self.corrupt_par2 > 0:
257
            print
258
            self.print_equal_line()
259
            print 'Files that had corrupt par2 files'
260
            self.print_equal_line()
261
 
262
            for entry in self.output_list:
263
                if entry[0] == 3:
264
                    print '%s' % (entry[1], )
265
 
266
        # Print a blank line at the end
267
        print
268
 
269
    def add_file(self, status, filename):
270
 
271
        if status == 0:
272
            self.good_files += 1
273
        elif status == 1:
274
            self.unverified += 1
275
        elif status == 2:
276
            self.unextractable += 1
277
        elif status == 3:
278
            self.corrupt_par2 += 1
279
        else:
280
            # We have a bad value, so raise a ValueError
281
            raise ValueError
282
 
283
        self.output_list.append([status, filename])
284
 
285
# This is the global output variable
286
output = rarslave_output()
287
 
288
################################################################################
129 ira 289
# The PAR2 Parser
290
#
291
# This was stolen from cfv (see http://cfv.sourceforge.net/ for a copy)
292
################################################################################
293
 
294
import struct, errno
295
 
296
def chompnulls(line):
297
    p = line.find('\0')
298
    if p < 0: return line
299
    else:     return line[:p]
300
 
301
def get_par2_filenames(filename):
302
    """Get all of the filenames that are protected by the par2
303
    file given as the filename"""
304
 
305
    try:
306
        file = open(filename, 'rb')
307
    except:
308
        print 'Could not open %s' % (filename, )
309
        return []
310
 
134 ira 311
    # We always want to do crc checks
312
    docrcchecks = True
313
 
129 ira 314
    pkt_header_fmt = '< 8s Q 16s 16s 16s'
315
    pkt_header_size = struct.calcsize(pkt_header_fmt)
316
    file_pkt_fmt = '< 16s 16s 16s Q'
317
    file_pkt_size = struct.calcsize(file_pkt_fmt)
318
    main_pkt_fmt = '< Q I'
319
    main_pkt_size = struct.calcsize(main_pkt_fmt)
320
 
321
    seen_file_ids = {}
322
    expected_file_ids = None
323
    filenames = []
324
 
325
    while 1:
326
        d = file.read(pkt_header_size)
327
        if not d:
328
            break
329
 
330
        magic, pkt_len, pkt_md5, set_id, pkt_type = struct.unpack(pkt_header_fmt, d)
331
 
332
        if docrcchecks:
333
            import md5
334
            control_md5 = md5.new()
335
            control_md5.update(d[0x20:])
336
            d = file.read(pkt_len - pkt_header_size)
337
            control_md5.update(d)
338
 
339
            if control_md5.digest() != pkt_md5:
340
                raise EnvironmentError, (errno.EINVAL, \
341
                    "corrupt par2 file - bad packet hash")
342
 
343
        if pkt_type == 'PAR 2.0\0FileDesc':
344
            if not docrcchecks:
345
                d = file.read(pkt_len - pkt_header_size)
346
 
347
            file_id, file_md5, file_md5_16k, file_size = \
348
                struct.unpack(file_pkt_fmt, d[:file_pkt_size])
349
 
350
            if seen_file_ids.get(file_id) is None:
351
                seen_file_ids[file_id] = 1
352
                filename = chompnulls(d[file_pkt_size:])
353
                filenames.append(filename)
354
 
355
        elif pkt_type == "PAR 2.0\0Main\0\0\0\0":
356
            if not docrcchecks:
357
                d = file.read(pkt_len - pkt_header_size)
358
 
359
            if expected_file_ids is None:
360
                expected_file_ids = []
361
                slice_size, num_files = struct.unpack(main_pkt_fmt, d[:main_pkt_size])
362
                num_nonrecovery = (len(d)-main_pkt_size)/16 - num_files
363
 
364
                for i in range(main_pkt_size,main_pkt_size+(num_files+num_nonrecovery)*16,16):
365
                    expected_file_ids.append(d[i:i+16])
366
 
367
        else:
368
            if not docrcchecks:
369
                file.seek(pkt_len - pkt_header_size, 1)
370
 
371
    if expected_file_ids is None:
372
        raise EnvironmentError, (errno.EINVAL, \
373
            "corrupt or unsupported par2 file - no main packet found")
374
 
375
    for id in expected_file_ids:
376
        if not seen_file_ids.has_key(id):
377
            raise EnvironmentError, (errno.EINVAL, \
378
                "corrupt or unsupported par2 file - " \
379
                "expected file description packet not found")
380
 
381
    return filenames
382
 
383
################################################################################
384
# The parset object
385
#
386
# This is an object based representation of a parset, and will verify itself
387
# and extract itself, if possible.
388
################################################################################
389
 
132 ira 390
import os, glob, re
129 ira 391
 
392
class parset:
393
    def __init__(self, par_filename):
394
        self.parfile = par_filename
395
        self.extra_pars = []
396
        self.files = False
397
        self.used_parjoin = False
398
        self.verified = False
399
        self.extracted = False
400
 
150 ira 401
    def print_debug_info(self):
402
        """Special function for debugging"""
403
        print '========== DEBUG INFO STARTS HERE =========='
220 ira 404
        print '=== parfile ==='
405
        print self.parfile
406
        print
152 ira 407
 
220 ira 408
        print '=== extra_pars ==='
409
        for f in self.extra_pars:
410
            print f
411
 
412
        print
413
 
414
        print '=== files ==='
415
        for f in self.files:
416
            print f
417
 
418
        print '=========== DEBUG INFO ENDS HERE ==========='
419
 
129 ira 420
    def get_filenames(self):
152 ira 421
        return get_par2_filenames(self.parfile)
129 ira 422
 
423
    def all_there(self):
424
        """Check if all the files for the parset are present.
425
        This will help us decide which par2 checker to use first"""
426
        for f in self.files:
427
            if not os.path.isfile(f):
428
                return False
429
 
430
        # The files were all there
431
        return True
432
 
433
    def verify(self):
434
        """This will verify the parset by the most efficient method first,
435
        and then move to a slower method if that one fails"""
436
 
437
        retval = False #not verified yet
438
 
439
        # if all the files are there, try verifying fast
440
        if self.all_there():
441
            retval = self.__fast_verify()
442
 
443
            if retval == False:
444
                # Failed to verify fast, so try it slow, maybe it needs repair
445
                retval = self.__slow_verify()
446
 
447
        # If we've got a video file, maybe we should try to parjoin it
448
        elif self.__has_video_file():
449
            retval = self.__parjoin()
450
 
451
        else: #not all there, maybe we can slow-repair
452
            retval = self.__slow_verify()
453
 
454
        self.verified = retval
455
        return self.verified
456
 
457
    def __fast_verify(self):
458
        retval = os.system('cfv -v -f "%s"' % (self.parfile, ))
459
 
460
        if retval == 0:
461
            return True #success
462
 
463
        return False #failure
464
 
465
    def __slow_verify(self):
466
        retval = os.system('par2repair "%s"' % (self.parfile, ))
467
 
468
        if retval == 0:
469
            return True #success
470
 
471
        return False #failure
472
 
473
    def __parjoin(self):
474
        retval = os.system('lxsplit -j "%s.001"' % (self.files[0], ))
475
 
476
        retval = self.__fast_verify()
477
 
478
        if retval == False:
479
            # Failed to verify fast, so try it slow, maybe it needs repair
480
            retval = self.__slow_verify()
481
 
482
        if retval == False: # failed to verify, so remove the lxsplit created file
149 ira 483
            try:
484
                os.remove(self.files[0])
485
            except OSError:
486
                print 'Failed to remove file: %s' % (self.files[0], )
129 ira 487
 
488
        self.used_parjoin = retval
489
        self.verified = retval
490
        return self.verified
491
 
492
    def __has_video_file(self):
141 ira 493
        regex = re.compile(
494
                config.get_value('regular expressions', 'video_file_regex'),
495
                re.IGNORECASE)
496
 
129 ira 497
        for f in self.files:
134 ira 498
            if regex.match(f):
129 ira 499
                return True
500
 
501
        return False
502
 
503
    def __remove_currentset(self):
504
        """Remove all of the files that are extractable, as well as the pars.
505
        Leave everything else alone"""
506
 
507
        if not self.extracted:
508
            print 'Did not extract yet, not removing currentset'
509
            return
510
 
511
        # remove the main par
512
        os.remove(self.parfile)
513
 
514
        # remove all of the extra pars
515
        for i in self.extra_pars:
516
            os.remove(i)
517
 
518
        # remove any rars that are associated (leave EVERYTHING else)
134 ira 519
        # This regex matches both old and new style rar(s) by default.
141 ira 520
        regex = re.compile(
521
                config.get_value('regular expressions', 'remove_regex'),
522
                re.IGNORECASE)
134 ira 523
 
129 ira 524
        for i in self.files:
132 ira 525
            if regex.match(i):
129 ira 526
                os.remove(i)
527
 
134 ira 528
        # remove any .{001,002,...} files (from parjoin)
129 ira 529
        if self.used_parjoin:
530
            for i in os.listdir(os.getcwd()):
531
                if i != self.files[0] and self.files[0] in i:
532
                    os.remove(i)
533
 
534
        # remove any temp repair files
141 ira 535
        regex = re.compile(
536
                config.get_value('regular expressions', 'temp_repair_regex'),
537
                re.IGNORECASE)
134 ira 538
        [os.remove(f) for f in os.listdir(os.getcwd()) if regex.match(f)]
129 ira 539
 
540
    def __get_extract_file(self):
541
        """Find the first extractable file"""
542
        for i in self.files:
543
            if os.path.splitext(i)[1] == '.rar':
544
                return i
545
 
546
        return None
547
 
548
    def extract(self):
549
        """Attempt to extract all of the files related to this parset"""
550
        if not self.verified:
551
            self.extracted = False
152 ira 552
            output.add_file(1, self.parfile)
129 ira 553
            return False #failed to extract
554
 
555
        extract_file = self.__get_extract_file()
556
 
557
        if extract_file != None:
141 ira 558
            if config.get_value('options', 'extract_with_full_path'):
153 ira 559
                retval = os.system('rar x -o+ -- "%s"' % (extract_file, ))
139 ira 560
            else:
153 ira 561
                retval = os.system('rar e -o+ -- "%s"' % (extract_file, ))
129 ira 562
 
563
            if retval != 0:
152 ira 564
                output.add_file(2, self.parfile)
129 ira 565
                self.extracted = False
566
                return self.extracted
567
 
568
        # we extracted ok, so remove the currentset
569
        self.extracted = True
570
        self.__remove_currentset()
571
 
152 ira 572
        output.add_file(0, self.parfile)
573
 
129 ira 574
        return self.extracted
575
 
576
 
577
################################################################################
578
# The rarslave program itself
579
################################################################################
580
 
134 ira 581
import os, sys
129 ira 582
from optparse import OptionParser
583
 
584
def check_required_progs():
585
    """Check if the required programs are installed"""
586
 
587
    shell_not_found = 32512
588
    needed = []
589
 
590
    if os.system('cfv --help > /dev/null 2>&1') == shell_not_found:
591
        needed.append('cfv')
592
 
593
    if os.system('par2repair --help > /dev/null 2>&1') == shell_not_found:
594
        needed.append('par2repair')
595
 
596
    if os.system('lxsplit --help > /dev/null 2>&1') == shell_not_found:
597
        needed.append('lxpsplit')
598
 
599
    if os.system('rar --help > /dev/null 2>&1') == shell_not_found:
600
        needed.append('rar')
601
 
602
    if needed:
603
        for n in needed:
604
            print 'Needed program "%s" not found in $PATH' % (n, )
605
 
606
        sys.exit(1)
607
 
608
def get_parsets():
609
    """Get a representation of each parset in the current directory, and
610
    return them as a list of parset instances"""
611
 
141 ira 612
    regex = re.compile(
613
            config.get_value('regular expressions', 'par2_regex'),
614
            re.IGNORECASE)
134 ira 615
    par2files = [f for f in os.listdir(os.getcwd()) if regex.match(f)]
129 ira 616
 
617
    parsets = []
618
 
619
    for i in par2files:
132 ira 620
        try:
621
            filenames = get_par2_filenames(i)
622
            create_new = True
623
        except EnvironmentError:
152 ira 624
            output.add_file(3, i)
132 ira 625
            continue
129 ira 626
 
627
        # if we already have an instance for this set, append
628
        # this par file to the extra_pars field
629
        for j in parsets:
630
            if j.files == filenames:
631
                j.extra_pars.append(i)
632
                create_new = False
633
 
634
        # we haven't seen this set yet, so we'll create it now
635
        if create_new == True:
636
            cur = parset(i)
637
            cur.files = filenames
638
            parsets.append(cur)
639
 
640
    return parsets
641
 
150 ira 642
def directory_worker(dir, options):
129 ira 643
    """Attempts to find, verify, and extract every parset in the directory
644
    given as a parameter"""
645
 
646
    cwd = os.getcwd()
647
    os.chdir(dir)
648
 
649
    parsets = get_parsets()
650
 
150 ira 651
    # Print debug info if we're supposed to
652
    if options.debug_info:
653
        for p in parsets:
220 ira 654
            p.print_debug_info()
129 ira 655
 
150 ira 656
    # No debug info
657
    else:
152 ira 658
 
150 ira 659
        # Verify each parset
660
        for p in parsets:
661
            p.verify()
129 ira 662
 
150 ira 663
        # Attempt to extract each parset
664
        for p in parsets:
665
            p.extract()
666
 
129 ira 667
    os.chdir(cwd)
668
 
669
def main():
670
 
671
    # Build the OptionParser
672
    parser = OptionParser()
134 ira 673
    parser.add_option('-n', '--not-recursive',
674
                      action='store_false', dest='recursive',
141 ira 675
                      default=config.get_value('options', 'recursive'),
676
                      help="Don't run recursively")
677
 
134 ira 678
    parser.add_option('-d', '--work-dir',
141 ira 679
                      dest='work_dir',
680
                      default=config.get_value('directories', 'working_directory'),
134 ira 681
                      help="Start running at DIR", metavar='DIR')
141 ira 682
 
134 ira 683
    parser.add_option('-p', '--check-required-programs',
684
                       action='store_true', dest='check_progs',
141 ira 685
                       default=config.get_value('options', 'check_required_programs'),
686
                       help="Check for required programs")
129 ira 687
 
141 ira 688
    parser.add_option('-f', '--write-default-config',
689
                      action='store_true', dest='write_def_config',
690
                      default=False, help="Write out a new default config")
691
 
692
    parser.add_option('-c', '--write-new-config',
693
                      action='store_true', dest='write_config',
694
                      default=False, help="Write out the current config")
695
 
150 ira 696
    parser.add_option('-o', '--output-debug-info',
697
                       action='store_true', dest='debug_info',
698
                       default=False,
699
                       help="Output debug info for every parset, then exit")
700
 
129 ira 701
    # Parse the given options
702
    (options, args) = parser.parse_args()
703
 
704
    # Fix up the working directory
705
    options.work_dir = os.path.abspath(os.path.expanduser(options.work_dir))
706
 
707
    # Check that we have the required programs installed
134 ira 708
    if options.check_progs:
709
        check_required_progs()
129 ira 710
 
141 ira 711
    # Write out a new default config, if we need it
712
    if options.write_def_config:
713
        config.write_config(default=True)
714
 
715
    # Write out the current config (adds new options to an existing config)
716
    if options.write_config:
717
        config.write_config()
718
 
129 ira 719
    # Run rarslave!
720
    if options.recursive:
721
        for root, dirs, files in os.walk(options.work_dir):
150 ira 722
            directory_worker(root, options)
129 ira 723
    else:
150 ira 724
        directory_worker(options.work_dir, options)
129 ira 725
 
152 ira 726
    # Print the results
727
    output.print_results_table()
728
 
129 ira 729
if __name__ == '__main__':
730
    main()
731