Subversion Repositories programming

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
129 ira 1
#!/usr/bin/env python
2
 
3
# Copyright: Ira W. Snyder (devel@irasnyder.com)
4
# Start Date: 2005-10-13
5
# End Date:
6
# License: GNU General Public License v2 (or at your option, any later version)
7
#
8
# Changelog Follows:
9
# - 2005-10-13
10
# - Added get_par2_filenames() to parse par2 files
11
# - Added the parset object to represent each parset.
12
#
13
# - 2005-10-14
14
# - Finished the parset object. It will now verify and extract parsets.
15
# - Small changes to the parset object. This makes the parjoin part
16
#   much more reliable.
17
# - Added the OptionParser to make this nice to run at the command line.
18
# - Made recursiveness an option.
19
# - Made start directory an option.
20
# - Check for appropriate programs before starting.
21
#
134 ira 22
# - 2005-10-17
23
# - Use a regular expression to handle the deletable types.
24
#
25
# - 2005-10-18
26
# - Use regular expressions to handle all finding of files, instead of
27
#   using the glob module.
28
# - Add a config class to handle all the default config stuff sanely.
29
#   This makes it easier to change some of the main parts of the program to
30
#   your specific configuration.
31
# - Move the docrcchecks variable inside the get_par2_filenames() function,
32
#   which is where it belongs anyway.
33
# - Added command-line option to check for required programs at start.
34
#
139 ira 35
# - 2005-10-20
36
# - Added a config option to extract with full path.
37
#
141 ira 38
# - 2005-10-22
39
# - Re-wrote the config class so that there is a config file, which
40
#   resides at ~/.config/rarslave/rarslave.conf by default.
41
# - Added the command-line option -c to write out an updated version
42
#   of the config file (to fill in any missing options with the defaults)
43
# - Added the command-line option -f to write out a new default config file,
44
#   which overwrites any user changes.
45
# - Made all regexes case insensitive.
46
# - Made all command-line options override the config file.
47
#
150 ira 48
# - 2005-10-30
49
# - Added the '-o' option, to output debugging info. Hopefully next time
50
#   someone finds a bug, they can output this and send it to me with a
51
#   description of the bug they're seeing.
52
#
152 ira 53
# - 2005-11-05
54
# - Added an output system to rarslave. This makes a nice status report
55
#   possible at the end of the program run.
56
#
129 ira 57
 
58
################################################################################
59
# REQUIREMENTS:
60
#
61
# This code requires the programs cfv, par2repair, lxsplit, and rar to be able
62
# to function properly. I will attempt to check that these are in your path.
63
################################################################################
64
 
141 ira 65
import ConfigParser, os
66
 
134 ira 67
class rarslave_config:
68
    """A simple class to hold the default configs for the whole program"""
129 ira 69
 
141 ira 70
    def __read_config(self, filename='~/.config/rarslave/rarslave.conf'):
71
        """Attempt to open and read the rarslave config file"""
134 ira 72
 
141 ira 73
        # Make sure the filename is corrected
74
        filename = os.path.abspath(os.path.expanduser(filename))
75
 
76
        user_config = {}
77
 
78
        # Write the default config if it doesn't exist
79
        if not os.path.isfile(filename):
80
            self.write_config(default=True)
81
 
82
        config = ConfigParser.ConfigParser()
83
        config.read(filename)
84
 
85
        for section in config.sections():
86
            for option in config.options(section):
87
                user_config[(section, option)] = config.get(section, option)
88
 
89
        return user_config
90
 
91
    def write_config(self, filename='~/.config/rarslave/rarslave.conf', default=False):
92
        """Write out the current config to the config file. If you set default=True, then
93
        the default config file will be written."""
94
 
95
        config = ConfigParser.ConfigParser()
96
 
97
        # Correct filename
98
        filename = os.path.abspath(os.path.expanduser(filename))
99
 
100
        # Reset all config to make sure we write the default one, if necessary
101
        if default:
102
            self.__user_config = {}
103
            print 'Writing default config to %s' % (filename, )
104
 
105
        # [directories] section
106
        config.add_section('directories')
107
        for (s, k) in self.__defaults.keys():
108
            if s == 'directories':
109
                config.set(s, k, self.get_value(s, k))
110
 
111
        # [options] section
112
        config.add_section('options')
113
        for (s, k) in self.__defaults.keys():
114
            if s == 'options':
115
                config.set(s, k, self.get_value(s, k))
116
 
117
        # [regular_expressions] section
118
        config.add_section('regular expressions')
119
        for (s, k) in self.__defaults.keys():
120
            if s == 'regular expressions':
121
                config.set(s, k, self.get_value(s, k))
122
 
123
        # Try to make the ~/.config/rarslave/ directory
124
        if not os.path.isdir(os.path.split(filename)[0]):
125
            try:
126
                os.makedirs(os.path.split(filename)[0])
127
            except:
128
                print 'Could not make directory: %s' % (os.path.split(filename)[0], )
129
                sys.exit()
130
 
131
        # Try to write the config file to disk
132
        try:
133
            fsock = open(filename, 'w')
134
            try:
135
                config.write(fsock)
136
            finally:
137
                fsock.close()
138
        except:
139
            print 'Could not open: %s for writing' % (filename, )
140
            sys.exit()
141
 
142
    def __get_default_val(self, section, key):
143
        return self.__defaults[(section, key)]
144
 
145
    def get_value(self, section, key):
146
        """Get a config value. Attempts to get the value from the user's
147
        config first, and then uses the default."""
148
 
149
        try:
150
            value = self.__user_config[(section, key)]
151
        except:
152
            # This should work, unless you write something stupid
153
            # into the code, so DON'T DO IT
154
            value = self.__get_default_val(section, key)
155
 
156
        # Convert config options to booleans for easier use
157
        if value == 'True':
158
            value = True
159
 
160
        if value == 'False':
161
            value = False
162
 
163
        return value
164
 
134 ira 165
    def __init__(self):
141 ira 166
        self.__defaults = {
167
            ('directories', 'working_directory') : '~/downloads/usenet',
168
            ('options', 'recursive') : True,
169
            ('options', 'check_required_programs') : False,
170
            ('options', 'extract_with_full_path') : False,
171
            ('regular expressions', 'par2_regex') : '.*\.par2$',
172
            ('regular expressions', 'video_file_regex') : '.*\.(avi|ogm|mkv|mp4)$',
173
            ('regular expressions', 'temp_repair_regex') : '.*\.1$',
174
            ('regular expressions', 'remove_regex') : '^.*\.(rar|r\d\d)$' }
134 ira 175
 
141 ira 176
        self.__user_config = self.__read_config()
177
 
178
# This is the global config variable.
134 ira 179
config = rarslave_config()
141 ira 180
 
129 ira 181
################################################################################
152 ira 182
# The rarslave_output class
183
#
184
# This class handles the nice output summary which is printed at the end
185
# of a run
186
################################################################################
187
 
188
class rarslave_output:
189
    # Data structure: list of lists
190
    # [ [status, filename], ... ]
191
    #
192
    # Where status is one of:
193
    # 0: Verified and Extracted Perfectly
194
    # 1: Failed to Verify (and therefore Extract)
195
    # 2: Verified correctly, but failed to Extract
196
    #
197
 
198
    def __init__(self):
199
        self.output_list    = []
200
        self.good_files     = 0
201
        self.unverified     = 0
202
        self.unextractable  = 0
203
        self.corrupt_par2   = 0
204
 
205
    def print_equal_line(self, size=80):
206
        """Print an 80 character line of equal signs"""
207
 
208
        str = ''
209
 
210
        for i in range(size):
211
            str += '='
212
 
213
        print str
214
 
215
    def print_results_table(self):
216
        """Print a nice table of the results from this run"""
217
 
218
        # Print the table of good files (if we have any)
219
        if self.good_files > 0:
220
            print
221
            self.print_equal_line()
222
            print 'Files that were extracted perfectly'
223
            self.print_equal_line()
224
 
225
            for entry in self.output_list:
226
                if entry[0] == 0:
227
                    print '%s' % (entry[1], )
228
 
229
        # Print the table of unverified files (if we have any)
230
        if self.unverified > 0:
231
            print
232
            self.print_equal_line()
233
            print 'Files that failed to verify (and extract)'
234
            self.print_equal_line()
235
 
236
            for entry in self.output_list:
237
                if entry[0] == 1:
238
                    print '%s' % (entry[1], )
239
 
240
        # Print the table of unextracted files (if we have any)
241
        if self.unextractable > 0:
242
            print
243
            self.print_equal_line()
244
            print 'Files that were verified, but failed to extract'
245
            self.print_equal_line()
246
 
247
            for entry in self.output_list:
248
                if entry[0] == 2:
249
                    print '%s' % (entry[1], )
250
 
251
        # Print the table of corrupt PAR2 files (if we have any)
252
        if self.corrupt_par2 > 0:
253
            print
254
            self.print_equal_line()
255
            print 'Files that had corrupt par2 files'
256
            self.print_equal_line()
257
 
258
            for entry in self.output_list:
259
                if entry[0] == 3:
260
                    print '%s' % (entry[1], )
261
 
262
        # Print a blank line at the end
263
        print
264
 
265
    def add_file(self, status, filename):
266
 
267
        if status == 0:
268
            self.good_files += 1
269
        elif status == 1:
270
            self.unverified += 1
271
        elif status == 2:
272
            self.unextractable += 1
273
        elif status == 3:
274
            self.corrupt_par2 += 1
275
        else:
276
            # We have a bad value, so raise a ValueError
277
            raise ValueError
278
 
279
        self.output_list.append([status, filename])
280
 
281
# This is the global output variable
282
output = rarslave_output()
283
 
284
################################################################################
129 ira 285
# The PAR2 Parser
286
#
287
# This was stolen from cfv (see http://cfv.sourceforge.net/ for a copy)
288
################################################################################
289
 
290
import struct, errno
291
 
292
def chompnulls(line):
293
    p = line.find('\0')
294
    if p < 0: return line
295
    else:     return line[:p]
296
 
297
def get_par2_filenames(filename):
298
    """Get all of the filenames that are protected by the par2
299
    file given as the filename"""
300
 
301
    try:
302
        file = open(filename, 'rb')
303
    except:
304
        print 'Could not open %s' % (filename, )
305
        return []
306
 
134 ira 307
    # We always want to do crc checks
308
    docrcchecks = True
309
 
129 ira 310
    pkt_header_fmt = '< 8s Q 16s 16s 16s'
311
    pkt_header_size = struct.calcsize(pkt_header_fmt)
312
    file_pkt_fmt = '< 16s 16s 16s Q'
313
    file_pkt_size = struct.calcsize(file_pkt_fmt)
314
    main_pkt_fmt = '< Q I'
315
    main_pkt_size = struct.calcsize(main_pkt_fmt)
316
 
317
    seen_file_ids = {}
318
    expected_file_ids = None
319
    filenames = []
320
 
321
    while 1:
322
        d = file.read(pkt_header_size)
323
        if not d:
324
            break
325
 
326
        magic, pkt_len, pkt_md5, set_id, pkt_type = struct.unpack(pkt_header_fmt, d)
327
 
328
        if docrcchecks:
329
            import md5
330
            control_md5 = md5.new()
331
            control_md5.update(d[0x20:])
332
            d = file.read(pkt_len - pkt_header_size)
333
            control_md5.update(d)
334
 
335
            if control_md5.digest() != pkt_md5:
336
                raise EnvironmentError, (errno.EINVAL, \
337
                    "corrupt par2 file - bad packet hash")
338
 
339
        if pkt_type == 'PAR 2.0\0FileDesc':
340
            if not docrcchecks:
341
                d = file.read(pkt_len - pkt_header_size)
342
 
343
            file_id, file_md5, file_md5_16k, file_size = \
344
                struct.unpack(file_pkt_fmt, d[:file_pkt_size])
345
 
346
            if seen_file_ids.get(file_id) is None:
347
                seen_file_ids[file_id] = 1
348
                filename = chompnulls(d[file_pkt_size:])
349
                filenames.append(filename)
350
 
351
        elif pkt_type == "PAR 2.0\0Main\0\0\0\0":
352
            if not docrcchecks:
353
                d = file.read(pkt_len - pkt_header_size)
354
 
355
            if expected_file_ids is None:
356
                expected_file_ids = []
357
                slice_size, num_files = struct.unpack(main_pkt_fmt, d[:main_pkt_size])
358
                num_nonrecovery = (len(d)-main_pkt_size)/16 - num_files
359
 
360
                for i in range(main_pkt_size,main_pkt_size+(num_files+num_nonrecovery)*16,16):
361
                    expected_file_ids.append(d[i:i+16])
362
 
363
        else:
364
            if not docrcchecks:
365
                file.seek(pkt_len - pkt_header_size, 1)
366
 
367
    if expected_file_ids is None:
368
        raise EnvironmentError, (errno.EINVAL, \
369
            "corrupt or unsupported par2 file - no main packet found")
370
 
371
    for id in expected_file_ids:
372
        if not seen_file_ids.has_key(id):
373
            raise EnvironmentError, (errno.EINVAL, \
374
                "corrupt or unsupported par2 file - " \
375
                "expected file description packet not found")
376
 
377
    return filenames
378
 
379
################################################################################
380
# The parset object
381
#
382
# This is an object based representation of a parset, and will verify itself
383
# and extract itself, if possible.
384
################################################################################
385
 
132 ira 386
import os, glob, re
129 ira 387
 
388
class parset:
389
    def __init__(self, par_filename):
390
        self.parfile = par_filename
391
        self.extra_pars = []
392
        self.files = False
393
        self.used_parjoin = False
394
        self.verified = False
395
        self.extracted = False
396
 
150 ira 397
    def print_debug_info(self):
398
        """Special function for debugging"""
399
        print '========== DEBUG INFO STARTS HERE =========='
400
        print 'parfile: %s' % (self.parfile, )
401
        print 'extra_pars: %s' % (self.extra_pars, )
402
        print 'files: %s' % (self.files, )
403
        print '========== DEBUG INFO ENDS HERE =========='
152 ira 404
 
129 ira 405
    def get_filenames(self):
152 ira 406
        return get_par2_filenames(self.parfile)
129 ira 407
 
408
    def all_there(self):
409
        """Check if all the files for the parset are present.
410
        This will help us decide which par2 checker to use first"""
411
        for f in self.files:
412
            if not os.path.isfile(f):
413
                return False
414
 
415
        # The files were all there
416
        return True
417
 
418
    def verify(self):
419
        """This will verify the parset by the most efficient method first,
420
        and then move to a slower method if that one fails"""
421
 
422
        retval = False #not verified yet
423
 
424
        # if all the files are there, try verifying fast
425
        if self.all_there():
426
            retval = self.__fast_verify()
427
 
428
            if retval == False:
429
                # Failed to verify fast, so try it slow, maybe it needs repair
430
                retval = self.__slow_verify()
431
 
432
        # If we've got a video file, maybe we should try to parjoin it
433
        elif self.__has_video_file():
434
            retval = self.__parjoin()
435
 
436
        else: #not all there, maybe we can slow-repair
437
            retval = self.__slow_verify()
438
 
439
        self.verified = retval
440
        return self.verified
441
 
442
    def __fast_verify(self):
443
        retval = os.system('cfv -v -f "%s"' % (self.parfile, ))
444
 
445
        if retval == 0:
446
            return True #success
447
 
448
        return False #failure
449
 
450
    def __slow_verify(self):
451
        retval = os.system('par2repair "%s"' % (self.parfile, ))
452
 
453
        if retval == 0:
454
            return True #success
455
 
456
        return False #failure
457
 
458
    def __parjoin(self):
459
        retval = os.system('lxsplit -j "%s.001"' % (self.files[0], ))
460
 
461
        retval = self.__fast_verify()
462
 
463
        if retval == False:
464
            # Failed to verify fast, so try it slow, maybe it needs repair
465
            retval = self.__slow_verify()
466
 
467
        if retval == False: # failed to verify, so remove the lxsplit created file
149 ira 468
            try:
469
                os.remove(self.files[0])
470
            except OSError:
471
                print 'Failed to remove file: %s' % (self.files[0], )
129 ira 472
 
473
        self.used_parjoin = retval
474
        self.verified = retval
475
        return self.verified
476
 
477
    def __has_video_file(self):
141 ira 478
        regex = re.compile(
479
                config.get_value('regular expressions', 'video_file_regex'),
480
                re.IGNORECASE)
481
 
129 ira 482
        for f in self.files:
134 ira 483
            if regex.match(f):
129 ira 484
                return True
485
 
486
        return False
487
 
488
    def __remove_currentset(self):
489
        """Remove all of the files that are extractable, as well as the pars.
490
        Leave everything else alone"""
491
 
492
        if not self.extracted:
493
            print 'Did not extract yet, not removing currentset'
494
            return
495
 
496
        # remove the main par
497
        os.remove(self.parfile)
498
 
499
        # remove all of the extra pars
500
        for i in self.extra_pars:
501
            os.remove(i)
502
 
503
        # remove any rars that are associated (leave EVERYTHING else)
134 ira 504
        # This regex matches both old and new style rar(s) by default.
141 ira 505
        regex = re.compile(
506
                config.get_value('regular expressions', 'remove_regex'),
507
                re.IGNORECASE)
134 ira 508
 
129 ira 509
        for i in self.files:
132 ira 510
            if regex.match(i):
129 ira 511
                os.remove(i)
512
 
134 ira 513
        # remove any .{001,002,...} files (from parjoin)
129 ira 514
        if self.used_parjoin:
515
            for i in os.listdir(os.getcwd()):
516
                if i != self.files[0] and self.files[0] in i:
517
                    os.remove(i)
518
 
519
        # remove any temp repair files
141 ira 520
        regex = re.compile(
521
                config.get_value('regular expressions', 'temp_repair_regex'),
522
                re.IGNORECASE)
134 ira 523
        [os.remove(f) for f in os.listdir(os.getcwd()) if regex.match(f)]
129 ira 524
 
525
    def __get_extract_file(self):
526
        """Find the first extractable file"""
527
        for i in self.files:
528
            if os.path.splitext(i)[1] == '.rar':
529
                return i
530
 
531
        return None
532
 
533
    def extract(self):
534
        """Attempt to extract all of the files related to this parset"""
535
        if not self.verified:
536
            self.extracted = False
152 ira 537
            output.add_file(1, self.parfile)
129 ira 538
            return False #failed to extract
539
 
540
        extract_file = self.__get_extract_file()
541
 
542
        if extract_file != None:
141 ira 543
            if config.get_value('options', 'extract_with_full_path'):
139 ira 544
                retval = os.system('rar x -o+ "%s"' % (extract_file, ))
545
            else:
546
                retval = os.system('rar e -o+ "%s"' % (extract_file, ))
129 ira 547
 
548
            if retval != 0:
152 ira 549
                output.add_file(2, self.parfile)
129 ira 550
                self.extracted = False
551
                return self.extracted
552
 
553
        # we extracted ok, so remove the currentset
554
        self.extracted = True
555
        self.__remove_currentset()
556
 
152 ira 557
        output.add_file(0, self.parfile)
558
 
129 ira 559
        return self.extracted
560
 
561
 
562
################################################################################
563
# The rarslave program itself
564
################################################################################
565
 
134 ira 566
import os, sys
129 ira 567
from optparse import OptionParser
568
 
569
def check_required_progs():
570
    """Check if the required programs are installed"""
571
 
572
    shell_not_found = 32512
573
    needed = []
574
 
575
    if os.system('cfv --help > /dev/null 2>&1') == shell_not_found:
576
        needed.append('cfv')
577
 
578
    if os.system('par2repair --help > /dev/null 2>&1') == shell_not_found:
579
        needed.append('par2repair')
580
 
581
    if os.system('lxsplit --help > /dev/null 2>&1') == shell_not_found:
582
        needed.append('lxpsplit')
583
 
584
    if os.system('rar --help > /dev/null 2>&1') == shell_not_found:
585
        needed.append('rar')
586
 
587
    if needed:
588
        for n in needed:
589
            print 'Needed program "%s" not found in $PATH' % (n, )
590
 
591
        sys.exit(1)
592
 
593
def get_parsets():
594
    """Get a representation of each parset in the current directory, and
595
    return them as a list of parset instances"""
596
 
141 ira 597
    regex = re.compile(
598
            config.get_value('regular expressions', 'par2_regex'),
599
            re.IGNORECASE)
134 ira 600
    par2files = [f for f in os.listdir(os.getcwd()) if regex.match(f)]
129 ira 601
 
602
    parsets = []
603
 
604
    for i in par2files:
132 ira 605
        try:
606
            filenames = get_par2_filenames(i)
607
            create_new = True
608
        except EnvironmentError:
152 ira 609
            output.add_file(3, i)
132 ira 610
            continue
129 ira 611
 
612
        # if we already have an instance for this set, append
613
        # this par file to the extra_pars field
614
        for j in parsets:
615
            if j.files == filenames:
616
                j.extra_pars.append(i)
617
                create_new = False
618
 
619
        # we haven't seen this set yet, so we'll create it now
620
        if create_new == True:
621
            cur = parset(i)
622
            cur.files = filenames
623
            parsets.append(cur)
624
 
625
    return parsets
626
 
150 ira 627
def directory_worker(dir, options):
129 ira 628
    """Attempts to find, verify, and extract every parset in the directory
629
    given as a parameter"""
630
 
631
    cwd = os.getcwd()
632
    os.chdir(dir)
633
 
634
    parsets = get_parsets()
635
 
150 ira 636
    # Print debug info if we're supposed to
637
    if options.debug_info:
638
        for p in parsets:
639
            p.debug()
129 ira 640
 
150 ira 641
    # No debug info
642
    else:
152 ira 643
 
150 ira 644
        # Verify each parset
645
        for p in parsets:
646
            p.verify()
129 ira 647
 
150 ira 648
        # Attempt to extract each parset
649
        for p in parsets:
650
            p.extract()
651
 
129 ira 652
    os.chdir(cwd)
653
 
654
def main():
655
 
656
    # Build the OptionParser
657
    parser = OptionParser()
134 ira 658
    parser.add_option('-n', '--not-recursive',
659
                      action='store_false', dest='recursive',
141 ira 660
                      default=config.get_value('options', 'recursive'),
661
                      help="Don't run recursively")
662
 
134 ira 663
    parser.add_option('-d', '--work-dir',
141 ira 664
                      dest='work_dir',
665
                      default=config.get_value('directories', 'working_directory'),
134 ira 666
                      help="Start running at DIR", metavar='DIR')
141 ira 667
 
134 ira 668
    parser.add_option('-p', '--check-required-programs',
669
                       action='store_true', dest='check_progs',
141 ira 670
                       default=config.get_value('options', 'check_required_programs'),
671
                       help="Check for required programs")
129 ira 672
 
141 ira 673
    parser.add_option('-f', '--write-default-config',
674
                      action='store_true', dest='write_def_config',
675
                      default=False, help="Write out a new default config")
676
 
677
    parser.add_option('-c', '--write-new-config',
678
                      action='store_true', dest='write_config',
679
                      default=False, help="Write out the current config")
680
 
150 ira 681
    parser.add_option('-o', '--output-debug-info',
682
                       action='store_true', dest='debug_info',
683
                       default=False,
684
                       help="Output debug info for every parset, then exit")
685
 
129 ira 686
    # Parse the given options
687
    (options, args) = parser.parse_args()
688
 
689
    # Fix up the working directory
690
    options.work_dir = os.path.abspath(os.path.expanduser(options.work_dir))
691
 
692
    # Check that we have the required programs installed
134 ira 693
    if options.check_progs:
694
        check_required_progs()
129 ira 695
 
141 ira 696
    # Write out a new default config, if we need it
697
    if options.write_def_config:
698
        config.write_config(default=True)
699
 
700
    # Write out the current config (adds new options to an existing config)
701
    if options.write_config:
702
        config.write_config()
703
 
129 ira 704
    # Run rarslave!
705
    if options.recursive:
706
        for root, dirs, files in os.walk(options.work_dir):
150 ira 707
            directory_worker(root, options)
129 ira 708
    else:
150 ira 709
        directory_worker(options.work_dir, options)
129 ira 710
 
152 ira 711
    # Print the results
712
    output.print_results_table()
713
 
129 ira 714
if __name__ == '__main__':
715
    main()
716