Subversion Repositories programming

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
129 ira 1
#!/usr/bin/env python
2
 
3
# Copyright: Ira W. Snyder (devel@irasnyder.com)
4
# Start Date: 2005-10-13
5
# End Date:
6
# License: GNU General Public License v2 (or at your option, any later version)
7
#
8
# Changelog Follows:
9
# - 2005-10-13
10
# - Added get_par2_filenames() to parse par2 files
11
# - Added the parset object to represent each parset.
12
#
13
# - 2005-10-14
14
# - Finished the parset object. It will now verify and extract parsets.
15
# - Small changes to the parset object. This makes the parjoin part
16
#   much more reliable.
17
# - Added the OptionParser to make this nice to run at the command line.
18
# - Made recursiveness an option.
19
# - Made start directory an option.
20
# - Check for appropriate programs before starting.
21
#
134 ira 22
# - 2005-10-17
23
# - Use a regular expression to handle the deletable types.
24
#
25
# - 2005-10-18
26
# - Use regular expressions to handle all finding of files, instead of
27
#   using the glob module.
28
# - Add a config class to handle all the default config stuff sanely.
29
#   This makes it easier to change some of the main parts of the program to
30
#   your specific configuration.
31
# - Move the docrcchecks variable inside the get_par2_filenames() function,
32
#   which is where it belongs anyway.
33
# - Added command-line option to check for required programs at start.
34
#
139 ira 35
# - 2005-10-20
36
# - Added a config option to extract with full path.
37
#
141 ira 38
# - 2005-10-22
39
# - Re-wrote the config class so that there is a config file, which
40
#   resides at ~/.config/rarslave/rarslave.conf by default.
41
# - Added the command-line option -c to write out an updated version
42
#   of the config file (to fill in any missing options with the defaults)
43
# - Added the command-line option -f to write out a new default config file,
44
#   which overwrites any user changes.
45
# - Made all regexes case insensitive.
46
# - Made all command-line options override the config file.
47
#
150 ira 48
# - 2005-10-30
49
# - Added the '-o' option, to output debugging info. Hopefully next time
50
#   someone finds a bug, they can output this and send it to me with a
51
#   description of the bug they're seeing.
52
#
129 ira 53
 
54
################################################################################
55
# REQUIREMENTS:
56
#
57
# This code requires the programs cfv, par2repair, lxsplit, and rar to be able
58
# to function properly. I will attempt to check that these are in your path.
59
################################################################################
60
 
141 ira 61
import ConfigParser, os
62
 
134 ira 63
class rarslave_config:
64
    """A simple class to hold the default configs for the whole program"""
129 ira 65
 
141 ira 66
    def __read_config(self, filename='~/.config/rarslave/rarslave.conf'):
67
        """Attempt to open and read the rarslave config file"""
134 ira 68
 
141 ira 69
        # Make sure the filename is corrected
70
        filename = os.path.abspath(os.path.expanduser(filename))
71
 
72
        user_config = {}
73
 
74
        # Write the default config if it doesn't exist
75
        if not os.path.isfile(filename):
76
            self.write_config(default=True)
77
 
78
        config = ConfigParser.ConfigParser()
79
        config.read(filename)
80
 
81
        for section in config.sections():
82
            for option in config.options(section):
83
                user_config[(section, option)] = config.get(section, option)
84
 
85
        return user_config
86
 
87
    def write_config(self, filename='~/.config/rarslave/rarslave.conf', default=False):
88
        """Write out the current config to the config file. If you set default=True, then
89
        the default config file will be written."""
90
 
91
        config = ConfigParser.ConfigParser()
92
 
93
        # Correct filename
94
        filename = os.path.abspath(os.path.expanduser(filename))
95
 
96
        # Reset all config to make sure we write the default one, if necessary
97
        if default:
98
            self.__user_config = {}
99
            print 'Writing default config to %s' % (filename, )
100
 
101
        # [directories] section
102
        config.add_section('directories')
103
        for (s, k) in self.__defaults.keys():
104
            if s == 'directories':
105
                config.set(s, k, self.get_value(s, k))
106
 
107
        # [options] section
108
        config.add_section('options')
109
        for (s, k) in self.__defaults.keys():
110
            if s == 'options':
111
                config.set(s, k, self.get_value(s, k))
112
 
113
        # [regular_expressions] section
114
        config.add_section('regular expressions')
115
        for (s, k) in self.__defaults.keys():
116
            if s == 'regular expressions':
117
                config.set(s, k, self.get_value(s, k))
118
 
119
        # Try to make the ~/.config/rarslave/ directory
120
        if not os.path.isdir(os.path.split(filename)[0]):
121
            try:
122
                os.makedirs(os.path.split(filename)[0])
123
            except:
124
                print 'Could not make directory: %s' % (os.path.split(filename)[0], )
125
                sys.exit()
126
 
127
        # Try to write the config file to disk
128
        try:
129
            fsock = open(filename, 'w')
130
            try:
131
                config.write(fsock)
132
            finally:
133
                fsock.close()
134
        except:
135
            print 'Could not open: %s for writing' % (filename, )
136
            sys.exit()
137
 
138
    def __get_default_val(self, section, key):
139
        return self.__defaults[(section, key)]
140
 
141
    def get_value(self, section, key):
142
        """Get a config value. Attempts to get the value from the user's
143
        config first, and then uses the default."""
144
 
145
        try:
146
            value = self.__user_config[(section, key)]
147
        except:
148
            # This should work, unless you write something stupid
149
            # into the code, so DON'T DO IT
150
            value = self.__get_default_val(section, key)
151
 
152
        # Convert config options to booleans for easier use
153
        if value == 'True':
154
            value = True
155
 
156
        if value == 'False':
157
            value = False
158
 
159
        return value
160
 
134 ira 161
    def __init__(self):
141 ira 162
        self.__defaults = {
163
            ('directories', 'working_directory') : '~/downloads/usenet',
164
            ('options', 'recursive') : True,
165
            ('options', 'check_required_programs') : False,
166
            ('options', 'extract_with_full_path') : False,
167
            ('regular expressions', 'par2_regex') : '.*\.par2$',
168
            ('regular expressions', 'video_file_regex') : '.*\.(avi|ogm|mkv|mp4)$',
169
            ('regular expressions', 'temp_repair_regex') : '.*\.1$',
170
            ('regular expressions', 'remove_regex') : '^.*\.(rar|r\d\d)$' }
134 ira 171
 
141 ira 172
        self.__user_config = self.__read_config()
173
 
174
# This is the global config variable.
134 ira 175
config = rarslave_config()
141 ira 176
 
129 ira 177
################################################################################
178
# The PAR2 Parser
179
#
180
# This was stolen from cfv (see http://cfv.sourceforge.net/ for a copy)
181
################################################################################
182
 
183
import struct, errno
184
 
185
def chompnulls(line):
186
    p = line.find('\0')
187
    if p < 0: return line
188
    else:     return line[:p]
189
 
190
def get_par2_filenames(filename):
191
    """Get all of the filenames that are protected by the par2
192
    file given as the filename"""
193
 
194
    try:
195
        file = open(filename, 'rb')
196
    except:
197
        print 'Could not open %s' % (filename, )
198
        return []
199
 
134 ira 200
    # We always want to do crc checks
201
    docrcchecks = True
202
 
129 ira 203
    pkt_header_fmt = '< 8s Q 16s 16s 16s'
204
    pkt_header_size = struct.calcsize(pkt_header_fmt)
205
    file_pkt_fmt = '< 16s 16s 16s Q'
206
    file_pkt_size = struct.calcsize(file_pkt_fmt)
207
    main_pkt_fmt = '< Q I'
208
    main_pkt_size = struct.calcsize(main_pkt_fmt)
209
 
210
    seen_file_ids = {}
211
    expected_file_ids = None
212
    filenames = []
213
 
214
    while 1:
215
        d = file.read(pkt_header_size)
216
        if not d:
217
            break
218
 
219
        magic, pkt_len, pkt_md5, set_id, pkt_type = struct.unpack(pkt_header_fmt, d)
220
 
221
        if docrcchecks:
222
            import md5
223
            control_md5 = md5.new()
224
            control_md5.update(d[0x20:])
225
            d = file.read(pkt_len - pkt_header_size)
226
            control_md5.update(d)
227
 
228
            if control_md5.digest() != pkt_md5:
229
                raise EnvironmentError, (errno.EINVAL, \
230
                    "corrupt par2 file - bad packet hash")
231
 
232
        if pkt_type == 'PAR 2.0\0FileDesc':
233
            if not docrcchecks:
234
                d = file.read(pkt_len - pkt_header_size)
235
 
236
            file_id, file_md5, file_md5_16k, file_size = \
237
                struct.unpack(file_pkt_fmt, d[:file_pkt_size])
238
 
239
            if seen_file_ids.get(file_id) is None:
240
                seen_file_ids[file_id] = 1
241
                filename = chompnulls(d[file_pkt_size:])
242
                filenames.append(filename)
243
 
244
        elif pkt_type == "PAR 2.0\0Main\0\0\0\0":
245
            if not docrcchecks:
246
                d = file.read(pkt_len - pkt_header_size)
247
 
248
            if expected_file_ids is None:
249
                expected_file_ids = []
250
                slice_size, num_files = struct.unpack(main_pkt_fmt, d[:main_pkt_size])
251
                num_nonrecovery = (len(d)-main_pkt_size)/16 - num_files
252
 
253
                for i in range(main_pkt_size,main_pkt_size+(num_files+num_nonrecovery)*16,16):
254
                    expected_file_ids.append(d[i:i+16])
255
 
256
        else:
257
            if not docrcchecks:
258
                file.seek(pkt_len - pkt_header_size, 1)
259
 
260
    if expected_file_ids is None:
261
        raise EnvironmentError, (errno.EINVAL, \
262
            "corrupt or unsupported par2 file - no main packet found")
263
 
264
    for id in expected_file_ids:
265
        if not seen_file_ids.has_key(id):
266
            raise EnvironmentError, (errno.EINVAL, \
267
                "corrupt or unsupported par2 file - " \
268
                "expected file description packet not found")
269
 
270
    return filenames
271
 
272
################################################################################
273
# The parset object
274
#
275
# This is an object based representation of a parset, and will verify itself
276
# and extract itself, if possible.
277
################################################################################
278
 
132 ira 279
import os, glob, re
129 ira 280
 
281
class parset:
282
    def __init__(self, par_filename):
283
        self.parfile = par_filename
284
        self.extra_pars = []
285
        self.files = False
286
        self.used_parjoin = False
287
        self.verified = False
288
        self.extracted = False
289
 
150 ira 290
    def print_debug_info(self):
291
        """Special function for debugging"""
292
        print '========== DEBUG INFO STARTS HERE =========='
293
        print 'parfile: %s' % (self.parfile, )
294
        print 'extra_pars: %s' % (self.extra_pars, )
295
        print 'files: %s' % (self.files, )
296
        print '========== DEBUG INFO ENDS HERE =========='
297
 
129 ira 298
    def get_filenames(self):
299
        return get_par2_filenames(parfile)
300
 
301
    def all_there(self):
302
        """Check if all the files for the parset are present.
303
        This will help us decide which par2 checker to use first"""
304
        for f in self.files:
305
            if not os.path.isfile(f):
306
                return False
307
 
308
        # The files were all there
309
        return True
310
 
311
    def verify(self):
312
        """This will verify the parset by the most efficient method first,
313
        and then move to a slower method if that one fails"""
314
 
315
        retval = False #not verified yet
316
 
317
        # if all the files are there, try verifying fast
318
        if self.all_there():
319
            retval = self.__fast_verify()
320
 
321
            if retval == False:
322
                # Failed to verify fast, so try it slow, maybe it needs repair
323
                retval = self.__slow_verify()
324
 
325
        # If we've got a video file, maybe we should try to parjoin it
326
        elif self.__has_video_file():
327
            retval = self.__parjoin()
328
 
329
        else: #not all there, maybe we can slow-repair
330
            retval = self.__slow_verify()
331
 
332
        self.verified = retval
333
        return self.verified
334
 
335
    def __fast_verify(self):
336
        retval = os.system('cfv -v -f "%s"' % (self.parfile, ))
337
 
338
        if retval == 0:
339
            return True #success
340
 
341
        return False #failure
342
 
343
    def __slow_verify(self):
344
        retval = os.system('par2repair "%s"' % (self.parfile, ))
345
 
346
        if retval == 0:
347
            return True #success
348
 
349
        return False #failure
350
 
351
    def __parjoin(self):
352
        retval = os.system('lxsplit -j "%s.001"' % (self.files[0], ))
353
 
354
        retval = self.__fast_verify()
355
 
356
        if retval == False:
357
            # Failed to verify fast, so try it slow, maybe it needs repair
358
            retval = self.__slow_verify()
359
 
360
        if retval == False: # failed to verify, so remove the lxsplit created file
149 ira 361
            try:
362
                os.remove(self.files[0])
363
            except OSError:
364
                print 'Failed to remove file: %s' % (self.files[0], )
129 ira 365
 
366
        self.used_parjoin = retval
367
        self.verified = retval
368
        return self.verified
369
 
370
    def __has_video_file(self):
141 ira 371
        regex = re.compile(
372
                config.get_value('regular expressions', 'video_file_regex'),
373
                re.IGNORECASE)
374
 
129 ira 375
        for f in self.files:
134 ira 376
            if regex.match(f):
129 ira 377
                return True
378
 
379
        return False
380
 
381
    def __remove_currentset(self):
382
        """Remove all of the files that are extractable, as well as the pars.
383
        Leave everything else alone"""
384
 
385
        if not self.extracted:
386
            print 'Did not extract yet, not removing currentset'
387
            return
388
 
389
        # remove the main par
390
        os.remove(self.parfile)
391
 
392
        # remove all of the extra pars
393
        for i in self.extra_pars:
394
            os.remove(i)
395
 
396
        # remove any rars that are associated (leave EVERYTHING else)
134 ira 397
        # This regex matches both old and new style rar(s) by default.
141 ira 398
        regex = re.compile(
399
                config.get_value('regular expressions', 'remove_regex'),
400
                re.IGNORECASE)
134 ira 401
 
129 ira 402
        for i in self.files:
132 ira 403
            if regex.match(i):
129 ira 404
                os.remove(i)
405
 
134 ira 406
        # remove any .{001,002,...} files (from parjoin)
129 ira 407
        if self.used_parjoin:
408
            for i in os.listdir(os.getcwd()):
409
                if i != self.files[0] and self.files[0] in i:
410
                    os.remove(i)
411
 
412
        # remove any temp repair files
141 ira 413
        regex = re.compile(
414
                config.get_value('regular expressions', 'temp_repair_regex'),
415
                re.IGNORECASE)
134 ira 416
        [os.remove(f) for f in os.listdir(os.getcwd()) if regex.match(f)]
129 ira 417
 
418
    def __get_extract_file(self):
419
        """Find the first extractable file"""
420
        for i in self.files:
421
            if os.path.splitext(i)[1] == '.rar':
422
                return i
423
 
424
        return None
425
 
426
    def extract(self):
427
        """Attempt to extract all of the files related to this parset"""
428
        if not self.verified:
429
            self.extracted = False
430
            print 'Not (successfully) verified, not extracting'
431
            return False #failed to extract
432
 
433
        extract_file = self.__get_extract_file()
434
 
435
        if extract_file != None:
141 ira 436
            if config.get_value('options', 'extract_with_full_path'):
139 ira 437
                retval = os.system('rar x -o+ "%s"' % (extract_file, ))
438
            else:
439
                retval = os.system('rar e -o+ "%s"' % (extract_file, ))
129 ira 440
 
441
            if retval != 0:
442
                print 'Failed to extract'
443
                self.extracted = False
444
                return self.extracted
445
 
446
        # we extracted ok, so remove the currentset
447
        self.extracted = True
448
        self.__remove_currentset()
449
 
450
        return self.extracted
451
 
452
 
453
################################################################################
454
# The rarslave program itself
455
################################################################################
456
 
134 ira 457
import os, sys
129 ira 458
from optparse import OptionParser
459
 
460
def check_required_progs():
461
    """Check if the required programs are installed"""
462
 
463
    shell_not_found = 32512
464
    needed = []
465
 
466
    if os.system('cfv --help > /dev/null 2>&1') == shell_not_found:
467
        needed.append('cfv')
468
 
469
    if os.system('par2repair --help > /dev/null 2>&1') == shell_not_found:
470
        needed.append('par2repair')
471
 
472
    if os.system('lxsplit --help > /dev/null 2>&1') == shell_not_found:
473
        needed.append('lxpsplit')
474
 
475
    if os.system('rar --help > /dev/null 2>&1') == shell_not_found:
476
        needed.append('rar')
477
 
478
    if needed:
479
        for n in needed:
480
            print 'Needed program "%s" not found in $PATH' % (n, )
481
 
482
        sys.exit(1)
483
 
484
def get_parsets():
485
    """Get a representation of each parset in the current directory, and
486
    return them as a list of parset instances"""
487
 
141 ira 488
    regex = re.compile(
489
            config.get_value('regular expressions', 'par2_regex'),
490
            re.IGNORECASE)
134 ira 491
    par2files = [f for f in os.listdir(os.getcwd()) if regex.match(f)]
129 ira 492
 
493
    parsets = []
494
 
495
    for i in par2files:
132 ira 496
        try:
497
            filenames = get_par2_filenames(i)
498
            create_new = True
499
        except EnvironmentError:
500
            print 'CORRUPT PARFILE: %s' % (i, )
501
            continue
129 ira 502
 
503
        # if we already have an instance for this set, append
504
        # this par file to the extra_pars field
505
        for j in parsets:
506
            if j.files == filenames:
507
                j.extra_pars.append(i)
508
                create_new = False
509
 
510
        # we haven't seen this set yet, so we'll create it now
511
        if create_new == True:
512
            cur = parset(i)
513
            cur.files = filenames
514
            parsets.append(cur)
515
 
516
    return parsets
517
 
150 ira 518
def directory_worker(dir, options):
129 ira 519
    """Attempts to find, verify, and extract every parset in the directory
520
    given as a parameter"""
521
 
522
    cwd = os.getcwd()
523
    os.chdir(dir)
524
 
525
    parsets = get_parsets()
526
 
150 ira 527
    # Print debug info if we're supposed to
528
    if options.debug_info:
529
        for p in parsets:
530
            p.debug()
129 ira 531
 
150 ira 532
    # No debug info
533
    else:
534
 
535
        # Verify each parset
536
        for p in parsets:
537
            p.verify()
129 ira 538
 
150 ira 539
        # Attempt to extract each parset
540
        for p in parsets:
541
            p.extract()
542
 
129 ira 543
    os.chdir(cwd)
544
 
545
def main():
546
 
547
    # Build the OptionParser
548
    parser = OptionParser()
134 ira 549
    parser.add_option('-n', '--not-recursive',
550
                      action='store_false', dest='recursive',
141 ira 551
                      default=config.get_value('options', 'recursive'),
552
                      help="Don't run recursively")
553
 
134 ira 554
    parser.add_option('-d', '--work-dir',
141 ira 555
                      dest='work_dir',
556
                      default=config.get_value('directories', 'working_directory'),
134 ira 557
                      help="Start running at DIR", metavar='DIR')
141 ira 558
 
134 ira 559
    parser.add_option('-p', '--check-required-programs',
560
                       action='store_true', dest='check_progs',
141 ira 561
                       default=config.get_value('options', 'check_required_programs'),
562
                       help="Check for required programs")
129 ira 563
 
141 ira 564
    parser.add_option('-f', '--write-default-config',
565
                      action='store_true', dest='write_def_config',
566
                      default=False, help="Write out a new default config")
567
 
568
    parser.add_option('-c', '--write-new-config',
569
                      action='store_true', dest='write_config',
570
                      default=False, help="Write out the current config")
571
 
150 ira 572
    parser.add_option('-o', '--output-debug-info',
573
                       action='store_true', dest='debug_info',
574
                       default=False,
575
                       help="Output debug info for every parset, then exit")
576
 
129 ira 577
    # Parse the given options
578
    (options, args) = parser.parse_args()
579
 
580
    # Fix up the working directory
581
    options.work_dir = os.path.abspath(os.path.expanduser(options.work_dir))
582
 
583
    # Check that we have the required programs installed
134 ira 584
    if options.check_progs:
585
        check_required_progs()
129 ira 586
 
141 ira 587
    # Write out a new default config, if we need it
588
    if options.write_def_config:
589
        config.write_config(default=True)
590
 
591
    # Write out the current config (adds new options to an existing config)
592
    if options.write_config:
593
        config.write_config()
594
 
129 ira 595
    # Run rarslave!
596
    if options.recursive:
597
        for root, dirs, files in os.walk(options.work_dir):
150 ira 598
            directory_worker(root, options)
129 ira 599
    else:
150 ira 600
        directory_worker(options.work_dir, options)
129 ira 601
 
602
if __name__ == '__main__':
603
    main()
604