rarslave.py

   1 #!/usr/bin/env python
   2 # vim: set ts=4 sts=4 sw=4 textwidth=112 :
   3
   4 import re, os, sys
   5
   6 # Global Variables
   7 (TYPE_OLDRAR, TYPE_NEWRAR, TYPE_ZIP, TYPE_NOEXTRACT) = range (4)
   8
   9 class RarslaveExtractor (object):
  10
  11         def __init__ (self, type):
  12                 self.type = type
  13                 self.heads = []
  14
  15         def addHead (self, head):
  16                 assert not os.path.isfile (head)
  17
  18                 self.heads.append (head)
  19
  20         def extract (self, todir):
  21                 # Extract all heads
  22
  23                 extraction_func = \
  24                         { TYPE_OLDRAR : self.__extract_rar,
  25                           TYPE_NEWRAR : self.__extract_rar,
  26                           TYPE_ZIP    : self.__extract_zip,
  27                           TYPE_NOEXTRACT : self.__extract_noextract }[self.type]
  28
  29                 # Call the extraction function on each head
  30                 for h in self.heads:
  31                         extraction_func (h, todir)
  32
  33         def __extract_rar (self, file, todir):
  34                 print 'Extracting (%s, %s)' % (file, todir)
  35
  36         def __extract_zip (self, file, todir):
  37                 print 'Extracting (%s, %s)' % (file, todir)
  38
  39         def __extract_noextract (self, file, todir):
  40                 print 'Extracting (%s, %s)' % (file, todir)
  41
  42
  43 def get_basename (name):
  44         """Strips most kinds of endings from a filename"""
  45
  46         regex = '^(.+)\.(par2|vol\d+\+\d+|\d\d\d|part\d+|rar|zip|avi|mp4|mkv|ogm)$'
  47         r = re.compile (regex, re.IGNORECASE)
  48         done = False
  49
  50         while not done:
  51                 done = True
  52
  53                 if r.match (name):
  54                         g = r.match (name).groups()
  55                         name = g[0]
  56                         done = False
  57
  58         return name
  59
  60 def find_likely_files (name, dir):
  61         """Finds files which are likely to be part of the set corresponding
  62            to $name in the directory $dir"""
  63
  64         if not os.path.isdir (os.path.abspath (dir)):
  65                 raise ValueError # bad directory given
  66
  67         dir = os.path.abspath (dir)
  68         ename = re.escape (name)
  69         regex = re.compile ('^%s.*$' % (ename, ))
  70
  71         return [os.path.abspath(f) for f in os.listdir (dir) if regex.match (f)]
  72
  73 def find_all_par2_files (dir):
  74         """Finds all par2 files in a directory"""
  75
  76         if not os.path.isdir (os.path.abspath (dir)):
  77                 raise ValueError # bad directory given
  78
  79         dir = os.path.abspath (dir)
  80         regex = re.compile ('^.*\.par2$', re.IGNORECASE)
  81
  82         # Find all files
  83         return [f for f in os.listdir (dir) if regex.match (f)]
  84
  85 def has_extension (f, ext):
  86         """Checks if f has the extension ext"""
  87
  88         if ext[0] != '.':
  89                 ext = '.' + ext
  90
  91         ext = re.escape (ext)
  92         regex = re.compile ('^.*%s$' % (ext, ), re.IGNORECASE)
  93         return regex.match (f)
  94
  95 def find_extraction_heads (files):
  96         """Takes a list of possible files and finds likely heads of
  97            extraction."""
  98
  99         # NOTE: perhaps this should happen AFTER repair is
 100         # NOTE: successful. That way all files would already exist
 101
 102         # According to various sources online:
 103         # 1) pre rar-3.0: .rar .r00 .r01 ...
 104         # 2) post rar-3.0: .part01.rar .part02.rar
 105         # 3) zip all ver: .zip
 106
 107         extractor = None
 108
 109         # Old RAR type, find all files ending in .rar
 110         if is_oldrar (files):
 111                 extractor = RarslaveExtractor (TYPE_OLDRAR)
 112                 regex = re.compile ('^.*\.rar$', re.IGNORECASE)
 113                 for f in files:
 114                         if regex.match (f):
 115                                 extractor.addHead (f)
 116
 117         if is_newrar (files):
 118                 extractor = RarslaveExtractor (TYPE_NEWRAR)
 119                 regex = re.compile ('^.*\.part01.rar$', re.IGNORECASE)
 120                 for f in files:
 121                         if regex.match (f):
 122                                 extractor.addHead (f)
 123
 124         if is_zip (files):
 125                 extractor = RarslaveExtractor (TYPE_ZIP)
 126                 regex = re.compile ('^.*\.zip$', re.IGNORECASE)
 127                 for f in files:
 128                         if regex.match (f):
 129                                 extractor.addHead (f)
 130
 131         if is_noextract (files):
 132                 extractor = RarslaveExtractor (TYPE_NOEXTRACT)
 133
 134         # Make sure we found the type
 135         assert extractor != None
 136
 137         return extractor
 138
 139 def is_oldrar (files):
 140         for f in files:
 141                 if has_extension (f, '.r00'):
 142                         return True
 143
 144 def is_newrar (files):
 145         for f in files:
 146                 if has_extension (f, '.part01.rar'):
 147                         return True
 148
 149 def is_zip (files):
 150         for f in files:
 151                 if has_extension (f, '.zip'):
 152                         return True
 153
 154 def is_noextract (files):
 155         # Type that needs no extraction.
 156         # TODO: Add others ???
 157         for f in files:
 158                 if has_extension (f, '.001'):
 159                         return True
 160
 161 def find_deleteable_files (files):
 162         # Deleteable types regex should come from the config
 163         dfiles = []
 164         dregex = re.compile ('^.*\.(par2|\d|\d\d\d|rar|r\d\d|zip)$', re.IGNORECASE)
 165
 166         return [f for f in files if dregex.match (f)]
 167
 168 def printlist (li):
 169         for f in li:
 170                 print f
 171
 172 def main ():
 173         DIR = os.path.abspath ('test_material/01/')
 174         p2files = find_all_par2_files (DIR)
 175         files = find_likely_files (get_basename (p2files[0]), DIR)
 176         find_extraction_heads (files).extract('nodir')
 177         print 'DELETEABLE_FILES:'
 178         printlist ( find_deleteable_files (files) )
 179
 180         print
 181
 182         DIR = os.path.abspath ('test_material/13/')
 183         p2files = find_all_par2_files (DIR)
 184         files = find_likely_files (get_basename (p2files[0]), DIR)
 185         find_extraction_heads (files).extract ('nodir')
 186         print 'DELETEABLE_FILES:'
 187         printlist ( find_deleteable_files (files) )
 188
 189 if __name__ == '__main__':
 190         main ()