[RARSLAVE] Add PAR2Set class

[rarslave2.git] / rarslave.py
diff --git a/rarslave.py b/rarslave.py

index 093d5fa..fab1f69 100644 (file)
--- a/rarslave.py
+++ b/rarslave.py
@@ -2,6 +2,7 @@
  # vim: set ts=4 sts=4 sw=4 textwidth=112 :
  
  import re, os, sys
+import par2parser
  
  # Global Variables
  (TYPE_OLDRAR, TYPE_NEWRAR, TYPE_ZIP, TYPE_NOEXTRACT) = range (4)
@@ -12,14 +13,29 @@ class RarslaveExtractor (object):
                 self.type = type
                 self.heads = []
  
-       def addHead (self, head):
-               assert not os.path.isfile (head)
+       def addHead (self, dir, head):
+               assert os.path.isdir (dir)
+               # REQUIRES that the dir is valid, but not that the file is valid, so that
+               # we can move a file that doesn't exist yet.
+               # FIXME: probably CAN add this back, since we should be running this AFTER repair.
+               #assert os.path.isfile (os.path.join (dir, head))
  
-               self.heads.append (head)
+               self.heads.append (os.path.join (dir, head))
  
         def extract (self, todir):
-               # Extract all heads
+               # Extract all heads of this set
+
+               # Create the directory $todir if it doesn't exist
+               if not os.path.isdir (todir):
+                       # TODO: LOGGER
+                       try:
+                               os.makedirs (todir)
+                       except OSError:
+                               # TODO: LOGGER
+                               # Failed mkdir -p, clean up time ...
+                               pass # FIXME: temporary for syntax
  
+               # Extract all heads
                 extraction_func = \
                         { TYPE_OLDRAR : self.__extract_rar,
                           TYPE_NEWRAR : self.__extract_rar,
@@ -31,14 +47,93 @@ class RarslaveExtractor (object):
                         extraction_func (h, todir)
  
         def __extract_rar (self, file, todir):
-               print 'Extracting (%s, %s)' % (file, todir)
+               assert os.path.isfile (file)
+               assert os.path.isdir (todir)
+
+               RAR_CMD = 'unrar x -o+ -- '
+
+               #file = full_abspath (file)
+               #todir = full_abspath (todir)
+
+               cmd = '%s \"%s\"' % (RAR_CMD, file)
+               ret = run_command (cmd, todir)
  
         def __extract_zip (self, file, todir):
-               print 'Extracting (%s, %s)' % (file, todir)
+               ZIP_CMD = 'unzip \"%s\" -d \"%s\"'
+
+               cmd = ZIP_CMD % (file, todir)
+               ret = run_command (cmd)
  
         def __extract_noextract (self, file, todir):
-               print 'Extracting (%s, %s)' % (file, todir)
+               # Just move this file to the $todir, since no extraction is needed
+               # FIXME: NOTE: mv will fail by itself if you're moving to the same dir!
+               cmd = 'mv \"%s\" \"%s\"' % (file, todir)
+               ret = run_command (cmd)
+
+
+
+class RarslaveRepairer (object):
+       # Verify (and repair) the set
+       # Make sure it worked, otherwise clean up and return failure
+
+       def __init__ (self, dir, file, join=False):
+               self.dir  = dir  # the directory containing the par2 file
+               self.file = file # the par2 file
+               self.join = join # True if the par2 set is 001 002 ...
+
+               assert os.path.isdir (dir)
+               assert os.path.isfile (os.path.join (dir, file))
+
+       def checkAndRepair (self):
+               # Form the command:
+               # par2repair -- PAR2 PAR2_EXTRA [JOIN_FILES]
+               PAR2_CMD = 'par2repair -- '
+
+               # Get set up
+               basename = get_basename (self.file)
+               all_files = find_likely_files (basename, self.dir)
+               all_files.sort ()
+               par2_files = find_par2_files (all_files)
+
+               # assemble the command
+               command = "%s \"%s\" " % (PAR2_CMD, self.file)
+
+               for f in par2_files:
+                       if f != self.file:
+                               command += "\"%s\" " % get_filename(f)
+
+               if self.join:
+                       for f in all_files:
+                               if f not in par2_files:
+                                       command += "\"%s\" " % get_filename(f)
  
+               # run the command
+               ret = run_command (command, self.dir)
+
+def run_command (cmd, indir=None):
+       # Runs the specified command-line in the directory given (or, in the current directory
+       # if none is given). It returns the status code given by the application.
+
+       pwd = os.getcwd ()
+
+       if indir != None:
+               assert os.path.isdir (indir) # MUST be a directory!
+               os.chdir (pwd)
+
+       # FIXME: re-enable this after testing
+       print 'RUNNING (%s): %s' % (indir, cmd)
+       # return os.system (cmd)
+
+
+def full_abspath (p):
+       return os.path.abspath (os.path.expanduser (p))
+
+def get_filename (f):
+       # TODO: I don't think that we should enforce this...
+       # TODO: ... because I think we should be able to get the filename, regardless
+       # TODO: of whether this is a legit filename RIGHT NOW or not.
+       # assert os.path.isfile (f)
+       return os.path.split (f)[1]
  
  def get_basename (name):
         """Strips most kinds of endings from a filename"""
@@ -68,19 +163,25 @@ def find_likely_files (name, dir):
         ename = re.escape (name)
         regex = re.compile ('^%s.*$' % (ename, ))
  
-       return [os.path.abspath(f) for f in os.listdir (dir) if regex.match (f)]
+       return [f for f in os.listdir (dir) if regex.match (f)]
+
+def find_par2_files (files):
+       """Find all par2 files in the list $files"""
+
+       regex = re.compile ('^.*\.par2$', re.IGNORECASE)
+       return [f for f in files if regex.match (f)]
  
  def find_all_par2_files (dir):
         """Finds all par2 files in a directory"""
+       # NOTE: does NOT return absolute paths
  
         if not os.path.isdir (os.path.abspath (dir)):
                 raise ValueError # bad directory given
  
         dir = os.path.abspath (dir)
-       regex = re.compile ('^.*\.par2$', re.IGNORECASE)
+       files = os.listdir (dir)
  
-       # Find all files
-       return [f for f in os.listdir (dir) if regex.match (f)]
+       return find_par2_files (files)
  
  def has_extension (f, ext):
         """Checks if f has the extension ext"""
@@ -92,7 +193,7 @@ def has_extension (f, ext):
         regex = re.compile ('^.*%s$' % (ext, ), re.IGNORECASE)
         return regex.match (f)
  
-def find_extraction_heads (files):
+def find_extraction_heads (dir, files):
         """Takes a list of possible files and finds likely heads of
            extraction."""
  
@@ -105,6 +206,7 @@ def find_extraction_heads (files):
         # 3) zip all ver: .zip 
  
         extractor = None
+       p2files = find_par2_files (files)
  
         # Old RAR type, find all files ending in .rar
         if is_oldrar (files):
@@ -112,25 +214,46 @@ def find_extraction_heads (files):
                 regex = re.compile ('^.*\.rar$', re.IGNORECASE)
                 for f in files:
                         if regex.match (f):
-                               extractor.addHead (f)
+                               extractor.addHead (dir, f)
  
         if is_newrar (files):
                 extractor = RarslaveExtractor (TYPE_NEWRAR)
                 regex = re.compile ('^.*\.part01.rar$', re.IGNORECASE)
                 for f in files:
                         if regex.match (f):
-                               extractor.addHead (f)
+                               extractor.addHead (dir, f)
  
         if is_zip (files):
                 extractor = RarslaveExtractor (TYPE_ZIP)
                 regex = re.compile ('^.*\.zip$', re.IGNORECASE)
                 for f in files:
                         if regex.match (f):
-                               extractor.addHead (f)
+                               extractor.addHead (dir, f)
  
         if is_noextract (files):
+               # Use the Par2 Parser (from cfv) here to find out what files are protected.
+               # Since these are not being extracted, they will be mv'd to another directory
+               # later.
                 extractor = RarslaveExtractor (TYPE_NOEXTRACT)
  
+               for f in p2files:
+                       done = False
+                       try:
+                               prot_files = par2parser.get_protected_files (dir, f)
+                               done = True
+                       except: #FIXME: add the actual exceptions
+                               print 'ERROR PARSING P2FILE ...', f
+                               continue
+
+                       if done:
+                               break
+
+               if done:
+                       for f in prot_files:
+                               extractor.addHead (dir, f)
+               else:
+                       print 'BADNESS'
+
         # Make sure we found the type
         assert extractor != None
  
@@ -169,22 +292,121 @@ def printlist (li):
         for f in li:
                 print f
  
+class PAR2Set (object):
+
+       dir = None
+       file = None
+       likely_files = []
+
+       def __init__ (self, dir, file):
+               assert os.path.isdir (dir)
+               assert os.path.isfile (os.path.join (dir, file))
+
+               self.dir = dir
+               self.file = file
+
+               basename = get_basename (file)
+               self.likely_files = find_likely_files (basename, dir)
+
+       def __list_eq (self, l1, l2):
+
+               if len(l1) != len(l2):
+                       return False
+
+               for e in l1:
+                       if e not in l2:
+                               return False
+
+               return True
+
+       def __eq__ (self, rhs):
+               return self.__list_eq (self.likely_files, rhs.likely_files)
+
+def generate_all_parsets (dir):
+       # Generate all parsets in the given directory.
+
+       assert os.path.isdir (dir) # Directory MUST be valid
+
+       parsets = []
+       p2files = find_all_par2_files (dir)
+
+       for f in p2files:
+               p = PAR2Set (dir, f)
+               if p not in parsets:
+                       parsets.append (p)
+
+       return parsets
+
  def main ():
+       # Setup stage
+       print '\nSETUP STAGE'
         DIR = os.path.abspath ('test_material/01/')
         p2files = find_all_par2_files (DIR)
-       files = find_likely_files (get_basename (p2files[0]), DIR)
-       find_extraction_heads (files).extract('nodir')
-       print 'DELETEABLE_FILES:'
+       p2file = p2files[0]
+
+       # Repair stage
+       print '\nREPAIR STAGE'
+       repairer = RarslaveRepairer (DIR, p2file)
+       repairer.checkAndRepair ()
+
+       # Extraction stage
+       print '\nEXTRACTION STAGE'
+       files = find_likely_files (get_basename (p2file), DIR)
+       extractor = find_extraction_heads (DIR, files)
+       extractor.extract('extract_dir')
+
+       # Deletion stage
+       print '\nDELETION STAGE'
         printlist ( find_deleteable_files (files) )
  
-       print
+       print '\n\n'
  
+       # Setup stage
+       print '\nSETUP STAGE'
         DIR = os.path.abspath ('test_material/13/')
         p2files = find_all_par2_files (DIR)
-       files = find_likely_files (get_basename (p2files[0]), DIR)
-       find_extraction_heads (files).extract ('nodir')
-       print 'DELETEABLE_FILES:'
+       p2file = p2files[0]
+
+       # Repair stage
+       print '\nREPAIR STAGE'
+       RarslaveRepairer (DIR, p2file, join=True).checkAndRepair ()
+
+       # Extraction stage
+       print '\nEXTRACTION STAGE'
+       files = find_likely_files (get_basename (p2file), DIR)
+       find_extraction_heads (DIR, files).extract ('extract_dir')
+
+       # Deletion stage
+       print '\nDELETION STAGE'
         printlist ( find_deleteable_files (files) )
  
+       print '\n\n'
+
+       # Setup stage
+       print '\nSETUP STAGE'
+       DIR = os.path.abspath ('test_material/14/')
+       p2files = find_all_par2_files (DIR)
+       p2file = p2files[0]
+
+       # Repair stage
+       print '\nREPAIR STAGE'
+       RarslaveRepairer (DIR, p2file, join=True).checkAndRepair ()
+
+       # Extraction stage
+       print '\nEXTRACTION STAGE'
+       files = find_likely_files (get_basename (p2file), DIR)
+       find_extraction_heads (DIR, files).extract ('extract_dir')
+
+       # Deletion stage
+       print '\nDELETEION STAGE'
+       printlist ( find_deleteable_files (files) )
+
+       #### TESTING ####
+       parsets = generate_all_parsets ('test_material/02/')
+       print '\n\nPARSETS LEN:', len(parsets)
+       for p in parsets:
+               print p.likely_files[0]
+
+
  if __name__ == '__main__':
         main ()