Update parser

[animesorter.git] / animesorter2.py
diff --git a/animesorter2.py b/animesorter2.py

index 9585394..24aa755 100755 (executable)
--- a/animesorter2.py
+++ b/animesorter2.py
@@ -50,58 +50,79 @@ class AnimeSorter2:
      def __init__(self, options):
          self.options = options
  
+    def __valid_dict_line (self, line):
+        if len(line) <= 0:
+            return False
+
+        if '=' not in line:
+            return False
+
+        # Comment lines are not really valid
+        if re.match ('^(\s*#.*|\s*)$', line):
+            return False
+
+        # Make sure that there is a definition and it is valid
+        try:
+            (regex, directory) = line.split('=')
+            regex = regex.strip()
+            directory = directory.strip()
+        except:
+            return False
+
+        # Make sure they have length
+        if len(regex) <= 0 or len(directory) <= 0:
+            return False
+
+        # I guess that it's valid now
+        return True
+
      def parse_dict(self):
          """Parses a dictionary file containing the sort definitions in the form:
-        DIRECTORY = PATTERN
+        REGEX_PATTERN = DIRECTORY
  
          Returns a list of tuples of the form (compiled_regex, to_directory)"""
  
          try:
              f = open(self.options.dict_file, 'r', 0)
              try:
-                data = f.read()
+                raw_lines = f.readlines()
              finally:
                  f.close()
          except IOError:
              logging.critical ('Opening dictionary: %s FAILED' % self.options.dict_file)
              sys.exit()
  
-        ### Get a LIST containing each line in the file
-        lines = [l for l in data.split('\n') if len(l) > 0]
+        ### Find all of the valid lines in the file
+        valid_lines = [l for l in raw_lines if self.__valid_dict_line (l)]
  
-        ### Remove comments / blank lines (zero length lines already removed above)
-        regex = re.compile ('^\s*#.*$')
-        lines = [l for l in lines if not re.match (regex, l)]
-        regex = re.compile ('^\s*$')
-        lines = [l for l in lines if not re.match (regex, l)]
+        # Set up variable for result
+        result = []
  
          ### Split each line into a tuple, and strip each element of spaces
-        result = self.split_lines(lines)
-        result = [(re.compile(r), d) for r, d in result]
-
-        ### Give some information about the dictionary we are using
-        logging.info ('Successfully loaded %d records from %s\n' % \
-                (len(result), self.options.dict_file))
+        for l in valid_lines:
+            (regex, directory) = l.split('=')
+            regex = regex.strip()
+            directory = directory.strip()
  
-        return tuple(result)
+            # Fix up the directory if necessary
+            if directory[0] != '/':
+                directory = os.path.join (self.options.output_dir, directory)
  
-    def split_lines(self, lines):
+            # Fix up the regex
+            if regex[0] != '^':
+                regex = '^' + regex
  
-        result = []
+            if regex[-1] != '$':
+                regex += '$'
  
-        for l in lines:
+            # Store the result
+            result.append ( (re.compile (regex), directory) )
  
-            try:
-                r, d = l.split('=')
-                r = r.strip()
-                d = d.strip()
-            except ValueError:
-                logging.warning ('Bad line in dictionary: "%s"' % l)
-                continue
-
-            result.append((r, d))
+        ### Give some information about the dictionary we are using
+        logging.info ('Successfully loaded %d records from %s\n' % \
+                (len(result), self.options.dict_file))
  
-        return result
+        return tuple (result)
  
      def get_matches(self, files, pattern):
          """get_matches(files, pattern):