def __init__(self, options):
self.options = options
+ def __valid_dict_line (self, line):
+ if len(line) <= 0:
+ return False
+
+ if '=' not in line:
+ return False
+
+ # Comment lines are not really valid
+ if re.match ('^(\s*#.*|\s*)$', line):
+ return False
+
+ # Make sure that there is a definition and it is valid
+ try:
+ (regex, directory) = line.split('=')
+ regex = regex.strip()
+ directory = directory.strip()
+ except:
+ return False
+
+ # Make sure they have length
+ if len(regex) <= 0 or len(directory) <= 0:
+ return False
+
+ # I guess that it's valid now
+ return True
+
def parse_dict(self):
"""Parses a dictionary file containing the sort definitions in the form:
- DIRECTORY = PATTERN
+ REGEX_PATTERN = DIRECTORY
Returns a list of tuples of the form (compiled_regex, to_directory)"""
try:
f = open(self.options.dict_file, 'r', 0)
try:
- data = f.read()
+ raw_lines = f.readlines()
finally:
f.close()
except IOError:
logging.critical ('Opening dictionary: %s FAILED' % self.options.dict_file)
sys.exit()
- ### Get a LIST containing each line in the file
- lines = [l for l in data.split('\n') if len(l) > 0]
+ ### Find all of the valid lines in the file
+ valid_lines = [l for l in raw_lines if self.__valid_dict_line (l)]
- ### Remove comments / blank lines (zero length lines already removed above)
- regex = re.compile ('^\s*#.*$')
- lines = [l for l in lines if not re.match (regex, l)]
- regex = re.compile ('^\s*$')
- lines = [l for l in lines if not re.match (regex, l)]
+ # Set up variable for result
+ result = []
### Split each line into a tuple, and strip each element of spaces
- result = self.split_lines(lines)
- result = [(re.compile(r), d) for r, d in result]
-
- ### Give some information about the dictionary we are using
- logging.info ('Successfully loaded %d records from %s\n' % \
- (len(result), self.options.dict_file))
+ for l in valid_lines:
+ (regex, directory) = l.split('=')
+ regex = regex.strip()
+ directory = directory.strip()
- return tuple(result)
+ # Fix up the directory if necessary
+ if directory[0] != '/':
+ directory = os.path.join (self.options.output_dir, directory)
- def split_lines(self, lines):
+ # Fix up the regex
+ if regex[0] != '^':
+ regex = '^' + regex
- result = []
+ if regex[-1] != '$':
+ regex += '$'
- for l in lines:
+ # Store the result
+ result.append ( (re.compile (regex), directory) )
- try:
- r, d = l.split('=')
- r = r.strip()
- d = d.strip()
- except ValueError:
- logging.warning ('Bad line in dictionary: "%s"' % l)
- continue
-
- result.append((r, d))
+ ### Give some information about the dictionary we are using
+ logging.info ('Successfully loaded %d records from %s\n' % \
+ (len(result), self.options.dict_file))
- return result
+ return tuple (result)
def get_matches(self, files, pattern):
"""get_matches(files, pattern):