#!/usr/bin/env python """ Given a root directory, recurse in it and find all the files with the same basename. We do not verify the contents of the files. """ import os, logging, re from fnmatch import fnmatch from os.path import * from collections import defaultdict def main(): import optparse parser = optparse.OptionParser(__doc__.strip()) parser.add_option('-i', '--ignore', action='append', default=[], help="Prune files/directories with the given name.") parser.add_option('-p', '--pattern', '--match', action='append', default=[], help="Match only files with the given globbing patterns.") opts, args = parser.parse_args() if not args: args = ['.'] uniqfiles = defaultdict(list) for dn in args: if not exists(dn) or not isdir(dn): logging.warning("Not a directory: '%s'; skipping." % dn) continue for root, dirs, files in os.walk(dn): for dn in list(dirs): if dn in opts.ignore: dirs.remove(dn) for fn in files: if fn in opts.ignore: continue if opts.pattern and not any(fnmatch(fn, p) for p in opts.pattern): continue uniqfiles[fn].append(join(root, fn)) for fn, an in sorted(uniqfiles.iteritems()): if len(an) > 1: print '%s (%d)' % (fn, len(an)) for ffn in an: print ' %s' % ffn print if __name__ == '__main__': main()