#!/usr/bin/python # # by Erik Osheim import os, re, sets, sys # regular expressions class_re = re.compile('^(.*?)\t(.*?)\t(.*?)\tc$') function_re = re.compile('^(.*?)\t(.*?)\t(.*?)\tf$') method_re = re.compile('^([^\t]+)\t([^\t]+)\t([^\t]+)\tm\tclass:([^\t]+)(?:\t.*)?$') class_supers_re = re.compile('^\/\^ *class +[_A-Za-z][_A-Za-z0-9]* *\((.*?)\) *: *\$\/;\"$') def_args_re = re.compile('^\/\^ *def +[_A-Za-z][_A-Za-z0-9]* *\((.*?)\) *: *\$\/;\"$') find_args_re = re.compile('[\*_a-zA-Z][\*_a-zA-Z0-9]*(?:=(?:[^,\'" ]+|"(?:\\.|[^\\"])*"|\'(?:\\.|[^\\"])*\'))?') # set of base python objects which can be assumed to exist base_objects = sets.Set(['object', 'list', 'dict']) def is_fully_qualified(s): return s in base_objects or '.' in s def parse_entry(line): m = class_re.match(line) if m: return ClassEntry(m.group(1), m.group(2), m.group(3)) m = function_re.match(line) if m: return FunctionEntry(m.group(1), m.group(2), m.group(3)) m = method_re.match(line) if m: return MethodEntry(m.group(1), m.group(2), m.group(3), m.group(4)) raise Exception, "Oh no: %s" % line class PythonCTagger: def __init__(self): self.entries = {} self.packages = {} self.classes = {} self.class_methods = {} def process_tagfile(self, path): f = open(path, 'r') data = f.read() f.close() self.process_data(data) def process_paths(self, paths=['.']): (stdin, stdout, stderr) = os.popen3("exuberant-ctags -L - -f -") for base in paths: for root, dirs, files in os.walk(base): if 'CVS' in dirs: dirs.remove('CVS') for name in files: if name.endswith('.py'): if base != '.': path = os.path.join(root, name) else: path = name stdin.write('%s\n' % path) stdin.flush() stdin.close() data = stdout.read() stdout.close() stderr.close() self.process_data(data) def process_data(self, data): # process the ctags output data for l in data.split('\n'): if not l: continue elif l.startswith('!'): continue else: e = parse_entry(l) self.entries[e.fullname()] = e package = e.package() if e.type == 'method': p = e.parent if not is_fully_qualified(p): p = '%s.%s' % (package, p) self.classes.setdefault(p, {}) self.classes[p][e.symbol] = e else: self.packages.setdefault(package, {}) self.packages[package][e.symbol] = e # this returns the methods available in the class def get_methods_for_class(self, c): cn = c.fullname() # if we haven't determined this classes methods yet, then let's do it if cn not in self.class_methods: classes_seen = sets.Set() methods_seen = sets.Set() self.class_methods[cn] = [] # create a queue of classes to process... this solves the ordering # problem for class inheritance...i.e.: # class Shape # class Rectangle(Shape) # class Rhombus(Shape) # class Square(Rectangle, Rhombus) # 1. [Square] --> process Square --> [Rectangle, Rhombus] # 2. [Rectangle, Rhombus] --> process Rectangle --> [Rhombus, Shape] # 3. [Rhombus, Shape] --> process Rhombus --> [Shape, Shape] # 4. [Shape, Shape] --> process Shape --> [Shape] # 5. [Shape] --> already processed Shape, skipping to_process = [c] while to_process: e = to_process.pop(0) fn = e.fullname() # if we've seen this class already, then skip it if fn in classes_seen: continue # mark that we've seen this class; if we don't know about it's # methods, then let's just skip it. classes_seen.add(fn) if fn not in self.classes: continue # for each method in the class, add it to our list if it's new for msymbol in self.classes[fn]: if msymbol not in methods_seen: self.class_methods[cn].append(self.classes[fn][msymbol]) methods_seen.add(msymbol) # for each parent of this class, append it to the end of the queue # if we know about it for sfn in e.supers: if sfn in self.entries: to_process.append(self.entries[sfn]) return self.class_methods[cn] def display(self): # for each package, print out the classes and functions in that package for p in ct.packages: print 'package %s' % p for es in ct.packages[p]: e = ct.packages[p][es] print ' %s %s' % (e.type, e.prototype()) fn = e.fullname() # for each class, print out the methods that class provides (either # implemented directly or inherited from a super class) if e.type == 'class': for m in ct.get_methods_for_class(e): # status determines whether the class is being inherited, # or implemented directly if fn != m.parent: status = '*' else: status = ' ' print ' %s %s' % (status, m.dump()) print '' class Entry: type = 'generic' def __init__(self, symbol, path): self.symbol = symbol self.path = path def __repr__(self): return '<%s %s.%s>' % (self.type.title(), self.package(), self.symbol) def package(self): return self.path[:-3].replace('/', '.') def fullname(self): return '%s.%s' % (self.package(), self.symbol) def prototype(self): return self.fullname() def dump(self): return '%s %s' % (self.type, self.prototype()) class ClassEntry(Entry): type = 'class' def __init__(self, symbol, path, match): Entry.__init__(self, symbol, path) m = class_supers_re.match(match) self.match = match self.supers = [] if m: self.supers = [x.strip() for x in m.group(1).split(',')] for i in range(0, len(self.supers)): if not is_fully_qualified(self.supers[i]): self.supers[i] = '%s.%s' % (self.package(), self.supers[i]) def prototype(self): return '%s(%s)' % (self.fullname(), ', '.join(self.supers)) class FunctionEntry(Entry): type = 'function' def __init__(self, symbol, path, match): Entry.__init__(self, symbol, path) m = def_args_re.match(match) self.match = match self.args = [] if m: self.args = re.findall(find_args_re, m.group(1)) def prototype(self): return '%s(%s)' % (self.fullname(), ', '.join(self.args)) class MethodEntry(FunctionEntry): type = 'method' def __init__(self, symbol, path, match, parent): FunctionEntry.__init__(self, symbol, path, match) self.parent = parent if is_fully_qualified(parent): self.parent = parent else: self.parent = '%s.%s' % (self.package(), parent) def fullname(self): return '%s.%s' % (self.parent, self.symbol) if __name__ == "__main__": ct = PythonCTagger() if len(sys.argv[1:]) == 0: ct.process_paths() else: ct.process_tagfile(sys.argv[1]) if False: sys.exit(0) else: ct.display()