""" Etags parser :author: Dan Williams """ import os import re from stat import ST_MTIME from subprocess import Popen, PIPE, STDOUT class EtagsRunError(Exception): pass class TagManager(object): lang = None prune = set(['SCCS', 'RCS', 'CVS', '.svn', '.hg', '.git', '.bzr']) exts = set() def __init__(self, base='.'): self.db = None self.base = base self.path = os.path.join(self.base, 'TAGS') self.update() def has(self, name): return name in self.db.tags def get(self, name): return self.db.get(name, []) def list(self): return self.db.records def update(self): if self.is_outdated(): self.run() self.db = Etags(self.path) self.db.parse() def run(self): lf = '--language-force=%s' % self.lang args = ['ctags', '-e', '-f', self.path, lf, '-L-'] pipe = Popen(args, stdin=PIPE, stdout=PIPE, stderr=STDOUT) indata = '\n'.join(self.get_paths()) + '\n' outdata = pipe.communicate(indata) if pipe.returncode != 0: raise EtagsRunError(outdata) def get_paths(self): return list(self._walk(mtime=0)) def is_outdated(self): if not os.path.exists(self.path): return True mtime = os.stat(self.path)[ST_MTIME] itr = self._walk(mtime) try: itr.next() return True except StopIteration: return False def _walk(self, mtime=0): paths = [] for root, dirs, files in os.walk(self.base): for d in dirs: if d in self.prune: dirs.remove(d) for f in files: path = os.path.join(root, f) if not self._match(path): continue elif os.stat(path)[ST_MTIME] < mtime: continue else: yield os.path.join(root, f) raise StopIteration def _match(self, path): _, ext = os.path.splitext(path) return ext in self.exts class Etags(object): def __init__(self, fname=None): self.fname = fname self.rawdata = None self.records = [] self.tags = {} def _load(self): fd = file(self.fname, 'r') self.rawdata = fd.read() fd.close() def get(self, name): return self.tags.get(name, []) def parse(self, fname=None): """ Parser is based on the little info found in Wikipedia: http://en.wikipedia.org/wiki/Ctags """ if fname: self.fname = fname self._load() i = 0 data_len = len(self.rawdata) data = self.rawdata while i < data_len: if ord(data[i]) == 0xc: i = self._parse_block(data, i+2) def _add_record(self, record): self.records.append(record) name = record.name if name is None: return self.tags.setdefault(name, []) self.tags[name].append(record) def _parse_block(self, data, i): n = data[i:].find('\n') + i l = data[i:n] try: filename, size = l.split(',') except ValueError, e: raise Exception("parse failed(%s): %r %r %r" % (i, l, e, data)) size = int(size) subblock = data[n+1:n+size+1] # ... for lineitem in subblock.split('\n'): if len(lineitem) == 0: continue record = self._parse_record(lineitem, filename) self._add_record(record) return n + size + 1 def _parse_record(self, lineitem, filename): try: defn, rest = lineitem.split(chr(0x7f)) except ValueError: print lineitem raise name = None if chr(0x01) in rest: name, rest = rest.split(chr(0x01)) else: txt = defn.strip() sp = re.split('[ ,;*()\t&=]', txt) sp = [x for x in sp if x != ''] if len(sp): name = sp[-1] tokens = rest.split(',') line = int(tokens[0]) byte = int(tokens[1]) record = EtagRecord(path=filename, defn=defn, name=name, line=line, byte=byte) return record class EtagRecord(object): def __init__(self, **kwargs): self.path = None self.defn = None self.name = None self.line = -1 self.byte = None self.__dict__.update(kwargs) def __repr__(self): return "%s [%s:%d]" % (self.name, self.path, self.line) if __name__ == '__main__': import sys from pprint import pprint etags = Etags(sys.argv[1]) etags.parse() if len(sys.argv) > 2: print etags.get(sys.argv[2]) else: pprint(etags.records)