#!/usr/bin/python import optparse import os import sys # this is a dictionary of all the "items" (lines) that have been seen, mapping # to a count of how many times it was seen. seen = {} parser = optparse.OptionParser() parser.set_defaults(draw=False, nullok=False, valsort=False, reverse=False, hwidth=None, percent=False, strip=False, delim=' ', swap=False, josh=False, nopad=False) parser.add_option('-d', '--draw', action='store_true', dest='draw', help='draw histogram with horizontal graph') parser.add_option('-D', '--delim', action='store', type="string", dest='delim', help='use STR as delimiter (default: " ")', metavar='STR') parser.add_option('-f', '--frequency', action='store_true', dest='valsort', help='sort by frequency') parser.add_option('-J', '--josh', action='store_true', dest='josh', help='josh rosenberg compatibility mode') parser.add_option('-N', '--nopad', action='store_true', dest='nopad', help='do not column-align output') parser.add_option('-p', '--percent', action='store_true', dest='percent', help='show percent rather than count') parser.add_option('-r', '--reverse', action='store_true', dest='reverse', help='reverse sort') parser.add_option('-s', '--strip', action='store_true', dest='strip', help='strip input lines') parser.add_option('-S', '--swap', action='store_true', dest='swap', help='swap output fields') parser.add_option('-w', '--width', action='store', type='int', dest='hwidth', help='use NUM characters for graph drawing', metavar="NUM") parser.add_option('-z', '--null', action='store_true', dest='nullok', help='allow null lines') (opts, args) = parser.parse_args() # allow some special delimiters to be translated delims = {'\\t': '\t', '\\v': '\v', '\\0': '\0'} opts.delim = delims.get(opts.delim, opts.delim) # josh rosenberg compatiblity mode if opts.josh: opts.delim = '\t' opts.swap = True opts.nopad = True # process each line on stdin. we can optionally strip whitespace, and we always # strip a trailing newline. by default we ignore empty lines, but nullok causes # us to count them as well. for line in sys.stdin: key = line[:-1] if opts.strip: key = key.strip() if key or opts.nullok: seen.setdefault(key, 0) seen[key] += 1 # sort either by key or by count, depending on the user wanted keys = seen.keys() if opts.valsort: cmpf = lambda a, b: cmp(seen[b], seen[a]) else: cmpf = lambda a, b: cmp(a, b) keys.sort(cmpf) # optionally reverse the sort if the user wanted if opts.reverse: keys.reverse() # determine max key length, max count, and total count; also, the printed length # of the max count. kmax, cmax, total = 0, 0, 0 for key in keys: cmax, kmax, total = max(seen[key], cmax), max(len(key), kmax), total + seen[key] smax = len(str(cmax)) # if we need to draw, we should make some guesses about the screen size. we # should probably never try to draw more than 40 characters worth of stars. width = os.environ.get("COLUMNS", 80) if opts.hwidth is None: opts.hwidth = min(abs(width - kmax), 40) for key in keys: # figure out how to draw the key if opts.nopad: vkey = key else: vkey = '%-*s' % (kmax, key) # figure out how to draw the value if opts.draw: ratio = float(seen[key]) / cmax nstar = int(round(opts.hwidth * ratio)) value = '%*s' % (opts.hwidth, '*' * nstar) elif opts.percent: perc = float(seen[key]) * 100 / total value = '%5.1f%%' % perc else: value = '%*d' % (smax, seen[key]) # figure out which order to write them in if opts.swap: print "%s%s%s" % (vkey, opts.delim, value) else: print "%s%s%s" % (value, opts.delim, key)