bin/histo

106 lines
3.9 KiB
Plaintext
Raw Permalink Normal View History

2010-08-02 11:14:18 -04:00
#!/usr/bin/python
import optparse
import os
import sys
# this is a dictionary of all the "items" (lines) that have been seen, mapping
# to a count of how many times it was seen.
seen = {}
parser = optparse.OptionParser()
parser.set_defaults(draw=False, nullok=False, valsort=False, reverse=False,
hwidth=None, percent=False, strip=False, delim=' ',
swap=False, josh=False, nopad=False)
parser.add_option('-d', '--draw', action='store_true', dest='draw',
help='draw histogram with horizontal graph')
parser.add_option('-D', '--delim', action='store', type="string", dest='delim',
help='use STR as delimiter (default: " ")', metavar='STR')
parser.add_option('-f', '--frequency', action='store_true', dest='valsort',
help='sort by frequency')
parser.add_option('-J', '--josh', action='store_true', dest='josh',
help='josh rosenberg compatibility mode')
parser.add_option('-N', '--nopad', action='store_true', dest='nopad',
help='do not column-align output')
parser.add_option('-p', '--percent', action='store_true', dest='percent',
help='show percent rather than count')
parser.add_option('-r', '--reverse', action='store_true', dest='reverse',
help='reverse sort')
parser.add_option('-s', '--strip', action='store_true', dest='strip',
help='strip input lines')
parser.add_option('-S', '--swap', action='store_true', dest='swap',
help='swap output fields')
parser.add_option('-w', '--width', action='store', type='int', dest='hwidth',
help='use NUM characters for graph drawing', metavar="NUM")
parser.add_option('-z', '--null', action='store_true', dest='nullok',
help='allow null lines')
(opts, args) = parser.parse_args()
# allow some special delimiters to be translated
delims = {'\\t': '\t', '\\v': '\v', '\\0': '\0'}
opts.delim = delims.get(opts.delim, opts.delim)
# josh rosenberg compatiblity mode
if opts.josh:
opts.delim = '\t'
opts.swap = True
opts.nopad = True
# process each line on stdin. we can optionally strip whitespace, and we always
# strip a trailing newline. by default we ignore empty lines, but nullok causes
# us to count them as well.
for line in sys.stdin:
key = line[:-1]
if opts.strip:
key = key.strip()
if key or opts.nullok:
seen.setdefault(key, 0)
seen[key] += 1
# sort either by key or by count, depending on the user wanted
keys = seen.keys()
if opts.valsort:
cmpf = lambda a, b: cmp(seen[b], seen[a])
else:
cmpf = lambda a, b: cmp(a, b)
keys.sort(cmpf)
# optionally reverse the sort if the user wanted
if opts.reverse: keys.reverse()
# determine max key length, max count, and total count; also, the printed length
# of the max count.
kmax, cmax, total = 0, 0, 0
for key in keys:
cmax, kmax, total = max(seen[key], cmax), max(len(key), kmax), total + seen[key]
smax = len(str(cmax))
# if we need to draw, we should make some guesses about the screen size. we
# should probably never try to draw more than 40 characters worth of stars.
width = os.environ.get("COLUMNS", 80)
if opts.hwidth is None:
opts.hwidth = min(abs(width - kmax), 40)
for key in keys:
# figure out how to draw the key
if opts.nopad:
vkey = key
else:
vkey = '%-*s' % (kmax, key)
# figure out how to draw the value
if opts.draw:
ratio = float(seen[key]) / cmax
nstar = int(round(opts.hwidth * ratio))
value = '%*s' % (opts.hwidth, '*' * nstar)
elif opts.percent:
perc = float(seen[key]) * 100 / total
value = '%5.1f%%' % perc
else:
value = '%*d' % (smax, seen[key])
# figure out which order to write them in
if opts.swap:
print "%s%s%s" % (vkey, opts.delim, value)
else:
print "%s%s%s" % (value, opts.delim, key)