106 lines
3.9 KiB
Plaintext
106 lines
3.9 KiB
Plaintext
|
#!/usr/bin/python
|
||
|
import optparse
|
||
|
import os
|
||
|
import sys
|
||
|
|
||
|
# this is a dictionary of all the "items" (lines) that have been seen, mapping
|
||
|
# to a count of how many times it was seen.
|
||
|
seen = {}
|
||
|
|
||
|
parser = optparse.OptionParser()
|
||
|
parser.set_defaults(draw=False, nullok=False, valsort=False, reverse=False,
|
||
|
hwidth=None, percent=False, strip=False, delim=' ',
|
||
|
swap=False, josh=False, nopad=False)
|
||
|
parser.add_option('-d', '--draw', action='store_true', dest='draw',
|
||
|
help='draw histogram with horizontal graph')
|
||
|
parser.add_option('-D', '--delim', action='store', type="string", dest='delim',
|
||
|
help='use STR as delimiter (default: " ")', metavar='STR')
|
||
|
parser.add_option('-f', '--frequency', action='store_true', dest='valsort',
|
||
|
help='sort by frequency')
|
||
|
parser.add_option('-J', '--josh', action='store_true', dest='josh',
|
||
|
help='josh rosenberg compatibility mode')
|
||
|
parser.add_option('-N', '--nopad', action='store_true', dest='nopad',
|
||
|
help='do not column-align output')
|
||
|
parser.add_option('-p', '--percent', action='store_true', dest='percent',
|
||
|
help='show percent rather than count')
|
||
|
parser.add_option('-r', '--reverse', action='store_true', dest='reverse',
|
||
|
help='reverse sort')
|
||
|
parser.add_option('-s', '--strip', action='store_true', dest='strip',
|
||
|
help='strip input lines')
|
||
|
parser.add_option('-S', '--swap', action='store_true', dest='swap',
|
||
|
help='swap output fields')
|
||
|
parser.add_option('-w', '--width', action='store', type='int', dest='hwidth',
|
||
|
help='use NUM characters for graph drawing', metavar="NUM")
|
||
|
parser.add_option('-z', '--null', action='store_true', dest='nullok',
|
||
|
help='allow null lines')
|
||
|
(opts, args) = parser.parse_args()
|
||
|
|
||
|
# allow some special delimiters to be translated
|
||
|
delims = {'\\t': '\t', '\\v': '\v', '\\0': '\0'}
|
||
|
opts.delim = delims.get(opts.delim, opts.delim)
|
||
|
|
||
|
# josh rosenberg compatiblity mode
|
||
|
if opts.josh:
|
||
|
opts.delim = '\t'
|
||
|
opts.swap = True
|
||
|
opts.nopad = True
|
||
|
|
||
|
# process each line on stdin. we can optionally strip whitespace, and we always
|
||
|
# strip a trailing newline. by default we ignore empty lines, but nullok causes
|
||
|
# us to count them as well.
|
||
|
for line in sys.stdin:
|
||
|
key = line[:-1]
|
||
|
if opts.strip:
|
||
|
key = key.strip()
|
||
|
if key or opts.nullok:
|
||
|
seen.setdefault(key, 0)
|
||
|
seen[key] += 1
|
||
|
|
||
|
# sort either by key or by count, depending on the user wanted
|
||
|
keys = seen.keys()
|
||
|
if opts.valsort:
|
||
|
cmpf = lambda a, b: cmp(seen[b], seen[a])
|
||
|
else:
|
||
|
cmpf = lambda a, b: cmp(a, b)
|
||
|
keys.sort(cmpf)
|
||
|
|
||
|
# optionally reverse the sort if the user wanted
|
||
|
if opts.reverse: keys.reverse()
|
||
|
|
||
|
# determine max key length, max count, and total count; also, the printed length
|
||
|
# of the max count.
|
||
|
kmax, cmax, total = 0, 0, 0
|
||
|
for key in keys:
|
||
|
cmax, kmax, total = max(seen[key], cmax), max(len(key), kmax), total + seen[key]
|
||
|
smax = len(str(cmax))
|
||
|
|
||
|
# if we need to draw, we should make some guesses about the screen size. we
|
||
|
# should probably never try to draw more than 40 characters worth of stars.
|
||
|
width = os.environ.get("COLUMNS", 80)
|
||
|
if opts.hwidth is None:
|
||
|
opts.hwidth = min(abs(width - kmax), 40)
|
||
|
|
||
|
for key in keys:
|
||
|
# figure out how to draw the key
|
||
|
if opts.nopad:
|
||
|
vkey = key
|
||
|
else:
|
||
|
vkey = '%-*s' % (kmax, key)
|
||
|
|
||
|
# figure out how to draw the value
|
||
|
if opts.draw:
|
||
|
ratio = float(seen[key]) / cmax
|
||
|
nstar = int(round(opts.hwidth * ratio))
|
||
|
value = '%*s' % (opts.hwidth, '*' * nstar)
|
||
|
elif opts.percent:
|
||
|
perc = float(seen[key]) * 100 / total
|
||
|
value = '%5.1f%%' % perc
|
||
|
else:
|
||
|
value = '%*d' % (smax, seen[key])
|
||
|
|
||
|
# figure out which order to write them in
|
||
|
if opts.swap:
|
||
|
print "%s%s%s" % (vkey, opts.delim, value)
|
||
|
else:
|
||
|
print "%s%s%s" % (value, opts.delim, key)
|