Skip to main content

Get metrics for text files from stdin. Line count, word count and character count. Also an example of getting the 100 most common words.

#!/usr/bin/env python

# fileinput will read stdin
# @link http://stackoverflow.com/questions/1450393/how-do-you-read-from-stdin-in-python
# @link http://docs.python.org/2/library/fileinput.html
import fileinput

# Counter can count things :D
# @link http://stackoverflow.com/questions/3594514/how-to-find-most-common-elements-of-a-list
from collections import Counter

# Get metrics for text files
#
# Number of lines
# Number of words
# Number of characters
#
# Usage:
#     $ cat file-1 file-2...file-n | count.py

chars = words = lines = 0
words_to_count = []

for line in fileinput.input():
    word_collection = line.split()
    lines += 1
    words += len(word_collection)
    chars += len(line)
    # Array merging is easy!
    # @link http://stackoverflow.com/questions/11574195/how-to-merge-multiple-lists-into-one-list-in-python
    words_to_count = words_to_count + word_collection

print "Lines:      %d\nWords:      %d\nCharacters: %d" % (lines, words, chars)

c = Counter(words_to_count)
# Formatting array of 100 most common words
# @link - one-liner below   http://stackoverflow.com/questions/4440516
# @link - formatting        http://docs.python.org/2/library/string.html#format-string-syntax
# @link - enumerate         http://docs.python.org/2/library/functions.html#enumerate
print('\n'.join('{} : {}'.format(*reversed(k)) for k in c.most_common(100)))