View difference between Paste ID: Lx4VnbzJ and 0hNciRka
SHOW: | | - or go back to the newest paste.
1
#!/usr/bin/python2
2
3
4-
import string
4+
"""Object-based word count demonstration"""
5
6
7
import sys
8
import collections
9
import csv
10
import re
11
12
13
class WordCount(object):
14
    """Count of words in a text
15
16
    Attributes:
17-
    with open(infilename, 'r') as infile:
17+
    counts -- collections.Counter of words
18-
        text = infile.read()
18+
    """
19-
    wordstring = text.translate(None, string.punctuation)
19+
    def __init__(self, counts):
20-
    wordlist = wordstring.split()
20+
        """Intializes a WordCount
21-
    counts = collections.Counter(wordlist)
21+
22-
    ordered = counts.most_common()
22+
        Do not call directly, use WordCount.fromfile or WordCount.fromstring
23-
    with open(outfilename, 'w') as outfile:
23+
        instead
24-
        writer = csv.writer(outfile)
24+
        """
25-
        writer.writerows(ordered)
25+
        self.counts = counts
26
27
    @classmethod
28
    def fromfile(cls, filename):
29
        """Creates a WordCount for the given file"""
30
        with open(filename, 'r') as infile:
31
            text = infile.read()
32
        return cls.fromstring(text)
33
34
    @classmethod
35
    def fromstring(cls, text):
36
        """Creates a WordCount for the given string"""
37
        wordlist = re.findall(r'\w+', text)
38
        counts = collections.Counter(wordlist)
39
        return cls(counts)
40
41
    @property
42
    def ordered(self):
43
        """List of (word, count) tuples in descending frequency"""
44
        items = self.counts.items()
45
        items.sort(key=lambda (word, count): (-count, word))
46
        return items
47
48
    def to_csv(self, filename):
49
        """Writes the ordered counts to file"""
50
        with open(filename, 'w') as outfile:
51
            writer = csv.writer(outfile)
52
            writer.writerows(self.ordered)
53
54
55
56
def main():
57
    """Counts words (and numbers) in a file"""
58
    try:
59
        infilename, outfilename = sys.argv[1:3]
60
    except IndexError:
61
        print "Usage: %s INFILE.txt OUTFILE.csv" % sys.argv[0]
62
        sys.exit(1)
63
    count = WordCount.fromfile(infilename)
64
    count.to_csv(outfilename)
65
66
67
if __name__ == '__main__':
68
    main()