Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/python2
- """Object-based word count demonstration"""
- import sys
- import collections
- import csv
- import re
- class WordCount(object):
- """Count of words in a text
- Attributes:
- counts -- collections.Counter of words
- """
- def __init__(self, counts):
- """Intializes a WordCount
- Do not call directly, use WordCount.fromfile or WordCount.fromstring
- instead
- """
- self.counts = counts
- @classmethod
- def fromfile(cls, filename):
- """Creates a WordCount for the given file"""
- with open(filename, 'r') as infile:
- text = infile.read()
- return cls.fromstring(text)
- @classmethod
- def fromstring(cls, text):
- """Creates a WordCount for the given string"""
- wordlist = re.findall(r'\w+', text)
- counts = collections.Counter(wordlist)
- return cls(counts)
- @property
- def ordered(self):
- """List of (word, count) tuples in descending frequency"""
- items = self.counts.items()
- items.sort(key=lambda (word, count): (-count, word))
- return items
- def to_csv(self, filename):
- """Writes the ordered counts to file"""
- with open(filename, 'w') as outfile:
- writer = csv.writer(outfile)
- writer.writerows(self.ordered)
- def main():
- """Counts words (and numbers) in a file"""
- try:
- infilename, outfilename = sys.argv[1:3]
- except IndexError:
- print "Usage: %s INFILE.txt OUTFILE.csv" % sys.argv[0]
- sys.exit(1)
- count = WordCount.fromfile(infilename)
- count.to_csv(outfilename)
- if __name__ == '__main__':
- main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement