Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python
- import sys
- from os import listdir
- from os.path import isfile, join
- def read_number_from_file(filename):
- total_count = 0
- with open(filename, 'r') as content_file:
- content = content_file.read()
- content = content.split('\t')
- content = content[1]
- try:
- total_count = int(content)
- except ValueError:
- return 1
- pass
- return total_count
- def read_input(file, separator='\t'):
- for line in file:
- yield line.rstrip().split(separator)
- def main():
- total_count = read_number_from_file("all_output.txt")
- data = read_input(sys.stdin)
- for word, count in data:
- try:
- count = int(count)
- except ValueError:
- continue
- frequency = count/(1.0 * total_count)
- if frequency < 0.000001:
- continue
- forsortfrequency = 1.0 - frequency
- key = "%.10f" % forsortfrequency + "@" + word
- print '%s\t%s' % (key, word)
- if __name__ == "__main__":
- main()
Add Comment
Please, Sign In to add comment