desdemona

better_freq_mapper

Jun 8th, 2016
475
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.04 KB | None | 0 0
  1. #!/usr/bin/env python
  2.  
  3. import sys
  4. from os import listdir
  5. from os.path import isfile, join
  6.  
  7. def read_number_from_file(filename):
  8.     total_count = 0
  9.     with open(filename, 'r') as content_file:
  10.         content = content_file.read()
  11.         content = content.split('\t')
  12.         content = content[1]
  13.     try:
  14.         total_count = int(content)
  15.     except ValueError:
  16.         return 1
  17.         pass
  18.     return total_count
  19.  
  20. def read_input(file, separator='\t'):
  21.     for line in file:
  22.         yield line.rstrip().split(separator)
  23.  
  24. def main():
  25.  
  26.     total_count = read_number_from_file("all_output.txt")
  27.     data = read_input(sys.stdin)
  28.  
  29.     for word, count in data:
  30.         try:
  31.             count = int(count)
  32.         except ValueError:
  33.             continue
  34.  
  35.         frequency = count/(1.0 * total_count)
  36.         if frequency < 0.000001:
  37.             continue
  38.  
  39.         forsortfrequency = 1.0 - frequency
  40.         key = "%.10f" % forsortfrequency + "@" + word
  41.         print '%s\t%s' % (key, word)
  42.  
  43. if __name__ == "__main__":
  44.     main()
Add Comment
Please, Sign In to add comment