desdemona

better_lang_mapper

Jun 8th, 2016
496
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 3.25 KB | None | 0 0
  1. #!/usr/bin/env python
  2.  
  3. import sys
  4. import string
  5.  
  6. printable = set(string.printable)
  7. number_of_most_popular_words = 200
  8. max_points = 100
  9.  
  10. def read_file_to_dictionary2(filename, dict):
  11.     f = open(filename)
  12.     filecontent = f.readlines()
  13.     f.close()
  14.  
  15.     for line in filecontent:
  16.         try:
  17.             value, key = line.split()
  18.             value = value.split('@')[0]
  19.  
  20.             value = float(value)
  21.             dict[key] = value
  22.         except:
  23.             pass
  24.  
  25. def read_file_to_dictionary(filename, dict):
  26.     f = open(filename)
  27.     filecontent = f.readlines()
  28.     f.close()
  29.  
  30.     for line in filecontent:
  31.         line = line.replace('\'', '').replace('[', '').replace(']', '').replace(',', '').replace(' ','\t')
  32.         value, key = line.split()
  33.         value = value.split('@')[0]
  34.  
  35.         value = float(value)
  36.         dict[key] = value
  37.  
  38.  
  39. def read_input(file):
  40.     for line in file:
  41.         yield line.rstrip().split()
  42.  
  43.  
  44.  
  45.  
  46.  
  47. def main(separator='\t'):
  48.     en_word_list = {}
  49.     read_file_to_dictionary('en_200_most_frequent.txt', en_word_list)
  50.  
  51.     pl_word_list = {}
  52.     read_file_to_dictionary('pl_200_most_frequent.txt', pl_word_list)
  53.  
  54.     hu_word_list = {}
  55.     read_file_to_dictionary('hu_200_most_frequent.txt', hu_word_list)
  56.  
  57.     de_word_list = {}
  58.     read_file_to_dictionary('de_200_most_frequent.txt', de_word_list)
  59.  
  60.     sv_word_list = {}
  61.     read_file_to_dictionary('sv_200_most_frequent.txt', sv_word_list)
  62.  
  63.     nl_word_list = {}
  64.     read_file_to_dictionary('nl_200_most_frequent.txt', nl_word_list)
  65.  
  66.     ascii_letters = set(string.ascii_letters)
  67.  
  68.     data = read_input(sys.stdin)
  69.  
  70.  
  71.     for line in data:
  72.         print_this_line_out = False
  73.         key = value = ""
  74.  
  75.         key = line[0]
  76.         value = line[1]
  77.  
  78.         freq = key.split('@')[0]
  79.         word = value.strip()
  80.  
  81.         # try:
  82.         #     freq = float(freq)
  83.         # except:
  84.         #     pass
  85.  
  86.         freq = float(freq)
  87.  
  88.  
  89.         en = pl = hu = sv = nl = de = 0
  90.         word = word.strip()
  91.         if len(word) > 2:
  92.             continue
  93.  
  94.  
  95.         if en_word_list.has_key(word):
  96.             en += max_points + 1.0 - ((1.0 - en_word_list[word]) - (1.0-freq))*100.0
  97.             print_this_line_out = True
  98.  
  99.         if pl_word_list.has_key(word):
  100.             pl += max_points + 1.0 - ((1.0 - pl_word_list[word]) - (1.0-freq))*100.0
  101.             print_this_line_out = True
  102.  
  103.         if hu_word_list.has_key(word):
  104.             hu += max_points + 1.0 - ((1.0 - hu_word_list[word]) - (1.0-freq))*100.0
  105.             print_this_line_out = True
  106.  
  107.         if de_word_list.has_key(word):
  108.             de += max_points + 1.0 - ((1.0 - de_word_list[word]) - (1.0-freq))*100.0
  109.             print_this_line_out = True
  110.  
  111.         if sv_word_list.has_key(word):
  112.             sv += max_points + 1.0 - ((1.0 - sv_word_list[word]) - (1.0-freq))*100.0
  113.             print_this_line_out = True
  114.  
  115.         if nl_word_list.has_key(word):
  116.             nl += max_points + 1.0 - ((1.0 - nl_word_list[word]) - (1.0-freq))*100.0
  117.             print_this_line_out = True
  118.  
  119.         if print_this_line_out:
  120.             print word + "\t" + str(en) + "\t" + str(pl) + "\t" + str(hu) + "\t" + str(de) + "\t" + str(sv) + "\t" + str(nl)
  121.  
  122. if __name__ == "__main__":
  123.     main()
Add Comment
Please, Sign In to add comment