Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python
- import sys
- import string
- printable = set(string.printable)
- number_of_most_popular_words = 200
- max_points = 100
- def read_file_to_dictionary2(filename, dict):
- f = open(filename)
- filecontent = f.readlines()
- f.close()
- for line in filecontent:
- try:
- value, key = line.split()
- value = value.split('@')[0]
- value = float(value)
- dict[key] = value
- except:
- pass
- def read_file_to_dictionary(filename, dict):
- f = open(filename)
- filecontent = f.readlines()
- f.close()
- for line in filecontent:
- line = line.replace('\'', '').replace('[', '').replace(']', '').replace(',', '').replace(' ','\t')
- value, key = line.split()
- value = value.split('@')[0]
- value = float(value)
- dict[key] = value
- def read_input(file):
- for line in file:
- yield line.rstrip().split()
- def main(separator='\t'):
- en_word_list = {}
- read_file_to_dictionary('en_200_most_frequent.txt', en_word_list)
- pl_word_list = {}
- read_file_to_dictionary('pl_200_most_frequent.txt', pl_word_list)
- hu_word_list = {}
- read_file_to_dictionary('hu_200_most_frequent.txt', hu_word_list)
- de_word_list = {}
- read_file_to_dictionary('de_200_most_frequent.txt', de_word_list)
- sv_word_list = {}
- read_file_to_dictionary('sv_200_most_frequent.txt', sv_word_list)
- nl_word_list = {}
- read_file_to_dictionary('nl_200_most_frequent.txt', nl_word_list)
- ascii_letters = set(string.ascii_letters)
- data = read_input(sys.stdin)
- for line in data:
- print_this_line_out = False
- key = value = ""
- key = line[0]
- value = line[1]
- freq = key.split('@')[0]
- word = value.strip()
- # try:
- # freq = float(freq)
- # except:
- # pass
- freq = float(freq)
- en = pl = hu = sv = nl = de = 0
- word = word.strip()
- if len(word) > 2:
- continue
- if en_word_list.has_key(word):
- en += max_points + 1.0 - ((1.0 - en_word_list[word]) - (1.0-freq))*100.0
- print_this_line_out = True
- if pl_word_list.has_key(word):
- pl += max_points + 1.0 - ((1.0 - pl_word_list[word]) - (1.0-freq))*100.0
- print_this_line_out = True
- if hu_word_list.has_key(word):
- hu += max_points + 1.0 - ((1.0 - hu_word_list[word]) - (1.0-freq))*100.0
- print_this_line_out = True
- if de_word_list.has_key(word):
- de += max_points + 1.0 - ((1.0 - de_word_list[word]) - (1.0-freq))*100.0
- print_this_line_out = True
- if sv_word_list.has_key(word):
- sv += max_points + 1.0 - ((1.0 - sv_word_list[word]) - (1.0-freq))*100.0
- print_this_line_out = True
- if nl_word_list.has_key(word):
- nl += max_points + 1.0 - ((1.0 - nl_word_list[word]) - (1.0-freq))*100.0
- print_this_line_out = True
- if print_this_line_out:
- print word + "\t" + str(en) + "\t" + str(pl) + "\t" + str(hu) + "\t" + str(de) + "\t" + str(sv) + "\t" + str(nl)
- if __name__ == "__main__":
- main()
Add Comment
Please, Sign In to add comment