Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python
- import sys
- import string
- printable = set(string.printable)
- number_of_most_popular_words = 200
- def read_file_to_dictionary(filename, dict):
- f = open(filename)
- filecontent = f.readlines()
- f.close()
- for line in filecontent:
- try:
- word, weight = line.split()
- weight = int(weight)
- dict[word] = weight
- except:
- pass
- # f = open('en_200_most_frequent.txt')
- # filecontent = f.readlines()
- # f.close()
- # en_word_list = {}
- # for line in filecontent:
- # try:
- # word, weight = line.split()
- # weight = int(weight)
- # en_word_list[word] = weight
- # except:
- # pass
- en_word_list = {}
- read_file_to_dictionary('en_200_most_frequent.txt', en_word_list)
- pl_word_list = {}
- read_file_to_dictionary('pl_200_most_frequent.txt', pl_word_list)
- hu_word_list = {}
- read_file_to_dictionary('hu_200_most_frequent.txt', hu_word_list)
- de_word_list = {}
- read_file_to_dictionary('de_200_most_frequent.txt', de_word_list)
- sv_word_list = {}
- read_file_to_dictionary('sv_200_most_frequent.txt', sv_word_list)
- nl_word_list = {}
- read_file_to_dictionary('nl_200_most_frequent.txt', nl_word_list)
- for line in sys.stdin:
- line = filter(lambda x: x in printable, line)
- line = line.strip()
- word, position = line.split()
- try:
- position = int(position)
- except:
- pass
- en = pl = hu = sv = nl = de = 0
- word = word.strip()
- if len(word) > 2:
- continue
- if en_word_list.has_key(word):
- en += number_of_most_popular_words + 1 - (en_word_list[word] - position)
- if pl_word_list.has_key(word):
- pl += number_of_most_popular_words + 1 - (pl_word_list[word] - position)
- if hu_word_list.has_key(word):
- hu += number_of_most_popular_words + 1 - (hu_word_list[word] - position)
- if de_word_list.has_key(word):
- de += number_of_most_popular_words + 1 - (de_word_list[word] - position)
- if sv_word_list.has_key(word):
- sv += number_of_most_popular_words + 1 - (sv_word_list[word] - position)
- if nl_word_list.has_key(word):
- nl += number_of_most_popular_words + 1 - (nl_word_list[word] - position)
- print word + "\t" + str(en) + "\t" + str(pl) + "\t" + str(hu) + "\t" + str(de) + "\t" + str(sv) + "\t" + str(nl)
Add Comment
Please, Sign In to add comment