better_lang_mapper

#!/usr/bin/env python

import sys
import string

printable = set(string.printable)
number_of_most_popular_words = 200
max_points = 100

def read_file_to_dictionary2(filename, dict):
    f = open(filename)
    filecontent = f.readlines()
    f.close()

    for line in filecontent:
        try:
            value, key = line.split()
            value = value.split('@')[0]

            value = float(value)
            dict[key] = value
        except:
            pass

def read_file_to_dictionary(filename, dict):
    f = open(filename)
    filecontent = f.readlines()
    f.close()

    for line in filecontent:
        line = line.replace('\'', '').replace('[', '').replace(']', '').replace(',', '').replace(' ','\t')
        value, key = line.split()
        value = value.split('@')[0]

        value = float(value)
        dict[key] = value


def read_input(file):
    for line in file:
        yield line.rstrip().split()


def main(separator='\t'):
    en_word_list = {}
    read_file_to_dictionary('en_200_most_frequent.txt', en_word_list)

    pl_word_list = {}
    read_file_to_dictionary('pl_200_most_frequent.txt', pl_word_list)

    hu_word_list = {}
    read_file_to_dictionary('hu_200_most_frequent.txt', hu_word_list)

    de_word_list = {}
    read_file_to_dictionary('de_200_most_frequent.txt', de_word_list)

    sv_word_list = {}
    read_file_to_dictionary('sv_200_most_frequent.txt', sv_word_list)

    nl_word_list = {}
    read_file_to_dictionary('nl_200_most_frequent.txt', nl_word_list)

    ascii_letters = set(string.ascii_letters)

    data = read_input(sys.stdin)


    for line in data:
        print_this_line_out = False
        key = value = ""

        key = line[0]
        value = line[1]

        freq = key.split('@')[0]
        word = value.strip()

        # try:
        #     freq = float(freq)
        # except:
        #     pass

        freq = float(freq)


        en = pl = hu = sv = nl = de = 0
        word = word.strip()
        if len(word) > 2:
            continue


        if en_word_list.has_key(word):
            en += max_points + 1.0 - ((1.0 - en_word_list[word]) - (1.0-freq))*100.0
            print_this_line_out = True

        if pl_word_list.has_key(word):
            pl += max_points + 1.0 - ((1.0 - pl_word_list[word]) - (1.0-freq))*100.0
            print_this_line_out = True

        if hu_word_list.has_key(word):
            hu += max_points + 1.0 - ((1.0 - hu_word_list[word]) - (1.0-freq))*100.0
            print_this_line_out = True

        if de_word_list.has_key(word):
            de += max_points + 1.0 - ((1.0 - de_word_list[word]) - (1.0-freq))*100.0
            print_this_line_out = True

        if sv_word_list.has_key(word):
            sv += max_points + 1.0 - ((1.0 - sv_word_list[word]) - (1.0-freq))*100.0
            print_this_line_out = True

        if nl_word_list.has_key(word):
            nl += max_points + 1.0 - ((1.0 - nl_word_list[word]) - (1.0-freq))*100.0
            print_this_line_out = True

        if print_this_line_out:
            print word + "\t" + str(en) + "\t" + str(pl) + "\t" + str(hu) + "\t" + str(de) + "\t" + str(sv) + "\t" + str(nl)

if __name__ == "__main__":
    main()