Advertisement
Void-voiD

Untitled

Apr 5th, 2020
312
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.96 KB | None | 0 0
  1. import re
  2. import numpy as np
  3. from scipy import spatial as sp
  4.  
  5. amount_of_words = 0
  6. amount_of_sentences = 0
  7. sentences = []
  8. freq = {}
  9. text = open("2.3.1.txt", 'r')
  10.  
  11. for line in text:
  12.     amount_of_sentences += 1
  13.     current = re.split('[^a-z]', line.lower())
  14.     result = []
  15.     for cur in current:
  16.         if cur != '':
  17.             if freq.get(cur) is None:
  18.                 amount_of_words += 1
  19.                 cur_list = [0 for i in range(amount_of_sentences - 1)]
  20.                 cur_list.insert(amount_of_sentences - 1, 1)
  21.                 freq[cur] = cur_list
  22.             else:
  23.                 cur_list = freq[cur]
  24.                 length = len(cur_list)
  25.                 if length < amount_of_sentences:
  26.                     for i in range(length, amount_of_sentences - length - 1):
  27.                         cur_list.insert(i, 0)
  28.                     cur_list.insert(amount_of_sentences - 1, 1)
  29.                 else:
  30.                     cur_list[amount_of_sentences - 1] += 1
  31.                 freq[cur] = cur_list
  32.             result.append(cur)
  33.     sentences.append(result)
  34. text.close()
  35.  
  36. matrix_freq = np.zeros((amount_of_sentences, amount_of_words))
  37. i = 0
  38.  
  39. for value in freq.values():
  40.     length = len(value)
  41.     for j in range(length):
  42.         matrix_freq[j, i] = value[j]
  43.     for j in range(length, amount_of_sentences):
  44.         matrix_freq[j, i] = 0
  45.     i += 1
  46.  
  47. results = []
  48. comp = matrix_freq[0]
  49. min1 = 1
  50. min2 = 1
  51. min1_index = 0
  52. min2_index = 0
  53. for i in range(1, amount_of_sentences):
  54.     x = sp.distance.cosine(comp, matrix_freq[i])
  55.     results.insert(i, x)
  56.     if x <= min1:
  57.         min1 = x
  58.         min1_index = i
  59.     elif x <= min2:
  60.         min2 = x
  61.         min2_index = i
  62.  
  63. print(freq.keys())
  64. for i in matrix_freq:
  65.     print(i)
  66. # print(results)
  67. # results.sort()
  68. # print(results)
  69. # print(min1, min2)
  70. # print(min1_index, min2_index)
  71.  
  72. res = open('2.3.1.result.txt', 'w')
  73. res.write(str(min1_index) + ' ' + str(min2_index))
  74. res.close()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement