Advertisement
here2share

# gzip_find_nearest.py

May 8th, 2024
465
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.93 KB | None | 0 0
  1. # gzip_find_nearest.py *** fixed!
  2.  
  3. import gzip
  4.  
  5. def predict(test_set, training_set):
  6.     predicted = 0
  7.     test_number = 0
  8.     for x1 in test_set:
  9.         distance_from_x1 = []
  10.         for x2 in training_set:
  11.             Cx2 = len(gzip.compress(x2.encode()))
  12.             x1x2 = " ".join([x1, x2])
  13.             Cx1x2 = len(gzip.compress(x1x2.encode()))
  14.             ncd = (Cx1x2 - min(len(x1.encode()), Cx2)) / max(len(x1.encode()), Cx2)
  15.             distance_from_x1.append((ncd, x2))
  16.         sorted_distances = sorted(distance_from_x1)
  17.        
  18.         guess = sorted_distances[0][1]
  19.        
  20.         print(f"\nAltered Test: {x1}")
  21.         match_percentage = round((100-sorted_distances[0][0]),6)
  22.         print(f"\tGuessing Nearest Match: {guess} at {match_percentage}%")
  23.         if answers[x1] == guess:
  24.             print("\t+++ CORRECT +++")
  25.             predicted = predicted + 1
  26.         else:
  27.             print(f"\t\tExpected Answer: {answers[x1]}")
  28.         test_number = test_number + 1
  29.         print(f"Predicted: {predicted} out of {test_number}  Errors: {test_number - predicted}")
  30.  
  31. # for testing
  32. import random
  33. import string
  34.  
  35. letters = list(string.ascii_letters)
  36. L = len(letters)
  37.  
  38. tests = []
  39. def generate_tests(num_tests):
  40.     i = 0
  41.     for _ in range(num_tests):
  42.         test = ''
  43.         for _ in range(9):
  44.             s = letters.pop(i%11)
  45.             letters.append(s)
  46.             i += 1
  47.             test += s
  48.         tests.append(test)
  49.         s = letters.pop(i%7)
  50.         letters.append(s)
  51.  
  52. def alter_test(test):
  53.     pos = random.sample(range(9), 2) # choose two positions to change
  54.     new_test = list(test)
  55.     new_test[pos[0]] = letters[(letters.index(new_test[pos[0]]) + random.randint(1, L - 1)) % L]
  56.     new_test[pos[1]] = letters[(letters.index(new_test[pos[1]]) + random.randint(1, L - 1)) % L]
  57.     return ''.join(new_test)
  58.  
  59. # generate a list of n random n-letter tests
  60. generate_tests(1000)
  61.  
  62. # make a copy of each test with two letters changed
  63. altered_tests = [alter_test(test) for test in tests]
  64. answers = dict(zip(altered_tests, tests))
  65. random.shuffle(tests)
  66.  
  67. # predict nearest matches for each altered test
  68. predict(altered_tests, tests)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement