Advertisement
here2share

# gzip_find_nearest.py

Jul 31st, 2023 (edited)
997
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.85 KB | None | 0 0
  1. # gzip_find_nearest.py
  2.  
  3. import gzip
  4.  
  5. def predict(test_set, training_set):
  6.     predicted = len(test_set)
  7.     for x1 in test_set:
  8.         distance_from_x1 = []
  9.         for x2 in training_set:
  10.             Cx2 = len(gzip.compress(x2.encode()))
  11.             x1x2 = " ".join([x1, x2])
  12.             Cx1x2 = len(gzip.compress(x1x2.encode()))
  13.             ncd = (Cx1x2 - min(len(x1.encode()), Cx2)) / max(len(x1.encode()), Cx2)
  14.             distance_from_x1.append((ncd, x2))
  15.         sorted_distances = sorted(distance_from_x1)
  16.        
  17.         guess = sorted_distances[0][1]
  18.        
  19.         print(f"\nAltered Test: {x1}")
  20.         match_percentage = round((100-sorted_distances[0][0]),6)
  21.         print(f"\tGuessing Nearest Match: {guess} at {match_percentage}%")
  22.         if answers[x1] == guess:
  23.             print("\t+++ CORRECT +++")
  24.         else:
  25.             print(f"\t\tExpected Answer: {answers[x1]}")
  26.             predicted = predicted - 1
  27.     print(f"\n\nPredicted: {predicted} out of {len(test_set)}")
  28.  
  29. # for testing
  30. import random
  31. import string
  32.  
  33. letters = list(string.ascii_letters)
  34. L = len(letters)
  35.  
  36. tests = []
  37. def generate_tests(num_tests):
  38.     i = 0
  39.     for _ in range(num_tests):
  40.         test = ''
  41.         for _ in range(9):
  42.             s = letters.pop(i%11)
  43.             letters.append(s)
  44.             i += 1
  45.             test += s
  46.         tests.append(test)
  47.         s = letters.pop(i%7)
  48.         letters.append(s)
  49.  
  50. def alter_test(test):
  51.     pos = random.sample(range(9), 2) # choose two positions to change
  52.     new_test = list(test)
  53.     new_test[pos[0]] = letters[(letters.index(new_test[pos[0]]) + random.randint(1, L - 1)) % L]
  54.     new_test[pos[1]] = letters[(letters.index(new_test[pos[1]]) + random.randint(1, L - 1)) % L]
  55.     return ''.join(new_test)
  56.  
  57. # generate a list of n random n-letter tests
  58. generate_tests(1000)
  59.  
  60. # make a copy of each test with two letters changed
  61. altered_tests = [alter_test(test) for test in tests]
  62. answers = dict(zip(altered_tests, tests))
  63. random.shuffle(tests)
  64.  
  65. # predict nearest matches for each altered test
  66. predict(altered_tests, tests)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement