Advertisement
here2share

# find_nearest.py

Aug 2nd, 2023 (edited)
1,210
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.40 KB | None | 0 0
  1. # find_nearest.py
  2.  
  3. import math
  4.  
  5. def levenshtein_distance(s1, s2):
  6.     if len(s1) < len(s2):
  7.         return levenshtein_distance(s2, s1)
  8.  
  9.     # len(s1) >= len(s2)
  10.     if len(s2) == 0:
  11.         return len(s1)
  12.  
  13.     # create two lists a and b for current and previous row
  14.     a = list(range(len(s2) + 1))
  15.     b = [0] * (len(s2) + 1)
  16.  
  17.     for i, c1 in enumerate(s1):
  18.         b[0] = i + 1
  19.         for j, c2 in enumerate(s2):
  20.             cost = 0 if c1 == c2 else 1
  21.             b[j + 1] = min(b[j] + 1, a[j + 1] + 1, a[j] + cost)
  22.         a, b = b, a
  23.  
  24.     return a[-1]
  25.  
  26. def predict(test_set, training_set):
  27.     predicted = len(test_set)
  28.     for x1 in test_set:
  29.         distance_from_x1 = []
  30.         for x2 in training_set:
  31.             distance = levenshtein_distance(x1, x2)
  32.             distance_from_x1.append((distance, x2))
  33.         sorted_distances = sorted(distance_from_x1)
  34.        
  35.         guess = sorted_distances[0][1]
  36.        
  37.         print(f"\nAltered Test: {x1}")
  38.         match_percentage = round((1 - sorted_distances[0][0] / max(len(x1), len(guess))) * 100, 6)
  39.         print(f"\tGuessing Nearest Match: {guess} at {match_percentage}%")
  40.         if answers[x1] == guess:
  41.             print("\t+++ CORRECT +++")
  42.         else:
  43.             print(f"\t\tExpected Answer: {answers[x1]}")
  44.             predicted = predicted - 1
  45.     print(f"\n\nPredicted: {predicted} out of {len(test_set)}")
  46.    
  47. # for testing
  48. import random
  49. import string
  50.  
  51. letters = list(string.ascii_letters)
  52. L = len(letters)
  53.  
  54. tests = []
  55. def generate_tests(num_tests):
  56.     i = 0
  57.     for _ in range(num_tests):
  58.         test = ''
  59.         for _ in range(25):
  60.             s = letters.pop(i%11)
  61.             letters.append(s)
  62.             i += 1
  63.             test += s
  64.         tests.append(test)
  65.         s = letters.pop(i%7)
  66.         letters.append(s)
  67.  
  68. def alter_test(test):
  69.     pos = random.sample(range(25), 12) # choose two positions to change
  70.     new_test = list(test)
  71.     new_test[pos[0]] = letters[(letters.index(new_test[pos[0]]) + random.randint(1, L - 1)) % L]
  72.     new_test[pos[1]] = letters[(letters.index(new_test[pos[1]]) + random.randint(1, L - 1)) % L]
  73.     return ''.join(new_test)
  74.  
  75. # generate a list of n random n-letter tests
  76. generate_tests(1000)
  77.  
  78. # make a copy of each test with two letters changed
  79. altered_tests = [alter_test(test) for test in tests]
  80. answers = dict(zip(altered_tests, tests))
  81. random.shuffle(tests)
  82.  
  83. # predict nearest matches for each altered test
  84. predict(altered_tests, tests)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement