Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- '''
- This problem was asked by Google.
- The edit distance between two strings refers to the minimum number of character insertions, deletions, and substitutions required to change one string
- to the other. For example, the edit distance between “kitten” and “sitting” is three: substitute the “k” for “s”, substitute the “e” for “i”, and append a “g”.
- Given two strings, compute the edit distance between them.
- '''
- def indexForMaximization(message1, message2):
- small = ""
- big = ""
- if len(message1) >= len(message2):
- big = message1
- small = message2
- else:
- big = message2
- small = message1
- # Example: "believe" and "unbelievable"
- # "unbelie", "nbeliev", "BELIEVA", "elievab", "lievabl", "ievable"
- # I will try to find the index in the big message, that maximizes the hits
- N = len(big)
- n = len(small)
- maxHits = 0
- index = -1
- for i in range(0, N-n+1):
- hits = 0
- for j in range(n):
- if big[i+j] == small[j]:
- hits += 1
- if hits > maxHits:
- maxHits = hits
- index = i + j
- # Now, j has the value of last iteration and last hit
- # So, I have to remove the length of small message
- rightIndex = index - n + 1
- return big, small, N, n, rightIndex
- def edit(message1, message2):
- big, small, N, n, rightIndex = indexForMaximization(message1, message2)
- # Now, I have the index in big message, that maximizes the hits
- # I know that if I begin from index I have many hits ----> If I dont make hits, I have to change letters
- # 1. SUBSTITUTIONS
- substitutions = 0
- subPrints = list()
- for i in range(n):
- if big[rightIndex+i] != small[i]:
- substitutions += 1
- subPrint = str(big[rightIndex+i]) + " ----> " + str(small[i])
- subPrints.append(subPrint)
- # 2a. INSERTIONS
- insertionsLeft = rightIndex
- insLeftPrints = list()
- for i in range(insertionsLeft):
- insertionLeft = str("Insertion of " + str(big[i]))
- insLeftPrints.append(insertionLeft)
- # 2b. INSERTIONS
- insertionsRight = N - (rightIndex + n)
- insRightPrints = list()
- for i in range(N - insertionsRight, N):
- insertionRight = str("Insertion of " + str(big[i]))
- insRightPrints.append(insertionRight)
- return substitutions, subPrints, insertionsLeft, insLeftPrints, insertionsRight, insRightPrints
- def prettyPrint(message1, message2):
- big, small, N, n, rightIndex = indexForMaximization(message1, message2)
- subs, subPrints, insLeft, insLeftPrints, insRight, insRightPrints = edit(message1, message2)
- changes = subs + insLeft + insRight
- print("******************************************************************")
- print("Comparing '" + str(message1) + "' and '" + str(message2) + "':")
- print("Changes = " + str(changes) + " <----> " + str(insLeft) + " insertionsLeft, " + str(subs) + " substitutions, " + str(insRight) + " insertionsRight")
- print("******************************************************************")
- print(" 1. InsertionsLeft = " + str(insLeft))
- for i in range(len(insLeftPrints)):
- print(" " + str(insLeftPrints[i]))
- print()
- print(" 2. Substitutions = " + str(subs))
- for i in range(len(subPrints)):
- print(" " + str(subPrints[i]))
- print()
- print(" 3. InsertionsRight = " + str(insRight))
- for i in range(len(insRightPrints)):
- print(" " + str(insRightPrints[i]))
- print()
- # MAIN FUNCTION
- prettyPrint("believe", "unbelievable")
- prettyPrint("kite", "able")
- prettyPrint("suffer", "unsuffer")
- prettyPrint("sales", "sure")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement