Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # matching - Finding identical copies in Python - Stackoverflow
- # https://stackoverflow.com/questions/71177696/finding-identical-copies-in-python
- # find matching not overlapping substrings
- # Still don't know what to do with
- # - `ababa aba` --> it should be `"aba":[0,2]` or `"aba":[0,6]` or pairs `"aba":[(0,2), (0,6)]`
- # - `aba ababa` --> it should be `"aba":[0,3]` or `"aba":[4,6]` or pairs `"aba":[(0,3), (4,6)]`
- import pprint
- def find_matches(text):
- results = dict()
- full_len = len(text)
- max_len = full_len//2
- #print('max_len:', max_len)
- # get different lengths
- for length in range(1, max_len+1):
- #print('--- length:', length, '---')
- # get substrings starting in different places
- for start in range(0, len(text)-length):
- pattern = text[start:start+length]
- #print('pattern:', pattern)
- # search only if pattern wasn't search before
- if pattern not in results:
- results[pattern] = [start]
- # search only after pattern
- index = start+length
- while True:
- index = text.find(pattern, index)
- if index < 0:
- break
- results[pattern].add(index)
- #index += 1
- index += length
- # remove single results
- results = {key:val for key,val in results.items() if len(val) > 1}
- return results
- # ----
- text = "lowlow key keykey y k"
- results = find_matches(text)
- pprint.pprint(results)
- for number, char in enumerate(text):
- print(f"{number:2}|{char}")
- # --- Result ---
- """
- {' ': [6, 10, 17, 19],
- ' k': [6, 10, 19],
- ' ke': [6, 10],
- ' key': [6, 10],
- 'e': [8, 12, 15],
- 'ey': [8, 12, 15],
- 'ey ': [8, 15],
- 'k': [7, 11, 14, 20],
- 'ke': [7, 11, 14],
- 'key': [7, 11, 14],
- 'key ': [7, 14],
- 'l': [0, 3],
- 'lo': [0, 3],
- 'low': [0, 3],
- 'o': [1, 4],
- 'ow': [1, 4],
- 'w': [2, 5],
- 'y': [9, 13, 16, 18],
- 'y ': [9, 16, 18],
- 'y k': [9, 18]}
- 0|l
- 1|o
- 2|w
- 3|l
- 4|o
- 5|w
- 6|
- 7|k
- 8|e
- 9|y
- 10|
- 11|k
- 12|e
- 13|y
- 14|k
- 15|e
- 16|y
- 17|
- 18|y
- 19|
- 20|k
- """
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement