Advertisement
Aikiro42

Word Search using Trie

Jan 29th, 2020
286
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 6.11 KB | None | 0 0
  1. class Lief:
  2.     def __init__(self, e, is_end):
  3.         self.e = e
  4.         self.children = {}
  5.         self.is_end = is_end
  6.  
  7.     def add_child(self, e, is_end):
  8.         self.children[e] = Lief(e, is_end)
  9.  
  10.     def get_child(self, e):
  11.         return self.children[e]
  12.  
  13.     def remove_child(self, e):
  14.         del self.children[e]
  15.  
  16.     def is_child(self, e):
  17.         return e in self.children
  18.  
  19.     def has_children(self):
  20.         return len(self.children) > 0
  21.  
  22.     def children_count(self):
  23.         return len(self.children)
  24.  
  25.     def add_word(self, word):
  26.         if word[0] not in self.children:
  27.             # if letter not in children, create new lief
  28.             new_lief = Lief(word[0], False)
  29.             if len(word) > 1:
  30.                 new_lief.add_word(word[1:])
  31.             else:
  32.                 new_lief.is_end = True
  33.             self.children[word[0]] = new_lief
  34.         else:
  35.             self.children[word[0]].add_word(word[1:])
  36.  
  37.     def search_word(self, word):
  38.         if word[0] == self.e:
  39.             # if word initial matches lief
  40.             if len(word) == 1 and self.is_end:
  41.                 return True
  42.             elif len(word) > 1 and word[1] in self.children:
  43.                 return self.children[word[1]].search_word(word[1:])
  44.             else:
  45.                 return False
  46.         else:
  47.             return False
  48.  
  49.     def remove_word(self, word):
  50.         if word[0] == self.e:
  51.             if len(word) == 1 and self.is_end:
  52.                 self.is_end = False
  53.             elif len(word) > 1 and word[1] in self.children:
  54.                 self.children[word[1]].remove_word(word[1:])
  55.  
  56.  
  57. class Trie:
  58.  
  59.     def __init__(self, wl=None):
  60.         self.heads = {}
  61.         if wl is not None:
  62.             [self.add_word(word) for word in wl]
  63.  
  64.     def add_word(self, word):
  65.         if not self.search_word(word):
  66.             if word[0] not in self.heads:
  67.                 new_lief = Lief(word[0], False)
  68.                 if len(word) > 1:
  69.                     new_lief.add_word(word[1:])
  70.                 else:
  71.                     new_lief.is_end = True
  72.                 self.heads[word[0]] = new_lief
  73.             else:
  74.                 self.heads[word[0]].add_word(word[1:])
  75.  
  76.     def search_word(self, word):
  77.         if word[0] in self.heads:
  78.             return self.heads[word[0]].search_word(word)
  79.         else:
  80.             return False
  81.  
  82.     def remove_word(self, word):
  83.         if word[0] in self.heads:
  84.             self.heads[word[0]].remove_word(word)
  85.  
  86.     def is_head(self, char):
  87.         return char in self.heads
  88.  
  89.     def get_head(self, char):
  90.         return self.heads[char]
  91.  
  92.  
  93. # 0: right, rotation cc
  94. direction = [
  95.     (1, 0),         # 0: right
  96.     (1, 1),         # 1: right down
  97.     (0, 1),         # 2: down
  98.     (-1, 1),        # 3: left down
  99.     (-1, 0),        # 4: left
  100.     (-1, -1),       # 5: left up
  101.     (0, -1),        # 6: up
  102.     (1, -1)         # 7: right up
  103. ]
  104.  
  105. d_name = {
  106.     0: 'E',
  107.     1: 'SE',
  108.     2: 'S',
  109.     3: 'SW',
  110.     4: 'W',
  111.     5: 'NW',
  112.     6: 'N',
  113.     7: 'NE',
  114. }
  115.  
  116.  
  117. def search_grid(g, trie: Trie):
  118.     word_coors = {}
  119.     found_words = []
  120.     for y in range(len(g)):
  121.         for x in range(len(g[0])):
  122.             curr_char = g[y][x]
  123.             if trie.is_head(curr_char):
  124.                 found_words += search_cell(g, x, y, trie.get_head(g[y][x]))
  125.                 if len(found_words) > 0:
  126.                     for word in found_words:
  127.                         word_coors[word] = (y+1, x+1)
  128.                         dictionary.remove_word(word)
  129.                     found_words = []
  130.     return word_coors
  131.  
  132.  
  133. def search_cell(g, x, y, node: Lief):
  134.     found_words = []
  135.     for d in range(len(direction)):
  136.         found_words += search(g, x, y, d, node)
  137.     return found_words
  138.  
  139.  
  140. def search(g, x, y, d, node: Lief, words_found=None, word_str=''):
  141.     if words_found is None:
  142.         words_found = []
  143.     global direction
  144.     if node.e == g[y][x]:  # if current letter matches
  145.         word_str += node.e
  146.  
  147.         if node.is_end:
  148.             words_found.append(word_str)
  149.  
  150.         # increment step
  151.         x += direction[d][0]
  152.         y += direction[d][1]
  153.  
  154.         if 0 <= x < len(g[0]) and 0 <= y < len(g):  # check if next step is still within bounds
  155.  
  156.             next_char = g[y][x]
  157.  
  158.             if node.is_child(next_char):
  159.                 next_node = node.get_child(next_char)
  160.                 return search(g, x, y, d, next_node, words_found, word_str)
  161.             else:
  162.                 return words_found
  163.  
  164.         else:
  165.             # if next step is out of bounds
  166.             return words_found
  167.     else:
  168.         return words_found
  169.  
  170.  
  171. # [MAIN CODE]=======================================================================
  172.  
  173. # Get grid and word list
  174. r, c = map(int, input().split())
  175. grid = [input() for _ in range(r)]
  176. word_count = int(input())
  177. word_list = [input() for _ in range(word_count)]
  178.  
  179. # Cache words
  180. dictionary = Trie(word_list)
  181.  
  182. # Search grid
  183. word_coors = search_grid(grid, dictionary)
  184. for word in word_list:
  185.     print(*word_coors[word])
  186.  
  187. # [TEST CODE]=======================================================================
  188.  
  189. # grid = [
  190. #         'xatnysnprabvbas',
  191. #         'teehsdaerpslyug',
  192. #         'soqgknsscoklpsn',
  193. #         'ochmacaeceteeei',
  194. #         'fyasarnprrrhfak',
  195. #         'tjenngoascosprr',
  196. #         'wjnkiewtohulsco',
  197. #         'aexnteemsrowlhw',
  198. #         'rbefrnprfietset',
  199. #         'egiaeusecuritye',
  200. #         'shhetcwhisasahn',
  201. #         'ssrescriptwptku',
  202. #         'xcremmapsryaurq',
  203. #         'srevresavepmssq',
  204. #         'socialwuevsuybb'
  205. #         ]
  206. #
  207. # word_list = ['bar', 'engine', 'key', 'networking', 'save', 'scan', 'scanner', 'screen', 'screenshot', 'script',
  208. #              'scroll', 'search', 'security', 'server', 'shareware', 'shell', 'shift', 'snapshot', 'social',
  209. #              'software', 'spam', 'spammer', 'spreadsheet', 'spyware', 'status', 'storage', 'supercomputer',
  210. #              'surf', 'syntax']
  211. # dictionary = Trie(word_list)
  212. # word_coors = search_grid(grid, dictionary)
  213. # for word in word_list:
  214. #     print('{}: {}'.format(word, word_coors[word]))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement