Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- N_RESULTS = 5
- def search_words(texts, queries):
- queries_dic = {}
- for query_i, query in enumerate(queries):
- for w in set(query.split()):
- if w not in queries_dic:
- queries_dic[w] = set()
- queries_dic[w].add(query_i)
- queries_set = set(queries_dic.keys())
- queries_texts_dic = {i: {} for i in range(len(queries))}
- for text_i, text in enumerate(texts):
- words = text.split()
- for w in set(words) & queries_set:
- w_count = words.count(w)
- for query_i in queries_dic[w]:
- queries_texts_dic[query_i][text_i] = \
- queries_texts_dic[query_i].get(text_i, 0) + w_count
- result = []
- for query_i in range(len(queries)):
- counts_inds = sorted([[-count, i]
- for i, count in queries_texts_dic[query_i].items()])
- result.append([i + 1 for _, i in counts_inds][:N_RESULTS])
- return result
- #[print(' '.join([str(x) for x in inds])) for inds in search_words(texts, queries)]
- texts = ['i love coffee',
- 'coffee with milk and sugar',
- 'free tea for everyone']
- queries = ['i like black coffee without milk',
- 'everyone loves new year',
- 'mary likes black coffee without milk']
- result = [[1, 2], [3], [2, 1]]
- assert(search_words(texts, queries) == result)
- texts = ['buy flat in moscow',
- 'rent flat in moscow',
- 'sell flat in moscow',
- 'want flat in moscow like crazy',
- 'clean flat in moscow on weekends',
- 'renovate flat in moscow']
- queries = ['flat in moscow for crazy weekends']
- result = [[4, 5, 1, 2, 3]]
- assert(search_words(texts, queries) == result)
Add Comment
Please, Sign In to add comment