Advertisement
aqibm

Untitled

Mar 12th, 2025
19
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.12 KB | None | 0 0
  1. #include <iostream>
  2. #include <vector>
  3. #include <unordered_map>
  4. #include <string>
  5. #include <algorithm>
  6. using namespace std;
  7.  
  8. // A Trie node now stores a map of children keyed by the next word (or token),
  9. // plus an extra field bestChild which is the key of the child with the highest frequency,
  10. // and bestChildCount to store its frequency.
  11. class Trie {
  12. public:
  13. unordered_map<string, Trie*> children;
  14. bool endOfSentence = false;
  15. int count = 0;
  16. string bestChild; // The next word with the highest frequency among the children
  17. int bestChildCount = 0; // Frequency of bestChild
  18.  
  19. Trie() : endOfSentence(false), count(0), bestChild(""), bestChildCount(0) {}
  20. };
  21.  
  22. // When inserting a sentence (vector of tokens) into the trie, we update the parent's bestChild.
  23. void insertWithBest(Trie* head, vector<string>& sentence) {
  24. Trie* cur = head;
  25. for (auto& token : sentence) {
  26. // Create child if it doesn't exist.
  27. if (cur->children.find(token) == cur->children.end()) {
  28. cur->children[token] = new Trie();
  29. }
  30. // Get pointer to the child.
  31. Trie* child = cur->children[token];
  32. // Increase the frequency for that child.
  33. child->count += 1;
  34. // Update current node's best child if needed.
  35. if (child->count > cur->bestChildCount) {
  36. cur->bestChildCount = child->count;
  37. cur->bestChild = token;
  38. }
  39. // Move down the trie.
  40. cur = child;
  41. }
  42. cur->endOfSentence = true;
  43. }
  44.  
  45. // Instead of iterating over all children to find the highest frequency one,
  46. // we simply return the bestChild field of the node corresponding to the given prefix.
  47. string searchBest(Trie* head, vector<string>& prefix) {
  48. Trie* cur = head;
  49. for (auto& token : prefix) {
  50. if (cur->children.find(token) == cur->children.end())
  51. return ""; // prefix not found
  52. cur = cur->children[token];
  53. }
  54. // Return the best next word (which has been maintained during insertion)
  55. return cur->bestChild;
  56. }
  57.  
  58. // Build the trie from a list of sentences and then find the best next word for a given prefix.
  59. string findNextWord(vector<vector<string>> sentences, vector<string> prefix) {
  60. Trie* head = new Trie();
  61. for (auto& sentence : sentences) {
  62. insertWithBest(head, sentence);
  63. }
  64. return searchBest(head, prefix);
  65. }
  66.  
  67. int main(){
  68. // Example sentences (each sentence is a vector of tokens/words)
  69. vector<vector<string>> sentences = { {"i", "am", "awesome"}, {"i", "am", "a", "googler"} };
  70.  
  71. // Given prefix "i am", we want to find the next word that is most frequent.
  72. vector<string> prefix = {"i", "am"};
  73.  
  74. // With the above sentences, the next words are:
  75. // - In sentence 1: "awesome"
  76. // - In sentence 2: "a"
  77. // Suppose that, after insertion, "a" turns out to be more frequent than "awesome".
  78. // Then the expected output is "a".
  79. string ans = findNextWord(sentences, prefix);
  80. cout << "Most likely next word: " << ans << endl;
  81.  
  82. // Cleanup of allocated Trie nodes is omitted for brevity.
  83. return 0;
  84. }
  85.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement