Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <iostream>
- #include <vector>
- #include <unordered_map>
- #include <string>
- #include <algorithm>
- using namespace std;
- // A Trie node now stores a map of children keyed by the next word (or token),
- // plus an extra field bestChild which is the key of the child with the highest frequency,
- // and bestChildCount to store its frequency.
- class Trie {
- public:
- unordered_map<string, Trie*> children;
- bool endOfSentence = false;
- int count = 0;
- string bestChild; // The next word with the highest frequency among the children
- int bestChildCount = 0; // Frequency of bestChild
- Trie() : endOfSentence(false), count(0), bestChild(""), bestChildCount(0) {}
- };
- // When inserting a sentence (vector of tokens) into the trie, we update the parent's bestChild.
- void insertWithBest(Trie* head, vector<string>& sentence) {
- Trie* cur = head;
- for (auto& token : sentence) {
- // Create child if it doesn't exist.
- if (cur->children.find(token) == cur->children.end()) {
- cur->children[token] = new Trie();
- }
- // Get pointer to the child.
- Trie* child = cur->children[token];
- // Increase the frequency for that child.
- child->count += 1;
- // Update current node's best child if needed.
- if (child->count > cur->bestChildCount) {
- cur->bestChildCount = child->count;
- cur->bestChild = token;
- }
- // Move down the trie.
- cur = child;
- }
- cur->endOfSentence = true;
- }
- // Instead of iterating over all children to find the highest frequency one,
- // we simply return the bestChild field of the node corresponding to the given prefix.
- string searchBest(Trie* head, vector<string>& prefix) {
- Trie* cur = head;
- for (auto& token : prefix) {
- if (cur->children.find(token) == cur->children.end())
- return ""; // prefix not found
- cur = cur->children[token];
- }
- // Return the best next word (which has been maintained during insertion)
- return cur->bestChild;
- }
- // Build the trie from a list of sentences and then find the best next word for a given prefix.
- string findNextWord(vector<vector<string>> sentences, vector<string> prefix) {
- Trie* head = new Trie();
- for (auto& sentence : sentences) {
- insertWithBest(head, sentence);
- }
- return searchBest(head, prefix);
- }
- int main(){
- // Example sentences (each sentence is a vector of tokens/words)
- vector<vector<string>> sentences = { {"i", "am", "awesome"}, {"i", "am", "a", "googler"} };
- // Given prefix "i am", we want to find the next word that is most frequent.
- vector<string> prefix = {"i", "am"};
- // With the above sentences, the next words are:
- // - In sentence 1: "awesome"
- // - In sentence 2: "a"
- // Suppose that, after insertion, "a" turns out to be more frequent than "awesome".
- // Then the expected output is "a".
- string ans = findNextWord(sentences, prefix);
- cout << "Most likely next word: " << ans << endl;
- // Cleanup of allocated Trie nodes is omitted for brevity.
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement