Advertisement
chevengur

СПРИНТ №1 | Структуры и классы | Урок 3: Структура

Sep 13th, 2023 (edited)
358
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 4.33 KB | None | 0 0
  1. #include <algorithm>
  2. #include <iostream>
  3. #include <set>
  4. #include <string>
  5. #include <utility>
  6. #include <vector>
  7.  
  8. using namespace std;
  9.  
  10. const int MAX_RESULT_DOCUMENT_COUNT = 5;
  11.  
  12. struct DocumentContent {
  13.     int id;
  14.     vector<string> words;
  15. };
  16.  
  17. string ReadLine() {
  18.     string s;
  19.     getline(cin, s);
  20.     return s;
  21. }
  22.  
  23. int ReadLineWithNumber() {
  24.     int result = 0;
  25.     cin >> result;
  26.     ReadLine();
  27.     return result;
  28. }
  29.  
  30. vector<string> SplitIntoWords(const string& text) {
  31.     vector<string> words;
  32.     string word;
  33.     for (const char c : text) {
  34.         if (c == ' ') {
  35.             if (!word.empty()) {
  36.                 words.push_back(word);
  37.                 word.clear();
  38.             }
  39.         }
  40.         else {
  41.             word += c;
  42.         }
  43.     }
  44.     if (!word.empty()) {
  45.         words.push_back(word);
  46.     }
  47.  
  48.     return words;
  49. }
  50.  
  51. set<string> ParseStopWords(const string& text) {
  52.     set<string> stop_words;
  53.     for (const string& word : SplitIntoWords(text)) {
  54.         stop_words.insert(word);
  55.     }
  56.     return stop_words;
  57. }
  58.  
  59. vector<string> SplitIntoWordsNoStop(const string& text, const set<string>& stop_words) {
  60.     vector<string> words;
  61.     for (const string& word : SplitIntoWords(text)) {
  62.         if (stop_words.count(word) == 0) {
  63.             words.push_back(word);
  64.         }
  65.     }
  66.     return words;
  67. }
  68.  
  69. void AddDocument(vector<DocumentContent>& documents, const set<string>& stop_words,
  70.     int document_id, const string& document) {
  71.     const vector<string> words = SplitIntoWordsNoStop(document, stop_words);
  72.     documents.push_back({ document_id, words });
  73. }
  74.  
  75. set<string> ParseQuery(const string& text, const set<string>& stop_words) {
  76.     set<string> query_words;
  77.     for (const string& word : SplitIntoWordsNoStop(text, stop_words)) {
  78.         query_words.insert(word);
  79.     }
  80.     return query_words;
  81. }
  82.  
  83. int MatchDocument(const DocumentContent& content, const set<string>& query_words) {
  84.     if (query_words.empty()) {
  85.         return 0;
  86.     }
  87.     set<string> matched_words;
  88.     for (const string& word : content.words) {
  89.         if (matched_words.count(word) != 0) {
  90.             continue;
  91.         }
  92.         if (query_words.count(word) != 0) {
  93.             matched_words.insert(word);
  94.         }
  95.     }
  96.     return static_cast<int>(matched_words.size());
  97. }
  98.  
  99. // Для каждого документа возвращает его релевантность и id
  100. vector<pair<int, int>> FindAllDocuments(const vector<DocumentContent>& documents,
  101.     const set<string>& query_words) {
  102.     vector<pair<int, int>> matched_documents;
  103.     for (const auto& document : documents) {
  104.         const int relevance = MatchDocument(document, query_words);
  105.         if (relevance > 0) {
  106.             matched_documents.push_back({ relevance, document.id });
  107.         }
  108.     }
  109.     return matched_documents;
  110. }
  111.  
  112. // Возвращает топ-5 самых релевантных документов в виде пар: {id, релевантность}
  113. vector<pair<int, int>> FindTopDocuments(const vector<DocumentContent>& documents,
  114.     const set<string>& stop_words, const string& raw_query) {
  115.     const set<string> query_words = ParseQuery(raw_query, stop_words);
  116.     auto matched_documents = FindAllDocuments(documents, query_words);
  117.  
  118.     sort(matched_documents.begin(), matched_documents.end());
  119.     reverse(matched_documents.begin(), matched_documents.end());
  120.     if (matched_documents.size() > MAX_RESULT_DOCUMENT_COUNT) {
  121.         matched_documents.resize(MAX_RESULT_DOCUMENT_COUNT);
  122.     }
  123.     for (auto& matched_document : matched_documents) {
  124.         swap(matched_document.first, matched_document.second);
  125.     }
  126.     return matched_documents;
  127. }
  128.  
  129. int main() {
  130.     const string stop_words_joined = ReadLine();
  131.     const set<string> stop_words = ParseStopWords(stop_words_joined);
  132.  
  133.     // Read documents
  134.     vector<DocumentContent> documents;
  135.     const int document_count = ReadLineWithNumber();
  136.     for (int document_id = 0; document_id < document_count; ++document_id) {
  137.         AddDocument(documents, stop_words, document_id, ReadLine());
  138.     }
  139.  
  140.     const string query = ReadLine();
  141.     for (auto [document_id, relevance] : FindTopDocuments(documents, stop_words, query)) {
  142.         cout << "{ document_id = "s << document_id << ", relevance = "s << relevance << " }"s
  143.             << endl;
  144.     }
  145. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement