Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <algorithm>
- #include <cmath>
- #include <iostream>
- #include <map>
- #include <set>
- #include <string>
- #include <utility>
- #include <vector>
- #include <cassert>
- using namespace std;
- const int MAX_RESULT_DOCUMENT_COUNT = 5;
- string ReadLine() {
- string s;
- getline(cin, s);
- return s;
- }
- int ReadLineWithNumber() {
- int result;
- cin >> result;
- ReadLine();
- return result;
- }
- vector<string> SplitIntoWords(const string& text) {
- vector<string> words;
- string word;
- for (const char c : text) {
- if (c == ' ') {
- if (!word.empty()) {
- words.push_back(word);
- word.clear();
- }
- }
- else {
- word += c;
- }
- }
- if (!word.empty()) {
- words.push_back(word);
- }
- return words;
- }
- struct Document {
- int id;
- double relevance;
- int rating;
- };
- enum class DocumentStatus {
- ACTUAL,
- IRRELEVANT,
- BANNED,
- REMOVED,
- };
- class SearchServer {
- public:
- void SetStopWords(const string& text) {
- for (const string& word : SplitIntoWords(text)) {
- stop_words_.insert(word);
- }
- }
- void AddDocument(int document_id, const string& document, DocumentStatus status,
- const vector<int>& ratings) {
- const vector<string> words = SplitIntoWordsNoStop(document);
- const double inv_word_count = 1.0 / words.size();
- for (const string& word : words) {
- word_to_document_freqs_[word][document_id] += inv_word_count;
- }
- documents_.emplace(document_id, DocumentData{ ComputeAverageRating(ratings), status });
- }
- vector<Document> FindTopDocuments(const string& raw_query, DocumentStatus status) const {
- return FindTopDocuments(raw_query, [status](int document_id, DocumentStatus doc_status, int rating) { return doc_status == status; });
- }
- vector<Document> FindTopDocuments(const string& raw_query) const {
- return FindTopDocuments(raw_query, DocumentStatus::ACTUAL);
- }
- template<typename Predicate>
- vector<Document> FindTopDocuments(const string& raw_query, Predicate predicate) const {
- const Query query = ParseQuery(raw_query);
- auto matched_documents = FindAllDocuments(query, predicate);
- sort(matched_documents.begin(), matched_documents.end(),
- [](const Document& lhs, const Document& rhs) {
- if (abs(lhs.relevance - rhs.relevance) < 1e-6) {
- return lhs.rating > rhs.rating;
- }
- else {
- return lhs.relevance > rhs.relevance;
- }
- });
- if (matched_documents.size() > MAX_RESULT_DOCUMENT_COUNT) {
- matched_documents.resize(MAX_RESULT_DOCUMENT_COUNT);
- }
- return matched_documents;
- }
- int GetDocumentCount() const {
- return documents_.size();
- }
- tuple<vector<string>, DocumentStatus> MatchDocument(const string& raw_query,
- int document_id) const {
- const Query query = ParseQuery(raw_query);
- vector<string> matched_words;
- for (const string& word : query.plus_words) {
- if (word_to_document_freqs_.count(word) == 0) {
- continue;
- }
- if (word_to_document_freqs_.at(word).count(document_id)) {
- matched_words.push_back(word);
- }
- }
- for (const string& word : query.minus_words) {
- if (word_to_document_freqs_.count(word) == 0) {
- continue;
- }
- if (word_to_document_freqs_.at(word).count(document_id)) {
- matched_words.clear();
- break;
- }
- }
- return { matched_words, documents_.at(document_id).status };
- }
- private:
- struct DocumentData {
- int rating;
- DocumentStatus status;
- };
- set<string> stop_words_;
- map<string, map<int, double>> word_to_document_freqs_;
- map<int, DocumentData> documents_;
- bool IsStopWord(const string& word) const {
- return stop_words_.count(word) > 0;
- }
- vector<string> SplitIntoWordsNoStop(const string& text) const {
- vector<string> words;
- for (const string& word : SplitIntoWords(text)) {
- if (!IsStopWord(word)) {
- words.push_back(word);
- }
- }
- return words;
- }
- static int ComputeAverageRating(const vector<int>& ratings) {
- if (ratings.empty()) {
- return 0;
- }
- int rating_sum = 0;
- for (const int rating : ratings) {
- rating_sum += rating;
- }
- return rating_sum / static_cast<int>(ratings.size());
- }
- struct QueryWord {
- string data;
- bool is_minus;
- bool is_stop;
- };
- QueryWord ParseQueryWord(string text) const {
- bool is_minus = false;
- // Word shouldn't be empty
- if (text[0] == '-') {
- is_minus = true;
- text = text.substr(1);
- }
- return { text, is_minus, IsStopWord(text) };
- }
- struct Query {
- set<string> plus_words;
- set<string> minus_words;
- };
- Query ParseQuery(const string& text) const {
- Query query;
- for (const string& word : SplitIntoWords(text)) {
- const QueryWord query_word = ParseQueryWord(word);
- if (!query_word.is_stop) {
- if (query_word.is_minus) {
- query.minus_words.insert(query_word.data);
- }
- else {
- query.plus_words.insert(query_word.data);
- }
- }
- }
- return query;
- }
- // Existence required
- double ComputeWordInverseDocumentFreq(const string& word) const {
- return log(GetDocumentCount() * 1.0 / word_to_document_freqs_.at(word).size());
- }
- template<typename Docpredicate>
- vector<Document> FindAllDocuments(const Query& query, Docpredicate predicate) const {
- map<int, double> document_to_relevance;
- for (const string& word : query.plus_words) {
- if (word_to_document_freqs_.count(word) == 0) {
- continue;
- }
- const double inverse_document_freq = ComputeWordInverseDocumentFreq(word);
- for (const auto [document_id, term_freq] : word_to_document_freqs_.at(word)) {
- if (predicate(document_id, documents_.at(document_id).status, documents_.at(document_id).rating)) {
- document_to_relevance[document_id] += term_freq * inverse_document_freq;
- }
- }
- }
- for (const string& word : query.minus_words) {
- if (word_to_document_freqs_.count(word) == 0) {
- continue;
- }
- for (const auto [document_id, _] : word_to_document_freqs_.at(word)) {
- document_to_relevance.erase(document_id);
- }
- }
- vector<Document> matched_documents;
- for (const auto [document_id, relevance] : document_to_relevance) {
- matched_documents.push_back(
- { document_id, relevance, documents_.at(document_id).rating });
- }
- return matched_documents;
- }
- };
- void PrintDocument(const Document& document) {
- cout << "{ "s
- << "document_id = "s << document.id << ", "s
- << "relevance = "s << document.relevance << ", "s
- << "rating = "s << document.rating
- << " }"s << endl;
- }
- // Тест проверяет, что поисковая система исключает стоп-слова при добавлении документов
- void TestExcludeStopWordsFromAddedDocumentContent() {
- const int doc_id = 42;
- const string content = "cat in the city"s;
- const vector<int> ratings = {1, 2, 3};
- {
- SearchServer server;
- server.AddDocument(doc_id, content, DocumentStatus::ACTUAL, ratings);
- const auto found_docs = server.FindTopDocuments("in"s);
- ASSERT_EQUAL(found_docs.size(), 1u);
- const Document& doc0 = found_docs[0];
- ASSERT_EQUAL(doc0.id, doc_id);
- }
- {
- SearchServer server;
- server.SetStopWords("in the"s);
- server.AddDocument(doc_id, content, DocumentStatus::ACTUAL, ratings);
- ASSERT_HINT(server.FindTopDocuments("in"s).empty(),
- "Stop words must be excluded from documents"s);
- }
- }
- void TestAddDocument(){
- const int doc_id = 25;
- const string content = "shaman - i'm russian";
- const vector<int>ratings = {0, 1, 2};
- const vector<int>ratings1 = {1};
- {
- SearchServer server;
- server.AddDocument(doc_id, content, DocumentStatus::ACTUAL, ratings);
- server.AddDocument(1, "FUCK THE POLICE"s, DocumentStatus::ACTUAL, ratings1);
- server.AddDocument(2, "shaman forever"s, DocumentStatus::ACTUAL, {0});
- ASSERT(!server.FindTopDocuments("shaman").empty());
- ASSERT_EQUAL_HINT(server.GetDocumentCount(), 3, "Not found this doc");
- }
- }
- void TestMinusWords(){
- {
- SearchServer server;
- server.AddDocument(1, "shaman isn't the russian"s, DocumentStatus::ACTUAL, {2,3,1});
- server.AddDocument(2, "killreal this the superman"s, DocumentStatus::ACTUAL, {5,12,3,1,1});
- server.AddDocument(3, "the famous people of great britain", DocumentStatus::ACTUAL, {3,1,2,3,1,1});
- server.AddDocument(4, "lalipop bob dad the", DocumentStatus::ACTUAL, {5});
- assert(server.GetDocumentCount() == 4);
- auto doc_vec = server.FindTopDocuments("-shaman -killreal the"s);
- ASSERT_EQUAL(doc_vec[0].id, 4);
- ASSERT_EQUAL(doc_vec[1].id, 3);
- }
- }
- void TestMatchDoc(){
- const int doc_id = 1;
- const string content = "just look in my eyes and you'll see russian paradise"s;
- const string content2 = "just russian paradise"s;
- vector<int>rating {1,3,4};
- {
- SearchServer server;
- server.AddDocument(doc_id, content, DocumentStatus::ACTUAL, rating);
- server.AddDocument(2, content2, DocumentStatus::ACTUAL, {1,3,4});
- const string raw_query = "just moment russian";
- const string min_raw_query = "-see";
- auto match_doc = server.MatchDocument(min_raw_query, doc_id);
- const auto [str, doc_st] = match_doc;
- ASSERT(str.empty());
- auto pl_match_doc = server.MatchDocument(raw_query, 2);
- const auto [s, dt] = pl_match_doc;
- ASSERT(!s.empty());
- }
- }
- void TestRelevance(){
- {
- SearchServer server;
- server.AddDocument(3, "hello world i'm from Pesua"s, DocumentStatus::ACTUAL, {2,3,4});
- server.AddDocument(2, "hello world i'm"s, DocumentStatus::ACTUAL, {2,3,4});
- server.AddDocument(1, "hello world"s, DocumentStatus::ACTUAL, {2,3,4});
- auto find_top = server.FindTopDocuments("hello world i'm from Pesua", DocumentStatus::ACTUAL);
- ASSERT_EQUAL(find_top[0].id, 3);
- ASSERT_EQUAL(find_top[1].id, 2);
- ASSERT_EQUAL(find_top[2].id, 1);
- }
- }
- void TestByRating(){
- {
- SearchServer server;
- server.AddDocument(3, "hello world i'm from Pesua"s, DocumentStatus::ACTUAL, {2,3,4});
- server.AddDocument(4, "sss psss add"s, DocumentStatus::ACTUAL, {6,1,2,7});
- auto find_top = server.FindTopDocuments("hello world i'm from Pesua sss", DocumentStatus::ACTUAL);
- ASSERT_EQUAL(find_top[0].rating, 3);
- ASSERT_EQUAL(find_top[1].rating, 4);
- }
- }
- void TestStatus(){
- const int doc_id = 1;
- const string content = "blue cat and red dog";
- vector<int>rating = {1, 1, 2, 3, 4};
- const int doc_id2 = 2;
- const string content2 = "yellow rabbit blue cat matrix";
- vector<int>rating2 = {2, 2, 3, 3, 1};
- const int doc_id3 = 3;
- const string content3 = "slut and river blue cat minner";
- vector<int>rating3 = {2, 0, 1, 2, 3, 1};
- const int doc_id4 = 4;
- const string content4 = "upload blue image";
- vector<int>rating4 = {1, 2, 3};
- const string raw_query = "blue cat";
- {
- SearchServer server;
- server.AddDocument(doc_id, content, DocumentStatus::ACTUAL, rating);
- server.AddDocument(doc_id2, content2, DocumentStatus::BANNED, rating2);
- server.AddDocument(doc_id3, content3, DocumentStatus::IRRELEVANT, rating3);
- server.AddDocument(doc_id4, content4, DocumentStatus::REMOVED, rating4);
- auto doc_act = server.FindTopDocuments(raw_query, DocumentStatus::ACTUAL);
- auto doc_ban = server.FindTopDocuments(raw_query, DocumentStatus::BANNED);
- auto doc_irr = server.FindTopDocuments(raw_query, DocumentStatus::IRRELEVANT);
- auto doc_rem = server.FindTopDocuments(raw_query, DocumentStatus::REMOVED);
- ASSERT_EQUAL(doc_act[0].id, 1);
- ASSERT_EQUAL(doc_ban[0].id, 2);
- ASSERT_EQUAL(doc_irr[0].id, 3);
- ASSERT_EQUAL(doc_rem[0].id, 4);
- }
- }
- void TestPredicate(){
- {
- SearchServer server;
- server.AddDocument(1, "blue green red bad"s, DocumentStatus::ACTUAL, {0});
- server.AddDocument(2, "sick peek world word"s, DocumentStatus::BANNED, {2});
- server.FindTopDocuments("peek"s);
- server.FindTopDocuments("raw"s, DocumentStatus::BANNED);
- server.FindTopDocuments("sick", [](int document_id, DocumentStatus doc_status, int rating){ return doc_status == DocumentStatus::BANNED;
- });
- }
- }
- void TestRelevanceNum(){
- SearchServer search_server;
- int document_id = 1;
- vector<int> rating1 = {1, 7, 13};
- DocumentStatus status = DocumentStatus::ACTUAL;
- string document1 = "tabby cat with big eyes"s;
- search_server.AddDocument(document_id, document1, status, rating1);
- document_id = 2;
- vector<int> rating2 = {2, 4, 10};
- string document2 = "small dog and tabby bird"s;
- search_server.AddDocument(document_id, document2, status, rating2);
- vector<Document> document = search_server.FindTopDocuments("cat"s);
- ASSERT_EQUAL(round(document[0].relevance*1000000)/1000000, 0.138629);
- }
- /*
- Разместите код остальных тестов здесь
- */
- // Функция TestSearchServer является точкой входа для запуска тестов
- void TestSearchServer() {
- RUN_TEST(TestExcludeStopWordsFromAddedDocumentContent);
- RUN_TEST(TestAddDocument);
- RUN_TEST(TestMinusWords);
- RUN_TEST(TestStatus);
- RUN_TEST(TestMatchDoc);
- RUN_TEST(TestRelevance);
- RUN_TEST(TestByRating);
- RUN_TEST(TestPredicate);
- RUN_TEST(TestRelevanceNum);
- // Не забудьте вызывать остальные тесты здесь
- }
- // --------- Окончание модульных тестов поисковой системы -----------
- int main() {
- TestSearchServer();
- // Если вы видите эту строку, значит все тесты прошли успешно
- cout << "Search server testing finished"s << endl;
- }
Add Comment
Please, Sign In to add comment