Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <algorithm>
- #include <cmath>
- #include <iostream>
- #include <map>
- #include <numeric>
- #include <optional>
- #include <set>
- #include <string>
- #include <utility>
- #include <vector>
- using namespace std;
- const int MAX_RESULT_DOCUMENT_COUNT = 5;
- const double EPSILON = 1e-6;
- enum class DocumentStatus {
- ACTUAL,
- IRRELEVANT,
- BANNED,
- REMOVED
- };
- struct Document {
- Document() = default;
- Document(int id, double relevance, int rating) : id(id), relevance(relevance), rating(rating){}
- int id = 0;
- double relevance = 0;
- int rating = 0;
- };
- ostream& operator<<(ostream& os, Document doc){
- return os << "{ document id = " << doc.id
- << ", relevance = " << doc.relevance
- << ", rating = " << doc.rating << " }";
- }
- string ReadLine() {
- string s;
- getline(cin, s);
- return s;
- }
- int ReadLineWithNumber() {
- int result = 0;
- cin >> result;
- ReadLine();
- return result;
- }
- vector<string> SplitIntoWords(const string& text) {
- vector<string> words;
- string word;
- for (const char c : text) {
- if (c == ' ') {
- if (!word.empty()) {
- words.push_back(word);
- word.clear();
- }
- }
- else if ('\0' <= c && c < ' '){
- throw invalid_argument("special symbol"s);
- }
- else {
- word += c;
- }
- }
- if (!word.empty()) {
- words.push_back(word);
- }
- return words;
- }
- template <typename StringContainer>
- set<string> MakeUniqueNonEmptyStrings(const StringContainer& strings) {
- set<string> non_empty_strings;
- for (const string& str : strings) {
- if (!str.empty()) {
- non_empty_strings.insert(str);
- }
- }
- return non_empty_strings;
- }
- class SearchServer {
- public:
- explicit SearchServer() = default;
- template <typename T>
- explicit SearchServer(const T& stop_words)
- : stop_words_(MakeUniqueNonEmptyStrings(stop_words)) {
- for (const string& stop_word : stop_words_){
- if (!IsValidWord(stop_word)) throw invalid_argument("special symbol"s);
- }
- }
- explicit SearchServer(const string& stop_words_str) : SearchServer(SplitIntoWords(stop_words_str)) {}
- static bool IsValidWord(const string& word) {
- return none_of(word.begin(), word.end(), [](char c) {
- return c >= '\0' && c < ' ';
- });
- }
- int GetDocumentId(int ordinal_num) const {
- if (ordinal_num < 0 || ordinal_num >= static_cast<int>(order_.size()))
- throw out_of_range("invalid id"s);
- return order_.at(ordinal_num);
- }
- bool IsDocumentExist(int id) const {
- return documents_.count(id);
- }
- int GetDocumentCount() const {
- return document_count_;
- };
- tuple<vector<string>, DocumentStatus> MatchDocument(const string& raw_query, int id) const{
- auto& map_ = word_to_document_freqs_;
- if ( !IsDocumentExist(id) ){
- throw invalid_argument("invalid id"s);
- }
- const auto query = ParseQuery(SplitIntoWordsNoStop(raw_query));
- tuple<vector<string>, DocumentStatus> result;
- get<1>(result) = documents_.at(id).status;
- for (const string& word : query.minus_words){
- auto ptr = map_.find(word);
- if (ptr == map_.end()) continue;
- if (ptr->second.count(id)) return result;
- }
- for (const string& word : query.plus_words){
- auto ptr = map_.find(word);
- if (ptr == map_.end()) continue;
- if (ptr->second.count(id))
- get<0>(result).push_back(word);
- }
- return result;
- }
- void AddDocument(int id, const string& document_text
- , DocumentStatus status, const vector<int>& ratings){
- if (id < 0)
- throw invalid_argument("id is less then zero"s);
- if (IsDocumentExist(id))
- throw invalid_argument("a document with this id already exists"s);
- vector<string> words = SplitIntoWordsNoStop(document_text);
- order_.push_back(id);
- int rating = ComputeAverageRating(ratings);
- double tf = 1. / words.size();
- for (const string& word : words){
- word_to_document_freqs_[word][id] += tf;
- }
- documents_[id] = {rating, status};
- ++document_count_;
- }
- template <typename Predicate>
- vector<Document> FindTopDocuments(const string& raw_query
- , Predicate predicate) const {
- const auto query = ParseQuery(SplitIntoWordsNoStop(raw_query));
- vector<Document> matched_documents = FindAllDocuments(query, predicate);
- sort(matched_documents.begin(), matched_documents.end(),
- [](const Document& ld, const Document& rd) {
- return (ld.relevance > rd.relevance
- || (std::abs(ld.relevance - rd.relevance) < EPSILON
- && ld.rating > rd.rating));
- }
- );
- if (matched_documents.size() > MAX_RESULT_DOCUMENT_COUNT) {
- matched_documents.resize(MAX_RESULT_DOCUMENT_COUNT);
- }
- return matched_documents;
- }
- vector<Document> FindTopDocuments(const string& raw_query
- , DocumentStatus status_ = DocumentStatus::ACTUAL) const {
- return FindTopDocuments(raw_query, [status_](int id, DocumentStatus status, int rating){
- return status == status_;
- });
- }
- private:
- struct Query{
- set<string> plus_words;
- set<string> minus_words;
- };
- struct DocumentData{
- int rating;
- DocumentStatus status;
- };
- int document_count_ = 0;
- set<string> stop_words_;
- map<string, map<int, double>> word_to_document_freqs_;
- map<int, DocumentData> documents_;
- vector<int> order_;
- vector<string> SplitIntoWordsNoStop(const string& text) const {
- vector<string> words;
- for (const string& word : SplitIntoWords(text)) {
- if (stop_words_.count(word) == 0) {
- words.push_back(word);
- }
- }
- return words;
- }
- Query ParseQuery(const vector<string>& words) const {
- Query query;
- for (const string& word : words) {
- if (word[0] == '-') {
- if (word.size() == 1u) throw invalid_argument("the '-' stands alone"s);
- if (word[1] == '-') throw invalid_argument("double '-'"s);
- query.minus_words.insert(string(word.begin() + 1, word.end()));
- }
- else {
- query.plus_words.insert(word);
- }
- }
- return query;
- }
- double ComputeIDF(const string& word) const {
- return log( static_cast<double>(document_count_)
- / static_cast<double>(word_to_document_freqs_.at(word).size()) );
- }
- static int ComputeAverageRating(const vector<int>& ratings) {
- if (ratings.size() == 0) return 0;
- return accumulate(ratings.begin(), ratings.end(), 0) / static_cast<int>(ratings.size());
- }
- void ExcludeMinusWords(const Query& query, map<int, double>& docs_relevance) const {
- for (const string& word : query.minus_words){
- if (word_to_document_freqs_.count(word)){
- for (const auto& [id, tf] : word_to_document_freqs_.at(word)){
- docs_relevance.erase(id);
- }
- }
- }
- }
- template <typename Predicate>
- vector<Document> FindAllDocuments(const Query& query, Predicate predicate) const {
- map<int, double> docs_relevance;
- vector<Document> matched_documents;
- for (const string& word : query.plus_words){
- if (word_to_document_freqs_.count(word)){
- const double idf = ComputeIDF(word);
- for (const auto& [id, tf] : word_to_document_freqs_.at(word)){
- docs_relevance[id] += idf * tf;
- }
- }
- }
- ExcludeMinusWords(query, docs_relevance);
- for (const auto& [id, relevance] : docs_relevance){
- if (predicate(id, documents_.at(id).status, documents_.at(id).rating)){
- matched_documents.push_back({id, relevance, documents_.at(id).rating});
- }
- }
- return matched_documents;
- }
- };
- template <typename It>
- class IterRange{
- public:
- IterRange(It begin, It end) : begin_(begin), end_(end), size_(distance(begin, end)) {}
- It begin() {
- return begin_;
- }
- It end() {
- return end_;
- }
- size_t size() {
- return size_;
- }
- private:
- It begin_;
- It end_;
- size_t size_;
- };
- template <typename it>
- void PrintRange(it first, it end){
- while (first != end){
- std::cout << *first;
- ++first;
- }
- }
- template <typename It>
- ostream& operator<<(ostream& os, IterRange<It> Ir){
- for (auto it = Ir.begin(); it != Ir.end(); ++it){
- os << *it;
- }
- return os;
- }
- template<typename It>
- class Paginator{
- public:
- Paginator(It begin, It end, size_t page_size){
- It page_begin = begin;
- It page_end = begin;
- while (page_end != end){
- page_begin = page_end;
- if (static_cast<int>(page_size) <= distance(page_begin, end)){
- advance(page_end, page_size);
- }else{
- page_end = end;
- }
- pages_.push_back(IterRange(page_begin, page_end));
- }
- }
- auto begin() const {
- return pages_.begin();
- }
- auto end() const {
- return pages_.end();
- }
- private:
- vector<IterRange<It>> pages_;
- };
- template <typename Container>
- auto Paginate(const Container& c, size_t page_size){
- return Paginator(c.begin(), c.end(), page_size);
- }
- int main(){
- SearchServer search_server("and with"s);
- search_server.AddDocument(1, "funny pet and nasty rat"s, DocumentStatus::ACTUAL, {7, 2, 7});
- search_server.AddDocument(2, "funny pet with curly hair"s, DocumentStatus::ACTUAL, {1, 2, 3});
- search_server.AddDocument(3, "big cat nasty hair"s, DocumentStatus::ACTUAL, {1, 2, 8});
- search_server.AddDocument(4, "big dog cat Vladislav"s, DocumentStatus::ACTUAL, {1, 3, 2});
- search_server.AddDocument(5, "big dog hamster Borya"s, DocumentStatus::ACTUAL, {1, 1, 1});
- const auto search_results = search_server.FindTopDocuments("curly dog"s);
- int page_size = 2;
- const auto pages = Paginate(search_results, page_size);
- // Выводим найденные документы по страницам
- for (auto page = pages.begin(); page != pages.end(); ++page) {
- cout << *page << endl;
- cout << "Page break"s << endl;
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement