ЯМП Лаба 2 2023 (хэш-таблица)

//Source.cpp

//Входной текст разбивается на слова, очищается от знаков препинания и
//помещается в хеш - таблицу.
//Каждый элемент хеш - таблицы – это слово и полесчетчик,
//содержащее количество раз, которое данное слово встретилось в тексте.
//При вставке нового элемента в таблицу, если данный элемент уже
//присутствует в ней, необходимо увеличить соответствующий счетчик.В
//противном случае, создается новый элемент.
//Программа должна выдавать 10 слов,
//которые встречаются чаще всего во входном тексте.

//Вариант 11. Хен-функция: метод деления. Метод разрешения коллизий: открытой адресации

#include "Hash.h"

int main()
{
    SetConsoleOutputCP(1251);
    SetConsoleCP(1251);

    int q;

    do
    {
        const size_t N = 1000;
        Hash table1(N);

        ifstream input_file_1("input_text_1.txt");
        string tmp;
        while (!input_file_1.eof())
        {
            input_file_1 >> tmp;
            tmp = remove_punctuation(tmp);
            table1.insert(tmp);
        }
        input_file_1.close();

        ofstream output_file_1("output_file_1.txt");
        table1.most_frequent_words(output_file_1);
        output_file_1.close();

        ///////////////////////////////////////////////////////////////
        ///////////////////////////////////////////////////////////////

        Hash table2(N);
        string tmp2;
        ifstream input_file_2("input_text_2.txt");

        while (!input_file_2.eof())
        {
            input_file_2 >> tmp2;
            tmp2 = remove_punctuation(tmp2);
            table2.insert(tmp2);
        }
        input_file_2.close();

        ofstream output_file_2("output_file_2.txt");
        table2.most_frequent_words(output_file_2);
        output_file_2.close();

        ///////////////////////////////////////////////////////////////
        ///////////////////////////////////////////////////////////////

        Hash table3(N);

        ifstream input_file_3("input_text_3.txt");

        while (!input_file_3.eof())
        {
            input_file_3 >> tmp;
            tmp = remove_punctuation(tmp);
            table3.insert(tmp);
        }
        input_file_3.close();

        ofstream output_file_3("output_file_3.txt");
        table3.most_frequent_words(output_file_3);
        output_file_3.close();

        cout << "\n0. Выход" << endl;
        cin >> q;
    } while (q != 0);
}

//Hash.h

#pragma once
#include <iostream>
#include <string>
#include <Windows.h>
#include <fstream>
#include <vector>

using namespace std;

struct node
{
    string word;
    int count;
    node()
    {
        word = '\0';
        count = 0;
    }
};

class Hash
{
    node* table;
    size_t size;
    size_t fullness;

public:
    int collision;
    Hash(size_t);
    ~Hash();
    int division_function(string, size_t);
    void insert(string);
    void new_table();
    void most_frequent_words(ofstream&);
};

string remove_punctuation(string);

//Hash.cpp

#include "Hash.h"

Hash::Hash(size_t n)
{
    table = new node[n];
    size = n;
    collision = 0;
    fullness = 0;
}
Hash::~Hash()
{
    delete[] table;
}
int Hash::division_function(string word, size_t size)
{
    int hash = 0;
    for (char c : word)
    {
        hash += static_cast<int>(c);
    }
    return (hash % size);
}
string remove_punctuation(string word)
{
    string result;
    for (char c : word)
    {
        if (isalpha(c))
            result += tolower(c);
    }

    return result;
}
void Hash::insert(string word)
{

    int index = division_function(word, size);

    if (table[index].count > 0 && table[index].word != word) //разрешение коллизий открытой адресацией
    {
        collision++;
        while (table[index].count > 0 && table[index].word != word)
        {
            index = (index + 1) % size;
        }
    }

    if (table[index].word == word)
        table[index].count++;
    else
    {
        table[index].word = word;
        table[index].count = 1;
    }

    fullness++;

    if (fullness == size)
        new_table();
}
void Hash::new_table()
{
    size_t old_size = size;
    size *= 2;
    node* tmp = new node[size];
    swap(tmp, table);
    for (size_t i = 0; i < old_size; i++)
    {
        table[i].word = tmp[i].word;
        table[i].count = tmp[i].count;
    }
    delete[] tmp;
}
void Hash::most_frequent_words(ofstream& file)
{
    vector <node> tmp_arr;
    for (size_t i = 0; i < size; i++)
        if (table[i].word != "\0")
            tmp_arr.push_back(table[i]);

    for (size_t i = 0; i < tmp_arr.size() - 1; i++)
    {
        int max = i;
        for (size_t j = i + 1; j < tmp_arr.size(); j++)
        {
            if (tmp_arr[j].count > tmp_arr[max].count)
                max = j;
        }

        if (max != i)
        {
            int tmp = tmp_arr[i].count;
            string word = tmp_arr[i].word;
            tmp_arr[i].word = tmp_arr[max].word;
            tmp_arr[i].count = tmp_arr[max].count;
            tmp_arr[max].word = word;
            tmp_arr[max].count = tmp;
        }
    }

    size_t top_ten = 10;
    if (top_ten > tmp_arr.size())
        top_ten = tmp_arr.size();

    for (size_t i = 0; i < top_ten; i++)
    {
        file << tmp_arr[i].word << " = " << tmp_arr[i].count << endl;
    }

    file << "\nКоличество коллизий = " << collision << endl;
}