Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #pragma once
- #pragma GCC optimize("03")
- //#pragma GCC target("sse,sse2,sse3,ssse3,sse4,popcnt,abm,mmx,avx,tune=native")
- #include <iostream>
- #include <iomanip>
- #include <cstdlib>
- #include <cstdio>
- #include <string>
- #include <vector>
- #include <map>
- #include <set>
- #include <unordered_map>
- #include <unordered_set>
- #include <queue>
- #include <deque>
- #include <cmath>
- #include <numeric>
- #include <algorithm>
- #include <ctime>
- #include <chrono>
- #include <random>
- #include <functional>
- #include <math.h>
- #include <time.h>
- #include <ctime>
- using namespace std;
- using ll = long long;
- using db = double;
- using ldb = long double;
- using pii = pair<int, int>;
- using pll = pair<ll, ll>;
- using pdd = pair<ldb, ldb>;
- using vint = vector<int>;
- using vll = vector<ll>;
- #define all(x) x.begin(), x.end()
- #define size(x) int(x.size())
- #define rep(x) for(int rep_i = 0; rep_i < x; ++rep_i)
- #define forp(i, s, f) for(int i = s; i < f; ++i)
- #define form(i, s, f) for(int i = s; i > f; --i)
- #define PB push_back
- #define MP make_pair
- #define F first
- #define S second
- #define elif else if
- #define dprint(x) for (auto now: x) cout << now << " "
- const int MOD = 1e9 + 7;
- const int MOD2 = 998244353;
- const int INF = 2e9 + 7;
- const ll LINF = 1e18 + 7;
- const double EPS = 1e-9;
- const long double PI = acosl(-1.0);
- struct cluster_center {
- pdd position = { 0, 0 };
- int obj_count = 0;
- ldb x_sum = 0;
- ldb y_sum = 0;
- friend bool operator == (const cluster_center& a, const cluster_center& b);
- };
- class k_meansRBT {
- private:
- vector<cluster_center> centers;
- vector<pdd> data;
- multimap <pdd, int> objects;
- //----------------------------------------------------------------------------
- //объекты в кчд - ключи, значения - куча (расстояние до кластера, кластер)
- //----------------------------------------------------------------------------
- int K = 0;
- int iterations = 100;
- int RandNumberInt(int min, int max)
- {
- static std::mt19937 gen(time(NULL));
- std::uniform_int_distribution<> uid(min, max);
- return uid(gen);
- }
- ldb RandNumberReal(ldb min, ldb max)
- {
- static std::mt19937 gen(time(NULL));
- std::uniform_real_distribution<> uid(min, max);
- return uid(gen);
- }
- double mysqrt(double x) {
- if (x <= 0)
- return 0;
- int exp = 0;
- x = frexp(x, &exp);
- if (exp & 1) {
- exp--;
- x *= 2;
- }
- double y = (1 + x) / 2;
- double z = 0;
- while (y != z) {
- z = y;
- y = (y + x / y) / 2;
- }
- return ldexp(y, exp / 2);
- }
- ldb pow_2(ldb x) {
- return x * x;
- }
- void set_iterations(int x) {
- iterations = x;
- }
- void compute_new_center(int i, pdd point, int c) {
- if (i < 0) return;
- ldb x = centers[i].x_sum + point.first;
- ldb y = centers[i].y_sum + point.second;
- int cnt = centers[i].obj_count + c;
- centers[i].position = MP(x / cnt, y / cnt);
- centers[i].x_sum = x;
- centers[i].y_sum = y;
- centers[i].obj_count = cnt;
- }
- ldb compute_dist(pdd p1, pdd p2) {
- ldb x1 = p1.first;
- ldb y1 = p1.second;
- ldb x2 = p2.first;
- ldb y2 = p2.second;
- return mysqrt(pow_2(x1 - x2) + pow_2(y1 - y2));
- }
- void make_random_centers1(int k) {
- map <pdd, bool> used;
- ldb r = RandNumberInt(0, size(data) - 1);
- pdd center = data[r];
- centers.resize(1);
- compute_new_center(0, center, 1);
- objects.find(data[r])->second = 0;
- used[centers[0].position] = true;
- int cluster = 1;
- int ds = size(data);
- while (cluster < k) {
- vector<ldb> sp;
- ldb sm = 0;
- forp(i, 0, ds) {
- priority_queue<pair<ldb, int>, vector<pair<ldb, int>>, greater<pair<ldb, int>>> pq;
- forp(j, 0, size(centers)) {
- ldb dist = pow_2(compute_dist(data[i], centers[j].position));
- pq.push(MP(dist, j));
- }
- pair <ldb, int> point = pq.top();
- sm += point.first;
- sp.PB(point.first);
- }
- ldb rnd = RandNumberReal(0, 1 - EPS) * sm;
- ldb sm2 = 0;
- int i = 0;
- while (i < ds && sm2 < rnd) {
- sm2 += sp[i];
- if (!used[data[i]] && rnd <= sm2) {
- centers.resize(cluster + 1);
- compute_new_center(cluster, data[i], 1);
- objects.find(data[i])->second = cluster;
- used[data[i]] = true;
- cluster++;
- break;
- }
- i++;
- }
- }
- }
- void make_random_centers2(int k) {
- map <pdd, bool> used;
- sort(data.begin(), data.end());
- ldb r = 0;
- pdd center = data[r];
- centers.resize(1);
- compute_new_center(0, center, 1);
- objects.find(data[r])->second = 0;
- used[centers[0].position] = true;
- int cluster = 1;
- int ds = size(data);
- while (cluster < k) {
- priority_queue<pair<ldb, int>> sp;
- forp(i, 0, ds) {
- priority_queue<pair<ldb, int>, vector<pair<ldb, int>>, greater<pair<ldb, int>>> pq;
- forp(j, 0, size(centers)) {
- ldb dist = compute_dist(data[i], centers[j].position);
- pq.push(MP(dist, j));
- }
- pair <ldb, int> point = pq.top();
- sp.push(MP(point.first, i));
- }
- pair<ldb, int> el = sp.top();
- int i = el.second;
- if (!used[data[i]]) {
- centers.resize(cluster + 1);
- compute_new_center(cluster, data[i], 1);
- objects.find(data[i])->second = cluster;
- used[data[i]] = true;
- cluster++;
- }
- }
- }
- void load_data(vector<pdd> x) {
- data = x;
- for (auto now : x) {
- objects.insert(MP(now, -1));
- }
- }
- void k_means_mf(int k) {
- K = k;
- forp(cnt, 0, iterations) {
- bool flag = true;
- for (auto& now : objects) {
- int prev_cluster = now.second;
- priority_queue<pair<ldb, int>, vector<pair<ldb, int>>, greater<pair<ldb, int>>> pq;
- forp(j, 0, K) {
- ldb dist = compute_dist(centers[j].position, now.first);
- pq.push(MP(dist, j));
- }
- pair <ldb, int> best_center = pq.top();
- now.second = best_center.second;
- if (prev_cluster != now.second || cnt == 0) {
- flag = false;
- compute_new_center(now.second, now.first, 1);
- compute_new_center(prev_cluster, MP(-now.first.first, -now.first.second), -1);
- }
- }
- if (flag) {
- break;
- }
- }
- }
- void print_data() {
- vector<vector<pdd>> cluss(K);
- for (auto it = objects.begin(); it != objects.end(); it++) {
- cluss[it->second].push_back(it->first);
- }
- int c = 0;
- forp(i, 0, K) {
- cout << "Cluster " << i + 1 << ": ";
- for (auto now : cluss[i]) {
- cout << "( " << now.first << ", " << now.second << " ); ";
- c++;
- }
- cout << '\n';
- }
- //cout << c << '\n';
- }
- public:
- k_meansRBT() {
- }
- vector<vector<pdd>> data_to_draw() {
- vector<vector<pdd>> cluss(K);
- for (auto it = objects.begin(); it != objects.end(); it++) {
- cluss[it->second].push_back(it->first);
- }
- return cluss;
- }
- bool clusterisation(vector<pdd> data, int k) {
- if (k < 1 || k > size(data)) {
- cout << "Invalid number of clusters!\n";
- return false;
- }
- else {
- this->load_data(data);
- this->make_random_centers1(k);
- this->k_means_mf(k);
- //this->print_data();
- return true;
- }
- }
- vector<pdd> get_centers() {
- vector<pdd> x;
- for (auto now: centers) {
- x.push_back(now.position);
- }
- return x;
- }
- };
- int main() {
- ios_base::sync_with_stdio(0);
- cin.tie(0);
- cout << setprecision(6);
- cout << fixed;
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement