Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <iostream>
- #include "stdc++.h"
- #include "cpu_bench.hpp"
- //Алгосы
- using namespace std;
- void haSetIntersectionhInter(vector<int>& A,vector<int>& B, vector<int>& result)
- {
- set<int> hs;
- // Insert the elements of arr1[] to set S
- for (int i = 0; i < A.size(); ++i){
- hs.insert(A[i]);
- }
- for (int i = 0; i < B.size(); ++i){
- // If element is present in set then
- // push it to vector V
- if (hs.find(B[i]) != hs.end()){
- result.push_back(B[i]);
- }
- }
- }
- int binary_search_find_index(vector<int>::iterator beg, vector<int>::iterator en, int data) {
- vector<int>::iterator it = std::lower_bound(beg, en, data);
- if (it == en || *it != data) {
- return -1;
- } else {
- std::size_t index = std::distance(beg, it);
- return index;
- }
- }
- //Бинарный поиск с возвратом индекса на итераторах с возвратом итератора
- vector<int>::iterator binary_search_iterated(vector<int>* arr, vector<int>::iterator low, vector<int>::iterator high, int x){
- if (high >= low){
- vector<int>::iterator mid = low + (high - low)/2;
- if (*mid == x){
- return mid;
- }
- else{
- if (*mid > x){
- return binary_search_iterated(arr, low, mid - 1, x);
- }
- else{
- return binary_search_iterated(arr, mid + 1, high, x);
- }
- }
- }
- else{
- return low;
- }
- }
- /*
- The most obvious alg
- Comparing all the elements in A and B with each other
- and inserting them in C
- Complexity is O(|A|*|B|)
- */
- void simple_intersection(unordered_set<int>* A, unordered_set<int>* B, unordered_set<int>* C){
- unordered_set<int>::const_iterator it_A, it_B;
- unordered_set<int>::const_iterator it_C;
- it_A = A->begin();
- it_B = B->begin();
- while (it_A != A->end()){
- while (it_B != B->end()){
- //cout << *it_B << ' ' << *it_A <<endl;
- if(*it_A == *it_B){
- C->insert(*it_A);
- }
- ++it_B;
- }
- it_B = B->begin();
- ++it_A;
- }
- }
- //TODO: сделать обертку
- //B не должен быть мощнее A
- void BaezaYates_step(vector<int>* A, vector<int>* B, vector<int>::iterator A_begin, vector<int>::iterator A_end, vector<int>::iterator B_begin, vector<int>::iterator B_end, unordered_set<int> *C){
- if (A_end - A_begin <= 0 || B_end - B_begin <= 0){
- return;
- }
- vector<int>::iterator midB = B_begin + (B_end - B_begin)/2;
- int midBval = *midB;
- vector<int>::iterator midA = binary_search_iterated(A, A_begin, A_end, midBval);
- vector<int>::iterator a_res,b_res;
- if ((midA-A_begin) > (midB-B_begin)){
- BaezaYates_step(A,B,A_begin, midA, B_begin, midB, C);
- }
- else{
- BaezaYates_step(B,A,B_begin, midB, A_begin, midA, C);
- }
- if (*midA == midBval){
- C->insert(midBval);
- }
- if ((A_end - midA) > (B_end - midB)){
- BaezaYates_step(A,B,midA, A_end, midB+1, B_end, C);
- }
- else{
- BaezaYates_step(B,A,midB+1, B_end, midA, A_end, C);
- }
- }
- void GallopingIntersection(vector<int>& A_sorted, vector<int>& B_sorted, unordered_set<int>& C, bool isSorted = false){
- int low = 0;
- int diff, high;
- int k;
- if (!isSorted){
- //creating unsorted duplicates and sort them as vectors
- std::sort(A_sorted.begin(), A_sorted.end());
- std::sort(B_sorted.begin(), B_sorted.end());
- for(int i=0; i<B_sorted.size(); ++i){ //< or <= I dunno
- //cout << "B[i]:" << B_sorted[i] << " ";
- diff = 1;
- while (low + diff <= A_sorted.size() && A_sorted[low + diff] <= B_sorted[i]){
- diff *= 2;
- }
- high = min(int(A_sorted.size()), low + diff);
- k = binary_search_find_index((A_sorted.begin() + low), (A_sorted.begin() + high), B_sorted[i]);
- if(k != -1){
- C.insert(B_sorted[i]);
- //cout << B_sorted[i];
- }
- low = k;
- //cout << endl;
- }
- }
- }
- void split(vector<int>& s, vector<vector<int>>& sub, int n){
- vector<int>::iterator splitter = s.begin();
- while(s.end() >= splitter + n){
- sub.push_back(vector<int>(splitter ,splitter + n));
- splitter += n;
- }
- if(s.end() - splitter > 0){
- sub.push_back(vector<int>(splitter, s.end()));
- }
- }
- void preprocessHash4(vector<int>& s, vector<vector<int>>& h_m, bitset<4>& bit){
- h_m.push_back(vector<int>());
- h_m.push_back(vector<int>());
- h_m.push_back(vector<int>());
- h_m.push_back(vector<int>());
- unordered_map<int,int> h;
- vector<int>::const_iterator i = s.begin();
- int buf_h;
- while(i != s.end()){
- buf_h = *i%4;
- bit[buf_h] = true;
- h[*i] = buf_h;
- h_m[buf_h].push_back(*i);
- ++i;
- }
- }
- void preprocessor4(vector<int>& s, vector<vector<int>>& subsets, vector<bitset<4>>& bitmasks, vector<vector<vector<int>>>& h_m){
- sort(s.begin(), s.end());
- split(s, subsets, 2);
- for (int i = 0; i < subsets.size(); ++i){
- bitmasks.push_back(bitset<4>());
- }
- for (int i = 0; i < subsets.size(); ++i){
- h_m.push_back(vector<vector<int>>());
- }
- for (int i = 0; i < subsets.size(); i++){
- preprocessHash4(subsets[i], h_m[i], bitmasks[i]);
- }
- }
- void IntersectSmall4(bitset<4>& bA, vector<vector<int>>&v_mA, bitset<4>& bB, vector<vector<int>>&v_mB, unordered_set<int>& res){
- bitset<4> bAnd = bA&bB;
- string ban = bAnd.to_string();
- for(int i = ban.find_first_of('1'); i < bAnd.size(); i = ban.find_first_of('1', i+1)){
- set_intersection(v_mA[i].begin(), v_mA[i].end(), v_mB[i].begin(), v_mB[i].end(), std::inserter(res, res.begin()));
- }
- }
- void dk_intersection4(vector<vector<int>>& subsetsA, vector<vector<vector<int>>>& h_rA, vector<bitset<4>>& bitA, vector<vector<int>>& subsetsB, vector<vector<vector<int>>>& h_rB, vector<bitset<4>>& bitB, unordered_set<int>& result){
- int p,q;
- p = 0;
- q = 0;
- while(p < subsetsA.size() && q < subsetsB.size()){
- if (*subsetsA[p].begin() > *(subsetsB[q].end()-1)){
- q += 1;
- cout << *subsetsA[p].begin() << ' ' <<*subsetsB[q].end() << endl;
- }
- else if(*(subsetsA[p].end()-1) < *subsetsB[q].begin()){
- p += 1;
- }
- else{
- IntersectSmall4(bitA[p], h_rA[p], bitB[q], h_rB[q], result);
- if (*(subsetsA[p].end()-1) < *(subsetsB[q].end()-1)){
- p += 1;
- }
- else{
- q += 1;
- }
- }
- }
- }
- void preprocessHash16(vector<int>& s, vector<vector<int>>& h_m, bitset<16>& bit){
- h_m.push_back(vector<int>());
- h_m.push_back(vector<int>());
- h_m.push_back(vector<int>());
- h_m.push_back(vector<int>());
- unordered_map<int,int> h;
- vector<int>::const_iterator i = s.begin();
- int buf_h;
- while(i != s.end()){
- buf_h = *i%16;
- bit[buf_h] = true;
- h[*i] = buf_h;
- h_m[buf_h].push_back(*i);
- ++i;
- }
- }
- void preprocessor16(vector<int>& s, vector<vector<int>>& subsets, vector<bitset<16>>& bitmasks, vector<vector<vector<int>>>& h_m){
- sort(s.begin(), s.end());
- split(s, subsets, 4);
- for (int i = 0; i < subsets.size(); ++i){
- bitmasks.push_back(bitset<16>());
- }
- for (int i = 0; i < subsets.size(); ++i){
- h_m.push_back(vector<vector<int>>());
- }
- for (int i = 0; i < subsets.size(); i++){
- preprocessHash16(subsets[i], h_m[i], bitmasks[i]);
- }
- }
- void IntersectSmall16(bitset<16>& bA, vector<vector<int>>&v_mA, bitset<16>& bB, vector<vector<int>>&v_mB, unordered_set<int>& res){
- bitset<16> bAnd = bA&bB;
- string ban = bAnd.to_string();
- for(int i = ban.find_first_of('1'); i < bAnd.size(); i = ban.find_first_of('1', i+1)){
- set_intersection(v_mA[i].begin(), v_mA[i].end(), v_mB[i].begin(), v_mB[i].end(), std::inserter(res, res.begin()));
- }
- }
- void dk_intersection16(vector<vector<int>>& subsetsA, vector<vector<vector<int>>>& h_rA, vector<bitset<16>>& bitA, vector<vector<int>>& subsetsB, vector<vector<vector<int>>>& h_rB, vector<bitset<16>>& bitB, unordered_set<int>& result){
- int p,q;
- p = 0;
- q = 0;
- while(p < subsetsA.size() && q < subsetsB.size()){
- if (*subsetsA[p].begin() > *(subsetsB[q].end()-1)){
- q += 1;
- cout << *subsetsA[p].begin() << ' ' <<*subsetsB[q].end() << endl;
- }
- else if(*(subsetsA[p].end()-1) < *subsetsB[q].begin()){
- p += 1;
- }
- else{
- IntersectSmall16(bitA[p], h_rA[p], bitB[q], h_rB[q], result);
- if (*(subsetsA[p].end()-1) < *(subsetsB[q].end()-1)){
- p += 1;
- }
- else{
- q += 1;
- }
- }
- }
- }
- void preprocessHash64(vector<int>& s, vector<vector<int>>& h_m, bitset<64>& bit){
- h_m.push_back(vector<int>());
- h_m.push_back(vector<int>());
- h_m.push_back(vector<int>());
- h_m.push_back(vector<int>());
- unordered_map<int,int> h;
- vector<int>::const_iterator i = s.begin();
- int buf_h;
- while(i != s.end()){
- buf_h = *i%64;
- bit[buf_h] = true;
- h[*i] = buf_h;
- h_m[buf_h].push_back(*i);
- ++i;
- }
- }
- void preprocessor64(vector<int>& s, vector<vector<int>>& subsets, vector<bitset<64>>& bitmasks, vector<vector<vector<int>>>& h_m){
- sort(s.begin(), s.end());
- split(s, subsets, 8);
- for (int i = 0; i < subsets.size(); ++i){
- bitmasks.push_back(bitset<64>());
- }
- for (int i = 0; i < subsets.size(); ++i){
- h_m.push_back(vector<vector<int>>());
- }
- for (int i = 0; i < subsets.size(); i++){
- preprocessHash64(subsets[i], h_m[i], bitmasks[i]);
- }
- }
- void IntersectSmall64(bitset<64>& bA, vector<vector<int>>&v_mA, bitset<64>& bB, vector<vector<int>>&v_mB, unordered_set<int>& res){
- bitset<64> bAnd = bA&bB;
- string ban = bAnd.to_string();
- for(int i = ban.find_first_of('1'); i < bAnd.size(); i = ban.find_first_of('1', i+1)){
- set_intersection(v_mA[i].begin(), v_mA[i].end(), v_mB[i].begin(), v_mB[i].end(), std::inserter(res, res.begin()));
- }
- }
- void dk_intersection64(vector<vector<int>>& subsetsA, vector<vector<vector<int>>>& h_rA, vector<bitset<64>>& bitA, vector<vector<int>>& subsetsB, vector<vector<vector<int>>>& h_rB, vector<bitset<64>>& bitB, unordered_set<int>& result){
- int p,q;
- p = 0;
- q = 0;
- while(p < subsetsA.size() && q < subsetsB.size()){
- if (*subsetsA[p].begin() > *(subsetsB[q].end()-1)){
- q += 1;
- cout << *subsetsA[p].begin() << ' ' <<*subsetsB[q].end() << endl;
- }
- else if(*(subsetsA[p].end()-1) < *subsetsB[q].begin()){
- p += 1;
- }
- else{
- IntersectSmall64(bitA[p], h_rA[p], bitB[q], h_rB[q], result);
- if (*(subsetsA[p].end()-1) < *(subsetsB[q].end()-1)){
- p += 1;
- }
- else{
- q += 1;
- }
- }
- }
- }
- // генератор
- void lcg(vector<int>& r, int seed, int size, int a, int c, long m){
- if (size == 1){
- r.push_back((a*seed+c)%m);
- return;
- }
- for(int i = 0; i < size; ++i){
- r.push_back(0);
- }
- r[0] = seed;
- for(int i = 1; i < size; ++i){
- r[i] = (a*r[i-1]+c)%m;
- }
- r.erase(r.begin());
- }
- void generate(int seed, long size, double a2b, double i2u, vector<int> & a, vector<int>& b){
- //hardcoded consts for lcg generator
- //a = 50001
- //c = 49999
- //m = 2500000000
- int inter = int(size*i2u);
- int a_size = int(a2b*size*(1-i2u)/(a2b+1))+inter;
- int b_size = int(size*(1-i2u)/(a2b+1))+inter;
- lcg(a, seed, a_size, 50001, 49999, 2500000000);
- lcg(b, a[a_size - inter],b_size, 50001, 49999, 2500000000);
- }
- //тестер
- int main(int argc, char* argv[]) {
- vector<int> a;
- vector<int> b;
- /*
- 1000
- 2000
- 5000
- 10000
- 20000
- 50000
- 100000
- 200000
- 500000
- 1000000
- 2000000
- 5000000
- 10000000
- */
- int i = 100000;
- if (argc>1){
- i = stoi(argv[1]);
- }
- generate(14, i, 1, 0.1, a, b);
- unordered_set<int> v_intersection;
- /*
- sort(a.begin(), a.end());
- sort(b.begin(), b.end());
- BaezaYates_step(&a, &b, a.begin(), a.end(), b.begin(), b.end(), &v_intersection);
- */
- unordered_set<int> res;
- double start_time = getCPUTime();
- GallopingIntersection(a, b, res);
- double end_time = getCPUTime();
- cout << res.size() << endl;
- // ' ' << v_intersection.size() << endl;
- cout << i << ' ' << a.size() << ", " << b.size() << ", " << end_time - start_time << ",\n";
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement