Advertisement
HellFinger

Untitled

Dec 17th, 2021
276
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 12.71 KB | None | 0 0
  1. #include <iostream>
  2. #include "stdc++.h"
  3. #include "cpu_bench.hpp"
  4.  
  5. //Алгосы
  6. using namespace std;
  7.  
  8.  
  9. void haSetIntersectionhInter(vector<int>& A,vector<int>& B, vector<int>& result)
  10. {
  11. set<int> hs;
  12.  
  13. // Insert the elements of arr1[] to set S
  14. for (int i = 0; i < A.size(); ++i){
  15. hs.insert(A[i]);
  16. }
  17. for (int i = 0; i < B.size(); ++i){
  18.  
  19. // If element is present in set then
  20. // push it to vector V
  21. if (hs.find(B[i]) != hs.end()){
  22. result.push_back(B[i]);
  23. }
  24. }
  25. }
  26.  
  27. int binary_search_find_index(vector<int>::iterator beg, vector<int>::iterator en, int data) {
  28. vector<int>::iterator it = std::lower_bound(beg, en, data);
  29. if (it == en || *it != data) {
  30. return -1;
  31. } else {
  32. std::size_t index = std::distance(beg, it);
  33. return index;
  34. }
  35. }
  36.  
  37. //Бинарный поиск с возвратом индекса на итераторах с возвратом итератора
  38.  
  39. vector<int>::iterator binary_search_iterated(vector<int>* arr, vector<int>::iterator low, vector<int>::iterator high, int x){
  40. if (high >= low){
  41. vector<int>::iterator mid = low + (high - low)/2;
  42.  
  43. if (*mid == x){
  44. return mid;
  45. }
  46. else{
  47. if (*mid > x){
  48. return binary_search_iterated(arr, low, mid - 1, x);
  49. }
  50. else{
  51. return binary_search_iterated(arr, mid + 1, high, x);
  52. }
  53. }
  54.  
  55. }
  56. else{
  57. return low;
  58. }
  59.  
  60. }
  61.  
  62.  
  63. /*
  64. The most obvious alg
  65. Comparing all the elements in A and B with each other
  66. and inserting them in C
  67. Complexity is O(|A|*|B|)
  68. */
  69. void simple_intersection(unordered_set<int>* A, unordered_set<int>* B, unordered_set<int>* C){
  70. unordered_set<int>::const_iterator it_A, it_B;
  71. unordered_set<int>::const_iterator it_C;
  72. it_A = A->begin();
  73. it_B = B->begin();
  74. while (it_A != A->end()){
  75. while (it_B != B->end()){
  76. //cout << *it_B << ' ' << *it_A <<endl;
  77. if(*it_A == *it_B){
  78. C->insert(*it_A);
  79. }
  80. ++it_B;
  81. }
  82. it_B = B->begin();
  83. ++it_A;
  84. }
  85.  
  86.  
  87.  
  88.  
  89. }
  90.  
  91. //TODO: сделать обертку
  92. //B не должен быть мощнее A
  93. void BaezaYates_step(vector<int>* A, vector<int>* B, vector<int>::iterator A_begin, vector<int>::iterator A_end, vector<int>::iterator B_begin, vector<int>::iterator B_end, unordered_set<int> *C){
  94. if (A_end - A_begin <= 0 || B_end - B_begin <= 0){
  95. return;
  96. }
  97. vector<int>::iterator midB = B_begin + (B_end - B_begin)/2;
  98. int midBval = *midB;
  99. vector<int>::iterator midA = binary_search_iterated(A, A_begin, A_end, midBval);
  100. vector<int>::iterator a_res,b_res;
  101. if ((midA-A_begin) > (midB-B_begin)){
  102. BaezaYates_step(A,B,A_begin, midA, B_begin, midB, C);
  103. }
  104. else{
  105. BaezaYates_step(B,A,B_begin, midB, A_begin, midA, C);
  106. }
  107. if (*midA == midBval){
  108. C->insert(midBval);
  109. }
  110. if ((A_end - midA) > (B_end - midB)){
  111. BaezaYates_step(A,B,midA, A_end, midB+1, B_end, C);
  112. }
  113. else{
  114. BaezaYates_step(B,A,midB+1, B_end, midA, A_end, C);
  115. }
  116.  
  117. }
  118.  
  119.  
  120.  
  121. void GallopingIntersection(vector<int>& A_sorted, vector<int>& B_sorted, unordered_set<int>& C, bool isSorted = false){
  122. int low = 0;
  123. int diff, high;
  124. int k;
  125.  
  126. if (!isSorted){
  127. //creating unsorted duplicates and sort them as vectors
  128. std::sort(A_sorted.begin(), A_sorted.end());
  129. std::sort(B_sorted.begin(), B_sorted.end());
  130.  
  131.  
  132.  
  133. for(int i=0; i<B_sorted.size(); ++i){ //< or <= I dunno
  134. //cout << "B[i]:" << B_sorted[i] << " ";
  135. diff = 1;
  136. while (low + diff <= A_sorted.size() && A_sorted[low + diff] <= B_sorted[i]){
  137. diff *= 2;
  138. }
  139. high = min(int(A_sorted.size()), low + diff);
  140. k = binary_search_find_index((A_sorted.begin() + low), (A_sorted.begin() + high), B_sorted[i]);
  141. if(k != -1){
  142. C.insert(B_sorted[i]);
  143. //cout << B_sorted[i];
  144. }
  145. low = k;
  146. //cout << endl;
  147.  
  148. }
  149.  
  150.  
  151. }
  152.  
  153.  
  154.  
  155. }
  156.  
  157.  
  158.  
  159.  
  160.  
  161. void split(vector<int>& s, vector<vector<int>>& sub, int n){
  162. vector<int>::iterator splitter = s.begin();
  163. while(s.end() >= splitter + n){
  164. sub.push_back(vector<int>(splitter ,splitter + n));
  165. splitter += n;
  166. }
  167. if(s.end() - splitter > 0){
  168. sub.push_back(vector<int>(splitter, s.end()));
  169. }
  170.  
  171. }
  172.  
  173.  
  174.  
  175.  
  176.  
  177.  
  178.  
  179.  
  180.  
  181.  
  182.  
  183.  
  184. void preprocessHash4(vector<int>& s, vector<vector<int>>& h_m, bitset<4>& bit){
  185. h_m.push_back(vector<int>());
  186. h_m.push_back(vector<int>());
  187. h_m.push_back(vector<int>());
  188. h_m.push_back(vector<int>());
  189.  
  190. unordered_map<int,int> h;
  191. vector<int>::const_iterator i = s.begin();
  192. int buf_h;
  193. while(i != s.end()){
  194. buf_h = *i%4;
  195. bit[buf_h] = true;
  196. h[*i] = buf_h;
  197. h_m[buf_h].push_back(*i);
  198. ++i;
  199. }
  200.  
  201.  
  202. }
  203.  
  204.  
  205. void preprocessor4(vector<int>& s, vector<vector<int>>& subsets, vector<bitset<4>>& bitmasks, vector<vector<vector<int>>>& h_m){
  206. sort(s.begin(), s.end());
  207. split(s, subsets, 2);
  208.  
  209. for (int i = 0; i < subsets.size(); ++i){
  210. bitmasks.push_back(bitset<4>());
  211. }
  212.  
  213.  
  214. for (int i = 0; i < subsets.size(); ++i){
  215. h_m.push_back(vector<vector<int>>());
  216. }
  217. for (int i = 0; i < subsets.size(); i++){
  218. preprocessHash4(subsets[i], h_m[i], bitmasks[i]);
  219. }
  220. }
  221.  
  222. void IntersectSmall4(bitset<4>& bA, vector<vector<int>>&v_mA, bitset<4>& bB, vector<vector<int>>&v_mB, unordered_set<int>& res){
  223. bitset<4> bAnd = bA&bB;
  224. string ban = bAnd.to_string();
  225.  
  226. for(int i = ban.find_first_of('1'); i < bAnd.size(); i = ban.find_first_of('1', i+1)){
  227. set_intersection(v_mA[i].begin(), v_mA[i].end(), v_mB[i].begin(), v_mB[i].end(), std::inserter(res, res.begin()));
  228. }
  229.  
  230.  
  231. }
  232.  
  233. void dk_intersection4(vector<vector<int>>& subsetsA, vector<vector<vector<int>>>& h_rA, vector<bitset<4>>& bitA, vector<vector<int>>& subsetsB, vector<vector<vector<int>>>& h_rB, vector<bitset<4>>& bitB, unordered_set<int>& result){
  234. int p,q;
  235. p = 0;
  236. q = 0;
  237. while(p < subsetsA.size() && q < subsetsB.size()){
  238.  
  239. if (*subsetsA[p].begin() > *(subsetsB[q].end()-1)){
  240. q += 1;
  241. cout << *subsetsA[p].begin() << ' ' <<*subsetsB[q].end() << endl;
  242.  
  243.  
  244. }
  245. else if(*(subsetsA[p].end()-1) < *subsetsB[q].begin()){
  246. p += 1;
  247.  
  248. }
  249. else{
  250. IntersectSmall4(bitA[p], h_rA[p], bitB[q], h_rB[q], result);
  251.  
  252. if (*(subsetsA[p].end()-1) < *(subsetsB[q].end()-1)){
  253. p += 1;
  254.  
  255. }
  256. else{
  257. q += 1;
  258. }
  259. }
  260. }
  261.  
  262.  
  263. }
  264.  
  265. void preprocessHash16(vector<int>& s, vector<vector<int>>& h_m, bitset<16>& bit){
  266. h_m.push_back(vector<int>());
  267. h_m.push_back(vector<int>());
  268. h_m.push_back(vector<int>());
  269. h_m.push_back(vector<int>());
  270.  
  271. unordered_map<int,int> h;
  272. vector<int>::const_iterator i = s.begin();
  273. int buf_h;
  274. while(i != s.end()){
  275. buf_h = *i%16;
  276. bit[buf_h] = true;
  277. h[*i] = buf_h;
  278. h_m[buf_h].push_back(*i);
  279. ++i;
  280. }
  281.  
  282.  
  283. }
  284.  
  285. void preprocessor16(vector<int>& s, vector<vector<int>>& subsets, vector<bitset<16>>& bitmasks, vector<vector<vector<int>>>& h_m){
  286. sort(s.begin(), s.end());
  287. split(s, subsets, 4);
  288.  
  289. for (int i = 0; i < subsets.size(); ++i){
  290. bitmasks.push_back(bitset<16>());
  291. }
  292.  
  293.  
  294. for (int i = 0; i < subsets.size(); ++i){
  295. h_m.push_back(vector<vector<int>>());
  296. }
  297. for (int i = 0; i < subsets.size(); i++){
  298. preprocessHash16(subsets[i], h_m[i], bitmasks[i]);
  299. }
  300. }
  301.  
  302. void IntersectSmall16(bitset<16>& bA, vector<vector<int>>&v_mA, bitset<16>& bB, vector<vector<int>>&v_mB, unordered_set<int>& res){
  303. bitset<16> bAnd = bA&bB;
  304. string ban = bAnd.to_string();
  305.  
  306. for(int i = ban.find_first_of('1'); i < bAnd.size(); i = ban.find_first_of('1', i+1)){
  307. set_intersection(v_mA[i].begin(), v_mA[i].end(), v_mB[i].begin(), v_mB[i].end(), std::inserter(res, res.begin()));
  308. }
  309.  
  310.  
  311. }
  312.  
  313. void dk_intersection16(vector<vector<int>>& subsetsA, vector<vector<vector<int>>>& h_rA, vector<bitset<16>>& bitA, vector<vector<int>>& subsetsB, vector<vector<vector<int>>>& h_rB, vector<bitset<16>>& bitB, unordered_set<int>& result){
  314. int p,q;
  315. p = 0;
  316. q = 0;
  317. while(p < subsetsA.size() && q < subsetsB.size()){
  318.  
  319. if (*subsetsA[p].begin() > *(subsetsB[q].end()-1)){
  320. q += 1;
  321. cout << *subsetsA[p].begin() << ' ' <<*subsetsB[q].end() << endl;
  322.  
  323.  
  324. }
  325. else if(*(subsetsA[p].end()-1) < *subsetsB[q].begin()){
  326. p += 1;
  327.  
  328. }
  329. else{
  330. IntersectSmall16(bitA[p], h_rA[p], bitB[q], h_rB[q], result);
  331.  
  332. if (*(subsetsA[p].end()-1) < *(subsetsB[q].end()-1)){
  333. p += 1;
  334.  
  335. }
  336. else{
  337. q += 1;
  338. }
  339. }
  340. }
  341.  
  342.  
  343. }
  344.  
  345. void preprocessHash64(vector<int>& s, vector<vector<int>>& h_m, bitset<64>& bit){
  346. h_m.push_back(vector<int>());
  347. h_m.push_back(vector<int>());
  348. h_m.push_back(vector<int>());
  349. h_m.push_back(vector<int>());
  350.  
  351. unordered_map<int,int> h;
  352. vector<int>::const_iterator i = s.begin();
  353. int buf_h;
  354. while(i != s.end()){
  355. buf_h = *i%64;
  356. bit[buf_h] = true;
  357. h[*i] = buf_h;
  358. h_m[buf_h].push_back(*i);
  359. ++i;
  360. }
  361.  
  362.  
  363. }
  364.  
  365. void preprocessor64(vector<int>& s, vector<vector<int>>& subsets, vector<bitset<64>>& bitmasks, vector<vector<vector<int>>>& h_m){
  366. sort(s.begin(), s.end());
  367. split(s, subsets, 8);
  368.  
  369. for (int i = 0; i < subsets.size(); ++i){
  370. bitmasks.push_back(bitset<64>());
  371. }
  372.  
  373.  
  374. for (int i = 0; i < subsets.size(); ++i){
  375. h_m.push_back(vector<vector<int>>());
  376. }
  377. for (int i = 0; i < subsets.size(); i++){
  378. preprocessHash64(subsets[i], h_m[i], bitmasks[i]);
  379. }
  380. }
  381.  
  382. void IntersectSmall64(bitset<64>& bA, vector<vector<int>>&v_mA, bitset<64>& bB, vector<vector<int>>&v_mB, unordered_set<int>& res){
  383. bitset<64> bAnd = bA&bB;
  384. string ban = bAnd.to_string();
  385.  
  386. for(int i = ban.find_first_of('1'); i < bAnd.size(); i = ban.find_first_of('1', i+1)){
  387. set_intersection(v_mA[i].begin(), v_mA[i].end(), v_mB[i].begin(), v_mB[i].end(), std::inserter(res, res.begin()));
  388. }
  389.  
  390.  
  391. }
  392.  
  393. void dk_intersection64(vector<vector<int>>& subsetsA, vector<vector<vector<int>>>& h_rA, vector<bitset<64>>& bitA, vector<vector<int>>& subsetsB, vector<vector<vector<int>>>& h_rB, vector<bitset<64>>& bitB, unordered_set<int>& result){
  394. int p,q;
  395. p = 0;
  396. q = 0;
  397. while(p < subsetsA.size() && q < subsetsB.size()){
  398.  
  399. if (*subsetsA[p].begin() > *(subsetsB[q].end()-1)){
  400. q += 1;
  401. cout << *subsetsA[p].begin() << ' ' <<*subsetsB[q].end() << endl;
  402.  
  403.  
  404. }
  405. else if(*(subsetsA[p].end()-1) < *subsetsB[q].begin()){
  406. p += 1;
  407.  
  408. }
  409. else{
  410. IntersectSmall64(bitA[p], h_rA[p], bitB[q], h_rB[q], result);
  411.  
  412. if (*(subsetsA[p].end()-1) < *(subsetsB[q].end()-1)){
  413. p += 1;
  414.  
  415. }
  416. else{
  417. q += 1;
  418. }
  419. }
  420. }
  421.  
  422.  
  423. }
  424.  
  425.  
  426.  
  427.  
  428.  
  429.  
  430. // генератор
  431.  
  432. void lcg(vector<int>& r, int seed, int size, int a, int c, long m){
  433. if (size == 1){
  434. r.push_back((a*seed+c)%m);
  435. return;
  436. }
  437. for(int i = 0; i < size; ++i){
  438. r.push_back(0);
  439. }
  440. r[0] = seed;
  441. for(int i = 1; i < size; ++i){
  442. r[i] = (a*r[i-1]+c)%m;
  443. }
  444. r.erase(r.begin());
  445. }
  446.  
  447.  
  448.  
  449. void generate(int seed, long size, double a2b, double i2u, vector<int> & a, vector<int>& b){
  450. //hardcoded consts for lcg generator
  451. //a = 50001
  452. //c = 49999
  453. //m = 2500000000
  454. int inter = int(size*i2u);
  455. int a_size = int(a2b*size*(1-i2u)/(a2b+1))+inter;
  456. int b_size = int(size*(1-i2u)/(a2b+1))+inter;
  457. lcg(a, seed, a_size, 50001, 49999, 2500000000);
  458. lcg(b, a[a_size - inter],b_size, 50001, 49999, 2500000000);
  459. }
  460.  
  461.  
  462.  
  463.  
  464.  
  465.  
  466.  
  467.  
  468.  
  469. //тестер
  470.  
  471. int main(int argc, char* argv[]) {
  472. vector<int> a;
  473. vector<int> b;
  474.  
  475. /*
  476. 1000
  477. 2000
  478. 5000
  479. 10000
  480. 20000
  481. 50000
  482. 100000
  483. 200000
  484. 500000
  485. 1000000
  486. 2000000
  487. 5000000
  488. 10000000
  489.  
  490. */
  491. int i = 100000;
  492.  
  493. if (argc>1){
  494. i = stoi(argv[1]);
  495. }
  496.  
  497. generate(14, i, 1, 0.1, a, b);
  498.  
  499.  
  500. unordered_set<int> v_intersection;
  501. /*
  502. sort(a.begin(), a.end());
  503. sort(b.begin(), b.end());
  504.  
  505. BaezaYates_step(&a, &b, a.begin(), a.end(), b.begin(), b.end(), &v_intersection);
  506. */
  507.  
  508. unordered_set<int> res;
  509.  
  510.  
  511.  
  512.  
  513.  
  514.  
  515. double start_time = getCPUTime();
  516.  
  517. GallopingIntersection(a, b, res);
  518.  
  519. double end_time = getCPUTime();
  520.  
  521. cout << res.size() << endl;
  522. // ' ' << v_intersection.size() << endl;
  523.  
  524. cout << i << ' ' << a.size() << ", " << b.size() << ", " << end_time - start_time << ",\n";
  525.  
  526. }
  527.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement