Advertisement
yepp

merge sort gpu

Jan 17th, 2015
489
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 1.73 KB | None | 0 0
  1. __global__ void merdzs(int* vfrom, int* vto, int size, int from, int tc) {
  2.     int u = blockIdx.x*tc*size + threadIdx.x*size;
  3.     int a = u; int b = u+size/2;
  4.     int al = a+size/2; int bl = b+size/2;
  5.     int k = u;
  6.     while (a < al && b < bl) {
  7.         if (vfrom[a] <= vfrom[b]) {
  8.             vto[k] = vfrom[a];
  9.             ++a;
  10.         } else {
  11.             vto[k] = vfrom[b];
  12.             ++b;
  13.         }
  14.         ++k;
  15.     }
  16.     if (a < al) {
  17.         while (a < al) {
  18.             vto[k] = vfrom[a];
  19.             ++a;
  20.             ++k;
  21.         }
  22.     }
  23.     else {
  24.         while (b < bl) {
  25.             vto[k] = vfrom[b];
  26.             ++b;
  27.             ++k;
  28.         }
  29.     }
  30. }
  31.  
  32. void mergeSortGPU(int* result, int size, int* bufferValues, int* bufferValues2) {
  33.     int p = 0;
  34.     int mtc = 1024;
  35.     for (int tc = size/2; tc >=1; tc >>= 1) {
  36.         merdzs <<<(tc > mtc ? tc/mtc : 1), (tc > mtc ? mtc : tc) >>>(p % 2 == 0 ? bufferValues : bufferValues2, p % 2 == 0 ? bufferValues2 : bufferValues, size/tc, (p%2 == 0) ? 0 : 1,  tc > mtc ? mtc : tc);
  37.         ++p;
  38.     }
  39.     cudaDeviceSynchronize();
  40.     if ((int)log2(size) % 2 == 1) {
  41.         cudaMemcpy(result, bufferValues2, size * sizeof(int), cudaMemcpyDeviceToHost);
  42.     } else {
  43.         cudaMemcpy(result, bufferValues, size * sizeof(int), cudaMemcpyDeviceToHost);
  44.     }
  45. }
  46.  
  47. int GPUMergeSort(int* values, int* result, int size) {
  48.  
  49.     int* bufferValues; int* bufferValues2;
  50.     cudaMalloc((void**)&bufferValues, size * sizeof(int));
  51.     cudaMalloc((void**)&bufferValues2, size * sizeof(int));
  52.  
  53.     cudaMemcpy(bufferValues, values, size * sizeof(int), cudaMemcpyHostToDevice);
  54.  
  55.     std::chrono::steady_clock::time_point start = std::chrono::steady_clock::now();
  56.     mergeSortGPU(result, size, bufferValues, bufferValues2);
  57.     std::chrono::steady_clock::time_point end = std::chrono::steady_clock::now();
  58.  
  59.     millisecs_t duration(std::chrono::duration_cast<millisecs_t>(end - start));
  60.     return duration.count();
  61. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement