Advertisement
homer512

CUDA producer-consumer pipe

Aug 21st, 2024
125
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 1.62 KB | Source Code | 0 0
  1. __device__ float produce(int iteration, unsigned idx);
  2.  
  3. __device__ void consume(int iteration, unsigned idx, float value);
  4.  
  5. __global__ void kernel(int iterations)
  6. {
  7.     /*
  8.      * Tuning parameters: Number of samples to buffer
  9.      * and number of producer vs. consumer threads
  10.      */
  11.     constexpr unsigned bufsize = 128;
  12.     const unsigned producer_count = 32;
  13.     const unsigned consumer_count = blockDim.x - producer_count;
  14.     /*
  15.      * Simple double-buffer setup to make a producer-consumer pipeline
  16.      */
  17.     __shared__ float buf[2][bufsize];
  18.     const bool is_producer = threadIdx.x < producer_count;
  19.     /*
  20.      * Fill first buffer. This can use all threads
  21.      */
  22.     for(unsigned i = threadIdx.x; i < bufsize; i += blockDim.x)
  23.         buf[0][i] = produce(0, i);
  24.     for(int i = 1; i < iterations; ++i) {
  25.         __syncthreads();
  26.         int cur_buf = i & 1;
  27.         int last_buf = (i - 1) & 1;
  28.         if(is_producer) {
  29.             // fill next buffer
  30.             for(unsigned j = threadIdx.x; j < bufsize; j += producer_count)
  31.                 buf[cur_buf][j] = produce(i, j);
  32.         }
  33.         else {
  34.             const unsigned consumer_idx = threadIdx.x - producer_count;
  35.             // consume last buffer
  36.             for(unsigned j = consumer_idx; j < bufsize; j += consumer_count)
  37.                 consume(i - 1, j, buf[last_buf][j]);
  38.         }
  39.     }
  40.     __syncthreads();
  41.     /*
  42.      * Consume last buffer. Can use all threads
  43.      */
  44.     int last_buf = (iterations - 1) & 1;
  45.     for(unsigned i = threadIdx.x; i < bufsize; i += blockDim.x)
  46.         consume(iterations - 1, i, buf[last_buf][i]);
  47. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement