Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- // The code that runs on CPU Properly.
- void CPU_function(int row,int col,int window_sizes[],int num_window)
- {
- int i;
- long int d=0;
- int win;
- if(col!=0)
- {
- // Variable window loop
- for(win=0;win<num_window;win++)
- {
- for(i=first_index[win];i<col;i++)
- {
- d=(key_column[row*MAX_LIMIT+first_index[win]])-(key_column[row*MAX_LIMIT+col]);
- if(d<=t_window[win])
- {
- if(key_column[row*MAX_LIMIT+last_index[win]]!=0)
- {
- printf("\nScore=%d",score[win]);
- first_index[win]++;
- last_index[win]=first_index[win]+window_sizes[win]-1;
- }
- break;
- }
- else
- {
- first_index[win]++;
- last_index[win]=first_index[win]+window_sizes[win]-1;
- }
- }
- }//Variable window loop end
- }
- }
- //Thread Organization set
- dim3 block(num_winow,num_threads);
- dim3 grid(1,1)
- /*
- My Question below
- Usually num_window will be a small value say 3 and usually less than 30 and the number of threads I need is keep on varies for each num_window.
- Example: when num_window[0]=3 (window of size= 3), I need 45 threads
- when num_window[1]=5 (window of size=4 ) I need 35 threads.
- How to get such variable number of threads for each num_window values.????
- */
- // The corresponding CUDA code for running on GPU that I wrote.
- __global__ void GPU_Function(int row,int col,long int *d_key_column,int *d_num_window,int *d_first_index,int *d_last_index,int *d_t_window,int *d_score,int *d_window_sizes,int D_MAX_LIMIT,long int * d_update_score,int *d_x,int *d_y)
- {
- long int i,j,k=0;
- long int d=-1;
- i=threadIdx.x;
- j=threadIdx.y;
- k=threadIdx.y+d_window_sizes[i];// For each window_sizes
- if(col!=0)
- {
- if(i<*d_num_window)
- {
- if(j<D_MAX_LIMIT-d_window_sizes[i]+1)
- {
- d=( d_key_column[row*D_MAX_LIMIT+k]) - (d_key_column[row*D_MAX_LIMIT+j]);
- if(d>=0&& d<=d_t_window[i])
- d_update_score[j]=d_score[i];
- }
- }
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement