Stybyk

kernely cuda

Dec 2nd, 2014
501
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 12.57 KB | None | 0 0
  1. // ***********************************************************************
  2. //
  3. // Demo program pro vyuku predmetu APPS (10/2021)
  4. // Petr Olivka, katedra informatiky, FEI, VSB-TU Ostrava
  5. //
  6. // Priklad pouziti CUDA technologie.
  7. // Prevod barevneho obrazku na odstiny sede barvy
  8. //
  9. // ***********************************************************************
  10.  
  11. #include <cuda_runtime.h>
  12. #include <stdio.h>
  13. #include <stdlib.h>
  14. #include <math.h>
  15.  
  16.  
  17. __global__ void resizebig(uchar4 * input ,uchar4 * output ,int sizex, int sizey)
  18. {
  19.  
  20.     // souradnice vlakna
  21.     int y = blockDim.y * blockIdx.y + threadIdx.y;
  22.      if ( y >= sizey ) return;
  23.  
  24.     int x = blockDim.x * blockIdx.x + threadIdx.x;
  25.     if ( x >= sizex ) return;
  26.    
  27.  
  28.  
  29.     for (int j = 1; j <= 1; j++)
  30.    {
  31.     for (int i = -1; i <= 1; i++)
  32.     {
  33.        
  34.            
  35.             output[(y * 2 + j) * sizex * 2 + (x * 2 + i)] = input[y * sizex + x];
  36.     }
  37.    }
  38.  
  39.    
  40.  
  41.  
  42. }
  43.  
  44.  
  45. void run_resize_big(uchar4 *input ,uchar4* output ,int sizex ,int sizey){
  46.  
  47.     cudaError_t erorlog;
  48.     uchar4 * CUDAinput;
  49.     uchar4 * CUDAoutput;
  50.  
  51.    
  52.     erorlog = cudaMalloc( &CUDAinput, sizex * sizey * sizeof( uchar4 ) );
  53.     if ( erorlog != cudaSuccess )
  54.         printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( erorlog ) ); 
  55.     /// sice by mohlo být paměti víc ... ale vyhrál jsme si :D
  56.     erorlog = cudaMalloc( &CUDAoutput, (sizex*2) * (sizey*2) * sizeof( uchar4 ) );
  57.     if ( erorlog != cudaSuccess )
  58.         printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( erorlog ) ); 
  59.  
  60.     // prenos barevneho obrazku do videokarty
  61.     erorlog = cudaMemcpy( CUDAinput, input, sizex * sizey* sizeof( uchar4 ), cudaMemcpyHostToDevice );
  62.     if ( erorlog != cudaSuccess )
  63.         printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( erorlog) );
  64.  
  65.     int block = 16;
  66.     dim3 blocks( ( sizex + block - 1 ) / block, ( sizey + block - 1 ) / block );
  67.     dim3 threads( block, block );
  68.  
  69.  
  70.     /// proste hodiny ney jsem  na to přišel
  71.     resizebig<<< blocks,threads >>> (CUDAinput ,CUDAoutput,sizex ,sizey );
  72.        
  73.  
  74.     if ( ( erorlog = cudaGetLastError() ) != cudaSuccess )
  75.         printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( erorlog ) );
  76.  
  77.     // prenos dat z videokarty
  78.     erorlog = cudaMemcpy( output, CUDAoutput, (sizex*2) * (sizey *2)* sizeof( uchar4 ), cudaMemcpyDeviceToHost );
  79.     if (erorlog != cudaSuccess )
  80.         printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( erorlog ) ); 
  81.     cudaThreadSynchronize();
  82.     cudaFree( CUDAinput );
  83.     cudaFree( CUDAoutput );
  84.  
  85. }
  86.  
  87.  
  88. __global__ void resizelow(uchar4 * input ,uchar4 * output ,int sizex, int sizey)
  89. {
  90.  
  91.     // souradnice vlakna
  92.     int y = blockDim.y * blockIdx.y + threadIdx.y;
  93.      if ( y >= sizey ) return;
  94.  
  95.     int x = blockDim.x * blockIdx.x + threadIdx.x;
  96.     if ( x >= sizex ) return;
  97.    
  98.  
  99.     //vyorec na zmenseni
  100.     output[y * sizex + x] = input[(y * 4 * sizex) + x*2];
  101.  
  102.  
  103.  
  104.  
  105.  
  106.  
  107. }
  108.  
  109.  
  110. /// upravou vzorce otocis doleva nebo doprava
  111. __global__ void kernel_rotace(uchar4 * input ,uchar4 * output ,int sizex, int sizey)
  112. {
  113.  
  114.     int x = blockDim.x * blockIdx.x + threadIdx.x;
  115.     if ( x >= sizex ) return;
  116.     int y = blockDim.y * blockIdx.y + threadIdx.y;
  117.     if ( y >= sizey ) return;
  118.  
  119.     /// otoceni doleva
  120.     output[(sizex - x) * sizey + y] = input[y * sizex + x];
  121.    
  122.  
  123.  
  124.     //otoceni doprava
  125.  
  126.     //output[x * sizey + (sizey - y)] =input[y * sizex + x];
  127.    
  128.  
  129.      
  130.  
  131.    
  132. /*
  133.     //vyplneni  barvou
  134.        
  135.     /*
  136.     int position = y * sizex + x;
  137.     output[position].x = 255;
  138.     output[position].y = 0;
  139.     output[position].z = 0;
  140.     */
  141. }
  142.  
  143.  
  144.  
  145. void run_rotace(uchar4 *input ,uchar4* output ,int sizex ,int sizey){
  146.  
  147.     cudaError_t erorlog;
  148.     uchar4 * CUDAinput;
  149.     uchar4 * CUDAoutput;
  150.  
  151.  
  152.     erorlog = cudaMalloc( &CUDAinput, sizex * sizey * sizeof( uchar4 ) );
  153.     if ( erorlog != cudaSuccess )
  154.         printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( erorlog ) ); 
  155.  
  156.     erorlog = cudaMalloc( &CUDAoutput, sizex * sizey * sizeof( uchar4 ) );
  157.     if ( erorlog != cudaSuccess )
  158.         printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( erorlog ) ); 
  159.  
  160.     // prenos barevneho obrazku do videokarty
  161.     erorlog = cudaMemcpy( CUDAinput, input, sizex * sizey * sizeof( uchar4 ), cudaMemcpyHostToDevice );
  162.     if ( erorlog != cudaSuccess )
  163.         printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( erorlog) );
  164.  
  165.     int block = 16;
  166.     dim3 blocks( ( sizex + block - 1 ) / block, ( sizey + block - 1 ) / block );
  167.     dim3 threads( block, block );
  168.  
  169.  
  170.  
  171.     kernel_rotace <<< blocks,threads >>> (CUDAinput ,CUDAoutput,sizex ,sizey );
  172.        
  173.  
  174.     if ( ( erorlog = cudaGetLastError() ) != cudaSuccess )
  175.         printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( erorlog ) );
  176.  
  177.     // prenos dat z videokarty
  178.     erorlog = cudaMemcpy( output, CUDAoutput, sizex * sizey * sizeof( uchar4 ), cudaMemcpyDeviceToHost );
  179.     if (erorlog != cudaSuccess )
  180.         printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( erorlog ) ); 
  181.     cudaThreadSynchronize();
  182.     cudaFree( CUDAinput );
  183.     cudaFree( CUDAoutput );
  184.  
  185. }
  186.  
  187.  
  188.  
  189.  
  190. void run_resize_low(uchar4 *input ,uchar4* output ,int sizex ,int sizey){
  191.  
  192.     cudaError_t erorlog;
  193.     uchar4 * CUDAinput;
  194.     uchar4 * CUDAoutput;
  195.  
  196.    
  197.     erorlog = cudaMalloc( &CUDAinput, sizex * sizey * sizeof( uchar4 ) );
  198.     if ( erorlog != cudaSuccess )
  199.         printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( erorlog ) ); 
  200.     /// sice by mohlo být paměti víc ... ale vyhrál jsme si :D
  201.     erorlog = cudaMalloc( &CUDAoutput, (sizex/2) * (sizey/2) * sizeof( uchar4 ) );
  202.     if ( erorlog != cudaSuccess )
  203.         printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( erorlog ) ); 
  204.  
  205.     // prenos barevneho obrazku do videokarty
  206.     erorlog = cudaMemcpy( CUDAinput, input, sizex * sizey* sizeof( uchar4 ), cudaMemcpyHostToDevice );
  207.     if ( erorlog != cudaSuccess )
  208.         printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( erorlog) );
  209.  
  210.     int block = 16;
  211.     dim3 blocks( ( sizex + block - 1 ) / block, ( sizey + block - 1 ) / block );
  212.     dim3 threads( block, block );
  213.  
  214.  
  215.     /// proste hodiny ney jsem  na to přišel
  216.     resizelow <<< blocks,threads >>> (CUDAinput ,CUDAoutput,sizex/2 ,sizey/2 );
  217.        
  218.  
  219.     if ( ( erorlog = cudaGetLastError() ) != cudaSuccess )
  220.         printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( erorlog ) );
  221.  
  222.     // prenos dat z videokarty
  223.     erorlog = cudaMemcpy( output, CUDAoutput, (sizex/2) * (sizey /2)* sizeof( uchar4 ), cudaMemcpyDeviceToHost );
  224.     if (erorlog != cudaSuccess )
  225.         printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( erorlog ) ); 
  226.     cudaThreadSynchronize();
  227.     cudaFree( CUDAinput );
  228.     cudaFree( CUDAoutput );
  229.  
  230. }
  231.  
  232.  
  233.  
  234.  
  235.  
  236.  
  237.  
  238.  
  239.  
  240.  
  241.  
  242.  
  243.  
  244.  
  245.  
  246.  
  247.  
  248.  
  249.  
  250.  
  251.  
  252.  
  253.  
  254.  
  255.  
  256.  
  257.  
  258. // Demo kernel pro prevod barevneho bodu na odstin sede.
  259. __global__ void kernel_grayscale( uchar4 *color_pic, uchar4* bw_pic, uchar4* MyO ,int sizex, int sizey )
  260. {
  261.     // souradnice vlakna, kontrola rozmeru obrazku
  262.     int y = blockDim.y * blockIdx.y + threadIdx.y;
  263.     if ( y >= sizey ) return;
  264.     int x = blockDim.x * blockIdx.x + threadIdx.x;
  265.     if ( x >= sizex ) return;
  266.  
  267.     uchar4 bgr = color_pic[ y * sizex + x ];
  268.  
  269.  
  270.     uchar4 bgr1 = color_pic[ y * sizex + x ];
  271.  
  272.     // vsechny tri barevne slozky budou mit stejnou hodnotu
  273.     bgr.x = bgr.y = bgr.z = bgr.x * 0.11 + bgr.y * 0.59 + bgr.z * 0.30;
  274.    
  275.     // ulozeni bodu do obrazku
  276.     bw_pic[ y * sizex + x ] = bgr;
  277.  
  278.     bgr1.x =  bgr1.x * 0.75 ;
  279.     bgr1.y =  bgr1.y * 0.1 ;
  280.     bgr1.z =  bgr1.z * 0.75;
  281.  
  282.     MyO [y * sizex + x ] = bgr1;
  283.  
  284.  
  285. }
  286.  
  287.  
  288.  
  289.  
  290.  
  291.  
  292.  
  293.  
  294.  
  295.  
  296.  
  297.  
  298.  
  299.  
  300.  
  301.  
  302.  
  303.  
  304.  
  305.  
  306.  
  307.  
  308.  
  309.  
  310.  
  311.  
  312.  
  313.  
  314.  
  315.  
  316. void run_grayscale( uchar4 *color_pic, uchar4* bw_pic, uchar4* MyO, int sizex, int sizey )
  317. {
  318.     cudaError_t cerr;
  319.     // alokace pameti ve videokarte
  320.     uchar4 *cudaColorPic;
  321.     uchar4 *cudaBWPic;
  322.     //////////////////////////////
  323.     uchar4* CudaMyO;
  324.     /////////////////////////////////
  325.     ////////////////////////////////////////////////////////////////////////////////
  326.     cerr = cudaMalloc( &CudaMyO, sizex * sizey * sizeof( uchar4 ) );
  327.     if ( cerr != cudaSuccess )
  328.         printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  329.     /////////////////////////////////////////////////////////////////////////////////////
  330.     cerr = cudaMalloc( &cudaColorPic, sizex * sizey * sizeof( uchar4 ) );
  331.     if ( cerr != cudaSuccess )
  332.         printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  333.  
  334.     cerr = cudaMalloc( &cudaBWPic, sizex * sizey * sizeof( uchar4 ) );
  335.     if ( cerr != cudaSuccess )
  336.         printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  337.  
  338.     // prenos barevneho obrazku do videokarty
  339.     cerr = cudaMemcpy( cudaColorPic, color_pic, sizex * sizey * sizeof( uchar4 ), cudaMemcpyHostToDevice );
  340.     if ( cerr != cudaSuccess )
  341.         printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  342.  
  343.     int block = 16;
  344.     dim3 blocks( ( sizex + block - 1 ) / block, ( sizey + block - 1 ) / block );
  345.     dim3 threads( block, block );
  346.  
  347.     // vytvoreni bloku s vlakny, matice vlaken muze byt vetsi, nez samotny obrazek!
  348.     kernel_grayscale<<< blocks, threads >>>( cudaColorPic, cudaBWPic,CudaMyO, sizex, sizey );
  349.  
  350.     ////////////////////////////////////////////////////////////////////////
  351.    
  352.     //////////////////////////////////////////////////////////////////////////////////
  353.  
  354.     if ( ( cerr = cudaGetLastError() ) != cudaSuccess )
  355.         printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  356.  
  357.     // prenos dat z videokarty
  358.     cerr = cudaMemcpy( bw_pic, cudaBWPic, sizex * sizey * sizeof( uchar4 ), cudaMemcpyDeviceToHost );
  359.     if ( cerr != cudaSuccess )
  360.         printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  361.  
  362.     //////////////////////////////////////////////////////////////////////////////////////
  363.     cerr = cudaMemcpy( MyO, CudaMyO, sizex * sizey * sizeof( uchar4 ), cudaMemcpyDeviceToHost );
  364.     if ( cerr != cudaSuccess )
  365.         printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  366.     //////////////////////////////////////////////////////////////////////////////////////
  367.  
  368.     // uvolneni alokovane pameti ve videokarte
  369.     cudaFree( cudaColorPic );
  370.     cudaFree( cudaBWPic );
  371.     cudaFree( CudaMyO);
  372.  
  373.  
  374. }
  375.  
  376.  
  377.  
  378. __global__ void prolnuti( uchar4 *first,uchar4 *second,uchar4 *final, int sizex, int sizey )
  379. {
  380.     // souradnice vlakna
  381.     int y = blockDim.y * blockIdx.y + threadIdx.y;
  382.     int x = blockDim.x * blockIdx.x + threadIdx.x;
  383.     if ( x >= sizex ) return;
  384.     if ( y >= sizey ) return;
  385.  
  386.     float perc2 = x / (float)sizex;
  387.  
  388.  
  389.    
  390.     final[y * sizex + x].x = first[y * sizex + x].x * perc2 + second[y * sizex + x].x * (1 - perc2);
  391.     final[y * sizex + x].y = first[y * sizex + x].y * perc2 + second[y * sizex + x].y * (1 - perc2);
  392.     final[y * sizex + x].z = first[y * sizex + x].z * perc2 + second[y * sizex + x].z * (1 - perc2);
  393.    
  394.  
  395. }
  396.  
  397. void joinImage( uchar4 *original, uchar4 *original2, uchar4 *joined, int width, int height )
  398. {
  399.     cudaError_t cerr;
  400.     // alokace pameti ve videokarte
  401.     uchar4 *cudaOriginal;
  402.     uchar4 *cudaOriginal2;
  403.     uchar4 *cudajoined;
  404.     cerr = cudaMalloc( &cudaOriginal, width * height * sizeof( uchar4 ) );
  405.     if ( cerr != cudaSuccess )
  406.         printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  407.  
  408.     cerr = cudaMalloc( &cudaOriginal2, width * height * sizeof( uchar4 ) );
  409.     if ( cerr != cudaSuccess )
  410.         printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  411.  
  412.     cerr = cudaMalloc( &cudajoined, width * height * sizeof( uchar4 ) );
  413.     if ( cerr != cudaSuccess )
  414.         printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  415.  
  416.     // prenos barevneho obrazku do videokarty
  417.     cerr = cudaMemcpy( cudaOriginal, original, width * height * sizeof( uchar4 ), cudaMemcpyHostToDevice );
  418.     if ( cerr != cudaSuccess )
  419.         printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  420.  
  421.     cerr = cudaMemcpy( cudaOriginal2, original2, width * height * sizeof( uchar4 ), cudaMemcpyHostToDevice );
  422.     if ( cerr != cudaSuccess )
  423.         printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  424.  
  425.     int block = 16;
  426.     dim3 blocks( ( width + block - 1 ) / block, ( height + block - 1 ) / block );
  427.     dim3 threads( block, block );
  428.  
  429.     // vytvoreni bloku s vlakny, matice vlaken muze byt vetsi, nez samotny obrazek!
  430.     prolnuti<<< blocks, threads >>>( cudaOriginal,cudaOriginal2, cudajoined, width, height );
  431.  
  432.     if ( ( cerr = cudaGetLastError() ) != cudaSuccess )
  433.         printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  434.  
  435.     // prenos dat z videokarty
  436.     cerr = cudaMemcpy( joined, cudajoined, width * height * sizeof( uchar4 ), cudaMemcpyDeviceToHost );
  437.     if ( cerr != cudaSuccess )
  438.         printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  439.  
  440.     // uvolneni alokovane pameti ve videokarte
  441.     cudaFree( cudajoined );
  442.     cudaFree( cudaOriginal );
  443.     cudaFree( cudaOriginal2 );
  444.  
  445. }
Add Comment
Please, Sign In to add comment