Advertisement
desdemona

cuda image filtering

May 30th, 2015
716
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 17.38 KB | None | 0 0
  1.  
  2. #include "cuda_runtime.h"
  3. #include "device_launch_parameters.h"
  4. #include "helper_cuda.h"
  5.  
  6. #include <stdlib.h>
  7. #include <stdio.h>
  8. #include <memory>
  9. #include <iostream>
  10. #include <cstdio>
  11. #include <cstdlib>
  12. #include <time.h>  
  13.  
  14. #include "bitmap_image.hpp"
  15. #include "csvparser.h"
  16.  
  17. #ifndef __CUDACC__  
  18.     #define __CUDACC__
  19. #endif
  20.  
  21. const int Filter_Size = 7;
  22. const int Filter_Total_Size = 49;
  23.  
  24. __device__ __constant__ float g_filter[Filter_Total_Size];
  25. __device__ __constant__ float g_factor[2];
  26.  
  27. cudaError_t addWithCuda(int *c, const int *a, const int *b, unsigned int size);
  28.  
  29. __global__ void addKernel(int *c, const int *a, const int *b)
  30. {
  31.     int i = threadIdx.x;
  32.     c[i] = a[i] + b[i];
  33. }
  34.  
  35. __device__ int buffadr(unsigned int x, unsigned int y, unsigned int imageWidth, unsigned int imageHeight)
  36. {
  37.     if (x >= imageWidth || y >= imageHeight)
  38.     {
  39.         return -1;
  40.     }
  41.  
  42.     return y*imageWidth + x;
  43. }
  44.  
  45. __global__ void filterKernel(const unsigned char *inputColor, unsigned char *outputColor, unsigned int imageWidth, unsigned int imageHeight,
  46.     unsigned int blockPartWidth, unsigned int blockPartHeight, unsigned int threadPartWidth, unsigned int threadPartHeight)
  47. {
  48.     __shared__ int blockXstart, blockYstart;
  49.     __shared__ int blockXend, blockYend;
  50.  
  51.     if (threadIdx.x == 0 && threadIdx.y == 0)
  52.     {
  53.         //policz i zapisz do shared czym zajmuje sie dany blok
  54.         blockXstart = blockIdx.x * blockPartWidth;
  55.         blockYstart = blockIdx.y * blockPartHeight;
  56.         blockXend = blockXstart + blockPartWidth; //tego miejsca juz nie liczymy
  57.         blockYend = blockYstart + blockPartHeight; //to też już poza obliczeniami
  58.         if (blockXend > imageWidth)
  59.         {
  60.             blockXend = imageWidth;
  61.         }
  62.         if (blockYend > imageHeight)
  63.         {
  64.             blockYend = imageHeight;
  65.         }
  66.         //policzyliśmy czym zajmuje dany block i zapisalismy, ok.
  67.     }
  68.  
  69.     __syncthreads();
  70.     //teraz każdy wątek liczy sobie swój start i koniec
  71.     unsigned int threadXstart = (threadIdx.x * threadPartWidth) + blockXstart;
  72.     unsigned int threadYstart = (threadIdx.y * threadPartHeight) + blockYstart;
  73.     unsigned int threadXend = threadXstart + threadPartWidth;
  74.     unsigned int threadYend = threadYstart + threadPartHeight;
  75.     threadXend = (threadXend > blockXend) ? blockXend : threadXend;
  76.     threadYend = (threadYend > blockYend) ? blockYend : threadYend;
  77.  
  78.     int x_, y_, bufadr_;
  79.     int c;
  80.     for (int y = threadYstart; y < threadYend; y++)
  81.     {
  82.         for (int x = threadXstart; x < threadXend; x++)
  83.         {
  84.             bufadr_ = buffadr(x, y, imageWidth, imageHeight);
  85.             if (bufadr_ < 0 || bufadr_ >= imageWidth*imageHeight)
  86.             {
  87.                 continue;
  88.             }
  89.             c = 0;
  90.             for (int j = -3; j < 4; j++)
  91.             {
  92.                 for (int i = -3; i < 4; i++)
  93.                 {
  94.                     y_ = y + j;
  95.                     x_ = x + i;
  96.                     bufadr_ = buffadr(x_, y_, imageWidth, imageHeight);
  97.                     if (bufadr_ < 0 || bufadr_ >= imageWidth*imageHeight)
  98.                     {
  99.                         continue;
  100.                     }
  101.                     c += g_filter[((j + 3)*7) + i + 3] * (int)inputColor[bufadr_];
  102.                 }
  103.             }
  104.             c = g_factor[0] * c + g_factor[1];
  105.            
  106.             c = (c < 0) ? 0 : c;
  107.             c = (c > 255) ? 255 : c;
  108.             outputColor[buffadr(x, y, imageWidth, imageHeight)] = (unsigned char)c;
  109.         }
  110.     }
  111.    
  112. }
  113. void printHelp()
  114. {
  115.     std::cout << "HELP: \n";
  116.     std::cout << "exe filter_file_path image_file_path\n";
  117.     std::cout << "\n";
  118.     system("pause");
  119. }
  120.  
  121. void readDeviceAttributes(int devId, int &driverVersion, int &runtimeVersion, unsigned long long &totalGlobalMem,
  122.     int &multiProcessorCount, int &cudaCores, int &warpSize, unsigned int &totalConstantMem,
  123.     unsigned int &sharedMemPerBlock, int &maxThreadsPerBlock, int &maxThreadsPerMP)
  124. {
  125.     /* code from deviceQuery example*/
  126.     cudaSetDevice(devId);
  127.     cudaDeviceProp deviceProp;
  128.     cudaGetDeviceProperties(&deviceProp, devId);
  129.     printf("\nDevice %d: \"%s\"\n", devId, deviceProp.name);
  130.     cudaDriverGetVersion(&driverVersion);
  131.     cudaRuntimeGetVersion(&runtimeVersion);
  132.     printf("  CUDA Driver Version / Runtime Version          %d.%d / %d.%d\n", driverVersion / 1000, (driverVersion % 100) / 10, runtimeVersion / 1000, (runtimeVersion % 100) / 10);
  133.     printf("  CUDA Capability Major/Minor version number:    %d.%d\n", deviceProp.major, deviceProp.minor);
  134.  
  135.     totalGlobalMem = (unsigned long long) deviceProp.totalGlobalMem;
  136.     printf("  Total amount of global memory: (%llu bytes)\n", totalGlobalMem);
  137.     printf("  (%2d) Multiprocessors, (%3d) CUDA Cores/MP:     %d CUDA Cores\n",
  138.         deviceProp.multiProcessorCount,
  139.         _ConvertSMVer2Cores(deviceProp.major, deviceProp.minor),
  140.         _ConvertSMVer2Cores(deviceProp.major, deviceProp.minor) * deviceProp.multiProcessorCount);
  141.  
  142.     printf("  Total amount of constant memory:               %lu bytes\n", deviceProp.totalConstMem);
  143.     totalConstantMem = deviceProp.totalConstMem;
  144.     printf("  Total amount of shared memory per block:       %lu bytes\n", deviceProp.sharedMemPerBlock);
  145.     sharedMemPerBlock = deviceProp.sharedMemPerBlock;
  146.  
  147.     maxThreadsPerBlock = deviceProp.maxThreadsPerBlock;
  148.     maxThreadsPerMP = deviceProp.maxThreadsPerMultiProcessor;
  149.     printf("\n");
  150. }
  151.  
  152. int main(int argc, char **argv)
  153. {
  154.     int devId = 0;
  155.     int warpSize, multiProcesorCount, cudaCores;
  156.     int runtimeVersion, driverVersion;
  157.     unsigned long long totalGlobalMem;
  158.     unsigned int sharedMemPerBlock, totalConstantMem;
  159.     int maxThreadsPerBlock, maxThreadsPerMP;
  160.  
  161.     cudaDeviceProp deviceProp;
  162.  
  163.     int status = EXIT_SUCCESS;
  164.     std::cout << argv[0] << " starting.. \n";
  165.     std::cout << "CUDA Image filtering - 03:40\n";
  166.  
  167.     unsigned char *g_r = 0; //graphics red
  168.     unsigned char *g_g = 0; //graphics green
  169.     unsigned char *g_b = 0; //graphics blue
  170.     unsigned char *g_or = 0; //graphics output red
  171.     unsigned char *g_og = 0; //graphics output green
  172.     unsigned char *g_ob = 0; //graphics output blue
  173.     cudaError_t cudaStatus;
  174.  
  175.     /* read cuda devices */
  176. #pragma region readDevices
  177.     int deviceCount = 0;
  178.     cudaError_t error_id = cudaGetDeviceCount(&deviceCount);
  179.     if (error_id != cudaSuccess)
  180.     {
  181.         std::cout << "cudaGetDeviceCount returned " << (int)error_id << "->" << cudaGetErrorString(error_id) << "\n";
  182.         std::cout << "Result = FAIL\n";
  183.         exit(EXIT_FAILURE);
  184.     }
  185.     // This function call returns 0 if there are no CUDA capable devices.
  186.     if (deviceCount == 0)
  187.     {
  188.         std::cout << "There are no available device(s) that support CUDA\n";
  189.         std::cout << "Result = FAIL\n";
  190.         exit(EXIT_FAILURE);
  191.     }
  192.     else
  193.     {
  194.         std::cout << "Detected " << deviceCount << " CUDA Capable device(s)\n";
  195.         if (deviceCount > 1)
  196.         {
  197.             std::cout << "Application will run on device with id= " << devId <<"\n";
  198.         }
  199.     }
  200. #pragma endregion readDevices
  201.  
  202.     /* read input parameters */
  203. #pragma region readAndParseInput
  204.     if (argc < 3)
  205.     {
  206.         std::cout << "You passed " << argc << "arguments\n";
  207.         std::cout << "Not enough arguments passed to program\n";
  208.         printHelp();
  209.         std::cout << "Result = FAIL\n";
  210.         exit(EXIT_FAILURE);
  211.     }
  212.     float filter_value[49];
  213.     float factor[2];
  214.  
  215.     /* read filter file and load to constant mem */
  216.     CsvParser *csvparser = CsvParser_new(argv[1], NULL, 0);
  217.     CsvRow *row;
  218.     std::string num;
  219.     double tmp;
  220.     for (int i = 0; i < 8; i++)
  221.     {
  222.         row = CsvParser_getRow(csvparser);
  223.         if (row == NULL)
  224.         {
  225.             std::cout << "Malformed csv filter\n";
  226.             std::cout << "line:" << i << std::endl;
  227.             std::cout << CsvParser_getErrorMessage(csvparser) << std::endl;
  228.             CsvParser_destroy(csvparser);
  229.             printHelp();
  230.             std::cout << "Result = FAIL\n";
  231.             exit(EXIT_FAILURE);
  232.         }
  233.         char **rowFields = CsvParser_getFields(row);
  234.         if (i < 7)
  235.         {
  236.             for (int j = 0; j < 7; j++)
  237.             {
  238.                 num = std::string(rowFields[j]);
  239.                 tmp = ::strtod(num.c_str(), 0);
  240.                 filter_value[i * 7 + j] = (float)tmp;
  241.             }
  242.         }
  243.         else
  244.         {
  245.             num = std::string(rowFields[0]);
  246.             tmp = ::strtod(num.c_str(), 0);
  247.             factor[0] = (float)tmp;
  248.             num = std::string(rowFields[1]);
  249.             tmp = ::strtod(num.c_str(), 0);
  250.             factor[1] = (float)tmp;
  251.         }
  252.     }
  253.     CsvParser_destroy_row(row);
  254.     CsvParser_destroy(csvparser);
  255.  
  256.     std::cout << "Filter from file " << argv[1] << " read succesfully \n";
  257.  
  258.     /* read bmp from file to gpu global mem */
  259.  
  260.     std::string file_name(argv[2]);
  261.     bitmap_image image(file_name);
  262.     if (!image)
  263.     {
  264.         std::cout << "Failed to open image " << file_name.c_str() << "\n";
  265.         printHelp();
  266.         std::cout << "Result = FAIL\n";
  267.         exit(EXIT_FAILURE);
  268.     }
  269.  
  270.     const unsigned int imgWidth = image.width();
  271.     const unsigned int imgHeight = image.height();
  272.     std::cout << "Loaded image " << imgWidth << "X" << imgHeight << " from file " << file_name.c_str() << " \n";
  273.  
  274.     unsigned int imgSize = imgWidth * imgHeight;
  275.     unsigned int imgSizeBytes = sizeof(unsigned char)* 3 * imgSize;
  276.  
  277.     /* check if filer fits into constant mem, load */
  278.     /* it will not, constant memory is too small*/
  279.     int neededConstMem = sizeof(float)* Filter_Total_Size;
  280.     /* check if image fits into global memory, alocate output tables */
  281.     int neededGlobalMem = 2 * imgSizeBytes;
  282.     std::cout << "Application will need total of " << neededGlobalMem <<  " bytes in global memory \n";
  283.     std::cout << "Application will need total of " << neededConstMem << " bytes in constant memory \n";
  284. #pragma endregion readAndParseInput
  285.  
  286. #pragma region readDeviceProperties
  287.     readDeviceAttributes(devId, driverVersion, runtimeVersion, totalGlobalMem, multiProcesorCount, cudaCores,
  288.         warpSize, totalConstantMem, sharedMemPerBlock, maxThreadsPerBlock, maxThreadsPerMP);
  289.  
  290.     if ((unsigned int)neededConstMem > totalConstantMem || neededGlobalMem > totalGlobalMem)
  291.     {
  292.         std::cout << "Not enough memory. Try smaller image or get better device. \n";
  293.         printHelp();
  294.         std::cout << "Result = FAIL\n";
  295.         exit(EXIT_FAILURE);
  296.     }
  297. #pragma endregion readDeviceProperties
  298.  
  299. #pragma region allocateAndCopyMem
  300.     unsigned char *r = (unsigned char*)malloc(sizeof(unsigned char) * imgSize);
  301.     unsigned char *g = (unsigned char*)malloc(sizeof(unsigned char) * imgSize);
  302.     unsigned char *b = (unsigned char*)malloc(sizeof(unsigned char) * imgSize);
  303.  
  304.     unsigned char red, green, blue;
  305.     for (std::size_t i = 0; i < imgHeight; i++)
  306.     {
  307.         for (std::size_t j = 0; j < imgWidth; j++)
  308.         {
  309.             image.get_pixel((unsigned int)j, (unsigned int)i, red, green, blue);
  310.             r[(int)i * (int)imgWidth + (int)j] = red;
  311.             g[(int)i * (int)imgWidth + (int)j] = green;
  312.             b[(int)i * (int)imgWidth + (int)j] = blue;
  313.         }
  314.     }
  315.  
  316.     std::cout << "Allocated mem for rgb tables. " << imgSizeBytes << " bytes in total \n";
  317.  
  318.     cudaStatus = cudaSetDevice(devId);
  319.     cudaGetDeviceProperties(&deviceProp, devId);
  320.     if (cudaStatus != cudaSuccess) {
  321.         std::cout << "cudaSetDevice failed! Do you have a CUDA - capable GPU installed ?\n";
  322.         status = EXIT_FAILURE;
  323.         goto Error;
  324.     }
  325.  
  326.     // Allocate GPU buffers for six vectors (3 input, 3 output).
  327.     cudaStatus = cudaMalloc((void**)&g_r, sizeof(unsigned char)* imgSize);
  328.     if (cudaStatus != cudaSuccess) {
  329.         std::cout << "cudaMalloc failed!\n";
  330.         status = EXIT_FAILURE;
  331.         goto Error;
  332.     }
  333.     cudaStatus = cudaMalloc((void**)&g_g, sizeof(unsigned char)* imgSize);
  334.     if (cudaStatus != cudaSuccess) {
  335.         std::cout << "cudaMalloc failed!\n";
  336.         status = EXIT_FAILURE;
  337.         goto Error;
  338.     }
  339.     cudaStatus = cudaMalloc((void**)&g_b, sizeof(unsigned char)* imgSize);
  340.     if (cudaStatus != cudaSuccess) {
  341.         std::cout << "cudaMalloc failed!\n";
  342.         status = EXIT_FAILURE;
  343.         goto Error;
  344.     }
  345.     cudaStatus = cudaMalloc((void**)&g_or, sizeof(unsigned char)* imgSize);
  346.     if (cudaStatus != cudaSuccess) {
  347.         std::cout << "cudaMalloc failed!\n";
  348.         status = EXIT_FAILURE;
  349.         goto Error;
  350.     }
  351.     cudaStatus = cudaMalloc((void**)&g_og, sizeof(unsigned char)* imgSize);
  352.     if (cudaStatus != cudaSuccess) {
  353.         std::cout << "cudaMalloc failed!\n";
  354.         status = EXIT_FAILURE;
  355.         goto Error;
  356.     }
  357.     cudaStatus = cudaMalloc((void**)&g_ob, sizeof(unsigned char)* imgSize);
  358.     if (cudaStatus != cudaSuccess) {
  359.         std::cout << "cudaMalloc failed!\n";
  360.         status = EXIT_FAILURE;
  361.         goto Error;
  362.     }
  363.  
  364.     std::cout << "Space for input and output images mallocked.\n";
  365.  
  366.     // Copy input vectors from host memory to GPU buffers.
  367.     cudaStatus = cudaMemcpy(g_r, r, sizeof(unsigned char)* imgSize, cudaMemcpyHostToDevice);
  368.     if (cudaStatus != cudaSuccess) {
  369.         std::cout << "cudaMalloc failed!\n";
  370.         status = EXIT_FAILURE;
  371.         goto Error;
  372.     }
  373.     cudaStatus = cudaMemcpy(g_g, g, sizeof(unsigned char)* imgSize, cudaMemcpyHostToDevice);
  374.     if (cudaStatus != cudaSuccess) {
  375.         std::cout << "cudaMalloc failed!\n";
  376.         status = EXIT_FAILURE;
  377.         goto Error;
  378.     }
  379.     cudaStatus = cudaMemcpy(g_b, b, sizeof(unsigned char)* imgSize, cudaMemcpyHostToDevice);
  380.     if (cudaStatus != cudaSuccess) {
  381.         std::cout << "cudaMalloc failed!\n";
  382.         status = EXIT_FAILURE;
  383.         goto Error;
  384.     }
  385.     std::cout << "Input data coppied into global mem.\n";
  386.     cudaStatus = cudaMemcpyToSymbol(g_filter, filter_value, sizeof(float)* Filter_Total_Size, 0, cudaMemcpyHostToDevice);
  387.     if (cudaStatus != cudaSuccess) {
  388.         std::cout << "cudaMemcpyToSymbol failed!\n";
  389.         status = EXIT_FAILURE;
  390.         goto Error;
  391.     }
  392.     cudaMemcpyToSymbol(g_factor, factor, sizeof(float)*2,0,cudaMemcpyHostToDevice);
  393.     getLastCudaError("Factor copy failed");
  394.     checkCudaErrors(cudaDeviceSynchronize());
  395.  
  396.     std::cout << "Filter data coppied into constant mem.\n";
  397.  
  398. #pragma endregion allocateAndCopyMem
  399.  
  400. #pragma region divideImage
  401.     //lets just start with dividing image into blocks in 2D
  402.     //4x4
  403.  
  404.     int blocksX = 64;// 4;
  405.     int blocksY = 64;// 4;
  406.     int threadsX = 8;// 8;
  407.     int threadsY = 8;// 8;
  408.  
  409.     //zaokrąglone w górę te podziały
  410.     unsigned int blockPartX = (imgWidth / blocksX) + (imgWidth%blocksX != 0);
  411.     unsigned int blockPartY = (imgHeight / blocksY) + (imgHeight%blocksY != 0);
  412.     //to wyjdzie zawsze conajmniej 1, chyba że wymiar obrazu to 0, a to już patola
  413.  
  414.     unsigned int threadPartX = (blockPartX / threadsX) + (blockPartX%threadsX != 0);
  415.     unsigned int threadPartY = (blockPartY / threadsY) + (blockPartY%threadsY != 0);
  416.     //tu też zawsze conajmniej 1
  417.  
  418.     dim3 blocksPerGrid(blocksX, blocksY);
  419.     //and 8x8 threads per block
  420.     dim3 threadsPerBlock(threadsX, threadsY);
  421.  
  422.     std::cout << blocksX << "X" << blocksY << " blocks, each block procesing " << blockPartX << "X" << blockPartY << "pixels.\n";
  423.     std::cout << threadsX << "X" << threadsY << " threads per block, each procesing " << threadPartX << "X" << threadPartY << "pixels.\n";
  424. #pragma endregion divideImage
  425.  
  426. #pragma region runKernels
  427.     /* do magic - run kernel */
  428.     filterKernel <<<blocksPerGrid, threadsPerBlock >>>(g_r, g_or, imgWidth, imgHeight, blockPartX, blockPartY, threadPartX, threadPartY);
  429.     getLastCudaError("Kernel execution failed");
  430.     checkCudaErrors(cudaDeviceSynchronize());
  431.     std::cout << "Red.\n";
  432.  
  433.     filterKernel <<<blocksPerGrid, threadsPerBlock >>>(g_g, g_og, imgWidth, imgHeight, blockPartX, blockPartY, threadPartX, threadPartY);
  434.     getLastCudaError("Kernel execution failed");
  435.     checkCudaErrors(cudaDeviceSynchronize());
  436.     std::cout << "Green.\n";
  437.  
  438.     filterKernel <<<blocksPerGrid, threadsPerBlock >>>(g_b, g_ob, imgWidth, imgHeight, blockPartX, blockPartY, threadPartX, threadPartY);
  439.     getLastCudaError("Kernel execution failed");
  440.     checkCudaErrors(cudaDeviceSynchronize());
  441.     std::cout << "Blue.\n";
  442.     getLastCudaError("Kernel execution failed");
  443.     checkCudaErrors(cudaDeviceSynchronize());
  444.  
  445.     //sekwencyjnie, hehe.
  446.     // Check if kernel execution generated an error
  447.  
  448.     /* start clock */
  449.  
  450.     /*Keep in mind that there is some driver overhead the first time you call
  451.     a particular kernel in your program, so when doing timing studies, you
  452.     should warm up your kernels by calling them once before your timer
  453.     starts.*/
  454.  
  455.     /* end clock */
  456.  
  457. #pragma endregion runKernels
  458.  
  459. #pragma region copyResults
  460.     // Copy output vector from GPU buffer to host memory.
  461.     cudaStatus = cudaMemcpy(r, g_or, sizeof(unsigned char)* imgSize, cudaMemcpyDeviceToHost);
  462.     if (cudaStatus != cudaSuccess) {
  463.         std::cout << "cudaMemcpy failed!\n";
  464.         status = EXIT_FAILURE;
  465.         goto Error;
  466.     }
  467.     cudaStatus = cudaMemcpy(g, g_og, sizeof(unsigned char)* imgSize, cudaMemcpyDeviceToHost);
  468.     if (cudaStatus != cudaSuccess) {
  469.         std::cout << "cudaMemcpy failed!\n";
  470.         status = EXIT_FAILURE;
  471.         goto Error;
  472.     }
  473.     cudaStatus = cudaMemcpy(b, g_ob, sizeof(unsigned char)* imgSize, cudaMemcpyDeviceToHost);
  474.     if (cudaStatus != cudaSuccess) {
  475.         std::cout << "cudaMemcpy failed!\n";
  476.         status = EXIT_FAILURE;
  477.         goto Error;
  478.     }
  479. #pragma endregion copyResults
  480.  
  481.     /* assemble and save output image */
  482. #pragma region saveOutput
  483.     for (std::size_t i = 0; i < imgHeight; i++)
  484.     {
  485.         for (std::size_t j = 0; j < imgWidth; j++)
  486.         {
  487.             red = r[(int)i * (int)imgWidth + (int)j];
  488.             green = g[(int)i * (int)imgWidth + (int)j];
  489.             blue = b[(int)i * (int)imgWidth + (int)j];
  490.             image.set_pixel((unsigned int)j, (unsigned int)i, red, green, blue);
  491.         }
  492.     }
  493.     time_t timer = NULL;
  494.     time(&timer);
  495.     struct tm * timeinfo = localtime(&timer);
  496.     char matko[14];
  497.     strftime(matko, 14, "%y%m%d_%H%M%S", timeinfo);
  498.     std::cout << "Saving result image to " << file_name.substr(0, file_name.size() - 4) + "_output_" + std::string(matko) + ".bmp" << " file.\n";
  499.     image.save_image(file_name.substr(0, file_name.size() - 4) + "_output_" + std::string(matko) + ".bmp");
  500. #pragma endregion saveOutput
  501.  
  502.  
  503.     // cudaDeviceReset must be called before exiting in order for profiling and
  504.     // tracing tools such as Nsight and Visual Profiler to show complete traces.
  505.     cudaStatus = cudaDeviceReset();
  506.     if (cudaStatus != cudaSuccess) {
  507.         fprintf(stderr, "cudaDeviceReset failed!");
  508.         return 1;
  509.     }
  510.  
  511. /* free every mallocked space and exit */
  512. #pragma region freeAndExit
  513. Error:
  514.     cudaFree(g_r);
  515.     cudaFree(g_g);
  516.     cudaFree(g_b);
  517.     cudaFree(g_or);
  518.     cudaFree(g_og);
  519.     cudaFree(g_ob);
  520.     //cudaFree(g_filter);
  521.     free(r);
  522.     free(g);
  523.     free(b);
  524.     system("pause");
  525.     exit(status);
  526. #pragma endregion freeAndExit
  527. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement