Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <stdio.h>
- #define SIZE 100
- #define BLOCKSIZE 100
- void HANDLE_ERROR(cudaError_t call)
- {
- cudaError_t ret = call;
- switch (ret)
- {
- case cudaSuccess:
- break;
- case cudaErrorInvalidValue:
- printf("ERROR: InvalidValue:%i.\n", __LINE__);
- exit(-1);
- break;
- case cudaErrorInvalidMemcpyDirection:
- printf("ERROR:Invalid memcpy direction:%i.\n", __LINE__);
- exit(-1);
- break;
- default:
- printf("ERROR>line:%i.%d' ‘ %s\n", __LINE__, ret, cudaGetErrorString(ret));
- exit(-1);
- break;
- }
- }
- __device__ int pomnozi(int a, int b)
- {
- return a*b;
- }
- __global__ void compute(int *d_a, int* d_b, int* d_c)
- {
- d_c[threadIdx.x] = 0;
- for (int i = 0; i<SIZE; i++) {
- int ela = d_a[threadIdx.x * SIZE + i];
- int elb = d_b[i];
- d_c[threadIdx.x] += pomnozi(ela, elb);
- }
- }
- __host__ void outer_compute(int* h_a, int* h_b, int* h_c)
- {
- int *d_a, *d_b, *d_c, i;
- HANDLE_ERROR(cudaMalloc((void **)& d_a, SIZE * SIZE * sizeof(int)));
- HANDLE_ERROR(cudaMalloc((void **)& d_b, SIZE * sizeof(int)));
- HANDLE_ERROR(cudaMalloc((void **)& d_c, BLOCKSIZE * sizeof(int)));
- //kopiranje nizova iz CPU u GPU
- HANDLE_ERROR(cudaMemcpy(d_a, h_a, SIZE * SIZE * sizeof(int), cudaMemcpyHostToDevice));
- HANDLE_ERROR(cudaMemcpy(d_b, h_b, SIZE * sizeof(int), cudaMemcpyHostToDevice));
- compute<<<1, BLOCKSIZE >>>(d_a, d_b, d_c);
- //sinhronizovanje i kopiranje niza iz GPU u CPU
- cudaThreadSynchronize();
- HANDLE_ERROR(cudaMemcpy(h_c, d_c, BLOCKSIZE * sizeof(int), cudaMemcpyDeviceToHost));
- cudaFree(d_a);
- cudaFree(d_b);
- cudaFree(d_c);
- }
- int main(int argc, char **argv)
- {
- int i, j;
- int a[SIZE * SIZE], b[SIZE], c_array[BLOCKSIZE];
- int sumCuda = 0, sumClassic=0;
- //a = (int*) malloc(SIZE * sizeof(int));
- /*for (i = 0; i < 100; i++) {
- a[i] = (*int) malloc(SIZE * sizeof(int));
- }*/
- //inicijalizacija
- for (i = 0; i < SIZE; ++i) {
- b[i] = i;
- for (j = 0; j < SIZE; ++j) {
- a[i * SIZE + j] = i * SIZE + j;
- }
- }
- //pozivanje host funkcije za CUDA i
- outer_compute(a, b, c_array);
- //Sabiranje rezultata dobijenih iz niti
- for (i = 0; i < SIZE; i++) {
- printf("Rezultat paralelni %d \n", c_array[i]);
- c_array[i] = 0;
- }
- //sekvencijalan kod
- for (i = 0; i<SIZE; i++) {
- for (j = 0; j < SIZE; ++j)
- c_array[i] += a[i * SIZE + j] * b[j];
- }
- for (i = 0; i < SIZE; i++) {
- printf("Rezultat sekvencijalni %d \n", c_array[i]);
- c_array[i] = 0;
- }
- //printf("Razlika izmedju dva rezultata je %d",sumClassic-sumCuda);
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement