Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include<stdio.h>
- #include<stdlib.h>
- __global__ void matrix_mult(long int *dA,long int *dB,long int *dC,long int m)
- {
- long int i=blockIdx.x*blockDim.x+threadIdx.x;
- long int j=blockIdx.y*blockDim.y+threadIdx.y;
- long int product=0,k;
- for(k=0;k<m;k++)
- {
- product=product+dA[i*m+k]*dB[k*m+j];
- }
- dC[i*m+j]=product;
- i+=blockDim.x*gridDim.x;
- j+=blockDim.y*gridDim.y;
- }
- void read_matrix_from_file(FILE *fp,long int *matrix,long int x,long int y)
- {
- long int i,j;
- for(i=0;i<x;i++)
- {
- for(j=0;j<y;j++)
- {
- fscanf(fp,"%ld",&matrix[i*x+j]);
- }
- }
- }
- void print_matrix_file(FILE *fp,long int *matrix,long int x,long int y)
- {
- long int i,j;
- for(i=0;i<x;i++)
- {
- for(j=0;j<y;j++)
- {
- fprintf(fp,"%ld ",matrix[i*x+j]);
- }
- fprintf(fp,"\n");
- }
- }
- void print_matrix(long int *matrix,long int x,long int y)
- {
- long int i,j;
- for(i=0;i<x;i++)
- {
- for(j=0;j<y;j++)
- {
- printf("%ld ",matrix[i*x+j]);
- }
- printf("\n");
- }
- }
- int main(int argc,char *argv[])
- {
- FILE *fp1=NULL,*fp2=NULL,*fp3=NULL;
- long int m,n,o,p;
- float elapsedTime;
- long int *dA=NULL,*dB=NULL,*dC=NULL;
- long int *matrixA=NULL,*matrixB=NULL,*matrixC=NULL;
- enum cudaError error;
- if(argc!=8)
- {
- printf("\n(8 Parameters)./a.out matrixfile1 m n matrixfile2 o p outputfile.txt");
- exit(0);
- }
- fp1=fopen(argv[1],"r+");
- fp2=fopen(argv[4],"r+");
- fp3=fopen(argv[7],"w+");
- if(fp1==NULL||fp2==NULL||fp3==NULL)
- {
- printf("\nError in opening the file");
- exit(0);
- }
- m=atol(argv[2]);
- n=atol(argv[3]);
- o=atol(argv[5]);
- p=atol(argv[6]);
- matrixA=(long int *)malloc(m*n*sizeof(long int));
- matrixB=(long int *)malloc(o*p*sizeof(long int));
- matrixC=(long int *)malloc(m*p*sizeof(long int));
- if(matrixA==NULL||matrixB==NULL||matrixC==NULL)
- {
- printf("Error Memory allocation for matrix in host\n");
- exit(0);
- }
- //size=m*m;
- // Reading the matrix from file
- read_matrix_from_file(fp1,matrixA,m,n);
- read_matrix_from_file(fp2,matrixB,o,p);
- //Print the matrix on the console
- //printf("\n\nMatrixA is:\n");
- //print_matrix(matrixA,m,n);
- //printf("\n\nMatrixB is:\n");
- //print_matrix(matrixB,o,p);
- //The first parameter of the cudaMalloc() function is the address of a pointer variable that must point to the allocated object after allocation.
- //MatrixA GPU Memory allocation
- error=cudaMalloc((void**)&dA,m*n*sizeof(long int));
- if(error) printf("\nError in allocation");
- //MatrixB GPU Memory allocation
- error=cudaMalloc((void**)&dB,p*o*sizeof(long int));
- if(error)printf("\nError in allocation");
- //MatrixC GPU Memory allocation
- error=cudaMalloc((void**)&dC,m*p*sizeof(long int));
- if(error)printf("\nError in allocation");
- //m GPU Memory allocation
- /*error=cudaMalloc((void **)&dM,sizeof(long int));
- if(error)printf("Error in allocation");*/
- //Copying matrixA to GPU Memory from CPU
- error=cudaMemcpy(dA,matrixA,m*n*sizeof(long int),cudaMemcpyHostToDevice);
- if(error)printf("\nError in copying data from host to device");
- //Copying matrixB to GPU Memory from CPU
- error=cudaMemcpy(dB,matrixB,o*p*sizeof(long int),cudaMemcpyHostToDevice);
- if(error)printf("\nError in copying data from host to device");
- //Copying m to GPU Memory from CPU
- /*error=cudaMemcpy(dM,&m,sizeof(long int),cudaMemcpyHostToDevice);
- if(error) printf("\nError in copying m:");*/
- dim3 dimBlock(10,10);
- dim3 dimGrid(100,100);
- cudaEvent_t start,stop;//To compute the elapsed time
- cudaEventCreate(&start);//Creating a start event. It marks the starting of an event
- cudaEventCreate(&stop);//Creating a stop event. It marks the stoping of an event
- cudaEventRecord(start,0);
- matrix_mult<<<dimGrid,dimBlock>>>(dA,dB,dC,m);
- cudaEventRecord(stop,0);//Same as above 'start' event
- cudaEventSynchronize(stop);//Blocks until the event has actually been recorded
- cudaEventElapsedTime(&elapsedTime,start,stop);
- cudaEventDestroy(start);//Destroys the specified start object.
- cudaEventDestroy(stop);//Destroys the specified stop object.
- printf("\n\nElapsed Time for computation on GPU(Seconds)=%lf\n",(double)(elapsedTime/1000));// Prints the computation time
- error=cudaMemcpy(matrixC,dC,m*p*sizeof(long int),cudaMemcpyDeviceToHost);
- if(error)printf("\nError in copying data from device to host");
- cudaFree(dA);
- cudaFree(dB);
- cudaFree(dC);
- printf("\n\nThe Product matrix is:(C=A*B):\n");
- //print_matrix(matrixC,m,p);
- print_matrix_file(fp3,matrixC,m,p);
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement