Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- /*
- Matrix Multiplication using dynamically allocated metrices-Using GPU
- matrixA m*n
- matrixB o*p
- Method
- 1. Alllocate memory for the matrix a,b, and c in cuda
- 2. Copy a and b to cuda memory
- 3. Set up the launch parameter
- 4. Call kernal with parameter and thread configuration
- 5. Define the kernal
- 6. Copy back the result to cpu memory
- 7. Write the result to file
- */
- #include<stdio.h>
- #include<stdlib.h>
- #include<time.h>
- //Function Allocates the matrix
- long int ** alloc_matrix(long int m,long int n)
- {
- long int **matrix1=NULL;
- long int *temp=NULL;
- long int i;
- matrix1=(long int **)malloc(m*sizeof(long int *));
- if(matrix1==NULL)
- {
- printf("\nError in allocating memory");
- exit(0);
- }
- for(i=0;i<m;i++)
- {
- matrix1[i]=(long int *)malloc(n*sizeof(long int));
- if(matrix1[i]==NULL)
- {
- printf("\nError in allocating memory");
- exit(0);
- }
- }
- free(temp);
- return (matrix1);
- }//End of matrix allocation function
- //Function to generate or read matrix
- void read_matrix(FILE *fp,long int **matrix,long int x,long int y)
- {
- long int i,j;
- srand(time(NULL));
- //printf("\nEnter the matrix:");
- for(i=0;i<x;i++)
- {
- for(j=0;j<y;j++)
- {
- //matrix[i][j]=rand()%10;
- fscanf(fp,"%ld",&matrix[i][j]);
- //scanf("%ld",&matrix[i][j]);
- }
- }
- }//End of read_matrix function
- //Print the matrix
- void print_matrix(long int **matrix,long int x,long int y)
- {
- long int i,j;
- for(i=0;i<x;i++)
- {
- for(j=0;j<y;j++)
- {
- printf("%ld\t",matrix[i][j]);
- }
- printf("\n");
- }
- }//End of print function
- //Print the matrix to the file
- void file_print(FILE *fp,long int **matrix,long int x,long int y)
- {
- long int i,j;
- for(i=0;i<x;i++)
- {
- for(j=0;j<y;j++)
- {
- fprintf(fp,"%ld ",matrix[i][j]);
- }
- fprintf(fp,"\n");
- }
- }//End of print function
- //Multiplies the two matrices and produce the result
- /*void matrix_multiply(long int **matrix1,long int **matrix2,long int **matrixR,long int m,long int n,long int p)
- {
- long int i,j,k;
- for(i=0;i<m;i++)
- {
- for(j=0;j<p;j++)
- {
- matrixR[i][j]=0;
- for(k=0;k<n;k++)
- {
- matrixR[i][j]+=matrix1[i][k]*matrix2[k][j];
- }
- }
- }
- }//End of matrix multiplication function*/
- //CUDA Kernal for multiplication
- __global__ void matrix_parallel_mult(int long *DmatrixA, int long *DmatrixB,int long *DmatrixC,int long *Dm)
- {
- long int k,i,j;
- i=blockIdx.x;
- j=blockIdx.y;
- long int product=0;
- for(k=0;k<*Dm;k++)
- {
- product=product+DmatrixA[i**Dm+k]*DmatrixB[k**Dm+j];
- }
- DmatrixC[i**Dm+j]=product;
- }
- //Main Function
- int main(int argc,char *argv[])
- {
- long int m,n,o,p;
- long int **matrixA=NULL,**matrixB=NULL,**matrixC=NULL;
- long int *DmatrixA=NULL,*DmatrixB=NULL,*DmatrixC=NULL,*Dm=NULL,*Dp=NULL;
- cudaError_t error;
- FILE *fp,*fp1,*fp2;
- if(argc!=8)
- {
- printf("\nThe format is:./executable Dim._of_A( m n) Dim._of_B(o p) Output_file.txt Martix1.txt Matrix2.txt\n");
- exit(0);
- }
- m=atol(argv[1]);
- n=atol(argv[2]);
- o=atol(argv[3]);
- p=atol(argv[4]);
- fp=fopen(argv[5],"w+");
- fp1=fopen(argv[6],"r+");
- fp2=fopen(argv[7],"r+");
- if(fp==NULL||fp1==NULL||fp2==NULL)
- {
- printf("\nError in opening the file:");
- exit(0);
- }
- //printf("\nEnter dimension:");
- //scanf("%ld%ld",&m,&n);
- matrixA=alloc_matrix(m,n);
- matrixB=alloc_matrix(o,p);
- if(n!=o)
- {
- printf("\nNo. of columns in matrixA and number of rows in matrixB are not equal");
- exit(0);
- }
- matrixC=alloc_matrix(m,p);//Allocating space for the product matrix
- read_matrix(fp1,matrixA,m,n);// Read the matrixA
- //file_print(fp1,matrixA,m,n);//Write the matrixA to file specified in argv[6]
- read_matrix(fp2,matrixB,o,p);//Read the matrixB
- //file_print(fp2,matrixB,o,p);//Write the matrixA to file specified in argv[7]
- //printf("\nThe matrixA is:\n");//Print the matrixA
- print_matrix(matrixA,m,n);
- //printf("\nThe matrixB is:\n");//Print the matrixB
- print_matrix(matrixB,o,p);
- //matrix_multiply(matrixA,matrixB,matrixC,m,n,p);//Multiply matrixA and matrixB to get matrixC
- //Allocating space in gpu
- error=cudaMalloc((void **)&DmatrixA,m*n*sizeof(long int));
- if(error){printf("\nMatrixA ::%s", cudaGetErrorString(error));exit(0);}
- error=cudaMalloc((void **)&DmatrixB,o*p*sizeof(long int));
- if(error){printf("\nMatrixB ::%s", cudaGetErrorString(error));exit(0);}
- error=cudaMalloc((void **)&DmatrixC,m*p*sizeof(long int));
- if(error){printf("\nMatrixC ::%s", cudaGetErrorString(error));exit(0);}
- error=cudaMalloc((void **)&Dm,sizeof(long int));
- if(error){printf("\nm ::%s", cudaGetErrorString(error));exit(0);}
- //Copying essential data needed during Kernal invocation
- error=cudaMemcpy(DmatrixA,matrixA,m*n*sizeof(long int),cudaMemcpyHostToDevice);
- if(error){printf("MatrixA %s",cudaGetErrorString(error));exit(0);}
- error=cudaMemcpy(DmatrixB,matrixB,o*p*sizeof(long int),cudaMemcpyHostToDevice);
- if(error){printf("MatrixB %s",cudaGetErrorString(error));exit(0);}
- error=cudaMemcpy(Dm,&m,sizeof(long int),cudaMemcpyHostToDevice);
- if(error){printf("m%s",cudaGetErrorString(error));exit(0);}
- //Setting block and grid configuration
- dim3 Grid(1,1);
- dim3 Block(m,m);
- //Launching Kernal with appropriate parameters
- matrix_parallel_mult<<<Grid,Block>>>(DmatrixA,DmatrixB,DmatrixC,Dm);
- //Copying Back the Result
- error=cudaMemcpy(matrixC,DmatrixC,m*p*sizeof(long int),cudaMemcpyDeviceToHost);
- printf("\nThe productmatrix is:\n");//Print the result of multiplication to the console
- print_matrix(matrixC,m,p);
- file_print(fp,matrixC,m,p);//Write the matrixA to file specified in argv[5] Result file
- //Deallocation in CPU
- free(matrixA);
- free(matrixB);
- free(matrixC);
- //Deallocation in GPU
- cudaFree(DmatrixA);
- cudaFree(DmatrixB);
- cudaFree(DmatrixC);
- cudaFree(Dm);
- cudaFree(Dp);
- //Closing all files
- fclose(fp);
- fclose(fp1);
- fclose(fp2);
- return 0;
- }//End of main function
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement