Advertisement
pai

Matrix_Multiplication_Using_UDA_With_Dynamically_Matrix

pai
Jul 8th, 2011
429
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C 5.89 KB | None | 0 0
  1. /*
  2.     Matrix Multiplication using dynamically allocated metrices-Using GPU
  3.  
  4.     matrixA m*n
  5.     matrixB o*p
  6.  
  7.     Method
  8.         1. Alllocate memory for the matrix a,b, and c in cuda
  9.         2. Copy a and b to cuda memory
  10.         3. Set up the launch parameter
  11.         4. Call kernal with parameter and thread configuration
  12.         5. Define the kernal
  13.         6. Copy back the result to cpu memory
  14.         7. Write the result to file
  15.        
  16.  
  17. */
  18.  
  19. #include<stdio.h>
  20. #include<stdlib.h>
  21. #include<time.h>
  22. //Function Allocates the matrix
  23. long int ** alloc_matrix(long int m,long int n)
  24. {
  25.     long int **matrix1=NULL;
  26.     long int *temp=NULL;
  27.     long int i;
  28.         matrix1=(long int **)malloc(m*sizeof(long int *));
  29.     if(matrix1==NULL)
  30.     {
  31.         printf("\nError in allocating memory");
  32.         exit(0);
  33.     }
  34.    
  35.     for(i=0;i<m;i++)
  36.     {
  37.         matrix1[i]=(long int *)malloc(n*sizeof(long int));
  38.         if(matrix1[i]==NULL)
  39.         {
  40.             printf("\nError in allocating memory");
  41.             exit(0);
  42.         }
  43.     }
  44.     free(temp);
  45.     return (matrix1);
  46. }//End of matrix allocation function
  47.  
  48. //Function to generate or read matrix
  49. void read_matrix(FILE *fp,long int **matrix,long int x,long int y)
  50. {
  51.     long int i,j;
  52.     srand(time(NULL));
  53.    
  54.     //printf("\nEnter the matrix:");
  55.     for(i=0;i<x;i++)
  56.     {
  57.         for(j=0;j<y;j++)
  58.         {
  59.             //matrix[i][j]=rand()%10;
  60.             fscanf(fp,"%ld",&matrix[i][j]);
  61.             //scanf("%ld",&matrix[i][j]);
  62.         }
  63.     }
  64. }//End of read_matrix function
  65.  
  66. //Print the matrix
  67. void print_matrix(long int **matrix,long int x,long int y)
  68. {
  69.     long int i,j;
  70.     for(i=0;i<x;i++)
  71.     {
  72.         for(j=0;j<y;j++)
  73.         {
  74.             printf("%ld\t",matrix[i][j]);
  75.         }
  76.         printf("\n");
  77.     }
  78. }//End of print function
  79.  
  80.  
  81. //Print the matrix to the file
  82. void file_print(FILE *fp,long int **matrix,long int x,long int y)
  83. {
  84.     long int i,j;
  85.     for(i=0;i<x;i++)
  86.     {
  87.         for(j=0;j<y;j++)
  88.         {
  89.             fprintf(fp,"%ld ",matrix[i][j]);
  90.         }
  91.         fprintf(fp,"\n");
  92.     }
  93. }//End of print function
  94.  
  95. //Multiplies the two matrices and produce the result
  96. /*void matrix_multiply(long int **matrix1,long int **matrix2,long int **matrixR,long int m,long int n,long int p)
  97. {
  98.     long int i,j,k;
  99.    
  100.     for(i=0;i<m;i++)
  101.     {
  102.         for(j=0;j<p;j++)
  103.         {
  104.             matrixR[i][j]=0;
  105.             for(k=0;k<n;k++)
  106.             {
  107.                 matrixR[i][j]+=matrix1[i][k]*matrix2[k][j];
  108.             }
  109.         }
  110.     }
  111.    
  112.  
  113. }//End of matrix multiplication function*/
  114.  
  115.  //CUDA Kernal for multiplication
  116.  
  117. __global__ void matrix_parallel_mult(int long *DmatrixA, int long *DmatrixB,int long *DmatrixC,int long *Dm)
  118.  
  119. {
  120.  
  121.     long int k,i,j;
  122.     i=blockIdx.x;
  123.     j=blockIdx.y;
  124.     long int product=0;
  125.     for(k=0;k<*Dm;k++)
  126.     {
  127.         product=product+DmatrixA[i**Dm+k]*DmatrixB[k**Dm+j];
  128.     }
  129.     DmatrixC[i**Dm+j]=product;
  130. }
  131. //Main Function
  132. int main(int argc,char *argv[])
  133. {
  134.     long int m,n,o,p;
  135.     long int **matrixA=NULL,**matrixB=NULL,**matrixC=NULL;
  136.     long int *DmatrixA=NULL,*DmatrixB=NULL,*DmatrixC=NULL,*Dm=NULL,*Dp=NULL;
  137.     cudaError_t error;
  138.    
  139.     FILE *fp,*fp1,*fp2;
  140.     if(argc!=8)
  141.     {
  142.         printf("\nThe format is:./executable  Dim._of_A( m n)   Dim._of_B(o p)  Output_file.txt Martix1.txt Matrix2.txt\n");
  143.         exit(0);
  144.     }
  145.     m=atol(argv[1]);
  146.     n=atol(argv[2]);
  147.     o=atol(argv[3]);
  148.     p=atol(argv[4]);
  149.     fp=fopen(argv[5],"w+");
  150.     fp1=fopen(argv[6],"r+");
  151.     fp2=fopen(argv[7],"r+");
  152.     if(fp==NULL||fp1==NULL||fp2==NULL)
  153.     {
  154.         printf("\nError in opening the file:");
  155.         exit(0);
  156.     }
  157.    
  158.     //printf("\nEnter dimension:");
  159.     //scanf("%ld%ld",&m,&n);
  160.     matrixA=alloc_matrix(m,n);
  161.     matrixB=alloc_matrix(o,p);
  162.     if(n!=o)
  163.     {
  164.         printf("\nNo. of columns in matrixA and number of rows in matrixB are not equal");
  165.         exit(0);
  166.     }
  167.     matrixC=alloc_matrix(m,p);//Allocating space for the product matrix
  168.  
  169.    
  170.     read_matrix(fp1,matrixA,m,n);// Read the matrixA
  171.     //file_print(fp1,matrixA,m,n);//Write the matrixA to file specified in argv[6]
  172.    
  173.     read_matrix(fp2,matrixB,o,p);//Read the matrixB
  174.     //file_print(fp2,matrixB,o,p);//Write the matrixA to file specified in argv[7]
  175.    
  176.     //printf("\nThe matrixA is:\n");//Print the matrixA
  177.     print_matrix(matrixA,m,n);
  178.    
  179.     //printf("\nThe matrixB is:\n");//Print the matrixB
  180.     print_matrix(matrixB,o,p);
  181.    
  182.     //matrix_multiply(matrixA,matrixB,matrixC,m,n,p);//Multiply matrixA and matrixB to get matrixC
  183.  
  184.     //Allocating space in gpu
  185.     error=cudaMalloc((void **)&DmatrixA,m*n*sizeof(long int));
  186.     if(error){printf("\nMatrixA ::%s", cudaGetErrorString(error));exit(0);}
  187.     error=cudaMalloc((void **)&DmatrixB,o*p*sizeof(long int));
  188.     if(error){printf("\nMatrixB ::%s", cudaGetErrorString(error));exit(0);}
  189.     error=cudaMalloc((void **)&DmatrixC,m*p*sizeof(long int));
  190.     if(error){printf("\nMatrixC ::%s", cudaGetErrorString(error));exit(0);}
  191.     error=cudaMalloc((void **)&Dm,sizeof(long int));
  192.     if(error){printf("\nm ::%s", cudaGetErrorString(error));exit(0);}
  193.    
  194.     //Copying essential data needed during Kernal invocation
  195.  
  196.     error=cudaMemcpy(DmatrixA,matrixA,m*n*sizeof(long int),cudaMemcpyHostToDevice);
  197.     if(error){printf("MatrixA %s",cudaGetErrorString(error));exit(0);}
  198.  
  199.     error=cudaMemcpy(DmatrixB,matrixB,o*p*sizeof(long int),cudaMemcpyHostToDevice);
  200.     if(error){printf("MatrixB %s",cudaGetErrorString(error));exit(0);}
  201.  
  202.     error=cudaMemcpy(Dm,&m,sizeof(long int),cudaMemcpyHostToDevice);
  203.     if(error){printf("m%s",cudaGetErrorString(error));exit(0);}
  204.    
  205.  
  206.     //Setting block and grid configuration
  207.     dim3 Grid(1,1);
  208.     dim3 Block(m,m);
  209.    
  210.     //Launching Kernal with appropriate parameters
  211.    
  212.     matrix_parallel_mult<<<Grid,Block>>>(DmatrixA,DmatrixB,DmatrixC,Dm);
  213.  
  214.     //Copying Back the Result
  215.  
  216.     error=cudaMemcpy(matrixC,DmatrixC,m*p*sizeof(long int),cudaMemcpyDeviceToHost);
  217.    
  218.    
  219.     printf("\nThe productmatrix is:\n");//Print the result of multiplication to the console
  220.     print_matrix(matrixC,m,p);
  221.  
  222.     file_print(fp,matrixC,m,p);//Write the matrixA to file specified in argv[5]  Result file
  223.    
  224.     //Deallocation in CPU
  225.     free(matrixA);
  226.     free(matrixB);
  227.     free(matrixC);
  228.     //Deallocation in GPU
  229.     cudaFree(DmatrixA);
  230.     cudaFree(DmatrixB);
  231.     cudaFree(DmatrixC);
  232.     cudaFree(Dm);
  233.     cudaFree(Dp);
  234.     //Closing all files
  235.     fclose(fp);
  236.     fclose(fp1);
  237.     fclose(fp2);
  238. return 0;
  239. }//End of main function
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement