Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <stdio.h>
- #include <string.h>
- #include <stdlib.h>
- #include <sys/time.h>
- #include <time.h>
- #include <math.h>
- #include "evaluate.h"
- #include <omp.h>
- void initialize(double** A, double** B, double** C, int* n);
- void* tune_matrix_multiply();
- void matrix_multiply(double* A, double* B, double* C, int n, void* tunning_params);
- void getUserID(char userID[20],char userName[30]);
- int main(int argc, char *argv[]){
- srand(time(NULL));
- void* tunning_params = tune_matrix_multiply();
- int n;
- double *A, *B, *C, tolerance = pow(10,-7);
- char userID[20];
- char userName[30];
- getUserID(userID,userName);
- if (debug_run(argc, argv,userID,userName)) { /* This is debug run for students to test their code. Do not edit this. */
- initialize(&A, &B, &C, &n);
- double etOpenMPst, etOpenMPsp;
- etOpenMPst = omp_get_wtime();
- matrix_multiply(A, B, C, n, tunning_params); /* Multiply B and C to store the results into A <- B * C */
- etOpenMPsp = omp_get_wtime();
- double total_flops = myCalcFlops(etOpenMPst, etOpenMPsp, n);
- printf("Flops : %lf\n",total_flops);
- int check = check_Correctness(A, B, C, n, tolerance);
- if(check) printf("Correct : true \n");
- else printf("Correct : false \n");
- free(A);
- free(B);
- free(C);
- } else{
- /* This is an evaluation run
- * and it will be used for evaluation of student answers
- * so dont wory about the code in the else block.
- */
- int sizes[4];
- find_matrix_sizes(argc, argv, sizes);
- double limit = 100.0, flops[4]= {0.0,0.0,0.0,0.0};
- int s, correctness[4] = {0,0,0,0};
- for(s=0;s<4;s++){
- int n0=sizes[s];
- //allocate memory of A, B, C
- A = (double *) malloc(n0*n0*sizeof(double));
- B = (double *) malloc(n0*n0*sizeof(double));
- C = (double *) malloc(n0*n0*sizeof(double));
- int j, i, k;
- for(j=0;j<5;j++){
- // Randomly initializing elements B & C from 0-limit
- /* B is a Lower Triangular Matrix and C is a n/16 banded Matrix */
- for(i=0;i<n0;i++){
- for(k=0;k<n0;k++){
- *(A + n0*i + k) = 0.0;
- if (k>i) *(B + n0*i + k) = 0.0;
- else *(B + n0*i + k) = (double)rand()/(double)(RAND_MAX/limit);
- if (k>i+n0/16-1 || i>k+n0/16-1) *(C + n0*i + k) = 0.0;
- else *(C + n0*i + k) = (double)rand()/(double)(RAND_MAX/limit);
- }
- }
- if(j==0){
- double etOpenMPst, etOpenMPsp;
- etOpenMPst = omp_get_wtime();
- matrix_multiply(A, B, C, n0, tunning_params);
- etOpenMPsp = omp_get_wtime();
- }
- else{
- double etOpenMPst, etOpenMPsp;
- etOpenMPst = omp_get_wtime();
- matrix_multiply(A, B, C, n0, tunning_params);
- etOpenMPsp = omp_get_wtime();
- flops[s] += myCalcFlops(etOpenMPst,etOpenMPsp, n0);
- int check = check_Correctness(A, B, C, n0, tolerance);
- if(check==1){
- correctness[s]++;
- }
- }
- }
- flops[s] = flops[s]/4.0;
- free(A);
- free(B);
- free(C);
- }
- double overall_flop_score = (flops[0] + 2 * flops[1] + 3 * flops[2] + 4 * flops[3])/10;
- int q,overall_correctness = 1;
- for(q=0;q<4;q++){
- if(correctness[q] != 4){
- overall_correctness = 0;
- }
- }
- printf("UserName:%s\tuserID:%s\n",userName,userID);
- printf("Size\tcorrectness\tflops\n");
- printf("small \t\t%d\t%lf\n",correctness[0],flops[0]);
- printf("medium \t\t%d\t%lf\n",correctness[1],flops[1]);
- printf("large \t\t%d\t%lf\n",correctness[2],flops[2]);
- printf("extralarge \t%d\t%lf\n",correctness[3],flops[3]);
- printf("\n");
- printf("Score = %lf\n",overall_correctness * overall_flop_score);
- FILE* outputfile=fopen("output.csv","w");
- fprintf(outputfile, "\"%s\",\"%s\",%d,%lf,%d,%lf,%d,%lf,%d,%lf,%lf",userID,userName,correctness[0],flops[0],correctness[1],flops[1],correctness[2],flops[2],correctness[3],flops[3],overall_correctness * overall_flop_score);
- fclose(outputfile);
- FILE* outputfile2=fopen("output2.csv","w");
- fprintf(outputfile, "\"%s\",%d,%lf,%d,%lf,%d,%lf,%d,%lf,%lf",userName,correctness[0],flops[0],correctness[1],flops[1],correctness[2],flops[2],correctness[3],flops[3],overall_correctness * overall_flop_score);
- fclose(outputfile2);
- }
- return 0;
- }
- //Add only code in the functions
- void getUserID(char userID[20],char userName[30]){
- //Enter your moodle userID here i.e. your entry number
- strcpy(userID,"jca142468");
- //Enter your name here
- strcpy(userName,"Rohit Katariya");
- }
- void initialize(double** A, double** B, double** C, int* n){
- /*
- Initialize matrix A , B & C where each of them is a square matrix of size n*n
- Try 4 sizes of matrix:
- a) small - should fit in the L1 cache
- b) medium - should fit in the L2 cache
- b) large - should fit in the L3 cache
- b) extra-large - should fit in the memory
- */
- *n=1024;
- *A = (double *) malloc(*n**n*sizeof(double));
- *B = (double *) malloc(*n**n*sizeof(double));
- *C = (double *) malloc(*n**n*sizeof(double));
- int i, k;
- for(i=0;i<*n;i++){
- for(k=0;k<*n;k++){
- *(*A + *n*i + k) = 0.0;
- if(k>i) *(*B + *n*i + k) = 0.0;
- else *(*B + *n*i + k) = (double)rand()/(double)(RAND_MAX/100.0);
- if(k>i+*n/16-1 || i>k+*n/16-1) *(*C + *n*i + k) = 0.0;
- else *(*C + *n*i + k) = (double)rand()/(double)(RAND_MAX/100.0);
- }
- }
- }
- #include <assert.h>
- void* tune_matrix_multiply(){
- /* Write code to test various methods to optimize the matrix multiplication
- and store these parameters in a data structure(for ex, array of values ). This
- data structure can be returned by this function and will be passed to the
- matrix multiply function so that it can efficiently multiply the matrices.
- */
- }
- #define MAXT(A,B) ({ __typeof__ (A) a = (A); __typeof__ (B) b = (B); a > b ? a : b; })
- #define MINT(A,B) ({ __typeof__ (A) a = (A); __typeof__ (B) b = (B); a > b ? b : a; })
- #define maX(A,B) ({ int a = (A); int b = (B); a > b ? a : b; })
- #define miN(A,B) ({ int a = (A); int b = (B); a > b ? b : a; })
- #define MAX(A,B) ({ ((A) > (B)) ? A : B; })
- #define MIN(A,B) ({ ((A) > (B)) ? B : A; })
- void matrix_multiply(double* A, double* B, double* C, int n, void* tunning_params){
- // Multiply matrix B & C and store the results in A
- int i,j,k,procsize,threadsize;
- // double *pA, *pB, *pC;
- // int chunksize;
- // chunksize = (n==256)?2:1;
- // omp_set_num_threads(2);
- // Change omp_proc_bind
- procsize = omp_get_num_procs();
- threadsize = omp_get_max_threads();
- if(n&32) {
- // #pragma omp for collapse(2) schedule(static, 2)
- // for(i=0;i<n;i++){
- // for(j=0;j<n;j++){
- // A[n*i+j] = 0;
- // int lim1 = MAX(0,j-n/16+1);
- // int lim2 = MIN(n, j+n/16);
- // register double temp=0;
- // for(k=lim1;k<lim2;k++){
- // temp += B[n*i+k]*C[n*k+j];
- // }
- // A[n*i+j] = temp;
- // }
- // }
- for(i = 0 ; i < n ; i++){
- int l, p ;
- for(j = 0 , l = MIN(i+2,n) , p = i<<5 ; j < i + 1 ; j++){
- A[p+j] += B[p + j] * C[(j<<5) + j];
- if(i && j < i) A[p + j] += B[p + j + 1] * C[((j+1)<<5) + j];
- if(j + 1 && j + 1 < l) A[p + j + 1] += B[p + j]*C[(j<<5) + j + 1];
- }
- }
- return;
- // for(i=0;i<n;i++){
- // double *pA = A + (n*i);
- // for(k=0;k< (i+1);k++){
- // double *pC = C + n*k;
- // int lim2 = MIN(n, k+(n>>4));
- // int lim1 = MAX(0,k-(n>>4)+1);
- // // register double temp = B[n*i+k];
- // for(j=lim1;j<lim2;j++){
- // *(pA+j) += B[n*i+k]**(pC+j);
- // }
- // }
- // }
- // return;
- }
- if(n&64) {
- int l , q , p;
- for(i = 0 ; i < n ; i++){
- for(j = 0 , l = MIN(i+2,n) ,q = MIN(i+3,n) , p = i<<6 ; j < i + 1 ; j++){
- A[p+j] += B[p + j] * C[(j<<6) + j];
- if(i && j < i) A[p + j] += B[p + j + 1] * C[((j+1)<<6) + j];
- if(i-1 && j < i-1) A[p + j] += B[p + j + 2] * C[((j+2)<<6) + j];
- if(i-2 && j < i-2) A[p + j] += B[p + j + 3] * C[((j+3)<<6) + j];
- if(j + 1 && j + 1 < l) A[p + j + 1] += B[p + j]*C[(j<<6) + j + 1];
- if(j + 2 && j + 2 < q) A[p + j + 2] += B[p + j]*C[(j<<6) + j + 2];
- if(j + 3 && j + 3 < MIN(i+4,n)) A[p + j + 3] += B[p + j]*C[(j<<6) + j + 3];
- }
- }
- return;
- // #pragma omp parallel for num_threads(2) default(none) private(k,j) shared(n,A,B,C)
- // for(i=0;i<n;i++){
- // double *pA = A + (n*i);
- // for(k=0;k< (i+1);k++){
- // double *pC = C + n*k;
- // int lim2 = MIN(n, k+(n>>4));
- // int lim1 = MAX(0,k-(n>>4)+1);
- // // register double temp = B[n*i+k];
- // for(j=lim1;j<lim2;j++){
- // *(pA+j) += B[n*i+k]**(pC+j);
- // }
- // }
- // }
- // return;
- }
- // #pragma omp for schedule(guided)
- // for(i=0;i<n;i++){
- // for(j=i+1;j<n;j++){
- // double temp=B[n*i+j];
- // B[n*i+j] = B[n*j+i];
- // B[n*j+i] = temp;
- // }
- // }
- if(n&256){
- // #pragma omp parallel default(none) private(i,k,j) shared(n,A,B,C)
- // #pragma omp for schedule(dynamic, 4)
- #pragma omp parallel for default(none) schedule(dynamic,4) private(k,j) shared(n,A,B,C) num_threads(7)
- for(i=n-1 ; i>=0 ; i--){
- double *pA = A+(n*i);
- for(k=0;k<(i+1);k++){
- double *pC = C+n*k;
- // double *pB = B+n*i+k
- int lim2 = MIN(n, k+(n>>4));
- int lim1 = MAX(0,k-(n>>4)+1);
- register double temp = B[n*i+k];
- for(j=lim1;j<lim2;j++){
- *(pA+j) += temp**(pC+j);
- }
- }
- }
- return;
- }
- // double *pA = A+(n*i);
- // double *pB = B+(n*i);
- #pragma omp parallel for default(none) schedule(dynamic) private(i,k,j) shared(n,A,B,C) num_threads(procsize+1)
- for(i=0; i<n; i++){
- double *pA = A+(n*i);
- for(k=0;k<(i+1);k++){
- double *pC = C+n*k;
- int lim2 = MIN(n, k+(n>>4));
- int lim1 = MAX(0, k-(n>>4)+1);
- register double temp=B[n*i+k];
- for(j=lim1;j<lim2;j++){
- *(pA+j) += temp**(pC+j);
- }
- }
- // int ii = i+n/2;
- // pA = A + (n*(ii));
- // for(k=0;k<(ii+1);k++){
- // double *pC = C+n*k;
- // int lim2 = MIN(n, k+(n>>4));
- // int lim1 = MAX(0, k-(n>>4)+1);
- // register double temp=B[n*ii+k];
- // for(j=lim1;j<lim2;j++){
- // *(pA+j) += temp**(pC+j);
- // }
- // }
- }
- return;
- // Basic Parallel
- // int i=0,j=0,k=0;
- // // int temp=0;
- // // printf("Number of procs %d\n",omp_get_num_procs());
- // // omp_set_num_threads(omp_get_num_procs());
- // #pragma omp parallel for private(j)
- // for(i=0;i<n;i++){
- // for(k=0;k<n;k++){
- // for(j=0;j<n;j++){
- // A[n*i+j] += B[n*i+k]*C[n*k+j];
- // }
- // }
- // }
- /*
- Version 2 - 1.65 Gflops
- int i=0,j=0,k=0;
- int chunk=10;
- // int temp=0;
- // printf("Number of procs %d\n",omp_get_num_procs());
- // omp_set_num_threads(omp_get_num_procs());
- #pragma omp parallel default(none) shared(A,B,C,n,chunk) private(i,j,k)
- {
- #pragma omp for collapse(2)
- for(i=0;i<n;i++){
- for(j=0;j<n;j++){
- // A[n*i+j] = 0;
- double temp=0;
- for(k=0;k<n;k++){
- temp += B[n*i+k]*C[n*k+j];
- }
- A[n*i+j]=temp;
- }
- }
- }
- */
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement