Assignment 6 Parallel FFT

#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/time.h>
#include <time.h>
#include <mpi.h>
#include "evaluate.h"

void fftCompute(int size, int rank, int NPerProc, float *inputFFTPerProc, float *outputFFTPerProc,  float *tempBuffer);

int main(int argc, char *argv[])
{
    MPI_Init(&argc, &argv);
    int size, rank;
    MPI_Comm_size(MPI_COMM_WORLD, &size);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);

    int evalFlag = atoi(argv[4]);
    if(evalFlag == 1){

        int N = atoi(argv[1]);
        int NPerProc = N/size;
        float *inputFFTPerProc = malloc(NPerProc*sizeof(float));
        float *outputFFTPerProc = malloc(2*NPerProc*sizeof(float));
        float *tempBuffer = malloc(2*NPerProc*sizeof(float));   // Only use this space for temporary Storage

        // Read part of the input file corresponding to the rank of the processor
        myInit(argc, argv, N, NPerProc, rank, inputFFTPerProc);

        fftCompute(size, rank, NPerProc, inputFFTPerProc, outputFFTPerProc, tempBuffer);

        double tolerance = pow(10.0,-5);
        int check = checkCorrectness(rank, N, NPerProc, outputFFTPerProc, tolerance);

        if(rank != 0)
            MPI_Send(&check,1,MPI_INT,0,0,MPI_COMM_WORLD);

        // Check overall output of the FFT computation
        if(rank == 0){
            int proc, correctCount=0;
            for(proc=1;proc<size;proc++){
                int check1;
                MPI_Recv(&check1, 1, MPI_INT, proc, 0, MPI_COMM_WORLD,MPI_STATUS_IGNORE);
                correctCount = correctCount+check1;
            }
            correctCount = correctCount+check;
            if (correctCount == size) printf("%s\n","Correct");
            else printf("%s\n","Incorrect");
        }
    }
    else{
        //////////////////////////////////////////////////////////////////////
        /////////  Optional code for defining your own data and test /////////
        // NOTE : You will to unable to use checkCorrectness(..) function to
        //        check the correctness of your implementation

        printf("Run Part\n");
        //////////////////////////////////////////////////////////////////////
    }
    MPI_Finalize();
    return 0;
}

typedef struct complex {
    float a,b;
}complex;

complex compa(complex a, complex b){
    complex ans;
    ans.a = a.a + b.a;
    ans.b = a.b + b.b;
    return ans;
}

complex comps(complex a, complex b){
    complex ans;
    ans.a = a.a - b.a;
    ans.b = a.b - b.b;
    return ans;
}

complex compm(complex c, complex d){
    complex ans;
    ans.a = c.a*d.a - c.b*d.b;
    ans.b = c.a*d.b + c.b*d.a;
    return ans;
}


complex nthroot(int k, int n){
    complex ans;
    float arg = (-2.0 * (M_PI) * k )/ (1.0*n);
    ans.a = cos(arg);
    ans.b = sin(arg);
    return ans;
}

void dftrecur(complex *a, complex* b, int n){
    if(n==1){return;}
    complex *even;
    complex *odd;
    even = b;
    odd = b + n/2;

    int it;
    for(it=0;it<(n/2);it++){
        even[it] = a[2*it];
        odd[it] = a[2*it+1];
    }

    memcpy(a,even, n/2 * sizeof(complex));
    memcpy(a+n/2, odd, n/2 * sizeof(complex));
    dftrecur(a,b,n/2);
    dftrecur(a + n/2,b + n/2,n/2);

    for(it=0;it<n;it++){
        complex omega = nthroot(it%(n/2),n);
        if(it<n/2){
            complex temp1 = compm(omega, a[it+(n/2)]);
            b[it] = compa(a[it],temp1);
        }
        else{
            complex temp1 = compm(omega, a[it]);
            b[it] = comps(a[it-n/2],temp1);
        }
    }

    memcpy(a,b,n*sizeof(complex));
}

void ordering(int* arr , int n){
    if(n<=2){return;}
    int even[n/2],odd[n/2];

    int i=0;
    for(i=0;i<n/2;i++){
        even[i] = arr[2*i];
        odd[i] = arr[2*i+1];
    }

    memcpy(arr,even, n/2 * sizeof(int));
    memcpy(arr+n/2, odd, n/2 * sizeof(int));

    ordering(arr,n/2);
    ordering(arr+n/2,n/2);
    return;
}

void reorder(int n, float* inputFFTPerProc, float* outputPerProc, float* tempBuffer){
    // printf("Size%d\n",n);
    if(n==2){
        outputPerProc[0] = inputFFTPerProc[0];
        outputPerProc[1] = inputFFTPerProc[1];
        return;
    }
    if(n==1){
        outputPerProc[0] = inputFFTPerProc[0];
        return;
    }
    memcpy(tempBuffer, inputFFTPerProc, n*sizeof(float));
    int i;
    for(i=0;i<n/2;i++){
        inputFFTPerProc[i] = tempBuffer[2*i];
        inputFFTPerProc[i+n/2] = tempBuffer[2*i+1];
    }
    reorder(n/2,inputFFTPerProc, outputPerProc, tempBuffer);
    reorder(n/2,inputFFTPerProc+n/2, outputPerProc+n/2, tempBuffer);
}

void fftCompute(int size, int rank, int NPerProc, float *inputFFTPerProc, float *outputPerProc,  float *tempBuffer){

// Please add your code for FFT Computation within the provided section //
// Store your output in the outputArray with even index for real and odd index for
// imaginary part
////////////////////////////////////////////////////////////////////////////////////////////


    // Code for parallel Execution //
    int it, j;
    int arr[size];
    for(it=0;it<size;it++){
        arr[it] = it;
    }
    ordering(arr,size);
    int numiter = NPerProc/size;

    // Dividing the data in order of processor;
    for(it=0;it<numiter;it++){
        for(j=0;j<size;j++){
            int p = numiter*arr[j] + it;
            outputPerProc[2*p] = inputFFTPerProc[it*size + j];
            outputPerProc[2*p+1] = 0.0;
        }
    }

    MPI_Alltoall(MPI_IN_PLACE, 2*(NPerProc/size), MPI_FLOAT, outputPerProc, 2*NPerProc/size, MPI_FLOAT, MPI_COMM_WORLD);
    //Solve serially for the data on same processor
    dftrecur(outputPerProc, tempBuffer, NPerProc);

    //Now combine the output in log(size) steps like the binary tree
    int numproc=1;
    MPI_Status stat;
    complex omega;
    complex temp1, temp2;
    while(numproc!=size){
        int Nprime = numproc*2*NPerProc;
        //Two cases for the left and right child
        if(rank%(2*numproc) >=numproc){
            MPI_Recv(tempBuffer, 2*NPerProc, MPI_FLOAT, rank-numproc, 0, MPI_COMM_WORLD, &stat);
            MPI_Send(outputPerProc, 2*NPerProc, MPI_FLOAT, rank-numproc, 0, MPI_COMM_WORLD);
            for(it=0;it<NPerProc;it++){
                omega = nthroot(((rank-numproc)%numproc)*NPerProc+it,  Nprime);
                temp2 = (complex){outputPerProc[2*it], outputPerProc[2*it+1]};
                temp1 = (complex){tempBuffer[2*it], tempBuffer[2*it+1]};
                temp2 = compm(omega, temp2);
                temp1 = comps(temp1,temp2);
                outputPerProc[2*it] = temp1.a;
                outputPerProc[2*it+1] = temp1.b;
            }
        }
        else{
            MPI_Send(outputPerProc, 2*NPerProc, MPI_FLOAT, rank+numproc, 0, MPI_COMM_WORLD);
            MPI_Recv(tempBuffer, 2*NPerProc, MPI_FLOAT, rank+numproc, 0, MPI_COMM_WORLD, &stat);
            for(it=0;it<NPerProc;it++){
                omega = nthroot((rank%numproc)*NPerProc+it, Nprime);
                temp1 = (complex){outputPerProc[2*it], outputPerProc[2*it+1]};
                temp2 = (complex){tempBuffer[2*it], tempBuffer[2*it+1]};
                temp2 = compm(omega, temp2);
                temp1 = compa(temp1,temp2);
                outputPerProc[2*it] = temp1.a;
                outputPerProc[2*it+1] = temp1.b;
            }
        }
        numproc*=2;
    }


    ///////// Serial Code ////////////
    // float *compOutput;
    // float *compOutput2;
    // if(rank==0){
    //      compOutput = (float *)malloc(2*NPerProc*size*sizeof(float));
    //  compOutput2 = (float *)malloc(2*NPerProc*size*sizeof(float));
    // }
    // MPI_Gather(inputFFTPerProc, NPerProc, MPI_FLOAT, compOutput2, NPerProc, MPI_FLOAT,0, MPI_COMM_WORLD);
    // int i;
    // int n = NPerProc*size;
    // if(rank==0){
    //     for(i=0;i<n;i++){
    //         compOutput[2*i] = compOutput2[i];
    //         compOutput[2*i+1] = 0.0;
    //     }
    // }
    // if(rank==0){
    //     dftrecur(compOutput, compOutput2, n);
    // }
    // MPI_Scatter(compOutput,2*NPerProc, MPI_FLOAT, outputPerProc, 2*NPerProc, MPI_FLOAT, 0, MPI_COMM_WORLD);
    return;


////////////////////////////////////////////////////////////////////////////////////////////
}