Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- void axpy (int n, float alpha, const float *x,int incx, float *y, int incy)
- {
- }
- void hello(char *a, int *b)
- {
- a[threadIdx.x] += b[threadIdx.x];
- }
- int block_size = 4;
- int n_blocks = N/block_size + (N%block_size == 0 ? 0:1);
- 8DnDBBNu
- int threadsPerBlock = 256;
- int blocksPerGrid =(N + threadsPerBlock - 1) / threadsPerBlock;
- // sparse matrix vector product: d_Ax = A * d_x
- cusparseScsrmv(cusparseHandle,CUSPARSE_OPERATION_NON_TRANSPOSE, N, N, nz, &alpha, descr, d_val, d_row, d_col, d_x, &beta, d_Ax); // PODMIEN FUNCKJE (ZADANIE-II)
- // vectorMatVec<<<blocksPerGrid, threadsPerBlock>>>(d_Ax, d_x, d_row, d_col, d_val, N);
- //azpy: d_r = d_r + alpham1 * d_Ax
- //cublasSaxpy(cublasHandle, N, &alpham1, d_Ax, 1, d_r, 1); // PODMIEN FUNCKJE (ZADANIE-I)
- vectorAxpy<<<blocksPerGrid, threadsPerBlock>>>(d_r,d_Ax,alpham1,N);
- //dot: r1 = d_r * d_r
- cublasStatus = cublasSdot(cublasHandle, N, d_r, 1, d_r, 1, &r1); // PODMIEN FUNCKJE (ZADANIE-III)
- k = 1;
- printf("My functions\n"
- while (r1 > tol*tol && k <= max_iter)
- {
- if (k > 1)
- {
- b = r1 / r0;
- //scal: d_p = b * d_p
- //cublasStatus = cublasSscal(cublasHandle, N, &b, d_p, 1); // PODMIEN FUNCKJE (ZADANIE-I)
- vectorScal<<<blocksPerGrid, threadsPerBlock>>>(d_p, b, N);
- //axpy: d_p = d_p + alpha * d_r
- cublasStatus =vectorAxpy<<<blocksPerGrid, threadsPerBlock>>>(d_p,d_r, alpham1,N); // PODMIEN FUNCKJE (ZADANIE-I)
- // vectorAxpy<<<blocksPerGrid, threadsPerBlock>>>(d_p,d_r, alpha,N);
- }
- else
- {
- //cpy: d_p = d_r
- //cublasStatus = cublasScopy(cublasHandle, N, d_r, 1, d_p, 1); // PODMIEN FUNCKJE (ZADANIE-I)
- vectorCopy<<<blocksPerGrid, threadsPerBlock>>>(d_p,d_r, N);
- }
- //sparse matrix-vector product: d_Ax = A * d_p
- cusparseScsrmv(cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, N, N, nz, &alpha, descr, d_val, d_row, d_col, d_p, &beta, d_Ax); // PODMIEN FUNCKJE (ZADANIE-II)
- cublasStatus = cublasSdot(cublasHandle, N, d_p, 1, d_Ax, 1, &dot); // PODMIEN FUNCKJE (ZADANIE-III)
- a = r1 / dot;
- //axpy: d_x = d_x + a*d_p
- // cublasStatus = vectorAxpy<<<blocksPerGrid, threadsPerBlock>>>(d_x,d_p,a,N); // PODMIEN FUNCKJE (ZADANIE-I)
- vectorAxpy<<<blocksPerGrid, threadsPerBlock>>>(d_x,d_p,a,N);
- na = -a;
- //axpy: d_r = d_r + na * d_Ax
- //cublasStatus = vectorAxpy<<<blocksPerGrid, threadsPerBlock>>>(d_r,d_Ax, na ,N); // PODMIEN FUNCKJE (ZADANIE-I)
- vectorAxpy<<<blocksPerGrid, threadsPerBlock>>>(d_r,d_Ax, na ,N);
- r0 = r1;
- //dot: r1 = d_r * d_r
- cublasStatus = cublasSdot(cublasHandle, N, d_r, 1, d_r, 1, &r1); // PODMIEN FUNCKJE (ZADANIE-III)
- cudaThreadSynchronize();
- printf("iteration = %3d, residual = %e\n", k, sqrt(r1));
- k++;
- }
- cudaMemcpy(x, d_x, N*sizeof(float), cudaMemcpyDeviceToHost);
- float rsum, diff, err = 0.0;
- for (int i = 0; i < N; i++)
- {
- rsum = 0.0;
- for (int j = I[i]; j < I[i+1]; j++)
- {
- rsum += val[j]*x[J[j]];
- }
- diff = fabs(rsum - rhs[i]);
- if (diff > err)
- {
- err = diff;
- }
- }
- cusparseDestroy(cusparseHandle);
- cublasDestroy(cublasHandle);
- free(I);
- free(J);
- free(val);
- free(x);
- free(rhs);
- cudaFree(d_col);
- cudaFree(d_row);
- cudaFree(d_val);
- cudaFree(d_x);
- cudaFree(d_r);
- cudaFree(d_p);
- cudaFree(d_Ax);
- cudaDeviceReset();
- printf("Test Summary: Error amount = %e\n", err);
- //exit((k <= max_iter) ? 0 : 1);
- }
- int main(int argc, char **argv)
- {
- //int N = 1e6;//1 << 20;
- //int N = 256 * (1<<10) -10 ; //1e6;//1 << 20;
- int N = 1e5;
- int M = N;
- cgs_basic(argc, argv, N, M);
- cgs_TODO(argc, argv, N, M);
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement