P?GESV
使用部分选主元的LU分解算法求解线性方程组Ax=B,其中A是N*N的分布式子矩阵,B是具有NRHS个向量的右端项矩阵。
接口定义
C Interface:
void psgesv_(const int *n, const int *nrhs, float *a, const int *ia, const int *ja, const int *desca, int *ipiv, float *b, const int *ib, const int *jb, const int *descb, int *info);
void pdgesv_(const int *n, const int *nrhs, double *a, const int *ia, const int *ja, const int *desca, int *ipiv, double *b, const int *ib, const int *jb, const int *descb, int *info);
void pcgesv_(const int *n, const int *nrhs, float _Complex *a, const int *ia, const int *ja, const int *desca, int *ipiv, float _Complex *b, const int *ib, const int *jb, const int *descb, int *info);
void pzgesv_(const int *n, const int *nrhs, double _Complex *a, const int *ia, const int *ja, const int *desca, int *ipiv, double _Complex *b, const int *ib, const int *jb, const int *descb, int *info);
Fortran Interface:
PSGESV(n, nrhs, a, ia, ja, desca, ipiv, b, ib, jb, descb, info)
PDGESV(n, nrhs, a, ia, ja, desca, ipiv, b, ib, jb, descb, info)
PCGESV(n, nrhs, a, ia, ja, desca, ipiv, b, ib, jb, descb, info)
PZGESV(n, nrhs, a, ia, ja, desca, ipiv, b, ib, jb, descb, info)
参数
参数 |
类型 |
范围 |
说明 |
输入/输出 |
---|---|---|---|---|
n |
整型 |
全局 |
矩阵的行数和列数。 |
输入 |
nrhs |
整型 |
全局 |
右侧的数量,即分布式子矩阵子(B)和X的列数。 |
输入 |
a |
|
本地 |
|
输入,输出 |
ia |
整型 |
全局 |
子矩阵A在全局矩阵中的行索引。 |
输入 |
ja |
整型 |
全局 |
子矩阵A在全局矩阵中的列索引。 |
输入 |
desca |
整型数组 |
本地,全局 |
分布式矩阵A的矩阵描述符。 |
输入 |
ipiv |
整型 |
本地 |
包含了主元及交换信息。 |
输出 |
b |
|
本地 |
|
输入,输出 |
ib |
整型 |
全局 |
子矩阵B在全局矩阵中的行索引。 |
输入 |
jb |
整型 |
全局 |
子矩阵B在全局矩阵中的列索引。 |
输入 |
descb |
整型数组 |
本地,全局 |
分布式矩阵B的矩阵描述符。 |
输入 |
info |
整型 |
全局 |
|
输出 |
依赖
#include <kscalapack.h>
示例
int izero=0; int ione=1; int myrank_mpi, nprocs_mpi; MPI_Init( &argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &myrank_mpi); MPI_Comm_size(MPI_COMM_WORLD, &nprocs_mpi); int n = 8; // (Global) Matrix size int nprow = 2; // Number of row procs int npcol = 2; // Number of column procs int nb = 4; // (Global) Block size char uplo='L'; // Matrix is lower triangular char layout='R'; // Block cyclic, Row major processor mapping int nrhs = 1; printf("Usage: ./test matrix_size block_size nprocs_row nprocs_col\n"); if(argc > 1) { n = atoi(argv[1]); } if(argc > 2) { nb = atoi(argv[2]); } if(argc > 3) { nprow = atoi(argv[3]); } if(argc > 4) { npcol = atoi(argv[4]); } assert(nprow * npcol == nprocs_mpi); // Initialize BLACS int iam, nprocs; int zero = 0; int ictxt, myrow, mycol; blacs_pinfo_(&iam, &nprocs) ; // BLACS rank and world size blacs_get_(&zero, &zero, &ictxt ); // -> Create context blacs_gridinit_(&ictxt, &layout, &nprow, &npcol ); // Context -> Initialize the grid blacs_gridinfo_(&ictxt, &nprow, &npcol, &myrow, &mycol ); // Context -> Context grid info (# procs row/col, current procs row/col) // Compute the size of the local matrices int mpA = numroc_( &n, &nb, &myrow, &izero, &nprow ); // My proc -> row of local A int nqA = numroc_( &n, &nb, &mycol, &izero, &npcol ); // My proc -> col of local A int mpB = numroc_( &n, &nb, &myrow, &izero, &nprow ); ofstream f1; string filename = to_string(myrank_mpi)+"Abegin.dat"; f1.open(filename); double *A; A = (double *)calloc(mpA*nqA,sizeof(double)) ; if (A==NULL){ printf("Error of memory allocation A on proc %dx%d\n",myrow,mycol); exit(0); } int k = 0; for (int j = 0; j < nqA; j++) { // local col int l_j = j / nb; // which block int x_j = j % nb; // where within that block int J = (l_j * npcol + mycol) * nb + x_j; // global col for (int i = 0; i < mpA; i++) { // local row int l_i = i / nb; // which block int x_i = i % nb; // where within that block int I = (l_i * nprow + myrow) * nb + x_i; // global row assert(I < n); assert(J < n); if(I == J) { A[k] = 2*n + 1.5 + (rand())%10; } else { A[k] = i + j + rand()% 10; } //printf("%d %d -> %d %d -> %f\n", i, j, I, J, A[k]); f1 <<I << " "<<J << " " << A[k]<<endl; k++; } } f1.close(); //creat descriptor int descA[9]; int info=0; int ipiv[10] = {0}; int lddB = mpB > 1 ? mpB : 1; descinit_( descA, &n, &n, &nb, &nb, &izero, &izero, &ictxt, &lddB, &info); if(info != 0) { printf("Error in descinit, info = %d\n", info); } filename = to_string(myrank_mpi)+"Bbegin.dat"; f1.open(filename); double *B; B = (double *)calloc(mpA,sizeof(double)) ; if (A==NULL){ printf("Error of memory allocation A on proc %dx%d\n",myrow,mycol); exit(0); } k = 0; for (int j = 0; j < mpB; j++) { // local col int l_i = j / nb; // which block int x_i = j % nb; // where within that block int I = (l_i * nprow + myrow) * nb + x_i; // global row B[j] = j + 1.5 + (rand())%10; f1 <<I << " " << B[j]<<endl; } f1.close(); int descB[9]; int nbrhs=1; descinit_( descB, &n, &nrhs, &nb, &nbrhs, &izero, &izero, &ictxt, &lddB, &info); // nbrhs need to be revised when nrhs!=1 //run pdpotrf_ and time double MPIt1 = MPI_Wtime(); printf("[%dx%d] Starting \n", myrow, mycol); pdgesv_(&n, &nrhs, A, &ione, &ione, descA, ipiv, B, &ione, &ione, descB, &info); if (info != 0) { printf("Error in caculate, info = %d\n", info); } filename = to_string(myrank_mpi)+"Bend.dat"; f1.open(filename); for (int j = 0; j < mpB; j++) { int l_i = j / nb; // which block int x_i = j % nb; // where within that block int I = (l_i * nprow + myrow) * nb + x_i; // global row f1 <<I<< " " << B[j]<<endl; } f1.close(); double MPIt2 = MPI_Wtime(); printf("[%dx%d] Done, time %e s.\n", myrow, mycol, MPIt2 - MPIt1); filename = to_string(myrank_mpi)+"end.dat"; f1.open(filename); k = 0; for (int j = 0; j < nqA; j++) { // local col int l_j = j / nb; // which block int x_j = j % nb; // where within that block int J = (l_j * npcol + mycol) * nb + x_j; // global col for (int i = 0; i < mpA; i++) { // local row int l_i = i / nb; // which block int x_i = i % nb; // where within that block int I = (l_i * nprow + myrow) * nb + x_i; // global row assert(I < n); assert(J < n); f1 <<I << " "<<J << " " << A[k]<<endl; k++; } } f1.close(); free(A); /* origin A: [[20.500000 4.000000 11.000000 3.000000 3.000000 4.000000 11.000000 3.000000] [ 7.000000 22.500000 4.000000 13.000000 7.000000 7.000000 4.000000 13.000000] [ 9.000000 9.000000 19.500000 8.000000 9.000000 9.000000 6.000000 8.000000] [ 8.000000 6.000000 12.000000 23.500000 8.000000 6.000000 12.000000 12.000000] [ 3.000000 4.000000 11.000000 3.000000 20.500000 4.000000 11.000000 3.000000] [ 7.000000 7.000000 4.000000 13.000000 7.000000 22.500000 4.000000 13.000000] [ 9.000000 9.000000 6.000000 8.000000 9.000000 9.000000 19.500000 8.000000] [ 8.000000 6.000000 12.000000 12.000000 8.000000 6.000000 12.000000 23.500000]] origin B: [[ 1.500000] [ 8.500000] [ 5.500000] [10.500000] [ 1.500000] [ 8.500000] [ 5.500000] [10.500000]] X: [[-0.073846] [ 0.069101] [ 0.047280] [ 0.273735] [-0.073846] [ 0.069101] [ 0.047280] [ 0.273735]] */