PPOTRI
计算经过Cholesky分解后的矩阵的逆矩阵。
接口定义
C Interface:
void pspotri_(const char *uplo, const int *n, float *a, const int *ia, const int *ja, const int *desca, int *info)
void pdpotri_(const char *uplo, const int *n, double *a, const int *ia, const int *ja, const int *desca, int *info)
void pcpotri_(const char *uplo, const int *n, float _Complex *a, const int *ia, const int *ja, const int *desca, int *info)
void pzpotri_(const char *uplo, const int *n, double _Complex *a, const int *ia, const int *ja, const int *desca, int *info)
Fortran Interface:
PSPOTRI(uplo, n, a, ia, ja, desca, info)
PDPOTRI(uplo, n, a, ia, ja, desca, info)
PCPOTRI(uplo, n, a, ia, ja, desca, info)
PZPOTRI(uplo, n, a, ia, ja, desca, info)
参数
参数 |
类型 |
范围 |
说明 |
输入/输出 |
---|---|---|---|---|
uplo |
字符 |
全局 |
U为上三角矩阵,L为下三角矩阵。 |
输入 |
n |
整型 |
全局 |
要操作的列数,比如子矩阵的列数。 |
输入 |
a |
|
本地 |
是全局实对称或Hermitiand的本地部分,在该接口中,为进行Cholesky分解后的矩阵。
|
输入,输出 |
ia |
整型 |
全局 |
子矩阵在全局矩阵中的行索引。 |
输入 |
ja |
整型 |
全局 |
子矩阵在全局矩阵中的列索引。 |
输入 |
desca |
整型数组 |
本地,全局 |
分布式矩阵A的矩阵描述符。 |
输入 |
info |
整型 |
全局 |
执行结果:
|
输出 |
依赖
#include <kscalapack.h>
示例
int izero=0; int ione=1; int myid, nprocs_mpi; MPI_Init( &argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &myid); MPI_Comm_size(MPI_COMM_WORLD, &nprocs_mpi); int n = 8; // (Global) Matrix size int nprow = 2; // Number of row procs int npcol = 2; // Number of column procs int nb = 4; // (Global) Block size char uplo='L'; // Matrix is lower triangular char layout='R'; // Block cyclic, Row major processor mapping printf("Usage: ./test matrix_size block_size nprocs_row nprocs_col\n"); if(argc > 1) { n = atoi(argv[1]); } if(argc > 2) { nb = atoi(argv[2]); } if(argc > 3) { nprow = atoi(argv[3]); } if(argc > 4) { npcol = atoi(argv[4]); } assert(nprow * npcol == nprocs_mpi); // Initialize BLACS int iam, nprocs; int zero = 0; int ictxt, myrow, mycol; blacs_pinfo_(&iam, &nprocs) ; // BLACS rank and world size blacs_get_(&zero, &zero, &ictxt ); // -> Create context blacs_gridinit_(&ictxt, &layout, &nprow, &npcol ); // Context -> Initialize the grid blacs_gridinfo_(&ictxt, &nprow, &npcol, &myrow, &mycol ); // Context -> Context grid info (# procs row/col, current procs row/col) ofstream f1; string filename; // Compute the size of the local matrices int mpA = numroc_( &n, &nb, &myrow, &izero, &nprow ); // My proc -> row of local A int nqA = numroc_( &n, &nb, &mycol, &izero, &npcol ); // My proc -> col of local A //allocate and fill the matrices A and B double *A; A = (double *)calloc(mpA*nqA,sizeof(double)) ; if (A==NULL){ printf("Error of memory allocation A on proc %dx%d\n",myrow,mycol); exit(0); } int k = 0; for (int j = 0; j < nqA; j++) { // local col int l_j = j / nb; // which block int x_j = j % nb; // where within that block int J = (l_j * npcol + mycol) * nb + x_j; // global col for (int i = 0; i < mpA; i++) { // local row int l_i = i / nb; // which block int x_i = i % nb; // where within that block int I = (l_i * nprow + myrow) * nb + x_i; // global row assert(I < n); assert(J < n); if(I == J) { A[k] = nb * nb; } else { A[k] = I + J; } //printf("%d %d -> %d %d -> %f\n", i, j, I, J, A[k]); k++; } } //creat descriptor int descA[9]; int info=0; int lddA = mpA > 1 ? mpA : 1; descinit_( descA, &n, &n, &nb, &nb, &izero, &izero, &ictxt, &lddA, &info); if(info != 0) { printf("Error in descinit, info = %d\n", info); } pdpotrf_(&uplo, &n, A, &ione, &ione, descA, &info); filename = to_string(myid)+"begin.dat"; f1.open(filename); k = 0; for (int j = 0; j < nqA; j++) { // local col int l_j = j / nb; // which block int x_j = j % nb; // where within that block int J = (l_j * npcol + mycol) * nb + x_j; // global col for (int i = 0; i < mpA; i++) { // local row int l_i = i / nb; // which block int x_i = i % nb; // where within that block int I = (l_i * nprow + myrow) * nb + x_i; // global row assert(I < n); assert(J < n); f1 <<I << " "<<J << " " << A[k]<<endl; k++; } } f1.close(); //run pdpotri_ and time double MPIt1 = MPI_Wtime(); printf("[%dx%d] Starting \n", myrow, mycol); pdpotri_(&uplo, &n, A, &ione, &ione, descA, &info); if (info != 0) { printf("Error in caculate, info = %d\n", info); } double MPIt2 = MPI_Wtime(); printf("[%dx%d] Finishing \n", myrow, mycol); filename = to_string(myid)+"end.dat"; f1.open(filename); k = 0; for (int j = 0; j < nqA; j++) { // local col int l_j = j / nb; // which block int x_j = j % nb; // where within that block int J = (l_j * npcol + mycol) * nb + x_j; // global col for (int i = 0; i < mpA; i++) { // local row int l_i = i / nb; // which block int x_i = i % nb; // where within that block int I = (l_i * nprow + myrow) * nb + x_i; // global row assert(I < n); assert(J < n); f1 <<I << " "<<J << " " << A[k]<<endl; k++; } } f1.close(); free(A); //exit and finanlize blacs_gridexit_(&ictxt); MPI_Finalize(); return 0; /* 输入矩阵A [[ 4.000000 1.000000 2.000000 3.000000 4.000000 5.000000 6.000000 7.000000] [ 0.250000 3.992180 3.000000 4.000000 5.000000 6.000000 7.000000 8.000000] [ 0.500000 0.720158 3.902740 5.000000 6.000000 7.000000 8.000000 9.000000] [ 0.750000 0.954992 1.008840 3.675290 7.000000 8.000000 9.000000 10.000000] [ 1.000000 1.189830 1.189710 1.064810 3.321910 9.000000 10.000000 11.000000] [ 1.250000 1.424660 1.370580 1.175220 0.955149 2.869830 11.000000 12.000000] [ 1.500000 1.659490 1.551450 1.285620 0.996646 0.756689 2.317410 13.000000] [ 1.750000 1.894330 1.732320 1.396030 1.038140 0.734271 0.500013 1.591320]] 输出矩阵A [[ 8.797610e-02 1.000000e+00 2.000000e+00 3.000000e+00 4.000000e+00 5.000000e+00 6.000000e+00 7.000000e+00] [ 2.352190e-02 9.302160e-02 3.000000e+00 4.000000e+00 5.000000e+00 6.000000e+00 7.000000e+00 8.000000e+00] [ 2.091620e-02 1.902120e-02 9.982790e-02 5.000000e+00 6.000000e+00 7.000000e+00 8.000000e+00 9.000000e+00] [ 1.726830e-02 1.542070e-02 1.295720e-02 1.095080e-01 7.000000e+00 8.000000e+00 9.000000e+00 1.000000e+01] [ 1.179650e-02 1.001990e-02 7.651130e-03 4.334850e-03 1.243600e-01 9.000000e+00 1.000000e+01 1.100000e+01] [ 2.676710e-03 1.018570e-03 -1.192280e-03 -4.287470e-03 -8.930260e-03 1.499980e-01 1.100000e+01 1.200000e+01] [-1.556280e-02 -1.698410e-02 -1.887910e-02 -2.153210e-02 -2.551170e-02 -3.214420e-02 2.045910e-01 1.300000e+01] [-7.028140e-02 -7.099200e-02 -7.193950e-02 -7.326610e-02 -7.525580e-02 -7.857210e-02 -8.520470e-02 3.948980e-01]] */