PPOTRI

计算经过Cholesky分解后的矩阵的逆矩阵。

接口定义

C Interface：

void pspotri_(const char *uplo, const int *n, float *a, const int *ia, const int *ja, const int *desca, int *info)

void pdpotri_(const char *uplo, const int *n, double *a, const int *ia, const int *ja, const int *desca, int *info)

void pcpotri_(const char *uplo, const int *n, float _Complex *a, const int *ia, const int *ja, const int *desca, int *info)

void pzpotri_(const char *uplo, const int *n, double _Complex *a, const int *ia, const int *ja, const int *desca, int *info)

Fortran Interface：

PSPOTRI(uplo, n, a, ia, ja, desca, info)

PDPOTRI(uplo, n, a, ia, ja, desca, info)

PCPOTRI(uplo, n, a, ia, ja, desca, info)

PZPOTRI(uplo, n, a, ia, ja, desca, info)

参数

参数	类型	范围	说明	输入/输出
uplo	字符	全局	U为上三角矩阵，L为下三角矩阵。	输入
n	整型	全局	要操作的列数，比如子矩阵的列数。	输入
a	在pspotrf中为单精度浮点型数组。在pdpotrf中为双精度浮点型数组。在pcpotrf中为单精度复数型数组。在pzpotrf中为双精度复数型数组。	本地	是全局实对称或Hermitian的本地部分，在该接口中，为进行Cholesky分解后的矩阵。当uplo = 'U'，仅存放当uplo = 'L'，调用后根据在全局矩阵中的位置存放对角线以及对角线以下的部分保存下三角矩阵L。	输入，输出
ia	整型	全局	子矩阵在全局矩阵中的行索引。	输入
ja	整型	全局	子矩阵在全局矩阵中的列索引。	输入
desca	整型数组	本地，全局	分布式矩阵A的矩阵描述符。	输入
info	整型	全局	执行结果：等于0：退出成功。小于0：第-info个参数值不合法。大于0：A中info大小的主子式非正定，无法完成分解。	输出

依赖

#include <kscalapack.h>

示例

    int izero=0;
    int ione=1;
    int myid, nprocs_mpi;
    MPI_Init( &argc, &argv);
    MPI_Comm_rank(MPI_COMM_WORLD, &myid);
    MPI_Comm_size(MPI_COMM_WORLD, &nprocs_mpi);
 
    int n = 8;       // (Global) Matrix size
    int nprow = 2;   // Number of row procs
    int npcol = 2;   // Number of column procs
    int nb = 4;      // (Global) Block size
    char uplo='L';   // Matrix is lower triangular
    char layout='R'; // Block cyclic, Row major processor mapping
    
    printf("Usage: ./test matrix_size block_size nprocs_row nprocs_col\n");
 
    if(argc > 1) {
        n = atoi(argv[1]);
    }
    if(argc > 2) {
        nb = atoi(argv[2]);
    }
    if(argc > 3) {
        nprow = atoi(argv[3]);
    }
    if(argc > 4) {
        npcol = atoi(argv[4]);
    }
 
    assert(nprow * npcol == nprocs_mpi);
 
    // Initialize BLACS
    int iam, nprocs;
    int zero = 0;
    int ictxt, myrow, mycol;
    blacs_pinfo_(&iam, &nprocs) ; // BLACS rank and world size
    blacs_get_(&zero, &zero, &ictxt ); // -> Create context
    blacs_gridinit_(&ictxt, &layout, &nprow, &npcol ); // Context -> Initialize the grid
    blacs_gridinfo_(&ictxt, &nprow, &npcol, &myrow, &mycol ); // Context -> Context grid info (# procs row/col, current procs row/col)
 
    ofstream f1;
    string filename;
    // Compute the size of the local matrices
    int mpA    = numroc_( &n, &nb, &myrow, &izero, &nprow ); // My proc -> row of local A
    int nqA    = numroc_( &n, &nb, &mycol, &izero, &npcol ); // My proc -> col of local A
   
    //allocate and fill the matrices A and B 
    double *A;
    A = (double *)calloc(mpA*nqA,sizeof(double)) ;
    if (A==NULL){ printf("Error of memory allocation A on proc %dx%d\n",myrow,mycol); exit(0); }
    int k = 0;
    for (int j = 0; j < nqA; j++) { // local col
        int l_j = j / nb; // which block
        int x_j = j % nb; // where within that block
        int J   = (l_j * npcol + mycol) * nb + x_j; // global col
        for (int i = 0; i < mpA; i++) { // local row
            int l_i = i / nb; // which block
            int x_i = i % nb; // where within that block
            int I   = (l_i * nprow + myrow) * nb + x_i; // global row
            assert(I < n);
            assert(J < n);
            if(I == J) {
                A[k] = nb * nb;
            } else {
                A[k] = I +  J;
            }
            //printf("%d %d -> %d %d -> %f\n", i, j, I, J, A[k]);
            k++;
        }
    }
    //create descriptor
    int descA[9];
    int info=0;
    int lddA = mpA > 1 ? mpA : 1;
    descinit_( descA,  &n, &n, &nb, &nb, &izero, &izero, &ictxt, &lddA, &info);
    if(info != 0) {
        printf("Error in descinit, info = %d\n", info);
    }
    
    pdpotrf_(&uplo, &n, A, &ione, &ione, descA, &info);
    
    filename = to_string(myid)+"begin.dat";
    f1.open(filename);
    k = 0;
    for (int j = 0; j < nqA; j++) { // local col
        int l_j = j / nb; // which block
        int x_j = j % nb; // where within that block
        int J   = (l_j * npcol + mycol) * nb + x_j; // global col
        for (int i = 0; i < mpA; i++) { // local row
            int l_i = i / nb; // which block
            int x_i = i % nb; // where within that block
            int I   = (l_i * nprow + myrow) * nb + x_i; // global row
            assert(I < n);
            assert(J < n);
            f1 <<I << " "<<J << " " << A[k]<<endl;
            k++;
        }
    }
    f1.close();
    //run pdpotri_ and time
    double MPIt1 = MPI_Wtime();
    printf("[%dx%d] Starting \n", myrow, mycol);
    pdpotri_(&uplo, &n, A, &ione, &ione, descA, &info);
    if (info != 0) {
        printf("Error in calculate, info = %d\n", info);
    }
    double MPIt2 = MPI_Wtime();
    printf("[%dx%d] Finishing \n", myrow, mycol);
    filename = to_string(myid)+"end.dat";
    f1.open(filename);
    k = 0;
    for (int j = 0; j < nqA; j++) { // local col
        int l_j = j / nb; // which block
        int x_j = j % nb; // where within that block
        int J   = (l_j * npcol + mycol) * nb + x_j; // global col
        for (int i = 0; i < mpA; i++) { // local row
            int l_i = i / nb; // which block
            int x_i = i % nb; // where within that block
            int I   = (l_i * nprow + myrow) * nb + x_i; // global row
            assert(I < n);
            assert(J < n);
            f1 <<I << " "<<J << " " << A[k]<<endl;
            k++;
        }
    }
    f1.close();
    free(A);
    //exit and finanlize
    blacs_gridexit_(&ictxt);
    MPI_Finalize();
    return 0;
/* 
输入矩阵A
[[ 4.000000  1.000000  2.000000  3.000000  4.000000  5.000000  6.000000  7.000000]
 [ 0.250000  3.992180  3.000000  4.000000  5.000000  6.000000  7.000000  8.000000]
 [ 0.500000  0.720158  3.902740  5.000000  6.000000  7.000000  8.000000  9.000000]
 [ 0.750000  0.954992  1.008840  3.675290  7.000000  8.000000  9.000000 10.000000]
 [ 1.000000  1.189830  1.189710  1.064810  3.321910  9.000000 10.000000 11.000000]
 [ 1.250000  1.424660  1.370580  1.175220  0.955149  2.869830 11.000000 12.000000]
 [ 1.500000  1.659490  1.551450  1.285620  0.996646  0.756689  2.317410 13.000000]
 [ 1.750000  1.894330  1.732320  1.396030  1.038140  0.734271  0.500013  1.591320]]

输出矩阵A
[[ 8.797610e-02  1.000000e+00  2.000000e+00  3.000000e+00  4.000000e+00  5.000000e+00  6.000000e+00  7.000000e+00]
 [ 2.352190e-02  9.302160e-02  3.000000e+00  4.000000e+00  5.000000e+00  6.000000e+00  7.000000e+00  8.000000e+00]
 [ 2.091620e-02  1.902120e-02  9.982790e-02  5.000000e+00  6.000000e+00  7.000000e+00  8.000000e+00  9.000000e+00]
 [ 1.726830e-02  1.542070e-02  1.295720e-02  1.095080e-01  7.000000e+00  8.000000e+00  9.000000e+00  1.000000e+01]
 [ 1.179650e-02  1.001990e-02  7.651130e-03  4.334850e-03  1.243600e-01  9.000000e+00  1.000000e+01  1.100000e+01]
 [ 2.676710e-03  1.018570e-03 -1.192280e-03 -4.287470e-03 -8.930260e-03  1.499980e-01  1.100000e+01  1.200000e+01]
 [-1.556280e-02 -1.698410e-02 -1.887910e-02 -2.153210e-02 -2.551170e-02 -3.214420e-02  2.045910e-01  1.300000e+01]
 [-7.028140e-02 -7.099200e-02 -7.193950e-02 -7.326610e-02 -7.525580e-02 -7.857210e-02 -8.520470e-02  3.948980e-01]]
*/

父主题： 矩阵求逆函数