鲲鹏社区首页
中文
注册
我要评分
文档获取效率
文档正确性
内容完整性
文档易理解
在线提单
论坛求助

PPOTRI

计算经过Cholesky分解后的矩阵的逆矩阵。

接口定义

C Interface:

void pspotri_(const char *uplo, const int *n, float *a, const int *ia, const int *ja, const int *desca, int *info)

void pdpotri_(const char *uplo, const int *n, double *a, const int *ia, const int *ja, const int *desca, int *info)

void pcpotri_(const char *uplo, const int *n, float _Complex *a, const int *ia, const int *ja, const int *desca, int *info)

void pzpotri_(const char *uplo, const int *n, double _Complex *a, const int *ia, const int *ja, const int *desca, int *info)

Fortran Interface:

PSPOTRI(uplo, n, a, ia, ja, desca, info)

PDPOTRI(uplo, n, a, ia, ja, desca, info)

PCPOTRI(uplo, n, a, ia, ja, desca, info)

PZPOTRI(uplo, n, a, ia, ja, desca, info)

参数

参数

类型

范围

说明

输入/输出

uplo

字符

全局

U为上三角矩阵,L为下三角矩阵。

输入

n

整型

全局

要操作的列数,比如子矩阵的列数。

输入

a

  • 在pspotrf中为单精度浮点型数组。
  • 在pdpotrf中为双精度浮点型数组。
  • 在pcpotrf中为单精度复数型数组。
  • 在pzpotrf中为双精度复数型数组。

本地

是全局实对称或Hermitiand的本地部分,在该接口中,为进行Cholesky分解后的矩阵。

  • 当uplo = 'U',仅存放
  • 当uplo = 'L',调用后根据在全局矩阵中的位置存放对角线以及对角线以下的部分保存下三角矩阵L。

输入,输出

ia

整型

全局

子矩阵在全局矩阵中的行索引。

输入

ja

整型

全局

子矩阵在全局矩阵中的列索引。

输入

desca

整型数组

本地,全局

分布式矩阵A的矩阵描述符。

输入

info

整型

全局

执行结果:

  • 等于0:退出成功。
  • 小于0:第-info个参数值不合法。
  • 大于0:A中info大小的主子式非正定,无法完成分解。

输出

依赖

#include <kscalapack.h>

示例

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
    int izero=0;
    int ione=1;
    int myid, nprocs_mpi;
    MPI_Init( &argc, &argv);
    MPI_Comm_rank(MPI_COMM_WORLD, &myid);
    MPI_Comm_size(MPI_COMM_WORLD, &nprocs_mpi);
 
    int n = 8;       // (Global) Matrix size
    int nprow = 2;   // Number of row procs
    int npcol = 2;   // Number of column procs
    int nb = 4;      // (Global) Block size
    char uplo='L';   // Matrix is lower triangular
    char layout='R'; // Block cyclic, Row major processor mapping
    
    printf("Usage: ./test matrix_size block_size nprocs_row nprocs_col\n");
 
    if(argc > 1) {
        n = atoi(argv[1]);
    }
    if(argc > 2) {
        nb = atoi(argv[2]);
    }
    if(argc > 3) {
        nprow = atoi(argv[3]);
    }
    if(argc > 4) {
        npcol = atoi(argv[4]);
    }
 
    assert(nprow * npcol == nprocs_mpi);
 
    // Initialize BLACS
    int iam, nprocs;
    int zero = 0;
    int ictxt, myrow, mycol;
    blacs_pinfo_(&iam, &nprocs) ; // BLACS rank and world size
    blacs_get_(&zero, &zero, &ictxt ); // -> Create context
    blacs_gridinit_(&ictxt, &layout, &nprow, &npcol ); // Context -> Initialize the grid
    blacs_gridinfo_(&ictxt, &nprow, &npcol, &myrow, &mycol ); // Context -> Context grid info (# procs row/col, current procs row/col)
 
    ofstream f1;
    string filename;
    // Compute the size of the local matrices
    int mpA    = numroc_( &n, &nb, &myrow, &izero, &nprow ); // My proc -> row of local A
    int nqA    = numroc_( &n, &nb, &mycol, &izero, &npcol ); // My proc -> col of local A
   
    //allocate and fill the matrices A and B 
    double *A;
    A = (double *)calloc(mpA*nqA,sizeof(double)) ;
    if (A==NULL){ printf("Error of memory allocation A on proc %dx%d\n",myrow,mycol); exit(0); }
    int k = 0;
    for (int j = 0; j < nqA; j++) { // local col
        int l_j = j / nb; // which block
        int x_j = j % nb; // where within that block
        int J   = (l_j * npcol + mycol) * nb + x_j; // global col
        for (int i = 0; i < mpA; i++) { // local row
            int l_i = i / nb; // which block
            int x_i = i % nb; // where within that block
            int I   = (l_i * nprow + myrow) * nb + x_i; // global row
            assert(I < n);
            assert(J < n);
            if(I == J) {
                A[k] = nb * nb;
            } else {
                A[k] = I +  J;
            }
            //printf("%d %d -> %d %d -> %f\n", i, j, I, J, A[k]);
            k++;
        }
    }
    //creat descriptor
    int descA[9];
    int info=0;
    int lddA = mpA > 1 ? mpA : 1;
    descinit_( descA,  &n, &n, &nb, &nb, &izero, &izero, &ictxt, &lddA, &info);
    if(info != 0) {
        printf("Error in descinit, info = %d\n", info);
    }
    
    pdpotrf_(&uplo, &n, A, &ione, &ione, descA, &info);
    
    filename = to_string(myid)+"begin.dat";
    f1.open(filename);
    k = 0;
    for (int j = 0; j < nqA; j++) { // local col
        int l_j = j / nb; // which block
        int x_j = j % nb; // where within that block
        int J   = (l_j * npcol + mycol) * nb + x_j; // global col
        for (int i = 0; i < mpA; i++) { // local row
            int l_i = i / nb; // which block
            int x_i = i % nb; // where within that block
            int I   = (l_i * nprow + myrow) * nb + x_i; // global row
            assert(I < n);
            assert(J < n);
            f1 <<I << " "<<J << " " << A[k]<<endl;
            k++;
        }
    }
    f1.close();
    //run pdpotri_ and time
    double MPIt1 = MPI_Wtime();
    printf("[%dx%d] Starting \n", myrow, mycol);
    pdpotri_(&uplo, &n, A, &ione, &ione, descA, &info);
    if (info != 0) {
        printf("Error in calculate, info = %d\n", info);
    }
    double MPIt2 = MPI_Wtime();
    printf("[%dx%d] Finishing \n", myrow, mycol);
    filename = to_string(myid)+"end.dat";
    f1.open(filename);
    k = 0;
    for (int j = 0; j < nqA; j++) { // local col
        int l_j = j / nb; // which block
        int x_j = j % nb; // where within that block
        int J   = (l_j * npcol + mycol) * nb + x_j; // global col
        for (int i = 0; i < mpA; i++) { // local row
            int l_i = i / nb; // which block
            int x_i = i % nb; // where within that block
            int I   = (l_i * nprow + myrow) * nb + x_i; // global row
            assert(I < n);
            assert(J < n);
            f1 <<I << " "<<J << " " << A[k]<<endl;
            k++;
        }
    }
    f1.close();
    free(A);
    //exit and finanlize
    blacs_gridexit_(&ictxt);
    MPI_Finalize();
    return 0;
/* 
输入矩阵A
[[ 4.000000  1.000000  2.000000  3.000000  4.000000  5.000000  6.000000  7.000000]
 [ 0.250000  3.992180  3.000000  4.000000  5.000000  6.000000  7.000000  8.000000]
 [ 0.500000  0.720158  3.902740  5.000000  6.000000  7.000000  8.000000  9.000000]
 [ 0.750000  0.954992  1.008840  3.675290  7.000000  8.000000  9.000000 10.000000]
 [ 1.000000  1.189830  1.189710  1.064810  3.321910  9.000000 10.000000 11.000000]
 [ 1.250000  1.424660  1.370580  1.175220  0.955149  2.869830 11.000000 12.000000]
 [ 1.500000  1.659490  1.551450  1.285620  0.996646  0.756689  2.317410 13.000000]
 [ 1.750000  1.894330  1.732320  1.396030  1.038140  0.734271  0.500013  1.591320]]

输出矩阵A
[[ 8.797610e-02  1.000000e+00  2.000000e+00  3.000000e+00  4.000000e+00  5.000000e+00  6.000000e+00  7.000000e+00]
 [ 2.352190e-02  9.302160e-02  3.000000e+00  4.000000e+00  5.000000e+00  6.000000e+00  7.000000e+00  8.000000e+00]
 [ 2.091620e-02  1.902120e-02  9.982790e-02  5.000000e+00  6.000000e+00  7.000000e+00  8.000000e+00  9.000000e+00]
 [ 1.726830e-02  1.542070e-02  1.295720e-02  1.095080e-01  7.000000e+00  8.000000e+00  9.000000e+00  1.000000e+01]
 [ 1.179650e-02  1.001990e-02  7.651130e-03  4.334850e-03  1.243600e-01  9.000000e+00  1.000000e+01  1.100000e+01]
 [ 2.676710e-03  1.018570e-03 -1.192280e-03 -4.287470e-03 -8.930260e-03  1.499980e-01  1.100000e+01  1.200000e+01]
 [-1.556280e-02 -1.698410e-02 -1.887910e-02 -2.153210e-02 -2.551170e-02 -3.214420e-02  2.045910e-01  1.300000e+01]
 [-7.028140e-02 -7.099200e-02 -7.193950e-02 -7.326610e-02 -7.525580e-02 -7.857210e-02 -8.520470e-02  3.948980e-01]]
*/