PPOTRI
计算经过Cholesky分解后的矩阵的逆矩阵。
接口定义
C Interface:
void pspotri_(const char *uplo, const int *n, float *a, const int *ia, const int *ja, const int *desca, int *info)
void pdpotri_(const char *uplo, const int *n, double *a, const int *ia, const int *ja, const int *desca, int *info)
void pcpotri_(const char *uplo, const int *n, float _Complex *a, const int *ia, const int *ja, const int *desca, int *info)
void pzpotri_(const char *uplo, const int *n, double _Complex *a, const int *ia, const int *ja, const int *desca, int *info)
Fortran Interface:
PSPOTRI(uplo, n, a, ia, ja, desca, info)
PDPOTRI(uplo, n, a, ia, ja, desca, info)
PCPOTRI(uplo, n, a, ia, ja, desca, info)
PZPOTRI(uplo, n, a, ia, ja, desca, info)
参数
参数 |
类型 |
范围 |
说明 |
输入/输出 |
---|---|---|---|---|
uplo |
字符 |
全局 |
U为上三角矩阵,L为下三角矩阵。 |
输入 |
n |
整型 |
全局 |
要操作的列数,比如子矩阵的列数。 |
输入 |
a |
|
本地 |
是全局实对称或Hermitiand的本地部分,在该接口中,为进行Cholesky分解后的矩阵。
|
输入,输出 |
ia |
整型 |
全局 |
子矩阵在全局矩阵中的行索引。 |
输入 |
ja |
整型 |
全局 |
子矩阵在全局矩阵中的列索引。 |
输入 |
desca |
整型数组 |
本地,全局 |
分布式矩阵A的矩阵描述符。 |
输入 |
info |
整型 |
全局 |
执行结果:
|
输出 |
依赖
#include <kscalapack.h>
示例
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 | int izero=0; int ione=1; int myid, nprocs_mpi; MPI_Init( &argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &myid); MPI_Comm_size(MPI_COMM_WORLD, &nprocs_mpi); int n = 8; // (Global) Matrix size int nprow = 2; // Number of row procs int npcol = 2; // Number of column procs int nb = 4; // (Global) Block size char uplo='L'; // Matrix is lower triangular char layout='R'; // Block cyclic, Row major processor mapping printf("Usage: ./test matrix_size block_size nprocs_row nprocs_col\n"); if(argc > 1) { n = atoi(argv[1]); } if(argc > 2) { nb = atoi(argv[2]); } if(argc > 3) { nprow = atoi(argv[3]); } if(argc > 4) { npcol = atoi(argv[4]); } assert(nprow * npcol == nprocs_mpi); // Initialize BLACS int iam, nprocs; int zero = 0; int ictxt, myrow, mycol; blacs_pinfo_(&iam, &nprocs) ; // BLACS rank and world size blacs_get_(&zero, &zero, &ictxt ); // -> Create context blacs_gridinit_(&ictxt, &layout, &nprow, &npcol ); // Context -> Initialize the grid blacs_gridinfo_(&ictxt, &nprow, &npcol, &myrow, &mycol ); // Context -> Context grid info (# procs row/col, current procs row/col) ofstream f1; string filename; // Compute the size of the local matrices int mpA = numroc_( &n, &nb, &myrow, &izero, &nprow ); // My proc -> row of local A int nqA = numroc_( &n, &nb, &mycol, &izero, &npcol ); // My proc -> col of local A //allocate and fill the matrices A and B double *A; A = (double *)calloc(mpA*nqA,sizeof(double)) ; if (A==NULL){ printf("Error of memory allocation A on proc %dx%d\n",myrow,mycol); exit(0); } int k = 0; for (int j = 0; j < nqA; j++) { // local col int l_j = j / nb; // which block int x_j = j % nb; // where within that block int J = (l_j * npcol + mycol) * nb + x_j; // global col for (int i = 0; i < mpA; i++) { // local row int l_i = i / nb; // which block int x_i = i % nb; // where within that block int I = (l_i * nprow + myrow) * nb + x_i; // global row assert(I < n); assert(J < n); if(I == J) { A[k] = nb * nb; } else { A[k] = I + J; } //printf("%d %d -> %d %d -> %f\n", i, j, I, J, A[k]); k++; } } //creat descriptor int descA[9]; int info=0; int lddA = mpA > 1 ? mpA : 1; descinit_( descA, &n, &n, &nb, &nb, &izero, &izero, &ictxt, &lddA, &info); if(info != 0) { printf("Error in descinit, info = %d\n", info); } pdpotrf_(&uplo, &n, A, &ione, &ione, descA, &info); filename = to_string(myid)+"begin.dat"; f1.open(filename); k = 0; for (int j = 0; j < nqA; j++) { // local col int l_j = j / nb; // which block int x_j = j % nb; // where within that block int J = (l_j * npcol + mycol) * nb + x_j; // global col for (int i = 0; i < mpA; i++) { // local row int l_i = i / nb; // which block int x_i = i % nb; // where within that block int I = (l_i * nprow + myrow) * nb + x_i; // global row assert(I < n); assert(J < n); f1 <<I << " "<<J << " " << A[k]<<endl; k++; } } f1.close(); //run pdpotri_ and time double MPIt1 = MPI_Wtime(); printf("[%dx%d] Starting \n", myrow, mycol); pdpotri_(&uplo, &n, A, &ione, &ione, descA, &info); if (info != 0) { printf("Error in calculate, info = %d\n", info); } double MPIt2 = MPI_Wtime(); printf("[%dx%d] Finishing \n", myrow, mycol); filename = to_string(myid)+"end.dat"; f1.open(filename); k = 0; for (int j = 0; j < nqA; j++) { // local col int l_j = j / nb; // which block int x_j = j % nb; // where within that block int J = (l_j * npcol + mycol) * nb + x_j; // global col for (int i = 0; i < mpA; i++) { // local row int l_i = i / nb; // which block int x_i = i % nb; // where within that block int I = (l_i * nprow + myrow) * nb + x_i; // global row assert(I < n); assert(J < n); f1 <<I << " "<<J << " " << A[k]<<endl; k++; } } f1.close(); free(A); //exit and finanlize blacs_gridexit_(&ictxt); MPI_Finalize(); return 0; /* 输入矩阵A [[ 4.000000 1.000000 2.000000 3.000000 4.000000 5.000000 6.000000 7.000000] [ 0.250000 3.992180 3.000000 4.000000 5.000000 6.000000 7.000000 8.000000] [ 0.500000 0.720158 3.902740 5.000000 6.000000 7.000000 8.000000 9.000000] [ 0.750000 0.954992 1.008840 3.675290 7.000000 8.000000 9.000000 10.000000] [ 1.000000 1.189830 1.189710 1.064810 3.321910 9.000000 10.000000 11.000000] [ 1.250000 1.424660 1.370580 1.175220 0.955149 2.869830 11.000000 12.000000] [ 1.500000 1.659490 1.551450 1.285620 0.996646 0.756689 2.317410 13.000000] [ 1.750000 1.894330 1.732320 1.396030 1.038140 0.734271 0.500013 1.591320]] 输出矩阵A [[ 8.797610e-02 1.000000e+00 2.000000e+00 3.000000e+00 4.000000e+00 5.000000e+00 6.000000e+00 7.000000e+00] [ 2.352190e-02 9.302160e-02 3.000000e+00 4.000000e+00 5.000000e+00 6.000000e+00 7.000000e+00 8.000000e+00] [ 2.091620e-02 1.902120e-02 9.982790e-02 5.000000e+00 6.000000e+00 7.000000e+00 8.000000e+00 9.000000e+00] [ 1.726830e-02 1.542070e-02 1.295720e-02 1.095080e-01 7.000000e+00 8.000000e+00 9.000000e+00 1.000000e+01] [ 1.179650e-02 1.001990e-02 7.651130e-03 4.334850e-03 1.243600e-01 9.000000e+00 1.000000e+01 1.100000e+01] [ 2.676710e-03 1.018570e-03 -1.192280e-03 -4.287470e-03 -8.930260e-03 1.499980e-01 1.100000e+01 1.200000e+01] [-1.556280e-02 -1.698410e-02 -1.887910e-02 -2.153210e-02 -2.551170e-02 -3.214420e-02 2.045910e-01 1.300000e+01] [-7.028140e-02 -7.099200e-02 -7.193950e-02 -7.326610e-02 -7.525580e-02 -7.857210e-02 -8.520470e-02 3.948980e-01]] */ |