PGETRI
计算分布式矩阵A的逆矩阵,其中A是LU分解后的LU矩阵。
接口定义
C Interface:
void psgetri_(const int *n, float *a, const int *ia, const int *ja, const int *desca, const int *ipiv, float *work, const int *lwork, int *iwork, const int *liwork, int *info);
void pdgetri_(const int *n, double *a, const int *ia, const int *ja, const int *desca, const int *ipiv, double *work, const int *lwork, int *iwork, const int *liwork, int *info);
void pcgetri_(const int *n, float _Complex *a, const int *ia, const int *ja, const int *desca,const int *ipiv, float _Complex *work, const int *lwork, int *iwork, const int *liwork, int *info);
void pzgetri_(const int *n, double _Complex *a, const int *ia, const int *ja, const int *desca, const int *ipiv, double _Complex *work, const int *lwork, int *iwork, const int *liwork, int *info);
Fortran Interface:
PSGETRI (n, a, ia, ja, desca, ipiv, work, lwork, iwork, liwork, info)
PDGETRI (n, a, ia, ja, desca, ipiv, work, lwork, iwork, liwork, info)
PCGETRI (n, a, ia, ja, desca, ipiv, work, lwork, iwork, liwork, info)
PZGETRI (n, a, ia, ja, desca, ipiv, work, lwork, iwork, liwork, info)
参数
参数 |
类型 |
范围 |
说明 |
输入/输出 |
|---|---|---|---|---|
n |
整型 |
全局 |
要操作的行数与列数,全局矩阵的行数。 |
输入 |
a |
|
本地 |
|
输入,输出 |
ia |
整型 |
全局 |
子矩阵在全局矩阵中的行索引。 |
输入 |
ja |
整型 |
全局 |
子矩阵在全局矩阵中的列索引。 |
输入 |
desca |
整型数组 |
本地,全局 |
分布式矩阵A的矩阵描述符。 |
输入 |
ipiv |
整型 |
本地 |
包含了主元信息。 |
输出 |
work |
|
本地 |
|
输入,输出 |
lwork |
整型 |
本地 |
数组work的空间大小,lwork≥LOCr(n+mod(ia-1,mb))*nb |
输入 |
iwork |
整型数组 |
本地 |
用于物理转置pivot的辅助数组 |
输入,输出 |
liwork |
整型 |
本地/全局 |
iwork数组空间的大小 |
输入 |
info |
整型 |
全局 |
执行结果:
|
输出 |
依赖
#include <kscalapack.h>
示例
C Interface:
int izero=0;
int ione=1;
int myid, nprocs_mpi;
MPI_Init( &argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &myid);
MPI_Comm_size(MPI_COMM_WORLD, &nprocs_mpi);
int n = 8; // (Global) Matrix size
int nprow = 2; // Number of row procs
int npcol = 2; // Number of column procs
int nb = 4; // (Global) Block size
char uplo='L'; // Matrix is lower triangular
char layout='R'; // Block cyclic, Row major processor mapping
if(argc > 1) {
n = atoi(argv[1]);
}
if(argc > 2) {
nb = atoi(argv[2]);
}
if(argc > 3) {
nprow = atoi(argv[3]);
}
if(argc > 4) {
npcol = atoi(argv[4]);
}
assert(nprow * npcol == nprocs_mpi);
// Initialize BLACS
int iam, nprocs;
int zero = 0;
int ictxt, myrow, mycol;
blacs_pinfo_(&iam, &nprocs) ; // BLACS rank and world size
blacs_get_(&zero, &zero, &ictxt ); // -> Create context
blacs_gridinit_(&ictxt, &layout, &nprow, &npcol ); // Context -> Initialize the grid
blacs_gridinfo_(&ictxt, &nprow, &npcol, &myrow, &mycol ); // Context -> Context grid info (# procs row/col, current procs row/col)
// Compute the size of the local matrices
int mpA = numroc_( &n, &nb, &myrow, &izero, &nprow ); // My proc -> row of local A
int nqA = numroc_( &n, &nb, &mycol, &izero, &npcol ); // My proc -> col of local A
ofstream f1;
string filename;
double *A;
A = (double *)calloc(mpA*nqA,sizeof(double)) ;
if (A==NULL){ printf("Error of memory allocation A on proc %dx%d\n",myrow,mycol); exit(0); }
int k = 0;
for (int j = 0; j < nqA; j++) { // local col
int l_j = j / nb; // which block
int x_j = j % nb; // where within that block
int J = (l_j * npcol + mycol) * nb + x_j; // global col
for (int i = 0; i < mpA; i++) { // local row
int l_i = i / nb; // which block
int x_i = i % nb; // where within that block
int I = (l_i * nprow + myrow) * nb + x_i; // global row
assert(I < n);
assert(J < n);
if(I == J) {
A[k] = i + j + 1.5 + (rand())%10;
} else {
A[k] = n + rand()% 10;
}
//printf("%d %d -> %d %d -> %f\n", i, j, I, J, A[k]);
k++;
}
}
if (myid == 0)
printf("*****************begin test**********************\n");
//creat descriptor
int descA[9];
int info=0;
int ipiv[10] = {0};
int lddA = mpA > 1 ? mpA : 1;
descinit_( descA, &n, &n, &nb, &nb, &izero, &izero, &ictxt, &lddA, &info);
if(info != 0) {
printf("Error in descinit, info = %d\n", info);
} else {
if (myid == 0)
printf("Finish descinit !\n");
}
pdgetrf_(&n, &n, A, &ione, &ione, descA, ipiv, &info);
int lwork = 20, liwork = 20; //
double * work;
int* iwork;
work = (double *)calloc(lwork, sizeof(double));
iwork = (int *)calloc(liwork, sizeof(int));
if (myid == 0)
printf("Finish preparing\n");
filename = to_string(myid)+"begin.dat";
f1.open(filename);
k = 0;
for (int j = 0; j < nqA; j++) { // local col
int l_j = j / nb; // which block
int x_j = j % nb; // where within that block
int J = (l_j * npcol + mycol) * nb + x_j; // global col
for (int i = 0; i < mpA; i++) { // local row
int l_i = i / nb; // which block
int x_i = i % nb; // where within that block
int I = (l_i * nprow + myrow) * nb + x_i; // global row
assert(I < n);
assert(J < n);
f1 <<I << " "<<J << " " << A[k]<<endl;
k++;
}
}
f1.close();
double MPIt1 = MPI_Wtime();
printf("[%dx%d] Starting pdgetri \n", myrow, mycol);
pdgetri_(&n, A, &ione, &ione, descA, ipiv, work, &lwork, iwork, &liwork, &info);
if (info != 0) {
printf("Error in caculate, info = %d\n", info);
}
double MPIt2 = MPI_Wtime();
printf("[%dx%d] Done, time %e s.\n", myrow, mycol, MPIt2 - MPIt1);
filename = to_string(myid)+"end.dat";
f1.open(filename);
k = 0;
for (int j = 0; j < nqA; j++) { // local col
int l_j = j / nb; // which block
int x_j = j % nb; // where within that block
int J = (l_j * npcol + mycol) * nb + x_j; // global col
for (int i = 0; i < mpA; i++) { // local row
int l_i = i / nb; // which block
int x_i = i % nb; // where within that block
int I = (l_i * nprow + myrow) * nb + x_i; // global row
assert(I < n);
assert(J < n);
f1 <<I << " "<<J << " " << A[k]<<endl;
k++;
}
}
f1.close();
free(A);
//exit and finanlize
blacs_gridexit_(&ictxt);
MPI_Finalize();
return 0;
/*输入矩阵A
[[ 1.500000e+01 1.400000e+01 7.500000e+00 1.100000e+01 1.500000e+01
1.400000e+01 1.000000e+01 1.100000e+01]
[ 3.000000e-01 6.800000e+00 1.475000e+01 4.700000e+00 6.500000e+00
6.800000e+00 1.400000e+01 4.700000e+00]
[ 8.666670e-01 -3.137250e-01 1.312750e+01 5.441180e+00 2.039220e+00
-1.223420e-16 1.072550e+01 5.941180e+00]
[ 9.333330e-01 -9.803920e-03 1.633680e-01 5.890500e+00 -2.694170e-01
-4.500000e+00 -1.948280e+00 5.808810e+00]
[ 7.333330e-01 1.078430e-01 7.548540e-01 -7.946400e-01 -8.954380e+00
-3.575880e+00 -1.487500e+00 -4.423370e-01]
[ 9.333330e-01 -6.715690e-01 9.069270e-01 8.411780e-01 -3.062640e-01
7.190140e+00 5.246760e-01 -5.202240e-01]
[ 1.000000e+00 0.000000e+00 1.904410e-01 -1.759140e-01 4.866260e-02
-8.589560e-02 -4.767850e+00 -1.327490e-01]
[ 8.666670e-01 -3.137250e-01 1.000000e+00 8.488250e-02 -2.553930e-03
5.185420e-02 -2.818240e-02 -9.709620e-01]]
*/
/*输出矩阵A
[[-0.130400 -0.056865 0.030892 0.076706 0.023446 -0.056865 0.030892
0.076706]
[ 0.043322 -0.078041 0.013146 -0.076609 0.043322 0.144181 0.013146
-0.076609]
[ 0.004289 -0.019433 -0.208930 0.028675 0.004289 -0.019433 0.191070
0.028675]
[ 0.014375 0.050912 -0.029025 -1.029910 0.014375 0.050912 -0.029025
0.970093]
[ 0.023446 -0.056865 0.030892 0.076706 -0.130400 -0.056865 0.030892
0.076706]
[ 0.043322 0.144181 0.013146 -0.076609 0.043322 -0.078041 0.013146
-0.076609]
[ 0.004289 -0.019433 0.191070 0.028675 0.004289 -0.019433 -0.208930
0.028675]
[ 0.014375 0.050912 -0.029025 0.970093 0.014375 0.050912 -0.029025
-1.029910]]
*/