P?(SY/HE)EVD
计算实数对称矩阵/复数厄米特矩阵A的所有特征值和特征向量,其中A是分布式矩阵。
接口定义
C Interface:
void pssyevd_(const char *jobz, const char *uplo, const int *n, float *a, const int *ia, const int *ja, const int *desca, float *w, float *z, const int *iz, const int *jz, const int *descz, float *work, const int *lwork, int *iwork, const int *liwork, int *info);
void pdsyevd_(const char *jobz, const char *uplo, const int *n, double *a, const int *ia, const int *ja, const int *desca, double *w, double *z, const int *iz, const int *jz, const int *descz, double *work, const int *lwork, int *iwork, const int *liwork, int *info);
void pcheevd_(const char *jobz, const char *uplo, const int *n, float _Complex *a, const int *ia, const int *ja, const int *desca, float *w, float _Complex *z, const int *iz, const int *jz, const int *descz, float _Complex *work, const int *lwork, float *rwork, const int *lrwork, int *iwork, const int *liwork, int *info);
void pzheevd_(const char *jobz, const char *uplo, const int *n, double _Complex *a, const int *ia, const int *ja, const int *desca, double *w, double _Complex *z, const int *iz, const int *jz, const int *descz, double _Complex *work, const int *lwork, double *rwork, const int *lrwork, int *iwork, const int *liwork, int *info);
Fortran Interface:
PSSYEVD(jobz, uplo, n, a, ia, ja, desca, w, z, iz, jz, descz, work, lwork, iwork, liwork, info)
PDSYEVD(jobz, uplo, n, a, ia, ja, desca, w, z, iz, jz, descz, work, lwork, iwork, liwork, info)
PCHEEVD(jobz, uplo, n, a, ia, ja, desca, w, z, iz, jz, descz, work, lwork, rwork, lrwork, iwork, liwork, info)
PZHEEVD(jobz, uplo, n, a, ia, ja, desca, w, z, iz, jz, descz, work, lwork, rwork, lrwork, iwork, liwork, info)
参数
参数 |
类型 |
范围 |
说明 |
输入/输出 |
---|---|---|---|---|
jobz |
字符型 |
全局 |
指定任务种类。
|
输入 |
uplo |
字符型 |
全局 |
指定矩阵类型。
|
输入 |
n |
整型 |
全局 |
矩阵的行数和列数。 |
输入 |
a |
|
本地 |
调用前保存待分解的分布式矩阵A的本地部分。
|
输入,输出 |
ia |
整型 |
全局 |
子矩阵A在全局矩阵中的行索引。 |
输入 |
ja |
整型 |
全局 |
子矩阵A在全局矩阵中的列索引。 |
输入 |
desca |
整型数组 |
本地,全局 |
分布式矩阵A的矩阵描述符。 |
输入 |
w |
|
全局 |
如果info=0,特征值升序排列。 |
输出 |
z |
|
本地 |
存放特征向量的矩阵。全局大小N*N,本地大小LDD_z * LOCc(jz+n-1)。
|
输出 |
iz |
整型 |
全局 |
子矩阵Z在全局矩阵中的行索引 |
输入 |
jz |
整型 |
全局 |
子矩阵Z在全局矩阵中的列索引。 |
输入 |
descz |
整型数组 |
本地,全局 |
分布式矩阵Z的矩阵描述符。 |
输入 |
work |
|
本地 |
Info=0时,work(0)返回最优lwork大小。 |
输出 |
lwork |
整型 |
本地/全局 |
work数组需要的空间大小。 |
输入 |
rwork(复数特有) |
|
本地 |
矩阵需要的空间大小。
|
输出 |
lrwork(复数特有) |
整型 |
本地/全局 |
rwork数组需要的空间大小。 |
输入 |
iwork |
整型数组 |
本地 |
Info=0时,iwork(0)返回最优liwork大小。 |
输出 |
liwork |
整型 |
本地/全局 |
liwork数组的维度,liwork=7*n+8*npcol+2。 |
输入 |
info |
整型 |
全局 |
|
输出 |
依赖
#include <kscalapack.h>
示例
int main(){ int izero=0; int ione=1; int myrank_mpi, nprocs_mpi; MPI_Init( &argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &myrank_mpi); MPI_Comm_size(MPI_COMM_WORLD, &nprocs_mpi); int n = 8; // (Global) Matrix size int nprow = 2; // Number of row procs int npcol = 2; // Number of column procs int nb = 4; // (Global) Block size char uplo='L'; // Matrix is lower triangular char jobs='V'; // Block cyclic, Row major processor mapping double tau[8]; char layout='R'; double w[8]; int iz; int jz; int descz; int lwork = 160; double work[lwork]; int liwork = 90; int iwork[liwork]; if(argc > 1) { n = atoi(argv[1]); } if(argc > 2) { nb = atoi(argv[2]); } if(argc > 3) { nprow = atoi(argv[3]); } if(argc > 4) { npcol = atoi(argv[4]); } assert(nprow * npcol == nprocs_mpi); // Initialize BLACS int iam, nprocs; int zero = 0; int ictxt, myrow, mycol; blacs_pinfo_(&iam, &nprocs) ; // BLACS rank and world size blacs_get_(&zero, &zero, &ictxt ); // -> Create context blacs_gridinit_(&ictxt, &layout, &nprow, &npcol ); // Context -> Initialize the grid blacs_gridinfo_(&ictxt, &nprow, &npcol, &myrow, &mycol ); // Context -> Context grid info (# procs row/col, current procs row/col) // Compute the size of the local matrices int mpA = numroc_( &n, &nb, &myrow, &izero, &nprow ); // My proc -> row of local A int nqA = numroc_( &n, &nb, &mycol, &izero, &npcol ); // My proc -> col of local A int mpZ = numroc_( &n, &nb, &myrow, &izero, &nprow ); // My proc -> row of local Z int nqZ = numroc_( &n, &nb, &mycol, &izero, &npcol ); // My proc -> col of local Z ofstream f1; string filename = to_string(myrank_mpi)+"A.dat"; f1.open(filename); double *A; A = (double *)calloc(mpA*nqA,sizeof(double)); double *Z; Z = (double *)calloc(mpZ*nqZ,sizeof(double)); if (A==NULL){ printf("Error of memory allocation A on proc %dx%d\n",myrow,mycol); exit(0); } int k = 0; double globalA[n][n]; for(int j = 0; j < n; j++) { for (int i = 0; i <= j; i++){ if (i == j) globalA[i][j]= 3 * n + (rand())%10; else { globalA[i][j] = n + (rand())%10; globalA[j][i] = n + (rand())%10; } if (myrank_mpi == 0) f1 <<i << " "<<j << " " << globalA[i][j]<<endl; } } f1.close(); for (int j = 0; j < nqA; j++) { // local col int l_j = j / nb; // which block int x_j = j % nb; // where within that block int J = (l_j * npcol + mycol) * nb + x_j; // global col for (int i = 0; i <= j; i++) { // local row int l_i = i / nb; // which block int x_i = i % nb; // where within that block int I = (l_i * nprow + myrow) * nb + x_i; // global row assert(I < n); assert(J < n); A[k] = globalA[I][J]; // printf("%d %d -> %d %d -> %f\n", i, j, I, J, A[k]); k++; } } int descA[9]; int info=0; int ipiv[10] = {0}; int lddA = mpA > 1 ? mpA : 1; descinit_( descA, &n, &n, &nb, &nb, &izero, &izero, &ictxt, &lddA, &info); int descZ[9]; descinit_( descZ, &n, &n, &nb, &nb, &izero, &izero, &ictxt, &lddA, &info); if(info != 0) { printf("Error in descinit, info = %d\n", info); } double MPIt1 = MPI_Wtime(); printf("[%dx%d] Starting \n", myrow, mycol); pdsyevd_(&jobs, &uplo, &n, A, &ione, &ione, descA, w, Z, &ione, &ione, descZ, work, &lwork, iwork, &liwork, &info); if (info != 0) { printf("Error in caculate, info = %d\n", info); } double MPIt2 = MPI_Wtime(); filename = to_string(myrank_mpi)+"Z.dat"; f1.open(filename); k = 0; for (int j = 0; j < nqZ; j++) { // local col int l_j = j / nb; // which block int x_j = j % nb; // where within that block int J = (l_j * npcol + mycol) * nb + x_j; // global col for (int i = 0; i < mpZ; i++) { // local row int l_i = i / nb; // which block int x_i = i % nb; // where within that block int I = (l_i * nprow + myrow) * nb + x_i; // global row assert(I < n); assert(J < n); f1 <<I << " "<<J << " " << Z[k]<<endl; k++; } } f1.close(); for (int i = 0; i < n; i++) { printf("%lf ",w[i]); } printf("\n[%dx%d] Done, time %e s.\n", myrow, mycol, MPIt2 - MPIt1); free(A); blacs_gridexit_(&ictxt); MPI_Finalize(); return 0; } /* Origin A: [[27. 0. 0. 0. 0. 0. 0. 0.] [14. 29. 0. 0. 0. 0. 0. 0.] [11. 14. 33. 0. 0. 0. 0. 0.] [ 9. 15. 17. 30. 0. 0. 0. 0.] [ 8. 10. 9. 15. 26. 0. 0. 0.] [ 8. 11. 13. 10. 16. 31. 0. 0.] [11. 9. 17. 9. 12. 16. 29. 0.] [ 8. 14. 8. 11. 8. 9. 13. 31.]] w://特征值 [-35.145752 -11.559735 -6.089042 -0.555364 16.500808 23.793150 25.601339 102.454598] Z://特征向量 [[ 3.808840e-01 -3.560630e-01 -1.508930e-01 -4.958660e-02 -2.479740e-01 5.474460e-01 3.507160e-01 -4.676850e-01] [ 1.215670e-01 -9.875040e-02 3.955830e-01 3.017760e-02 3.132180e-01 1.838380e-01 -7.044540e-01 -4.357930e-01] [-5.219660e-01 3.517350e-01 4.716980e-01 3.487300e-01 -2.010880e-01 2.340930e-01 2.884750e-01 -2.850760e-01] [-1.710940e-01 4.669610e-01 -7.019050e-01 1.670770e-01 1.194220e-01 3.689920e-01 -2.620540e-01 -1.140160e-01] [ 3.644890e-01 4.411160e-01 -5.683900e-02 -3.502880e-02 -3.815800e-01 -5.387890e-01 -4.586880e-02 -4.796940e-01] [ 8.826880e-04 -3.503620e-02 -1.090290e-01 1.568230e-01 7.541030e-01 -2.801260e-01 4.334450e-01 -3.567550e-01] [-5.430560e-01 -5.514490e-01 -3.000940e-01 1.117970e-01 -2.539910e-01 -3.196940e-01 -1.823870e-01 -3.137810e-01] [-3.326610e-01 1.476200e-01 1.999970e-02 -8.993240e-01 8.318180e-02 6.770530e-02 7.489810e-02 -2.030950e-01]] */
如果调用该接口过程中出现警告"KML INFORM: GEQRF performance in eigensolver is suboptimal due values close to under/overflow",可能会导致性能下降,建议设置KML_FAST_EIGENSOLVER=0到环境变量。