P?(SY/HE)EVD
Interface Definition
C interface:
void pssyevd_(const char *jobz, const char *uplo, const int *n, float *a, const int *ia, const int *ja, const int *desca, float *w, float *z, const int *iz, const int *jz, const int *descz, float *work, const int *lwork, int *iwork, const int *liwork, int *info);
void pdsyevd_(const char *jobz, const char *uplo, const int *n, double *a, const int *ia, const int *ja, const int *desca, double *w, double *z, const int *iz, const int *jz, const int *descz, double *work, const int *lwork, int *iwork, const int *liwork, int *info);
void pcheevd_(const char *jobz, const char *uplo, const int *n, float _Complex *a, const int *ia, const int *ja, const int *desca, float *w, float _Complex *z, const int *iz, const int *jz, const int *descz, float _Complex *work, const int *lwork, float *rwork, const int *lrwork, int *iwork, const int *liwork, int *info);
void pzheevd_(const char *jobz, const char *uplo, const int *n, double _Complex *a, const int *ia, const int *ja, const int *desca, double *w, double _Complex *z, const int *iz, const int *jz, const int *descz, double _Complex *work, const int *lwork, double *rwork, const int *lrwork, int *iwork, const int *liwork, int *info);
Fortran interface:
PSSYEVD(jobz, uplo, n, a, ia, ja, desca, w, z, iz, jz, descz, work, lwork, iwork, liwork, info)
PDSYEVD(jobz, uplo, n, a, ia, ja, desca, w, z, iz, jz, descz, work, lwork, iwork, liwork, info)
PCHEEVD(jobz, uplo, n, a, ia, ja, desca, w, z, iz, jz, descz, work, lwork, rwork, lrwork, iwork, liwork, info)
PZHEEVD(jobz, uplo, n, a, ia, ja, desca, w, z, iz, jz, descz, work, lwork, rwork, lrwork, iwork, liwork, info)
Parameters
Parameter |
Type |
Scope |
Description |
Input/Output |
|---|---|---|---|---|
jobz |
Character |
Global |
Specifies the job type.
|
Input |
uplo |
Character |
Global |
Specifies the matrix type.
|
Input |
n |
Integer |
Global |
Number of rows and columns in a matrix. |
Input |
a |
|
Local |
Stores the local part of distributed matrix A to be factorized before this function is called.
|
Input, output |
ia |
Integer |
Global |
Row indices of submatrix A in the global matrix. |
Input |
ja |
Integer |
Global |
Column indices of submatrix A in the global matrix. |
Input |
desca |
Integer array |
Local and global |
Descriptor of distributed matrix A. |
Input |
w |
|
Global |
If info=0, eigenvalues are sorted in ascending order. |
Output |
z |
|
Local |
A matrix that stores eigenvectors. The global size is N*N, and the local size is LDD_z * LOCc(jz+n-1).
|
Output |
iz |
Integer |
Global |
Row indices of submatrix Z in the global matrix. |
Input |
jz |
Integer |
Global |
Column indices of submatrix Z in the global matrix. |
Input |
descz |
Integer array |
Local and global |
Descriptor of distributed matrix Z. |
Input |
work |
|
Local |
If info = 0, work(0) returns the optimal lwork size. |
Output |
lwork |
Integer |
Local/Global |
Size of the space required by the work array. |
Input |
rwork (only available for the complex type) |
|
Local |
Size of the space required by the matrix.
|
Output |
lrwork (only available for the complex type) |
Integer |
Local/Global |
Size of the space required by the rwork array. |
Input |
iwork |
Integer array |
Local |
If info=0, iwork(0) returns the optimal liwork size. |
Output |
liwork |
Integer |
Local/Global |
Dimension of the liwork array, liwork=7*n+8*npcol+2. |
Input |
info |
Integer |
Global |
|
Output |
Dependency
#include <kscalapack.h>
Example
int main(){
int izero=0;
int ione=1;
int myrank_mpi, nprocs_mpi;
MPI_Init( &argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &myrank_mpi);
MPI_Comm_size(MPI_COMM_WORLD, &nprocs_mpi);
int n = 8; // (Global) Matrix size
int nprow = 2; // Number of row procs
int npcol = 2; // Number of column procs
int nb = 4; // (Global) Block size
char uplo='L'; // Matrix is lower triangular
char jobs='V'; // Block cyclic, Row major processor mapping
double tau[8];
char layout='R';
double w[8];
int iz;
int jz;
int descz;
int lwork = 160;
double work[lwork];
int liwork = 90;
int iwork[liwork];
if(argc > 1) {
n = atoi(argv[1]);
}
if(argc > 2) {
nb = atoi(argv[2]);
}
if(argc > 3) {
nprow = atoi(argv[3]);
}
if(argc > 4) {
npcol = atoi(argv[4]);
}
assert(nprow * npcol == nprocs_mpi);
// Initialize BLACS
int iam, nprocs;
int zero = 0;
int ictxt, myrow, mycol;
blacs_pinfo_(&iam, &nprocs) ; // BLACS rank and world size
blacs_get_(&zero, &zero, &ictxt ); // -> Create context
blacs_gridinit_(&ictxt, &layout, &nprow, &npcol ); // Context -> Initialize the grid
blacs_gridinfo_(&ictxt, &nprow, &npcol, &myrow, &mycol ); // Context -> Context grid info (# procs row/col, current procs row/col)
// Compute the size of the local matrices
int mpA = numroc_( &n, &nb, &myrow, &izero, &nprow ); // My proc -> row of local A
int nqA = numroc_( &n, &nb, &mycol, &izero, &npcol ); // My proc -> col of local A
int mpZ = numroc_( &n, &nb, &myrow, &izero, &nprow ); // My proc -> row of local Z
int nqZ = numroc_( &n, &nb, &mycol, &izero, &npcol ); // My proc -> col of local Z
ofstream f1;
string filename = to_string(myrank_mpi)+"A.dat";
f1.open(filename);
double *A;
A = (double *)calloc(mpA*nqA,sizeof(double));
double *Z;
Z = (double *)calloc(mpZ*nqZ,sizeof(double));
if (A==NULL){ printf("Error of memory allocation A on proc %dx%d\n",myrow,mycol); exit(0); }
int k = 0;
double globalA[n][n];
for(int j = 0; j < n; j++) {
for (int i = 0; i <= j; i++){
if (i == j) globalA[i][j]= 3 * n + (rand())%10;
else {
globalA[i][j] = n + (rand())%10;
globalA[j][i] = n + (rand())%10;
}
if (myrank_mpi == 0)
f1 <<i << " "<<j << " " << globalA[i][j]<<endl;
}
}
f1.close();
for (int j = 0; j < nqA; j++) { // local col
int l_j = j / nb; // which block
int x_j = j % nb; // where within that block
int J = (l_j * npcol + mycol) * nb + x_j; // global col
for (int i = 0; i <= j; i++) { // local row
int l_i = i / nb; // which block
int x_i = i % nb; // where within that block
int I = (l_i * nprow + myrow) * nb + x_i; // global row
assert(I < n);
assert(J < n);
A[k] = globalA[I][J];
// printf("%d %d -> %d %d -> %f\n", i, j, I, J, A[k]);
k++;
}
}
int descA[9];
int info=0;
int ipiv[10] = {0};
int lddA = mpA > 1 ? mpA : 1;
descinit_( descA, &n, &n, &nb, &nb, &izero, &izero, &ictxt, &lddA, &info);
int descZ[9];
descinit_( descZ, &n, &n, &nb, &nb, &izero, &izero, &ictxt, &lddA, &info);
if(info != 0) {
printf("Error in descinit, info = %d\n", info);
}
double MPIt1 = MPI_Wtime();
printf("[%dx%d] Starting \n", myrow, mycol);
pdsyevd_(&jobs, &uplo, &n, A, &ione, &ione, descA, w, Z, &ione, &ione, descZ, work, &lwork, iwork, &liwork, &info);
if (info != 0) {
printf("Error in caculate, info = %d\n", info);
}
double MPIt2 = MPI_Wtime();
filename = to_string(myrank_mpi)+"Z.dat";
f1.open(filename);
k = 0;
for (int j = 0; j < nqZ; j++) { // local col
int l_j = j / nb; // which block
int x_j = j % nb; // where within that block
int J = (l_j * npcol + mycol) * nb + x_j; // global col
for (int i = 0; i < mpZ; i++) { // local row
int l_i = i / nb; // which block
int x_i = i % nb; // where within that block
int I = (l_i * nprow + myrow) * nb + x_i; // global row
assert(I < n);
assert(J < n);
f1 <<I << " "<<J << " " << Z[k]<<endl;
k++;
}
}
f1.close();
for (int i = 0; i < n; i++) {
printf("%lf ",w[i]);
}
printf("\n[%dx%d] Done, time %e s.\n", myrow, mycol, MPIt2 - MPIt1);
free(A);
blacs_gridexit_(&ictxt);
MPI_Finalize();
return 0;
}
/*
Origin A:
[[27. 0. 0. 0. 0. 0. 0. 0.]
[14. 29. 0. 0. 0. 0. 0. 0.]
[11. 14. 33. 0. 0. 0. 0. 0.]
[ 9. 15. 17. 30. 0. 0. 0. 0.]
[ 8. 10. 9. 15. 26. 0. 0. 0.]
[ 8. 11. 13. 10. 16. 31. 0. 0.]
[11. 9. 17. 9. 12. 16. 29. 0.]
[ 8. 14. 8. 11. 8. 9. 13. 31.]]
w:// eigenvalues
[-35.145752 -11.559735 -6.089042 -0.555364 16.500808 23.793150 25.601339 102.454598]
Z:// eigenvectors
[[ 3.808840e-01 -3.560630e-01 -1.508930e-01 -4.958660e-02 -2.479740e-01
5.474460e-01 3.507160e-01 -4.676850e-01]
[ 1.215670e-01 -9.875040e-02 3.955830e-01 3.017760e-02 3.132180e-01
1.838380e-01 -7.044540e-01 -4.357930e-01]
[-5.219660e-01 3.517350e-01 4.716980e-01 3.487300e-01 -2.010880e-01
2.340930e-01 2.884750e-01 -2.850760e-01]
[-1.710940e-01 4.669610e-01 -7.019050e-01 1.670770e-01 1.194220e-01
3.689920e-01 -2.620540e-01 -1.140160e-01]
[ 3.644890e-01 4.411160e-01 -5.683900e-02 -3.502880e-02 -3.815800e-01
-5.387890e-01 -4.586880e-02 -4.796940e-01]
[ 8.826880e-04 -3.503620e-02 -1.090290e-01 1.568230e-01 7.541030e-01
-2.801260e-01 4.334450e-01 -3.567550e-01]
[-5.430560e-01 -5.514490e-01 -3.000940e-01 1.117970e-01 -2.539910e-01
-3.196940e-01 -1.823870e-01 -3.137810e-01]
[-3.326610e-01 1.476200e-01 1.999970e-02 -8.993240e-01 8.318180e-02
6.770530e-02 7.489810e-02 -2.030950e-01]]
*/
If the warning "KML INFORM: GEQRF performance in eigensolver is suboptimal due values close to under/overflow" is displayed during the invoking of this interface, the performance may deteriorate. You are advised to add KML_FAST_EIGENSOLVER=0 to environment variables.