P?GEQRF
计算矩阵的QR分解,即。
接口定义
C Interface:
void psgeqrf_(const int *m, const int *n, float*a, const int *ia, const int *ja, const int *desca, float*tau, float*work, int *lwork, int *info)
void pdgeqrf_(const int *m, const int *n, double *a, const int *ia, const int *ja, const int *desca, double *tau, double *work, int *lwork, int *info)
void pcgeqrf_(const int *m, const int *n, float _Complex *a, const int *ia, const int *ja, const int *desca, float _Complex *tau, float _Complex *work, int *lwork, int *info)
void pzgeqrf_(const int *m, const int *n, double _Complex *a, const int *ia, const int *ja, const int *desca, double _Complex *tau, double _Complex *work, int *lwork, int *info)
Fortran Interface:
PSGEQRF(m, n, a, ia, ja, desca, tau, work, lwork, info)
PDGEQRF(m, n, a, ia, ja, desca, tau, work, lwork, info)
PCGEQRF(m, n, a, ia, ja, desca, tau, work, lwork, info)
PZGEQRF(m, n, a, ia, ja, desca, tau, work, lwork, info)
参数
参数 |
类型 |
范围 |
说明 |
输入/输出 |
---|---|---|---|---|
m |
整型 |
全局 |
要操作的行数,比如子矩阵的行数。 |
输入 |
n |
整型 |
全局 |
要操作的列数,比如子矩阵的列数。 |
输入 |
a |
|
本地 |
|
输入,输出 |
ia |
整型 |
全局 |
子矩阵在全局矩阵中的行索引。 |
输入 |
ja |
整型 |
全局 |
子矩阵在全局矩阵中的列索引。 |
输入 |
desca |
整型数组 |
本地,全局 |
分布式矩阵A的矩阵描述符。 |
输入 |
tau |
|
本地 |
初等反射的系数,长度为min(m,n)。 |
输出 |
work |
|
本地 |
临时存储空间,退出时返回最小和最佳的lwork。 |
输出 |
lwork |
整型 |
本地,全局 |
Work数组的维度。 |
输入 |
info |
整型 |
全局 |
执行结果:
|
输出 |
依赖
#include <kscalapack.h>
示例
int izero=0; int ione=1; int myrank_mpi, nprocs_mpi; MPI_Init( &argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &myrank_mpi); MPI_Comm_size(MPI_COMM_WORLD, &nprocs_mpi); int n = 8; // (Global) Matrix size int nprow = 2; // Number of row procs int npcol = 2; // Number of column procs int nb = 4; // (Global) Block size char uplo='L'; // Matrix is lower triangular char layout='R'; // Block cyclic, Row major processor mapping double tau[8]; int lwork = 48; double work[48]; printf("Usage: ./test matrix_size block_size nprocs_row nprocs_col\n"); if(argc > 1) { n = atoi(argv[1]); } if(argc > 2) { nb = atoi(argv[2]); } if(argc > 3) { nprow = atoi(argv[3]); } if(argc > 4) { npcol = atoi(argv[4]); } assert(nprow * npcol == nprocs_mpi); // Initialize BLACS int iam, nprocs; int zero = 0; int ictxt, myrow, mycol; blacs_pinfo_(&iam, &nprocs) ; // BLACS rank and world size blacs_get_(&zero, &zero, &ictxt ); // -> Create context blacs_gridinit_(&ictxt, &layout, &nprow, &npcol ); // Context -> Initialize the grid blacs_gridinfo_(&ictxt, &nprow, &npcol, &myrow, &mycol ); // Context -> Context grid info (# procs row/col, current procs row/col) // Compute the size of the local matrices int mpA = numroc_( &n, &nb, &myrow, &izero, &nprow ); // My proc -> row of local A int nqA = numroc_( &n, &nb, &mycol, &izero, &npcol ); // My proc -> col of local A double *A; A = (double *)calloc(mpA*nqA,sizeof(double)) ; if (A==NULL){ printf("Error of memory allocation A on proc %dx%d\n",myrow,mycol); exit(0); } int k = 0; for (int j = 0; j < nqA; j++) { // local col int l_j = j / nb; // which block int x_j = j % nb; // where within that block int J = (l_j * npcol + mycol) * nb + x_j; // global col for (int i = 0; i < mpA; i++) { // local row int l_i = i / nb; // which block int x_i = i % nb; // where within that block int I = (l_i * nprow + myrow) * nb + x_i; // global row assert(I < n); assert(J < n); if(I == J) { A[k] = i + j + 1.5 + (rand())%10; } else { A[k] = n + rand()% 10; } //printf("%d %d -> %d %d -> %f\n", i, j, I, J, A[k]); k++; } } int descA[9]; int info=0; int ipiv[10] = {0}; int lddA = mpA > 1 ? mpA : 1; descinit_( descA, &n, &n, &nb, &nb, &izero, &izero, &ictxt, &lddA, &info); if(info != 0) { printf("Error in descinit, info = %d\n", info); } double MPIt1 = MPI_Wtime(); printf("[%dx%d] Starting \n", myrow, mycol); pdgeqrf_(&n, &n, A, &ione, &ione, descA, tau, work, &lwork, &info); if (info != 0) { printf("Error in caculate, info = %d\n", info); } double MPIt2 = MPI_Wtime(); printf("[%dx%d] Done, time %e s.\n", myrow, mycol, MPIt2 - MPIt1); free(A); blacs_gridexit_(&ictxt); MPI_Finalize(); return 0;
/* * Origin A: /* 4.50000 11.0000 17.0000 8.00000 11.0000 11.0000 17.0000 8.00000 14.0000 8.50000 9.00000 17.0000 14.0000 13.0000 9.00000 17.0000 15.0000 14.0000 7.50000 11.0000 15.0000 14.0000 10.0000 11.0000 13.0000 10.0000 15.0000 13.5000 13.0000 10.0000 15.0000 14.0000 11.0000 11.0000 17.0000 8.00000 4.50000 11.0000 17.0000 8.00000 14.0000 13.0000 9.00000 17.0000 14.0000 8.50000 9.00000 17.0000 15.0000 14.0000 10.0000 11.0000 15.0000 14.0000 7.50000 11.0000 13.0000 10.0000 15.0000 14.0000 13.0000 10.0000 15.0000 13.5000 */ // After PDGEQRF: /* -36.3490 -31.6790 -32.1329 -35.4205 -35.1867 -31.6790 -32.1329 -35.4205 0.342726 8.46706 11.1681 0.285094 4.34884 6.07544 11.1681 0.285094 0.367206 0.114565 13.7477 3.02971 -0.816048 1.94287 13.2930 3.02971 0.318245 0.245466 -0.182222 7.89794 5.36891 -0.658967 0.174397 7.86629 0.269284 0.0337660 -0.199487 0.530335 5.89453 2.63366 -0.221784 0.0288312 0.342726 0.111495 0.258678 -0.396869 0.304197 4.86159 0.325469 -0.0199092 0.367206 0.114565 0.265211 0.293751 -0.0482493 -0.0280641 3.47962 0.00528634 0.318245 0.245466 -0.182222 0.173244 -0.0665625 0.227001 -0.112593 -0.705509 */