Rate This Document
Findability
Accuracy
Completeness
Readability

P?GEQRF

Calculate the QR factorization of a matrix, that is, .

Interface Definition

C interface:

void psgeqrf_(const int *m, const int *n, float*a, const int *ia, const int *ja, const int *desca, float*tau, float*work, int *lwork, int *info)

void pdgeqrf_(const int *m, const int *n, double *a, const int *ia, const int *ja, const int *desca, double *tau, double *work, int *lwork, int *info)

void pcgeqrf_(const int *m, const int *n, float _Complex *a, const int *ia, const int *ja, const int *desca, float _Complex *tau, float _Complex *work, int *lwork, int *info)

void pzgeqrf_(const int *m, const int *n, double _Complex *a, const int *ia, const int *ja, const int *desca, double _Complex *tau, double _Complex *work, int *lwork, int *info)

Fortran interface:

PSGEQRF(m, n, a, ia, ja, desca, tau, work, lwork, info)

PDGEQRF(m, n, a, ia, ja, desca, tau, work, lwork, info)

PCGEQRF(m, n, a, ia, ja, desca, tau, work, lwork, info)

PZGEQRF(m, n, a, ia, ja, desca, tau, work, lwork, info)

Parameters

Parameter

Type

Scope

Description

Input/Output

m

Integer

Global

Number of rows to be operated, for example, the number of rows in a submatrix

Input

n

Integer

Global

Number of columns to be operated, for example, the number of columns in a submatrix

Input

a

  • A single-precision floating-point array for psgeqrf
  • A double-precision floating-point array for pdgeqrf
  • A single-precision complex number array for pcgeqrf
  • A double-precision complex number array for pzgeqrf

Local

  • Stores matrix A to be factorized before calling this function.
  • After this function is called, a matrix R with a size of min(m,n)*n (when m ≥ n, R is an upper triangular matrix) is stored on and above the diagonal. Elements below the diagonal and tau jointly represent an orthogonal matrix Q.

Input/Output

ia

Integer

Global

Row indices of the submatrix in the global matrix

Input

ja

Integer

Global

Column indices of the submatrix in the global matrix

Input

desca

Integer array

Local and global

Descriptor of distributed matrix A

Input

tau

  • A single-precision floating-point array for psgeqrf
  • A double-precision floating-point array for pdgeqrf
  • A single-precision complex number array for pcgeqrf
  • A double-precision complex number array for pzgeqrf

Local

Elementary reflection coefficient. Its length is min(m, n).

Output

work

  • A single-precision floating-point array for psgeqrf
  • A double-precision floating-point array for pdgeqrf
  • A single-precision complex number array for pcgeqrf
  • A double-precision complex number array for pzgeqrf

Local

Temporary storage space. When you exit, the minimum and optimal lwork values are returned.

Output

lwork

Integer

Local and global

Dimension of the work array

Input

info

Integer

Global

Execution result:

  • 0: The execution is successful.
  • Smaller than 0: The value of the -info-th parameter is invalid.

Output

Dependencies

#include <kscalapack.h>

Examples

    int izero=0;
    int ione=1;
    int myrank_mpi, nprocs_mpi;
    MPI_Init( &argc, &argv);
    MPI_Comm_rank(MPI_COMM_WORLD, &myrank_mpi);
    MPI_Comm_size(MPI_COMM_WORLD, &nprocs_mpi);
 
    int n = 8;       // (Global) Matrix size
    int nprow = 2;   // Number of row procs
    int npcol = 2;   // Number of column procs
    int nb = 4;      // (Global) Block size
    char uplo='L';   // Matrix is lower triangular
    char layout='R'; // Block cyclic, Row major processor mapping
    double tau[8];
    int lwork = 48;
    double work[48];
    printf("Usage: ./test matrix_size block_size nprocs_row nprocs_col\n");
 
    if(argc > 1) {
        n = atoi(argv[1]);
    }
    if(argc > 2) {
        nb = atoi(argv[2]);
    }
    if(argc > 3) {
        nprow = atoi(argv[3]);
    }
    if(argc > 4) {
        npcol = atoi(argv[4]);
    }
 
    assert(nprow * npcol == nprocs_mpi);
 
    // Initialize BLACS
    int iam, nprocs;
    int zero = 0;
    int ictxt, myrow, mycol;
    blacs_pinfo_(&iam, &nprocs) ; // BLACS rank and world size
    blacs_get_(&zero, &zero, &ictxt ); // -> Create context
    blacs_gridinit_(&ictxt, &layout, &nprow, &npcol ); // Context -> Initialize the grid
    blacs_gridinfo_(&ictxt, &nprow, &npcol, &myrow, &mycol ); // Context -> Context grid info (# procs row/col, current procs row/col)
 
    // Compute the size of the local matrices
    int mpA    = numroc_( &n, &nb, &myrow, &izero, &nprow ); // My proc -> row of local A
    int nqA    = numroc_( &n, &nb, &mycol, &izero, &npcol ); // My proc -> col of local A

    double *A;
    A = (double *)calloc(mpA*nqA,sizeof(double)) ;
    if (A==NULL){ printf("Error of memory allocation A on proc %dx%d\n",myrow,mycol); exit(0); }
    int k = 0;
    for (int j = 0; j < nqA; j++) { // local col
        int l_j = j / nb; // which block
        int x_j = j % nb; // where within that block
        int J   = (l_j * npcol + mycol) * nb + x_j; // global col
        for (int i = 0; i < mpA; i++) { // local row
            int l_i = i / nb; // which block
            int x_i = i % nb; // where within that block
            int I   = (l_i * nprow + myrow) * nb + x_i; // global row
            assert(I < n);
            assert(J < n);
            if(I == J) {
                A[k] = i + j + 1.5  +  (rand())%10;
            } else {
                A[k] = n + rand()% 10;
            }
            //printf("%d %d -> %d %d -> %f\n", i, j, I, J, A[k]);
            k++;
        }
    }
    int descA[9];
    int info=0;
    int ipiv[10] = {0};
    int lddA = mpA > 1 ? mpA : 1;
    descinit_( descA,  &n, &n, &nb, &nb, &izero, &izero, &ictxt, &lddA, &info);
    if(info != 0) {
        printf("Error in descinit, info = %d\n", info);
    }
    double MPIt1 = MPI_Wtime();
    printf("[%dx%d] Starting \n", myrow, mycol);
    pdgeqrf_(&n, &n, A,  &ione, &ione, descA, tau, work, &lwork, &info);
    if (info != 0) {
        printf("Error in caculate, info = %d\n", info);
    }
    double MPIt2 = MPI_Wtime();
    printf("[%dx%d] Done, time %e s.\n", myrow, mycol, MPIt2 - MPIt1);
    free(A);
    blacs_gridexit_(&ictxt);
    MPI_Finalize();
    return 0;
    /* 
     * Origin A: 
    /*  
    4.50000 11.0000 17.0000 8.00000 11.0000 11.0000 17.0000 8.00000
    14.0000 8.50000 9.00000 17.0000 14.0000 13.0000 9.00000 17.0000
    15.0000 14.0000 7.50000 11.0000 15.0000 14.0000 10.0000 11.0000
    13.0000 10.0000 15.0000 13.5000 13.0000 10.0000 15.0000 14.0000
    11.0000 11.0000 17.0000 8.00000 4.50000 11.0000 17.0000 8.00000
    14.0000 13.0000 9.00000 17.0000 14.0000 8.50000 9.00000 17.0000
    15.0000 14.0000 10.0000 11.0000 15.0000 14.0000 7.50000 11.0000
    13.0000 10.0000 15.0000 14.0000 13.0000 10.0000 15.0000 13.5000
    */
        // After PDGEQRF: 
    /*
    -36.3490 -31.6790 -32.1329 -35.4205 -35.1867 -31.6790 -32.1329 -35.4205
    0.342726 8.46706 11.1681 0.285094 4.34884 6.07544 11.1681 0.285094
    0.367206 0.114565 13.7477 3.02971 -0.816048 1.94287 13.2930 3.02971
    0.318245 0.245466 -0.182222 7.89794 5.36891 -0.658967 0.174397 7.86629
    0.269284 0.0337660 -0.199487 0.530335 5.89453 2.63366 -0.221784 0.0288312
    0.342726 0.111495 0.258678 -0.396869 0.304197 4.86159 0.325469 -0.0199092
    0.367206 0.114565 0.265211 0.293751 -0.0482493 -0.0280641 3.47962 0.00528634
    0.318245 0.245466 -0.182222 0.173244 -0.0665625 0.227001 -0.112593 -0.705509
    */