Rate This Document
Findability
Accuracy
Completeness
Readability

P?GETRF

Compute the LU factorization of matrix A. This function uses partial pivoting, allowing row interchanges.

The operation is defined as A = PLU, where P is a permutation matrix, L is a lower triangular matrix or a lower echelon matrix and the diagonal is 1, and U is an upper triangular matrix or an upper echelon matrix.

Interface Definition

C interface:

void psgetrf_(const int *m, const int *n, float *a, const int *ia, const int *ja, const int *desca, int *ipiv, int *info)

void pdgetrf_(const int *m, const int *n, double*a, const int *ia, const int *ja, const int *desca, int *ipiv, int *info)

void pcgetrf_(const int *m, const int *n, float _Complex *a, const int *ia, const int *ja, const int *desca, int *ipiv, int *info)

void pzgetrf_(const int *m, const int *n, double _Complex*a, const int *ia, const int *ja, const int *desca, int *ipiv, int *info)

Fortran interface:

PSGETRF(m, n, a, ia, ja, desca, ipiv, info)

PDGETRF(m, n, a, ia, ja, desca, ipiv, info)

PCGETRF(m, n, a, ia, ja, desca, ipiv, info)

PZGETRF(m, n, a, ia, ja, desca, ipiv, info)

Parameters

Parameter

Type

Scope

Description

Input/Output

m

Integer

Global

Number of rows to be operated, for example, the number of rows in a submatrix

Input

n

Integer

Global

Number of columns to be operated, for example, the number of columns in a submatrix

Input

a

  • A single-precision floating-point array for psgetrf
  • A double-precision floating-point array for pdgetrf
  • A single-precision complex array for pcgetrf
  • A double-precision complex array for pzgetrf

Local

  • Stores the local M*N part of distributed matrix A before this function is called.
  • Stores the local factorization results L and U after this function is called. The diagonal elements of L (all are 1) are not stored.

Input, output

ia

Integer

Global

Row indices of the submatrix in the global matrix.

Input

ja

Integer

Global

Column indices of the submatrix in the global matrix.

Input

desca

Integer array

Local and global

Descriptor of distributed matrix A.

Input

ipiv

Integer

Local

Contains the pivoting information.

Output

info

Integer

Global

Execution result:

  • 0: The execution is successful.
  • Smaller than 0: The value of the -info-th parameter is invalid.
  • Greater than 0: The info-th element on the diagonal of matrix U is 0. The matrix factorization is complete, but U is singular. As a result, an error of dividing by zero occurs when a system of linear equations is solved.

Output

Dependency

#include <kscalapack.h>

Example

C interface:
    int izero=0;
    int ione=1;
    int myrank_mpi, nprocs_mpi;
    MPI_Init( &argc, &argv);
    MPI_Comm_rank(MPI_COMM_WORLD, &myrank_mpi);
    MPI_Comm_size(MPI_COMM_WORLD, &nprocs_mpi);
 
    int n = 8;       // (Global) Matrix size
    int nprow = 2;   // Number of row procs
    int npcol = 2;   // Number of column procs
    int nb = 4;      // (Global) Block size
    char uplo='L';   // Matrix is lower triangular
    char layout='R'; // Block cyclic, Row major processor mapping
    
    printf("Usage: ./test matrix_size block_size nprocs_row nprocs_col\n");
 
    if(argc > 1) {
        n = atoi(argv[1]);
    }
    if(argc > 2) {
        nb = atoi(argv[2]);
    }
    if(argc > 3) {
        nprow = atoi(argv[3]);
    }
    if(argc > 4) {
        npcol = atoi(argv[4]);
    }
    assert(nprow * npcol == nprocs_mpi);
    // Initialize BLACS
    int iam, nprocs;
    int zero = 0;
    int ictxt, myrow, mycol;
    blacs_pinfo_(&iam, &nprocs) ; // BLACS rank and world size
    blacs_get_(&zero, &zero, &ictxt ); // -> Create context
    blacs_gridinit_(&ictxt, &layout, &nprow, &npcol ); // Context -> Initialize the grid
    blacs_gridinfo_(&ictxt, &nprow, &npcol, &myrow, &mycol ); // Context -> Context grid info (# procs row/col, current procs row/col)
 
    // Compute the size of the local matrices
    int mpA    = numroc_( &n, &nb, &myrow, &izero, &nprow ); // My proc -> row of local A
    int nqA    = numroc_( &n, &nb, &mycol, &izero, &npcol ); // My proc -> col of local A
   
    //allocate and fill the matrix A
    double *A;
    A = (double *)calloc(mpA*nqA,sizeof(double)) ;
    if (A==NULL){ printf("Error of memory allocation A on proc %dx%d\n",myrow,mycol); exit(0); }
    int k = 0;
    for (int j = 0; j < nqA; j++) { // local col
        int l_j = j / nb; // which block
        int x_j = j % nb; // where within that block
        int J   = (l_j * npcol + mycol) * nb + x_j; // global col
        for (int i = 0; i < mpA; i++) { // local row
            int l_i = i / nb; // which block
            int x_i = i % nb; // where within that block
            int I   = (l_i * nprow + myrow) * nb + x_i; // global row
            assert(I < n);
            assert(J < n);
            if(I == J) {
                A[k] = i + j + (rand())%10;
            } else {
                A[k] = i + j + 5  + (rand())%10;
            }
            //printf("%d %d -> %d %d -> %f\n", i, j, I, J, A[k]);
            k++;
        }
    }
    //creat descriptor
    int descA[9];
    int info=0;
    int ipiv[10] = {0};
    int lddA = mpA > 1 ? mpA : 1;
    descinit_( descA,  &n, &n, &nb, &nb, &izero, &izero, &ictxt, &lddA, &info);
    if(info != 0) {
        printf("Error in descinit, info = %d\n", info);
    }
    
    //run pdpotrf_ and time
    double MPIt1 = MPI_Wtime();
    printf("[%dx%d] Starting \n", myrow, mycol);
    pdgetrf_(&n, &n, A, &ione, &ione, descA, ipiv,&info);
    if (info != 0) {
        printf("Error in caculate, info = %d\n", info);
    }
    double MPIt2 = MPI_Wtime();
    printf("[%dx%d] Done, time %e s.\n", myrow, mycol, MPIt2 - MPIt1);
    free(A);
 
    //exit and finanlize
    blacs_gridexit_(&ictxt);
    MPI_Finalize();
    return 0;
   
    /* 
     * Origin A: 
     *  
    4.00000 19.0000 25.0000 16.0000 19.0000 19.0000 25.0000 16.0000
    22.0000 8.00000 17.0000 25.0000 22.0000 21.0000 17.0000 25.0000
    23.0000 22.0000 7.00000 19.0000 23.0000 22.0000 18.0000 19.0000
    21.0000 18.0000 23.0000 13.0000 21.0000 18.0000 23.0000 22.0000
    19.0000 19.0000 25.0000 16.0000 4.00000 19.0000 25.0000 16.0000
    22.0000 21.0000 17.0000 25.0000 22.0000 8.00000 17.0000 25.0000
    23.0000 22.0000 18.0000 19.0000 23.0000 22.0000 7.00000 19.0000
    21.0000 18.0000 23.0000 22.0000 21.0000 18.0000 23.0000 13.0000
    */

    // After PDGETRF: 
    /*
    23.0000 22.0000 7.00000 19.0000 23.0000 22.0000 18.0000 19.0000
    0.173913 15.1739 23.7826 12.6957 15.0000 15.1739 21.8696 12.6957
    0.956522 -0.859599 30.7479 17.7393 12.8940 13.0000 18.5817 17.7393
    0.913043 -0.137536 0.646538 -14.0708 -6.27341 -8.40499 -2.44069 -5.07082
    0.826087 0.0544413 0.582891 0.762348 -18.5499 -1.17005 -0.0305972 -6.86113
    0.956522 -0.00286533 0.337340 -0.0624197 0.253277 -17.6137 -6.56767 2.29955
    1.00000 0.00000 0.357749 0.451018 0.0961398 0.0424351 -16.2651 -3.49711
    0.913043 -0.137536 0.646538 0.360379 0.216315 0.290848 -0.0218688 -11.5045
    */