Rate This Document
Findability
Accuracy
Completeness
Readability

P?GESV

Interface Definition

C interface:

void psgesv_(const int *n, const int *nrhs, float *a, const int *ia, const int *ja, const int *desca, int *ipiv, float *b, const int *ib, const int *jb, const int *descb, int *info);

void pdgesv_(const int *n, const int *nrhs, double *a, const int *ia, const int *ja, const int *desca, int *ipiv, double *b, const int *ib, const int *jb, const int *descb, int *info);

void pcgesv_(const int *n, const int *nrhs, float _Complex *a, const int *ia, const int *ja, const int *desca, int *ipiv, float _Complex *b, const int *ib, const int *jb, const int *descb, int *info);

void pzgesv_(const int *n, const int *nrhs, double _Complex *a, const int *ia, const int *ja, const int *desca, int *ipiv, double _Complex *b, const int *ib, const int *jb, const int *descb, int *info);

Fortran interface:

PSGESV(n, nrhs, a, ia, ja, desca, ipiv, b, ib, jb, descb, info)

PDGESV(n, nrhs, a, ia, ja, desca, ipiv, b, ib, jb, descb, info)

PCGESV(n, nrhs, a, ia, ja, desca, ipiv, b, ib, jb, descb, info)

PZGESV(n, nrhs, a, ia, ja, desca, ipiv, b, ib, jb, descb, info)

Parameters

Parameter

Type

Scope

Description

Input/Output

n

Integer

Global

Number of rows and columns in a matrix.

Input

nrhs

Integer

Global

Number of columns in the distributed submatrices sub(B) and X.

Input

a

  • A single-precision floating-point array for psgesv
  • A double-precision floating-point array for pdgesv
  • A complex single-precision array for pcgesv
  • A complex double-precision array for pzgesv

Local

  • Stores the local M*N part of distributed matrix A before this function is called.
  • Stores the local factorization results L and U after this function is called. The diagonal elements of L (all are 1) are not stored.

Input, output

ia

Integer

Global

Row indices of submatrix A in the global matrix.

Input

ja

Integer

Global

Column indices of submatrix A in the global matrix.

Input

desca

Integer array

Local and global

Descriptor of distributed matrix A.

Input

ipiv

Integer

Local

Contains the pivot and swap information.

Output

b

  • A single-precision floating-point array for psgesv
  • A double-precision floating-point array for pdgesv
  • A complex single-precision array for pcgesv
  • A complex double-precision array for pzgesv

Local

  • Stores RHS items before this function is called.
  • Stores the result after this function is called.

Input, output

ib

Integer

Global

Row indices of submatrix B in the global matrix.

Input

jb

Integer

Global

Column indices of submatrix B in the global matrix.

Input

descb

Integer array

Local and global

Descriptor of distributed matrix B.

Input

info

Integer

Global

  • 0: The execution is successful.
  • Smaller than 0: If the value of info is -i, the i-th parameter is invalid.
  • Greater than 0: An algorithm error occurs.

Output

Dependency

#include <kscalapack.h>

Example

    int izero=0;
    int ione=1;
    int myrank_mpi, nprocs_mpi;
    MPI_Init( &argc, &argv);
    MPI_Comm_rank(MPI_COMM_WORLD, &myrank_mpi);
    MPI_Comm_size(MPI_COMM_WORLD, &nprocs_mpi);
 
    int n = 8;       // (Global) Matrix size
    int nprow = 2;   // Number of row procs
    int npcol = 2;   // Number of column procs
    int nb = 4;      // (Global) Block size
    char uplo='L';   // Matrix is lower triangular
    char layout='R'; // Block cyclic, Row major processor mapping
    int nrhs = 1;
 
    printf("Usage: ./test matrix_size block_size nprocs_row nprocs_col\n");
 
    if(argc > 1) {
        n = atoi(argv[1]);
    }
    if(argc > 2) {
        nb = atoi(argv[2]);
    }
    if(argc > 3) {
        nprow = atoi(argv[3]);
    }
    if(argc > 4) {
        npcol = atoi(argv[4]);
    }
    assert(nprow * npcol == nprocs_mpi);
    // Initialize BLACS
    int iam, nprocs;
    int zero = 0;
    int ictxt, myrow, mycol;
    blacs_pinfo_(&iam, &nprocs) ; // BLACS rank and world size
    blacs_get_(&zero, &zero, &ictxt ); // -> Create context
    blacs_gridinit_(&ictxt, &layout, &nprow, &npcol ); // Context -> Initialize the grid
    blacs_gridinfo_(&ictxt, &nprow, &npcol, &myrow, &mycol ); // Context -> Context grid info (# procs row/col, current procs row/col)
 
    // Compute the size of the local matrices
    int mpA    = numroc_( &n, &nb, &myrow, &izero, &nprow ); // My proc -> row of local A
    int nqA    = numroc_( &n, &nb, &mycol, &izero, &npcol ); // My proc -> col of local A
    int mpB    = numroc_( &n, &nb, &myrow, &izero, &nprow );
    ofstream f1;
    string filename = to_string(myrank_mpi)+"Abegin.dat";
    f1.open(filename);
    double *A;
    A = (double *)calloc(mpA*nqA,sizeof(double)) ;
    if (A==NULL){ printf("Error of memory allocation A on proc %dx%d\n",myrow,mycol); exit(0); }
    int k = 0;
    for (int j = 0; j < nqA; j++) { // local col
        int l_j = j / nb; // which block
        int x_j = j % nb; // where within that block
        int J   = (l_j * npcol + mycol) * nb + x_j; // global col
        for (int i = 0; i < mpA; i++) { // local row
            int l_i = i / nb; // which block
            int x_i = i % nb; // where within that block
            int I   = (l_i * nprow + myrow) * nb + x_i; // global row
            assert(I < n);
            assert(J < n);
            if(I == J) {
                A[k] = 2*n + 1.5  +  (rand())%10;
            } else {
                A[k] = i + j + rand()% 10;
            }
            //printf("%d %d -> %d %d -> %f\n", i, j, I, J, A[k]);
            f1 <<I << " "<<J << " " << A[k]<<endl;
            k++;
        }
    }
    f1.close();
    
    //creat descriptor
    int descA[9];
    int info=0;
    int ipiv[10] = {0};
    int lddB = mpB > 1 ? mpB : 1;
    descinit_( descA,  &n, &n, &nb, &nb, &izero, &izero, &ictxt, &lddB, &info);
    if(info != 0) {
        printf("Error in descinit, info = %d\n", info);
    }
    
    filename = to_string(myrank_mpi)+"Bbegin.dat";
    f1.open(filename);
    double *B;
    B = (double *)calloc(mpA,sizeof(double)) ;
    if (A==NULL){ printf("Error of memory allocation A on proc %dx%d\n",myrow,mycol); exit(0); }
    k = 0;
    for (int j = 0; j < mpB; j++) { // local col
        int l_i = j / nb; // which block
        int x_i = j % nb; // where within that block
        int I   = (l_i * nprow + myrow) * nb + x_i; // global row
       B[j] = j + 1.5  +  (rand())%10;
        f1 <<I << " " << B[j]<<endl;
    }  
    f1.close();
    int descB[9];
    int nbrhs=1;    
    descinit_( descB,  &n, &nrhs, &nb, &nbrhs, &izero, &izero, &ictxt, &lddB, &info); // nbrhs need to be revised when nrhs!=1
        
 
 
    //run pdpotrf_ and time
    double MPIt1 = MPI_Wtime();
    printf("[%dx%d] Starting \n", myrow, mycol);
    pdgesv_(&n, &nrhs, A, &ione, &ione, descA, ipiv, B, &ione, &ione, descB, &info);
    if (info != 0) {
        printf("Error in caculate, info = %d\n", info);
    }
    filename = to_string(myrank_mpi)+"Bend.dat"; 
    f1.open(filename);
    for (int j = 0; j < mpB; j++) {
        int l_i = j / nb; // which block
        int x_i = j % nb; // where within that block
        int I   = (l_i * nprow + myrow) * nb + x_i; // global row
       f1 <<I<< " " << B[j]<<endl;
    }
    f1.close();
 
    double MPIt2 = MPI_Wtime();
    printf("[%dx%d] Done, time %e s.\n", myrow, mycol, MPIt2 - MPIt1);
    filename = to_string(myrank_mpi)+"end.dat";
    f1.open(filename);
    k = 0;
    for (int j = 0; j < nqA; j++) { // local col
        int l_j = j / nb; // which block
        int x_j = j % nb; // where within that block
        int J   = (l_j * npcol + mycol) * nb + x_j; // global col
        for (int i = 0; i < mpA; i++) { // local row
            int l_i = i / nb; // which block
            int x_i = i % nb; // where within that block
            int I   = (l_i * nprow + myrow) * nb + x_i; // global row
            assert(I < n);
            assert(J < n);
            f1 <<I << " "<<J << " " << A[k]<<endl;
            k++;
        }
    }
    f1.close();
    free(A);
    /*
    origin A:
[[20.500000  4.000000 11.000000  3.000000  3.000000  4.000000 11.000000 3.000000]
 [ 7.000000 22.500000  4.000000 13.000000  7.000000  7.000000  4.000000 13.000000]
 [ 9.000000  9.000000 19.500000  8.000000  9.000000  9.000000  6.000000 8.000000]
 [ 8.000000  6.000000 12.000000 23.500000  8.000000  6.000000 12.000000 12.000000]
 [ 3.000000  4.000000 11.000000  3.000000 20.500000  4.000000 11.000000 3.000000]
 [ 7.000000  7.000000  4.000000 13.000000  7.000000 22.500000  4.000000 13.000000]
 [ 9.000000  9.000000  6.000000  8.000000  9.000000  9.000000 19.500000 8.000000]
 [ 8.000000  6.000000 12.000000 12.000000  8.000000  6.000000 12.000000 23.500000]]
origin B:
[[ 1.500000]
 [ 8.500000]
 [ 5.500000]
 [10.500000]
 [ 1.500000]
 [ 8.500000]
 [ 5.500000]
 [10.500000]]
X:
[[-0.073846]
 [ 0.069101]
 [ 0.047280]
 [ 0.273735]
 [-0.073846]
 [ 0.069101]
 [ 0.047280]
 [ 0.273735]]
    */