开发者
我要评分
获取效率
正确性
完整性
易理解
在线提单
论坛求助

直接求解器(DIRECT SOLVER)迁移

可替换性

MKL中包含单机版直接求解器PARDISO(Parallel Direct Sparse Solver Interface)和集群版直接求解器PARDISO-Cluster(Parallel Direct Sparse Solver for Cluster Interface)。前者为shared-memory版本的并行稀疏求解器,而后者则和SCADSS一样,支持MPI和OPENMP技术的混合模式。直接求解器包含Analyze、Factorize和Solve三个核心阶段,每个阶段都可以单独调用。这一点与用户的实际问题相关,比如用户在求解部分包含时间项的偏微分方程的时候,稀疏矩阵是保持不变的,因此Analyze和Factorize只需要进行一次,重复调用Solve阶段即可。再比如,稀疏矩阵的数值发生了变化,但是矩阵的非零元位置未发生变化,那么Analyze只需要进行一次,重复调用Solve阶段即可。在这里给出MKL中的direct solver与SCADSS的接口映射关系。

表1 PARDISO-Cluster与SCADSS的替换映射关系

PARDISO-Cluster接口

SCADSS接口

mtype=2;或者mtype = 4;

phase = 11;

cluster_sparse_solver(pt, &maxfct, &mnum, &mtype, &phase, &n, a, ia, ja, &perm, &nrhs, iparm, &msglvl, b, x, &comm, &error);

KmlScadssSpdAnalyzeDI(&handle);或者

KmlScadssHpdAnalyzeZI(&handle);

mtype=2;或者mtype = 4;

phase = 22;

cluster_sparse_solver(pt, &maxfct, &mnum, &mtype, &phase, &n, a, ia, ja, &perm, &nrhs, iparm, &msglvl, b, x, &comm, &error);

KmlScadssSpdFactorizeDI(&handle);或者

KmlScadssHpdFactorizeZI(&handle);

mtype=2;或者mtype = 4;

phase = 33;

cluster_sparse_solver(pt, &maxfct, &mnum, &mtype, &phase, &n, a, ia, ja, &perm, &nrhs, iparm, &msglvl, b, x, &comm, &error);

KmlScadssSpdSolveDI(&handle);或者

KmlScadssHpdSolveZI(&handle);

mtype=2;或者mtype = 4;

phase = -1;

cluster_sparse_solver(pt, &maxfct, &mnum, &mtype, &phase, &n, a, ia, ja, &perm, &nrhs, iparm, &msglvl, b, x, &comm, &error);

KmlScadssSpdCleanDI(&handle);或者

KmlScadssHpdCleanZI(&handle);

  1. MKL中direct solver通过mtype参数确定输入稀疏矩阵的类型,mtype=2表示SPD矩阵,mtype=4则表示HPD矩阵。
  2. phase参数控制MKL direct solver执行的操作,除上表中列举的取值外,还可以取别的值。例如取12时表示执行Analyze,Factorize两个过程。
表2 PARDISO与SCADSS的替换映射关系

PARDISO接口

SCADSS接口

mtype=2;或者mtype = 4;

phase = 11;

pardiso(pt, &maxfct, &mnum, &mtype, &phase, &n, a, ia, ja, &perm, &nrhs, iparm, &msglvl, b, x, &error);

KmlScadssSpdAnalyzeDI(&handle);或者

KmlScadssHpdAnalyzeZI(&handle);

mtype=2;或者mtype = 4;

phase = 22;

pardiso(pt, &maxfct, &mnum, &mtype, &phase, &n, a, ia, ja, &perm, &nrhs, iparm, &msglvl, b, x, &error);

KmlScadssSpdFactorizeDI(&handle);或者

KmlScadssHpdFactorizeZI(&handle);

mtype=2;或者mtype = 4;

phase = 33;

pardiso(pt, &maxfct, &mnum, &mtype, &phase, &n, a, ia, ja, &perm, &nrhs, iparm, &msglvl, b, x, &error);

KmlScadssSpdSolveDI(&handle);或者

KmlScadssHpdSolveZI(&handle);

mtype=2;或者mtype = 4;

phase = -1;

pardiso(pt, &maxfct, &mnum, &mtype, &phase, &n, a, ia, ja, &perm, &nrhs, iparm, &msglvl, b, x, &error);

KmlScadssSpdCleanDI(&handle);或者

KmlScadssHpdCleanZI(&handle);

C语言迁移步骤

  1. 迁移前(调用PARDISO)。
     1
     2
     3
     4
     5
     6
     7
     8
     9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    int n = 8;
    int nnz = 17;
    int ia[9] = {0, 3, 7, 9, 11, 13, 15, 16, 17};
    int ja[17] = {0, 3, 4, 1, 2, 3, 5, 2, 7, 3, 6, 4, 5, 5, 7, 6, 7};
    double a[17] = {1.0, 1.0, 2.0, 9.0, 2.0, 1.0, -3.0, 3.0, 2.0, 9.0, -5.0, 6.0, 1.0, 4.0, 1.0, 7.0, 2.0};
    
    int nrhs = 1;     // Number of right hand sides.
    double b[8]={4.0, 9.0, 7.0, 6.0, 9.0, 3.0, 2.0, 5.0};
    double x[8];
    
    int mtype = 2;
    void *pt[64];
    int iparm[64];
    int maxfct, mnum, phase, error, msglvl, perm;
    pardisoinit(pt, &mtype, iparm);
    iparm[34] = 1;
    maxfct = 1;       // Maximum number of numerical factorizations.
    mnum = 1;         // Which factorization to use.
    msglvl = 0;       // Print statistical information in file.
    error = 0;        // Initialize error flag.
    
    phase = 11;
    pardiso(pt, &maxfct, &mnum, &mtype, &phase, &n, a, ia, ja, &perm, &nrhs, iparm, &msglvl, b, x, &error); // Analyze
    printf ("Reordering completed ... ");
    
    phase = 22;
    pardiso(pt, &maxfct, &mnum, &mtype, &phase, &n, a, ia, ja, &perm, &nrhs, iparm, &msglvl, b, x, &error); // Factorize
    printf ("\nFactorization completed ... ");
    
    phase = 33;
    pardiso(pt, &maxfct, &mnum, &mtype, &phase, &n, a, ia, ja, &perm, &nrhs, iparm, &msglvl, b, x, &error); // Solve
    printf("\nSolve completed ... ");
    
    printf("\nThe solution of the system is: ");
    for(int i = 0; i < n; i++){
        printf ("\n x [%d] = % f", i, x[i]);
    }
    printf("\n");
    
    phase = -1;      // Release internal memory.
    pardiso(pt, &maxfct, &mnum, &mtype, &phase, &n, a, ia, ja, &perm, &nrhs, iparm, &msglvl, b, x, &error);
    
    迁移前(调用PARDISO-Cluster)
     1
     2
     3
     4
     5
     6
     7
     8
     9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    int n = 8;
    int nnz = 17;
    int ia[9] = {0, 3, 7, 9, 11, 13, 15, 16, 17};
    int ja[17] = {0, 3, 4, 1, 2, 3, 5, 2, 7, 3, 6, 4, 5, 5, 7, 6, 7};
    double a[17] = {1.0, 1.0, 2.0, 9.0, 2.0, 1.0, -3.0, 3.0, 2.0, 9.0, -5.0, 6.0, 1.0, 4.0, 1.0, 7.0, 2.0};
    
    /* RHS and solution vectors. */
    int nrhs = 1;     /* Number of right hand sides. */
    double b[8]={4.0, 9.0, 7.0, 6.0, 9.0, 3.0, 2.0, 5.0};
    double x[8];
    
    int mtype = 2;
    void *pt[64];
    int iparm[64];
    int maxfct, mnum, phase, error, msglvl, perm;
    for (int i = 0; i < 64; i++) {
        iparm[i] = 0;
        pt[i] = 0;
    }
    iparm[0] = 1;
    iparm[1] = 2;
    iparm[17] = -1;
    iparm[18] = -1;
    iparm[34] = 1;
    maxfct = 1;       // Maximum number of numerical factorizations.
    mnum = 1;         // Which factorization to use.
    msglvl = 0;       // Print statistical information in file.
    error = 0;        // Initialize error flag.
    
    MPI_Init(NULL, NULL);
    int size, rank;
    MPI_Comm_size(MPI_COMM_WORLD, &size);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Fint comm = MPI_Comm_c2f(MPI_COMM_WORLD);
    
    phase = 11;
    cluster_sparse_solver(pt, &maxfct, &mnum, &mtype, &phase, &n, a, ia, ja, &perm, &nrhs, iparm, &msglvl, b, x, &comm, &error); // Analyze
    
    phase = 22;
    cluster_sparse_solver(pt, &maxfct, &mnum, &mtype, &phase, &n, a, ia, ja, &perm, &nrhs, iparm, &msglvl, b, x, &comm, &error); // Factorize
    
    phase = 33;
    cluster_sparse_solver(pt, &maxfct, &mnum, &mtype, &phase, &n, a, ia, ja, &perm, &nrhs, iparm, &msglvl, b, x, &comm, &error); // Solve
    
    if (rank == 0){
        printf("The solution of the system is: ");
        for(int i = 0; i < n; i++){
            printf ("\n x [%d] = %f", i, x[i]);
        }
        printf("\n");
    }
    
    phase = -1;      // Release internal memory.
    cluster_sparse_solver(pt, &maxfct, &mnum, &mtype, &phase, &n, a, ia, ja, &perm, &nrhs, iparm, &msglvl, b, x, &comm, &error);
    
    MPI_Finalize();
    
    迁移后
      1
      2
      3
      4
      5
      6
      7
      8
      9
     10
     11
     12
     13
     14
     15
     16
     17
     18
     19
     20
     21
     22
     23
     24
     25
     26
     27
     28
     29
     30
     31
     32
     33
     34
     35
     36
     37
     38
     39
     40
     41
     42
     43
     44
     45
     46
     47
     48
     49
     50
     51
     52
     53
     54
     55
     56
     57
     58
     59
     60
     61
     62
     63
     64
     65
     66
     67
     68
     69
     70
     71
     72
     73
     74
     75
     76
     77
     78
     79
     80
     81
     82
     83
     84
     85
     86
     87
     88
     89
     90
     91
     92
     93
     94
     95
     96
     97
     98
     99
    100
    101
    102
    103
    104
    105
    106
    107
    108
    109
    110
    111
    112
    113
    114
    115
    116
    117
    118
    119
    120
    121
    122
    123
    124
    125
    126
    127
    128
    129
    130
    131
    132
    133
    134
    135
    136
    137
    138
    139
    140
    141
    142
    143
    144
    145
    146
    147
    148
    149
    150
    151
    152
    153
    154
    155
    156
    157
    158
    159
    160
    161
    162
    163
    164
    165
    166
    167
    168
    169
    170
    171
    172
    173
    174
    175
    176
    177
    178
    179
    180
    181
    182
    183
    184
    185
    186
    187
    188
    189
    190
    191
    192
    193
    194
    195
    196
    197
    198
    199
    200
    201
    202
    203
    204
    205
    206
    207
    208
    209
    int n = 8;
    int nrhs = 1;
    int ia[9] = {0, 3, 7, 9, 11, 13, 15, 16, 17};
    int ja[17] = {0, 3, 4, 1, 2, 3, 5, 2, 7, 3, 6, 4, 5, 5, 7, 6, 7};
    double a[17] = {1.0, 1.0, 2.0, 9.0, 2.0, 1.0, -3.0, 3.0, 2.0, 9.0, -5.0, 6.0, 1.0, 4.0, 1.0, 7.0, 2.0};
    
    MPI_Init(NULL, NULL);
    int size, rank;
    MPI_Comm_size(MPI_COMM_WORLD, &size);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    
    KmlSolverMatrixStore storeA;
    storeA.indexType = KMLSS_INDEX_INT32;
    storeA.valueType = KMLSS_VALUE_FP64;
    storeA.format = KMLSS_MATRIX_STORE_CSR;
    if (rank == 0) {
        storeA.nRow = n;
        storeA.nCol = n;
        storeA.csr.rowOffset = ia;
        storeA.csr.colIndex = ja;
        storeA.csr.value = a;
    } else {
        storeA.nRow = 0;
        storeA.nCol = 0;
        storeA.csr.rowOffset = nullptr;
        storeA.csr.colIndex = nullptr;
        storeA.csr.value = nullptr;
    }
    
    KmlSolverMatrixOption optA;
    optA.fieldMask = KMLSS_MATRIX_OPTION_TYPE;
    optA.type = KMLSS_MATRIX_GEN;
    
    KmlScasolverMatrixOption scaOptA;
    if (rank == 0) {
        scaOptA.fieldMask = KMLSS_MATRIX_OPTIONS_GLOBAL_NROWS |
                            KMLSS_MATRIX_OPTIONS_GLOBAL_NCOLS |
                            KMLSS_MATRIX_OPTIONS_PARTITION;
        scaOptA.partition.type = KMLSS_MATRIX_PARTITION_ROW;
        scaOptA.globalNumRows = n;
        scaOptA.globalNumCols = n;
        scaOptA.partition.localBegin = 0;
    } else {
        scaOptA.fieldMask = 0;
    }
    
    KmlScasolverMatrix *A;
    ierr = KmlScasolverMatrixCreate(&A, &storeA, &optA, &scaOptA);
    if (ierr != KMLSS_NO_ERROR) {
        printf("ERROR when create A: %d\n", ierr);
        return 1;
    }
    
    // Create vector b
    double b[8] = {3.0, 1.0, 7.0, -4.0, 5.0, -2.0, 10.0, 10.0};
    KmlSolverMatrixStore storeB;
    storeB.indexType = KMLSS_INDEX_INT32;
    storeB.valueType = KMLSS_VALUE_FP64;
    storeB.format = KMLSS_MATRIX_STORE_DENSE_COL_MAJOR;
    if (rank == 0) {
        storeB.nRow = n;
        storeB.nCol = nrhs;
        storeB.dense.value = b;
        storeB.dense.ld = n;
    } else {
        storeB.nRow = 0;
        storeB.nCol = 0;
        storeB.dense.value = nullptr;
        storeB.dense.ld = 0;
    }
    
    KmlSolverMatrixOption optB;
    optB.fieldMask = KMLSS_MATRIX_OPTION_TYPE;
    optB.type = KMLSS_MATRIX_GEN;
    
    KmlScasolverMatrixOption scaOptB;
    if (rank == 0) {
        scaOptB.fieldMask = KMLSS_MATRIX_OPTIONS_GLOBAL_NROWS |
                            KMLSS_MATRIX_OPTIONS_GLOBAL_NCOLS |
                            KMLSS_MATRIX_OPTIONS_PARTITION;
        scaOptB.partition.type = KMLSS_MATRIX_PARTITION_ROW;
        scaOptB.partition.localBegin = 0;
        scaOptB.globalNumRows = n;
        scaOptB.globalNumCols = nrhs;
    } else {
        scaOptB.fieldMask = 0;
    }
    
    KmlScasolverMatrix *B;
    ierr = KmlScasolverMatrixCreate(&B, &storeB, &optB, &scaOptB);
    if (ierr != KMLSS_NO_ERROR) {
        printf("ERROR when create b: %d\n", ierr);
        return 1;
    }
    
    // Create vector x
    double x[8] = {0};
    KmlSolverMatrixStore storeX;
    storeX.indexType = KMLSS_INDEX_INT32;
    storeX.valueType = KMLSS_VALUE_FP64;
    storeX.format = KMLSS_MATRIX_STORE_DENSE_COL_MAJOR;
    if (rank == 0) {
        storeX.nRow = n;
        storeX.nCol = nrhs;
        storeX.dense.value = x;
        storeX.dense.ld = n;
    } else {
        storeX.nRow = 0;
        storeX.nCol = 0;
        storeX.dense.value = nullptr;
        storeX.dense.ld = 0;
    }
    
    KmlSolverMatrixOption optX;
    optX.fieldMask = KMLSS_MATRIX_OPTION_TYPE;
    optX.type = KMLSS_MATRIX_GEN;
    
    KmlScasolverMatrixOption scaOptX;
    if (rank == 0) {
        scaOptX.fieldMask = KMLSS_MATRIX_OPTIONS_GLOBAL_NROWS |
                            KMLSS_MATRIX_OPTIONS_GLOBAL_NCOLS |
                            KMLSS_MATRIX_OPTIONS_PARTITION;
        scaOptX.partition.type = KMLSS_MATRIX_PARTITION_ROW;
        scaOptX.partition.localBegin = 0;
        scaOptX.globalNumRows = n;
        scaOptX.globalNumCols = nrhs;
    } else {
        scaOptX.fieldMask = 0;
    }
    
    KmlScasolverMatrix *X;
    ierr = KmlScasolverMatrixCreate(&X, &storeX, &optX, &scaOptX);
    if (ierr != KMLSS_NO_ERROR) {
        printf("ERROR when create x: %d\n", ierr);
        return 1;
    }
    
    // Init solver
    KmlDssInitOption opt;
    opt.fieldMask = KMLDSS_INIT_OPTION_BWR_MODE | KMLDSS_INIT_OPTION_NTHREADS;
    opt.bwrMode = KMLDSS_BWR_OFF;
    opt.nThreads = 32;
    
    KmlScadssInitOption scaOpt;
    scaOpt.fieldMask = KMLSCADSS_OPTIONS_COMM;
    scaOpt.comm = comm;
    
    KmlScadssSolver *solver;
    ierr = KmlScadssInit(&solver, &opt, &scaOpt);
    if (ierr != KMLSS_NO_ERROR) {
        printf("ERROR in KmlDssInit: %d\n", ierr);
        return ierr;
    }
    
    // Analyze
    KmlDssAnalyzeOption optAnalyze;
    optAnalyze.fieldMask = KMLDSS_ANALYZE_OPTION_MATCHING_TYPE | KMLDSS_ANALYZE_OPTION_RDR_TYPE |
                               KMLDSS_ANALYZE_OPTION_NTHREADS_RDR;
    optAnalyze.matchingType = KMLDSS_MATCHING_OFF;
    optAnalyze.rdrType = KMLDSS_RDR_KRDR;
    optAnalyze.nThreadsRdr = 1;
    
    KmlScadssAnalyzeOption scaOptAnalyze;
    scaOptAnalyze.fieldMask = 0;
    
    ierr = KmlScadssAnalyze(solver, A, &optAnalyze, &scaOptAnalyze);
    if (ierr != KMLSS_NO_ERROR) {
        printf("ERROR in KmlDssAnalyze: %d\n", ierr);
        return ierr;
    }
    
    // Factorize
    KmlDssFactorizeOption optFact;
    optFact.fieldMask = KMLDSS_FACTORIZE_OPTION_PERTURBATION_THRESHOLD;
    optFact.perturbationThreshold = 1e-8;
    
    KmlScadssFactorizeOption scaOptFact;
    scaOptFact.fieldMask = 0;
    
    ierr = KmlScadssFactorize(solver, A, &optFact, &scaOptFact);
    if (ierr != KMLSS_NO_ERROR) {
        printf("ERROR in KmlDssFactorize: %d\n", ierr);
        return ierr;
    }
    
    // Solve
    KmlDssSolveOption optSolve;
    optSolve.fieldMask = KMLDSS_SOLVE_OPTION_SOLVE_STAGE | KMLDSS_SOLVE_OPTION_REFINE_METHOD;
    optSolve.stage = KMLDSS_SOLVE_ALL;
    optSolve.refineMethod = KMLDSS_REFINE_OFF;
    
    KmlScadssSolveOption scaOptSolve;
    scaOptSolve.fieldMask = 0;
    
    ierr = KmlScadssSolve(solver, B, X, &optSolve, &scaOptSolve);
    if (ierr != KMLSS_NO_ERROR) {
        printf("ERROR in KmlDssSolve: %d\n", ierr);
        return ierr;
    }
    
    // Output result x
    if (rank == 0) {
        printf("Result of first factorize and solve:\n");
        for (int i = 0; i < n; i++) {
            printf("%lf ", x[i]);
        }
        printf("\n");
    }
    MPI_Finalize();
    
  2. 头文件

    迁移前(PARDISO)

    #include "mkl_types.h"

    #include "mkl_cluster_sparse_solver.h"

    迁移前(PARDISO-Cluster)

    #include "mkl_pardiso.h"

    #include "mkl_types.h"

    迁移后

    #include "kml_scadss.h"

  3. 编译链接库。

    替换MKL相关的链接选项, 具体选项请参见《Kunpeng HPCKit 26.0.RC1 安装指南》安装KML。