直接求解器(DIRECT SOLVER)迁移
可替换性
MKL中包含单机版直接求解器PARDISO(Parallel Direct Sparse Solver Interface)和集群版直接求解器PARDISO-Cluster(Parallel Direct Sparse Solver for Cluster Interface)。前者为shared-memory版本的并行稀疏求解器,而后者则和SCADSS一样,支持MPI和OPENMP技术的混合模式。直接求解器包含Analyze、Factorize和Solve三个核心阶段,每个阶段都可以单独调用。这一点与用户的实际问题相关,比如用户在求解部分包含时间项的偏微分方程的时候,稀疏矩阵是保持不变的,因此Analyze和Factorize只需要进行一次,重复调用Solve阶段即可。再比如,稀疏矩阵的数值发生了变化,但是矩阵的非零元位置未发生变化,那么Analyze只需要进行一次,重复调用Solve阶段即可。在这里给出MKL中的direct solver与SCADSS的接口映射关系。
PARDISO-Cluster接口 |
SCADSS接口 |
|---|---|
mtype=2;或者mtype = 4; phase = 11; cluster_sparse_solver(pt, &maxfct, &mnum, &mtype, &phase, &n, a, ia, ja, &perm, &nrhs, iparm, &msglvl, b, x, &comm, &error); |
KmlScadssSpdAnalyzeDI(&handle);或者 KmlScadssHpdAnalyzeZI(&handle); |
mtype=2;或者mtype = 4; phase = 22; cluster_sparse_solver(pt, &maxfct, &mnum, &mtype, &phase, &n, a, ia, ja, &perm, &nrhs, iparm, &msglvl, b, x, &comm, &error); |
KmlScadssSpdFactorizeDI(&handle);或者 KmlScadssHpdFactorizeZI(&handle); |
mtype=2;或者mtype = 4; phase = 33; cluster_sparse_solver(pt, &maxfct, &mnum, &mtype, &phase, &n, a, ia, ja, &perm, &nrhs, iparm, &msglvl, b, x, &comm, &error); |
KmlScadssSpdSolveDI(&handle);或者 KmlScadssHpdSolveZI(&handle); |
mtype=2;或者mtype = 4; phase = -1; cluster_sparse_solver(pt, &maxfct, &mnum, &mtype, &phase, &n, a, ia, ja, &perm, &nrhs, iparm, &msglvl, b, x, &comm, &error); |
KmlScadssSpdCleanDI(&handle);或者 KmlScadssHpdCleanZI(&handle); |
- MKL中direct solver通过mtype参数确定输入稀疏矩阵的类型,mtype=2表示SPD矩阵,mtype=4则表示HPD矩阵。
- phase参数控制MKL direct solver执行的操作,除上表中列举的取值外,还可以取别的值。例如取12时表示执行Analyze,Factorize两个过程。
PARDISO接口 |
SCADSS接口 |
|---|---|
mtype=2;或者mtype = 4; phase = 11; pardiso(pt, &maxfct, &mnum, &mtype, &phase, &n, a, ia, ja, &perm, &nrhs, iparm, &msglvl, b, x, &error); |
KmlScadssSpdAnalyzeDI(&handle);或者 KmlScadssHpdAnalyzeZI(&handle); |
mtype=2;或者mtype = 4; phase = 22; pardiso(pt, &maxfct, &mnum, &mtype, &phase, &n, a, ia, ja, &perm, &nrhs, iparm, &msglvl, b, x, &error); |
KmlScadssSpdFactorizeDI(&handle);或者 KmlScadssHpdFactorizeZI(&handle); |
mtype=2;或者mtype = 4; phase = 33; pardiso(pt, &maxfct, &mnum, &mtype, &phase, &n, a, ia, ja, &perm, &nrhs, iparm, &msglvl, b, x, &error); |
KmlScadssSpdSolveDI(&handle);或者 KmlScadssHpdSolveZI(&handle); |
mtype=2;或者mtype = 4; phase = -1; pardiso(pt, &maxfct, &mnum, &mtype, &phase, &n, a, ia, ja, &perm, &nrhs, iparm, &msglvl, b, x, &error); |
KmlScadssSpdCleanDI(&handle);或者 KmlScadssHpdCleanZI(&handle); |
C语言迁移步骤
- 迁移前(调用PARDISO)。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41
int n = 8; int nnz = 17; int ia[9] = {0, 3, 7, 9, 11, 13, 15, 16, 17}; int ja[17] = {0, 3, 4, 1, 2, 3, 5, 2, 7, 3, 6, 4, 5, 5, 7, 6, 7}; double a[17] = {1.0, 1.0, 2.0, 9.0, 2.0, 1.0, -3.0, 3.0, 2.0, 9.0, -5.0, 6.0, 1.0, 4.0, 1.0, 7.0, 2.0}; int nrhs = 1; // Number of right hand sides. double b[8]={4.0, 9.0, 7.0, 6.0, 9.0, 3.0, 2.0, 5.0}; double x[8]; int mtype = 2; void *pt[64]; int iparm[64]; int maxfct, mnum, phase, error, msglvl, perm; pardisoinit(pt, &mtype, iparm); iparm[34] = 1; maxfct = 1; // Maximum number of numerical factorizations. mnum = 1; // Which factorization to use. msglvl = 0; // Print statistical information in file. error = 0; // Initialize error flag. phase = 11; pardiso(pt, &maxfct, &mnum, &mtype, &phase, &n, a, ia, ja, &perm, &nrhs, iparm, &msglvl, b, x, &error); // Analyze printf ("Reordering completed ... "); phase = 22; pardiso(pt, &maxfct, &mnum, &mtype, &phase, &n, a, ia, ja, &perm, &nrhs, iparm, &msglvl, b, x, &error); // Factorize printf ("\nFactorization completed ... "); phase = 33; pardiso(pt, &maxfct, &mnum, &mtype, &phase, &n, a, ia, ja, &perm, &nrhs, iparm, &msglvl, b, x, &error); // Solve printf("\nSolve completed ... "); printf("\nThe solution of the system is: "); for(int i = 0; i < n; i++){ printf ("\n x [%d] = % f", i, x[i]); } printf("\n"); phase = -1; // Release internal memory. pardiso(pt, &maxfct, &mnum, &mtype, &phase, &n, a, ia, ja, &perm, &nrhs, iparm, &msglvl, b, x, &error);
迁移前(调用PARDISO-Cluster)。1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56
int n = 8; int nnz = 17; int ia[9] = {0, 3, 7, 9, 11, 13, 15, 16, 17}; int ja[17] = {0, 3, 4, 1, 2, 3, 5, 2, 7, 3, 6, 4, 5, 5, 7, 6, 7}; double a[17] = {1.0, 1.0, 2.0, 9.0, 2.0, 1.0, -3.0, 3.0, 2.0, 9.0, -5.0, 6.0, 1.0, 4.0, 1.0, 7.0, 2.0}; /* RHS and solution vectors. */ int nrhs = 1; /* Number of right hand sides. */ double b[8]={4.0, 9.0, 7.0, 6.0, 9.0, 3.0, 2.0, 5.0}; double x[8]; int mtype = 2; void *pt[64]; int iparm[64]; int maxfct, mnum, phase, error, msglvl, perm; for (int i = 0; i < 64; i++) { iparm[i] = 0; pt[i] = 0; } iparm[0] = 1; iparm[1] = 2; iparm[17] = -1; iparm[18] = -1; iparm[34] = 1; maxfct = 1; // Maximum number of numerical factorizations. mnum = 1; // Which factorization to use. msglvl = 0; // Print statistical information in file. error = 0; // Initialize error flag. MPI_Init(NULL, NULL); int size, rank; MPI_Comm_size(MPI_COMM_WORLD, &size); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Fint comm = MPI_Comm_c2f(MPI_COMM_WORLD); phase = 11; cluster_sparse_solver(pt, &maxfct, &mnum, &mtype, &phase, &n, a, ia, ja, &perm, &nrhs, iparm, &msglvl, b, x, &comm, &error); // Analyze phase = 22; cluster_sparse_solver(pt, &maxfct, &mnum, &mtype, &phase, &n, a, ia, ja, &perm, &nrhs, iparm, &msglvl, b, x, &comm, &error); // Factorize phase = 33; cluster_sparse_solver(pt, &maxfct, &mnum, &mtype, &phase, &n, a, ia, ja, &perm, &nrhs, iparm, &msglvl, b, x, &comm, &error); // Solve if (rank == 0){ printf("The solution of the system is: "); for(int i = 0; i < n; i++){ printf ("\n x [%d] = %f", i, x[i]); } printf("\n"); } phase = -1; // Release internal memory. cluster_sparse_solver(pt, &maxfct, &mnum, &mtype, &phase, &n, a, ia, ja, &perm, &nrhs, iparm, &msglvl, b, x, &comm, &error); MPI_Finalize();
迁移后:1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209
int n = 8; int nrhs = 1; int ia[9] = {0, 3, 7, 9, 11, 13, 15, 16, 17}; int ja[17] = {0, 3, 4, 1, 2, 3, 5, 2, 7, 3, 6, 4, 5, 5, 7, 6, 7}; double a[17] = {1.0, 1.0, 2.0, 9.0, 2.0, 1.0, -3.0, 3.0, 2.0, 9.0, -5.0, 6.0, 1.0, 4.0, 1.0, 7.0, 2.0}; MPI_Init(NULL, NULL); int size, rank; MPI_Comm_size(MPI_COMM_WORLD, &size); MPI_Comm_rank(MPI_COMM_WORLD, &rank); KmlSolverMatrixStore storeA; storeA.indexType = KMLSS_INDEX_INT32; storeA.valueType = KMLSS_VALUE_FP64; storeA.format = KMLSS_MATRIX_STORE_CSR; if (rank == 0) { storeA.nRow = n; storeA.nCol = n; storeA.csr.rowOffset = ia; storeA.csr.colIndex = ja; storeA.csr.value = a; } else { storeA.nRow = 0; storeA.nCol = 0; storeA.csr.rowOffset = nullptr; storeA.csr.colIndex = nullptr; storeA.csr.value = nullptr; } KmlSolverMatrixOption optA; optA.fieldMask = KMLSS_MATRIX_OPTION_TYPE; optA.type = KMLSS_MATRIX_GEN; KmlScasolverMatrixOption scaOptA; if (rank == 0) { scaOptA.fieldMask = KMLSS_MATRIX_OPTIONS_GLOBAL_NROWS | KMLSS_MATRIX_OPTIONS_GLOBAL_NCOLS | KMLSS_MATRIX_OPTIONS_PARTITION; scaOptA.partition.type = KMLSS_MATRIX_PARTITION_ROW; scaOptA.globalNumRows = n; scaOptA.globalNumCols = n; scaOptA.partition.localBegin = 0; } else { scaOptA.fieldMask = 0; } KmlScasolverMatrix *A; ierr = KmlScasolverMatrixCreate(&A, &storeA, &optA, &scaOptA); if (ierr != KMLSS_NO_ERROR) { printf("ERROR when create A: %d\n", ierr); return 1; } // Create vector b double b[8] = {3.0, 1.0, 7.0, -4.0, 5.0, -2.0, 10.0, 10.0}; KmlSolverMatrixStore storeB; storeB.indexType = KMLSS_INDEX_INT32; storeB.valueType = KMLSS_VALUE_FP64; storeB.format = KMLSS_MATRIX_STORE_DENSE_COL_MAJOR; if (rank == 0) { storeB.nRow = n; storeB.nCol = nrhs; storeB.dense.value = b; storeB.dense.ld = n; } else { storeB.nRow = 0; storeB.nCol = 0; storeB.dense.value = nullptr; storeB.dense.ld = 0; } KmlSolverMatrixOption optB; optB.fieldMask = KMLSS_MATRIX_OPTION_TYPE; optB.type = KMLSS_MATRIX_GEN; KmlScasolverMatrixOption scaOptB; if (rank == 0) { scaOptB.fieldMask = KMLSS_MATRIX_OPTIONS_GLOBAL_NROWS | KMLSS_MATRIX_OPTIONS_GLOBAL_NCOLS | KMLSS_MATRIX_OPTIONS_PARTITION; scaOptB.partition.type = KMLSS_MATRIX_PARTITION_ROW; scaOptB.partition.localBegin = 0; scaOptB.globalNumRows = n; scaOptB.globalNumCols = nrhs; } else { scaOptB.fieldMask = 0; } KmlScasolverMatrix *B; ierr = KmlScasolverMatrixCreate(&B, &storeB, &optB, &scaOptB); if (ierr != KMLSS_NO_ERROR) { printf("ERROR when create b: %d\n", ierr); return 1; } // Create vector x double x[8] = {0}; KmlSolverMatrixStore storeX; storeX.indexType = KMLSS_INDEX_INT32; storeX.valueType = KMLSS_VALUE_FP64; storeX.format = KMLSS_MATRIX_STORE_DENSE_COL_MAJOR; if (rank == 0) { storeX.nRow = n; storeX.nCol = nrhs; storeX.dense.value = x; storeX.dense.ld = n; } else { storeX.nRow = 0; storeX.nCol = 0; storeX.dense.value = nullptr; storeX.dense.ld = 0; } KmlSolverMatrixOption optX; optX.fieldMask = KMLSS_MATRIX_OPTION_TYPE; optX.type = KMLSS_MATRIX_GEN; KmlScasolverMatrixOption scaOptX; if (rank == 0) { scaOptX.fieldMask = KMLSS_MATRIX_OPTIONS_GLOBAL_NROWS | KMLSS_MATRIX_OPTIONS_GLOBAL_NCOLS | KMLSS_MATRIX_OPTIONS_PARTITION; scaOptX.partition.type = KMLSS_MATRIX_PARTITION_ROW; scaOptX.partition.localBegin = 0; scaOptX.globalNumRows = n; scaOptX.globalNumCols = nrhs; } else { scaOptX.fieldMask = 0; } KmlScasolverMatrix *X; ierr = KmlScasolverMatrixCreate(&X, &storeX, &optX, &scaOptX); if (ierr != KMLSS_NO_ERROR) { printf("ERROR when create x: %d\n", ierr); return 1; } // Init solver KmlDssInitOption opt; opt.fieldMask = KMLDSS_INIT_OPTION_BWR_MODE | KMLDSS_INIT_OPTION_NTHREADS; opt.bwrMode = KMLDSS_BWR_OFF; opt.nThreads = 32; KmlScadssInitOption scaOpt; scaOpt.fieldMask = KMLSCADSS_OPTIONS_COMM; scaOpt.comm = comm; KmlScadssSolver *solver; ierr = KmlScadssInit(&solver, &opt, &scaOpt); if (ierr != KMLSS_NO_ERROR) { printf("ERROR in KmlDssInit: %d\n", ierr); return ierr; } // Analyze KmlDssAnalyzeOption optAnalyze; optAnalyze.fieldMask = KMLDSS_ANALYZE_OPTION_MATCHING_TYPE | KMLDSS_ANALYZE_OPTION_RDR_TYPE | KMLDSS_ANALYZE_OPTION_NTHREADS_RDR; optAnalyze.matchingType = KMLDSS_MATCHING_OFF; optAnalyze.rdrType = KMLDSS_RDR_KRDR; optAnalyze.nThreadsRdr = 1; KmlScadssAnalyzeOption scaOptAnalyze; scaOptAnalyze.fieldMask = 0; ierr = KmlScadssAnalyze(solver, A, &optAnalyze, &scaOptAnalyze); if (ierr != KMLSS_NO_ERROR) { printf("ERROR in KmlDssAnalyze: %d\n", ierr); return ierr; } // Factorize KmlDssFactorizeOption optFact; optFact.fieldMask = KMLDSS_FACTORIZE_OPTION_PERTURBATION_THRESHOLD; optFact.perturbationThreshold = 1e-8; KmlScadssFactorizeOption scaOptFact; scaOptFact.fieldMask = 0; ierr = KmlScadssFactorize(solver, A, &optFact, &scaOptFact); if (ierr != KMLSS_NO_ERROR) { printf("ERROR in KmlDssFactorize: %d\n", ierr); return ierr; } // Solve KmlDssSolveOption optSolve; optSolve.fieldMask = KMLDSS_SOLVE_OPTION_SOLVE_STAGE | KMLDSS_SOLVE_OPTION_REFINE_METHOD; optSolve.stage = KMLDSS_SOLVE_ALL; optSolve.refineMethod = KMLDSS_REFINE_OFF; KmlScadssSolveOption scaOptSolve; scaOptSolve.fieldMask = 0; ierr = KmlScadssSolve(solver, B, X, &optSolve, &scaOptSolve); if (ierr != KMLSS_NO_ERROR) { printf("ERROR in KmlDssSolve: %d\n", ierr); return ierr; } // Output result x if (rank == 0) { printf("Result of first factorize and solve:\n"); for (int i = 0; i < n; i++) { printf("%lf ", x[i]); } printf("\n"); } MPI_Finalize();
- 头文件
#include "mkl_types.h"
#include "mkl_cluster_sparse_solver.h"
迁移前(PARDISO-Cluster):
#include "mkl_pardiso.h"
#include "mkl_types.h"
迁移后:
#include "kml_scadss.h"
- 编译链接库。