直接求解器(DIRECT SOLVER)迁移
可替换性
MKL中包含单机版直接求解器PARDISO(Parallel Direct Sparse Solver Interface)和集群版直接求解器PARDISO-Cluster(Parallel Direct Sparse Solver for Cluster Interface)。前者为shared-memory版本的并行稀疏求解器,而后者则和SCADSS一样,支持MPI和OPENMP技术的混合模式。直接求解器包含Analyze,Factorize,Solve三个核心阶段,每个阶段都可以单独调用。这一点与用户的实际问题相关,比如用户在求解部分包含时间项的偏微分方程的时候,稀疏矩阵是保持不变的,因此Analyze和Factorize只需要进行一次,重复调用Solve阶段即可。再比如,稀疏矩阵的数值发生了变化,但是矩阵的非零元位置未发生变化,那么Analyze只需要进行一次,重复调用Solve阶段即可。在这里给出MKL中的direct solver与SCADSS的接口映射关系。
PARDISO-Cluster接口 |
SCADSS接口 |
|---|---|
mtype=2;或者mtype = 4; phase = 11; cluster_sparse_solver(pt, &maxfct, &mnum, &mtype, &phase, &n, a, ia, ja, &perm, &nrhs, iparm, &msglvl, b, x, &comm, &error); |
KmlScadssSpdAnalyzeDI(&handle);或者 KmlScadssHpdAnalyzeZI(&handle); |
mtype=2;或者mtype = 4; phase = 22; cluster_sparse_solver(pt, &maxfct, &mnum, &mtype, &phase, &n, a, ia, ja, &perm, &nrhs, iparm, &msglvl, b, x, &comm, &error); |
KmlScadssSpdFactorizeDI(&handle);或者 KmlScadssHpdFactorizeZI(&handle); |
mtype=2;或者mtype = 4; phase = 33; cluster_sparse_solver(pt, &maxfct, &mnum, &mtype, &phase, &n, a, ia, ja, &perm, &nrhs, iparm, &msglvl, b, x, &comm, &error); |
KmlScadssSpdSolveDI(&handle);或者 KmlScadssHpdSolveZI(&handle); |
mtype=2;或者mtype = 4; phase = -1; cluster_sparse_solver(pt, &maxfct, &mnum, &mtype, &phase, &n, a, ia, ja, &perm, &nrhs, iparm, &msglvl, b, x, &comm, &error); |
KmlScadssSpdCleanDI(&handle);或者 KmlScadssHpdCleanZI(&handle); |
- MKL中direct solver通过mtype参数确定输入稀疏矩阵的类型,mtype=2表示SPD矩阵,mtype=4则表示HPD矩阵。
- phase参数控制MKL direct solver执行的操作,除上表中列举的取值外,还可以取别的值。例如取12时表示执行Analyze,Factorize两个过程。
PARDISO接口 |
SCADSS接口 |
|---|---|
mtype=2;或者mtype = 4; phase = 11; pardiso(pt, &maxfct, &mnum, &mtype, &phase, &n, a, ia, ja, &perm, &nrhs, iparm, &msglvl, b, x, &error); |
KmlScadssSpdAnalyzeDI(&handle);或者 KmlScadssHpdAnalyzeZI(&handle); |
mtype=2;或者mtype = 4; phase = 22; pardiso(pt, &maxfct, &mnum, &mtype, &phase, &n, a, ia, ja, &perm, &nrhs, iparm, &msglvl, b, x, &error); |
KmlScadssSpdFactorizeDI(&handle);或者 KmlScadssHpdFactorizeZI(&handle); |
mtype=2;或者mtype = 4; phase = 33; pardiso(pt, &maxfct, &mnum, &mtype, &phase, &n, a, ia, ja, &perm, &nrhs, iparm, &msglvl, b, x, &error); |
KmlScadssSpdSolveDI(&handle);或者 KmlScadssHpdSolveZI(&handle); |
mtype=2;或者mtype = 4; phase = -1; pardiso(pt, &maxfct, &mnum, &mtype, &phase, &n, a, ia, ja, &perm, &nrhs, iparm, &msglvl, b, x, &error); |
KmlScadssSpdCleanDI(&handle);或者 KmlScadssHpdCleanZI(&handle); |
C语言迁移步骤
- 迁移前(调用PARDISO)。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41
int n = 8; int nnz = 17; int ia[9] = {0, 3, 7, 9, 11, 13, 15, 16, 17}; int ja[17] = {0, 3, 4, 1, 2, 3, 5, 2, 7, 3, 6, 4, 5, 5, 7, 6, 7}; double a[17] = {1.0, 1.0, 2.0, 9.0, 2.0, 1.0, -3.0, 3.0, 2.0, 9.0, -5.0, 6.0, 1.0, 4.0, 1.0, 7.0, 2.0}; int nrhs = 1; // Number of right hand sides. double b[8]={4.0, 9.0, 7.0, 6.0, 9.0, 3.0, 2.0, 5.0}; double x[8]; int mtype = 2; void *pt[64]; int iparm[64]; int maxfct, mnum, phase, error, msglvl, perm; pardisoinit(pt, &mtype, iparm); iparm[34] = 1; maxfct = 1; // Maximum number of numerical factorizations. mnum = 1; // Which factorization to use. msglvl = 0; // Print statistical information in file. error = 0; // Initialize error flag. phase = 11; pardiso(pt, &maxfct, &mnum, &mtype, &phase, &n, a, ia, ja, &perm, &nrhs, iparm, &msglvl, b, x, &error); // Analyze printf ("Reordering completed ... "); phase = 22; pardiso(pt, &maxfct, &mnum, &mtype, &phase, &n, a, ia, ja, &perm, &nrhs, iparm, &msglvl, b, x, &error); // Factorize printf ("\nFactorization completed ... "); phase = 33; pardiso(pt, &maxfct, &mnum, &mtype, &phase, &n, a, ia, ja, &perm, &nrhs, iparm, &msglvl, b, x, &error); // Solve printf("\nSolve completed ... "); printf("\nThe solution of the system is: "); for(int i = 0; i < n; i++){ printf ("\n x [%d] = % f", i, x[i]); } printf("\n"); phase = -1; // Release internal memory. pardiso(pt, &maxfct, &mnum, &mtype, &phase, &n, a, ia, ja, &perm, &nrhs, iparm, &msglvl, b, x, &error);
迁移前(调用PARDISO-Cluster)。1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56
int n = 8; int nnz = 17; int ia[9] = {0, 3, 7, 9, 11, 13, 15, 16, 17}; int ja[17] = {0, 3, 4, 1, 2, 3, 5, 2, 7, 3, 6, 4, 5, 5, 7, 6, 7}; double a[17] = {1.0, 1.0, 2.0, 9.0, 2.0, 1.0, -3.0, 3.0, 2.0, 9.0, -5.0, 6.0, 1.0, 4.0, 1.0, 7.0, 2.0}; /* RHS and solution vectors. */ int nrhs = 1; /* Number of right hand sides. */ double b[8]={4.0, 9.0, 7.0, 6.0, 9.0, 3.0, 2.0, 5.0}; double x[8]; int mtype = 2; void *pt[64]; int iparm[64]; int maxfct, mnum, phase, error, msglvl, perm; for (int i = 0; i < 64; i++) { iparm[i] = 0; pt[i] = 0; } iparm[0] = 1; iparm[1] = 2; iparm[17] = -1; iparm[18] = -1; iparm[34] = 1; maxfct = 1; // Maximum number of numerical factorizations. mnum = 1; // Which factorization to use. msglvl = 0; // Print statistical information in file. error = 0; // Initialize error flag. MPI_Init(NULL, NULL); int size, rank; MPI_Comm_size(MPI_COMM_WORLD, &size); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Fint comm = MPI_Comm_c2f(MPI_COMM_WORLD); phase = 11; cluster_sparse_solver(pt, &maxfct, &mnum, &mtype, &phase, &n, a, ia, ja, &perm, &nrhs, iparm, &msglvl, b, x, &comm, &error); // Analyze phase = 22; cluster_sparse_solver(pt, &maxfct, &mnum, &mtype, &phase, &n, a, ia, ja, &perm, &nrhs, iparm, &msglvl, b, x, &comm, &error); // Factorize phase = 33; cluster_sparse_solver(pt, &maxfct, &mnum, &mtype, &phase, &n, a, ia, ja, &perm, &nrhs, iparm, &msglvl, b, x, &comm, &error); // Solve if (rank == 0){ printf("The solution of the system is: "); for(int i = 0; i < n; i++){ printf ("\n x [%d] = % f", i, x[i]); } printf("\n"); } phase = -1; // Release internal memory. cluster_sparse_solver(pt, &maxfct, &mnum, &mtype, &phase, &n, a, ia, ja, &perm, &nrhs, iparm, &msglvl, b, x, &comm, &error); MPI_Finalize();
迁移后:1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38
int n = 8; int ia[9] = {0, 3, 7, 9, 11, 13, 15, 16, 17}; int ja[17] = {0, 3, 4, 1, 2, 3, 5, 2, 7, 3, 6, 4, 5, 5, 7, 6, 7}; double a[17] = {1.0, 1.0, 2.0, 9.0, 2.0, 1.0, -3.0, 3.0, 2.0, 9.0, -5.0, 6.0, 1.0, 4.0, 1.0, 7.0, 2.0}; MPI_Init(NULL, NULL); int size, rank; MPI_Comm_size(MPI_COMM_WORLD, &size); MPI_Comm_rank(MPI_COMM_WORLD, &rank); KmlScasolverTask* matrix_handle = NULL; int ierr = KmlScadssSpdInitDI(&matrix_handle, n, a, ja, ia, MPI_COMM_WORLD); //Init if (ierr != 0) { printf("\nERROR in KmlScadssSpdInitDI: %d", ierr); } ierr = KmlScadssSpdAnalyzeDI(&matrix_handle); //Analyze phase if (ierr != 0) { printf("\nERROR in KmlScadssSpdAnalyzeDI: %d", ierr); } ierr = KmlScadssSpdFactorizeDI(&matrix_handle); //Factorize phase if (ierr != 0) { printf("\nERROR in KmlScadssSpdFactorizeDI: %d", ierr); } int nrhs = 1; int ldx=n, ldb=n; double b[8]={4.0, 9.0, 7.0, 6.0, 9.0, 3.0, 2.0, 5.0}; double x[8]; ierr = KmlScadssSpdSolveDI(&matrix_handle, nrhs, x, ldx, b, ldb); //Solve phase if (ierr != 0) { printf("\nERROR in KmlScadssSpdSolveDI: %d", ierr); } ierr = KmlScadssSpdCleanDI(&matrix_handle); //clean if (ierr != 0) { printf("\nERROR in KmlScadssSpdcleanDI: %d", ierr); } MPI_Finalize();
- 头文件
#include "mkl_types.h"
#include "mkl_cluster_sparse_solver.h"
迁移前(PARDISO-Cluster):
#include "mkl_pardiso.h"
#include "mkl_types.h"
迁移后:
#include "kml_scadss.h
- 编译链接库。
替换MKL相关的链接选项, 具体选项请参见《Kunpeng HPCKit 25.1.0.SPC001 安装指南》安装KML。