直接求解器（DIRECT SOLVER）迁移

可替换性

MKL中包含单机版直接求解器PARDISO（Parallel Direct Sparse Solver Interface）和集群版直接求解器PARDISO-Cluster（Parallel Direct Sparse Solver for Cluster Interface）。前者为shared-memory版本的并行稀疏求解器，而后者则和SCADSS一样，支持MPI和OPENMP技术的混合模式。直接求解器包含Analyze，Factorize，Solve三个核心阶段，每个阶段都可以单独调用。这一点与用户的实际问题相关，比如用户在求解部分包含时间项的偏微分方程的时候，稀疏矩阵是保持不变的，因此Analyze和Factorize只需要进行一次，重复调用Solve阶段即可。再比如，稀疏矩阵的数值发生了变化，但是矩阵的非零元位置未发生变化，那么Analyze只需要进行一次，重复调用Solve阶段即可。在这里给出MKL中的direct solver与SCADSS的接口映射关系。

表1 PARDISO-Cluster与SCADSS的替换映射关系
PARDISO-Cluster接口	SCADSS接口
mtype=2;或者mtype = 4; phase = 11; cluster_sparse_solver(pt, &maxfct, &mnum, &mtype, &phase, &n, a, ia, ja, &perm, &nrhs, iparm, &msglvl, b, x, &comm, &error);	KmlScadssSpdAnalyzeDI(&handle);或者 KmlScadssHpdAnalyzeZI(&handle);
mtype=2;或者mtype = 4; phase = 22; cluster_sparse_solver(pt, &maxfct, &mnum, &mtype, &phase, &n, a, ia, ja, &perm, &nrhs, iparm, &msglvl, b, x, &comm, &error);	KmlScadssSpdFactorizeDI(&handle);或者 KmlScadssHpdFactorizeZI(&handle);
mtype=2;或者mtype = 4; phase = 33; cluster_sparse_solver(pt, &maxfct, &mnum, &mtype, &phase, &n, a, ia, ja, &perm, &nrhs, iparm, &msglvl, b, x, &comm, &error);	KmlScadssSpdSolveDI(&handle);或者 KmlScadssHpdSolveZI(&handle);
mtype=2;或者mtype = 4; phase = -1; cluster_sparse_solver(pt, &maxfct, &mnum, &mtype, &phase, &n, a, ia, ja, &perm, &nrhs, iparm, &msglvl, b, x, &comm, &error);	KmlScadssSpdCleanDI(&handle);或者 KmlScadssHpdCleanZI(&handle);

MKL中direct solver通过mtype参数确定输入稀疏矩阵的类型，mtype=2表示SPD矩阵，mtype=4则表示HPD矩阵。
phase参数控制MKL direct solver执行的操作，除上表中列举的取值外，还可以取别的值。例如取12时表示执行Analyze，Factorize两个过程。

表2 PARDISO与SCADSS的替换映射关系
PARDISO接口	SCADSS接口
mtype=2;或者mtype = 4; phase = 11; pardiso(pt, &maxfct, &mnum, &mtype, &phase, &n, a, ia, ja, &perm, &nrhs, iparm, &msglvl, b, x, &error);	KmlScadssSpdAnalyzeDI(&handle);或者 KmlScadssHpdAnalyzeZI(&handle);
mtype=2;或者mtype = 4; phase = 22; pardiso(pt, &maxfct, &mnum, &mtype, &phase, &n, a, ia, ja, &perm, &nrhs, iparm, &msglvl, b, x, &error);	KmlScadssSpdFactorizeDI(&handle);或者 KmlScadssHpdFactorizeZI(&handle);
mtype=2;或者mtype = 4; phase = 33; pardiso(pt, &maxfct, &mnum, &mtype, &phase, &n, a, ia, ja, &perm, &nrhs, iparm, &msglvl, b, x, &error);	KmlScadssSpdSolveDI(&handle);或者 KmlScadssHpdSolveZI(&handle);
mtype=2;或者mtype = 4; phase = -1; pardiso(pt, &maxfct, &mnum, &mtype, &phase, &n, a, ia, ja, &perm, &nrhs, iparm, &msglvl, b, x, &error);	KmlScadssSpdCleanDI(&handle);或者 KmlScadssHpdCleanZI(&handle);

C语言迁移步骤

迁移前（调用PARDISO）。

int n = 8;
int nnz = 17;
int ia[9] = {0, 3, 7, 9, 11, 13, 15, 16, 17};
int ja[17] = {0, 3, 4, 1, 2, 3, 5, 2, 7, 3, 6, 4, 5, 5, 7, 6, 7};
double a[17] = {1.0, 1.0, 2.0, 9.0, 2.0, 1.0, -3.0, 3.0, 2.0, 9.0, -5.0, 6.0, 1.0, 4.0, 1.0, 7.0, 2.0};

int nrhs = 1;     // Number of right hand sides.
double b[8]={4.0, 9.0, 7.0, 6.0, 9.0, 3.0, 2.0, 5.0};
double x[8];

int mtype = 2;
void *pt[64];
int iparm[64];
int maxfct, mnum, phase, error, msglvl, perm;
pardisoinit(pt, &mtype, iparm);
iparm[34] = 1;
maxfct = 1;       // Maximum number of numerical factorizations.
mnum = 1;         // Which factorization to use.
msglvl = 0;       // Print statistical information in file.
error = 0;        // Initialize error flag.

phase = 11;
pardiso(pt, &maxfct, &mnum, &mtype, &phase, &n, a, ia, ja, &perm, &nrhs, iparm, &msglvl, b, x, &error); // Analyze
printf ("Reordering completed ... ");

phase = 22;
pardiso(pt, &maxfct, &mnum, &mtype, &phase, &n, a, ia, ja, &perm, &nrhs, iparm, &msglvl, b, x, &error); // Factorize
printf ("\nFactorization completed ... ");

phase = 33;
pardiso(pt, &maxfct, &mnum, &mtype, &phase, &n, a, ia, ja, &perm, &nrhs, iparm, &msglvl, b, x, &error); // Solve
printf("\nSolve completed ... ");

printf("\nThe solution of the system is: ");
for(int i = 0; i < n; i++){
    printf ("\n x [%d] = % f", i, x[i]);
}
printf("\n");

phase = -1;      // Release internal memory.
pardiso(pt, &maxfct, &mnum, &mtype, &phase, &n, a, ia, ja, &perm, &nrhs, iparm, &msglvl, b, x, &error);

迁移前（调用PARDISO-Cluster）。

int n = 8;
int nnz = 17;
int ia[9] = {0, 3, 7, 9, 11, 13, 15, 16, 17};
int ja[17] = {0, 3, 4, 1, 2, 3, 5, 2, 7, 3, 6, 4, 5, 5, 7, 6, 7};
double a[17] = {1.0, 1.0, 2.0, 9.0, 2.0, 1.0, -3.0, 3.0, 2.0, 9.0, -5.0, 6.0, 1.0, 4.0, 1.0, 7.0, 2.0};

/* RHS and solution vectors. */
int nrhs = 1;     /* Number of right hand sides. */
double b[8]={4.0, 9.0, 7.0, 6.0, 9.0, 3.0, 2.0, 5.0};
double x[8];

int mtype = 2;
void *pt[64];
int iparm[64];
int maxfct, mnum, phase, error, msglvl, perm;
for (int i = 0; i < 64; i++) {
    iparm[i] = 0;
    pt[i] = 0;
}
iparm[0] = 1;
iparm[1] = 2;
iparm[17] = -1;
iparm[18] = -1;
iparm[34] = 1;
maxfct = 1;       // Maximum number of numerical factorizations.
mnum = 1;         // Which factorization to use.
msglvl = 0;       // Print statistical information in file.
error = 0;        // Initialize error flag.

MPI_Init(NULL, NULL);
int size, rank;
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Fint comm = MPI_Comm_c2f(MPI_COMM_WORLD);

phase = 11;
cluster_sparse_solver(pt, &maxfct, &mnum, &mtype, &phase, &n, a, ia, ja, &perm, &nrhs, iparm, &msglvl, b, x, &comm, &error); // Analyze

phase = 22;
cluster_sparse_solver(pt, &maxfct, &mnum, &mtype, &phase, &n, a, ia, ja, &perm, &nrhs, iparm, &msglvl, b, x, &comm, &error); // Factorize

phase = 33;
cluster_sparse_solver(pt, &maxfct, &mnum, &mtype, &phase, &n, a, ia, ja, &perm, &nrhs, iparm, &msglvl, b, x, &comm, &error); // Solve

if (rank == 0){
    printf("The solution of the system is: ");
    for(int i = 0; i < n; i++){
        printf ("\n x [%d] = % f", i, x[i]);
    }
    printf("\n");
}

phase = -1;      // Release internal memory.
cluster_sparse_solver(pt, &maxfct, &mnum, &mtype, &phase, &n, a, ia, ja, &perm, &nrhs, iparm, &msglvl, b, x, &comm, &error);

MPI_Finalize();

迁移后：

int n = 8;
int ia[9] = {0, 3, 7, 9, 11, 13, 15, 16, 17};
int ja[17] = {0, 3, 4, 1, 2, 3, 5, 2, 7, 3, 6, 4, 5, 5, 7, 6, 7};
double a[17] = {1.0, 1.0, 2.0, 9.0, 2.0, 1.0, -3.0, 3.0, 2.0, 9.0, -5.0, 6.0, 1.0, 4.0, 1.0, 7.0, 2.0};

MPI_Init(NULL, NULL);
int size, rank;
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);

KmlScasolverTask* matrix_handle = NULL;
int ierr = KmlScadssSpdInitDI(&matrix_handle, n, a, ja, ia, MPI_COMM_WORLD); //Init
if (ierr != 0) {
    printf("\nERROR in KmlScadssSpdInitDI: %d", ierr);
}
ierr = KmlScadssSpdAnalyzeDI(&matrix_handle); //Analyze phase
if (ierr != 0) {
    printf("\nERROR in KmlScadssSpdAnalyzeDI: %d", ierr);
}
ierr = KmlScadssSpdFactorizeDI(&matrix_handle); //Factorize phase
if (ierr != 0) {
    printf("\nERROR in KmlScadssSpdFactorizeDI: %d", ierr);
}

int nrhs = 1;
int ldx=n, ldb=n;
double b[8]={4.0, 9.0, 7.0, 6.0, 9.0, 3.0, 2.0, 5.0};
double x[8];

ierr = KmlScadssSpdSolveDI(&matrix_handle, nrhs, x, ldx, b, ldb); //Solve phase
if (ierr != 0) {
    printf("\nERROR in KmlScadssSpdSolveDI: %d", ierr);
}
ierr = KmlScadssSpdCleanDI(&matrix_handle); //clean
if (ierr != 0) {
    printf("\nERROR in KmlScadssSpdcleanDI: %d", ierr);
}
MPI_Finalize();

头文件
 迁移前（PARDISO）：

#include "mkl_types.h"

#include "mkl_cluster_sparse_solver.h"

迁移前（PARDISO-Cluster）：

#include "mkl_pardiso.h"

#include "mkl_types.h"

迁移后：

#include "kml_scadss.h
编译链接库。
替换MKL相关的链接选项，具体选项请参见《Kunpeng HPCKit 25.1.0.SPC001 安装指南》安装KML。