构建机器学习算法加速库适配代码Spark-ml-algo-lib过程:
获取地址:https://github.com/apache/spark/archive/v2.3.2.zip
wget https://github.com/apache/spark/archive/v2.3.2.zip unzip v2.3.2.zip
获取地址:https://github.com/scalanlp/breeze/archive/releases/v0.13.1.zip
wget https://github.com/scalanlp/breeze/archive/releases/v0.13.1.zip unzip v0.13.1.zip
cd /opt/ mkdir -p Spark-ml-algo-lib/ml-accelerator/src/main/scala/breeze/optimize mkdir -p Spark-ml-algo-lib/ml-core/src/main/scala/breeze/numerics mkdir -p Spark-ml-algo-lib/ml-accelerator/src/main/scala/org/apache/spark/ml/classification mkdir -p Spark-ml-algo-lib/ml-accelerator/src/main/scala/org/apache/spark/ml/optim/aggregator mkdir -p Spark-ml-algo-lib/ml-accelerator/src/main/scala/org/apache/spark/ml/optim/loss mkdir -p Spark-ml-algo-lib/ml-accelerator/src/main/scala/org/apache/spark/ml/recommendation mkdir -p Spark-ml-algo-lib/ml-accelerator/src/main/scala/org/apache/spark/ml/regression mkdir -p Spark-ml-algo-lib/ml-accelerator/src/main/scala/org/apache/spark/ml/tree/impl mkdir -p Spark-ml-algo-lib/ml-accelerator/src/main/scala/org/apache/spark/mllib/clustering mkdir -p Spark-ml-algo-lib/ml-accelerator/src/main/scala/org/apache/spark/mllib/fpm mkdir -p Spark-ml-algo-lib/ml-accelerator/src/main/scala/org/apache/spark/mllib/linalg/distributed mkdir -p Spark-ml-algo-lib/ml-accelerator/src/main/scala/org/apache/spark/mllib/tree mkdir -p Spark-ml-algo-lib/ml-core/src/main/scala/org/apache/spark/ml/tree/impl mkdir -p Spark-ml-algo-lib/ml-core/src/main/scala/org/apache/spark/mllib/clustering mkdir -p Spark-ml-algo-lib/ml-core/src/main/scala/org/apache/spark/mllib/fpm mkdir -p Spark-ml-algo-lib/ml-core/src/main/scala/org/apache/spark/mllib/tree/impurity
有些文件在复制到目标文件夹后需要改名。
1 2 |
cp /opt/spark-2.3.2/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala /opt/Spark-ml-algo-lib/ml-accelerator/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala cp /opt/breeze-releases-v0.13.1/math/src/main/scala/breeze/optimize/FirstOrderMinimizer.scala /opt/Bigdata_ML_ALGO_ACC_LIB/ml-accelerator/src/main/scala/breeze/optimize/FirstOrderMinimizerX.scala |
Spark-ml-algo-lib工程目录 |
Spark-ml-algo-lib工程文件名 |
Spark原文件所在目录 |
Spark原文件名 |
---|---|---|---|
Spark-ml-algo-lib/ml-accelerator/ src/main/scala/org/apache/spark/ml/classification/ |
GBTClassifier.scala |
spark-2.3.2/mllib/src/main/scala/org/ apache/spark/ml/classification/ |
GBTClassifier.scala |
LinearSVC.scala |
LinearSVC.scala |
||
RandomForestClassifier.scala |
RandomForestClassifier.scala |
||
DecisionTreeClassifier.scala |
DecisionTreeClassifier.scala |
||
LogisticRegression.scala |
LogisticRegression.scala |
||
Spark-ml-algo-lib/ml-accelerator/ src/main/scala/org/apache/spark/ml/optim/aggregator/ |
DifferentiableLossAggregatorX.scala |
spark-2.3.2/mllib/src/main/scala/org/ apache/spark/ml/optim/aggregator/ |
DifferentiableLossAggregator.scala |
HingeAggregatorX.scala |
HingeAggregator.scala |
||
HuberAggregatorX.scala |
HuberAggregator.scala |
||
LeastSquaresAggregatorX.scala |
LeastSquaresAggregator.scala |
||
LogisticAggregatorX.scala |
LogisticAggregator.scala |
||
Spark-ml-algo-lib/ml-accelerator/ src/main/scala/org/apache/spark/ml/optim/loss/ |
RDDLossFunctionX.scala |
spark-2.3.2/mllib/src/main/scala/org/ apache/spark/ml/optim/loss/ |
RDDLossFunction.scala |
Spark-ml-algo-lib/ml-accelerator/ src/main/scala/org/apache/spark/ml/regression/ |
DecisionTreeRegressor.scala |
spark-2.3.2/mllib/src/main/scala/org/ apache/spark/ml/optim/loss/ |
DecisionTreeRegressor.scala |
GBTRegressor.scala |
GBTRegressor.scala |
||
LinearRegression.scala |
LinearRegression.scala |
||
RandomForestRegressor.scala |
RandomForestRegressor.scala |
||
Spark-ml-algo-lib/ml-accelerator/ src/main/scala/org/apache/spark/ml/tree/impl/ |
GradientBoostedTrees.scala |
spark-2.3.2/mllib/src/main/scala/org/ apache/spark/ml/tree/impl/ |
GradientBoostedTrees.scala |
NodeIdCache.scala |
NodeIdCache.scala |
||
RandomForest.scala |
RandomForest.scala |
||
RandomForest4GBDTX.scala |
RandomForest.scala |
||
RandomForestRaw.scala |
RandomForest.scala |
||
DecisionForest.scala |
RandomForest.scala |
||
Spark-ml-algo-lib/ml-accelerator/ src/main/scala/org/apache/spark/ml/tree/ |
treeParams.scala |
spark-2.3.2/mllib/src/main/scala/org/ apache/spark/ml/tree/ |
treeParams.scala |
Spark-ml-algo-lib/ml-accelerator/src/main/scala/org/apache/spark/mllib/clustering/ |
KMACCm.scala |
spark-2.3.2/mllib/src/main/scala/org/ apache/spark/mllib/clustering |
KMeans.scala |
KMeans.scala |
KMeans.scala |
||
Spark-ml-algo-lib/ml-accelerator/src/main/scala/org/apache/spark/mllib/linalg/distributed/ |
RowMatrix.scala |
spark-2.3.2/mllib/src/main/scala/org/ apache/spark/mllib/linalg/distributed |
RowMatrix.scala |
Spark-ml-algo-lib/ml-accelerator/src/main/scala/org/apache/spark/mllib/linalg/ |
EigenValueDecomposition.scala |
spark-2.3.2/mllib/src/main/scala/org/ apache/spark/mllib/linalg |
EigenValueDecomposition.scala |
Spark-ml-algo-lib/ml-accelerator/src/main/scala/org/apache/spark/mllib/tree/ |
DecisionTree.scala |
spark-2.3.2/mllib/src/main/scala/org/ apache/spark/mllib/tree |
DecisionTree.scala |
Spark-ml-algo-lib/ml-core/ src/main/scala/org/apache/spark/ml/tree/ |
Node.scala |
spark-2.3.2/mllib/src/main/scala/org/ apache/spark/ml/tree/ |
Node.scala |
Split.scala |
Split.scala |
||
Spark-ml-algo-lib/ml-core/ src/main/scala/org/apache/spark/ml/tree/impl |
BaggedPoint.scala |
spark-2.3.2/mllib/src/main/scala/org/ apache/spark/ml/tree/impl/ |
BaggedPoint.scala |
DTFeatureStatsAggregator.scala |
DTStatsAggregator.scala |
||
DTStatsAggregator.scala |
DTStatsAggregator.scala |
||
GradientBoostedTreesCore.scala |
RandomForest.scala |
||
TreePointX.scala |
TreePoint.scala |
||
TreePointY.scala |
TreePoint.scala |
||
Spark-ml-algo-lib/ml-core/src/main/scala/org/apache/spark/mllib/tree/impurity/ |
Entropy.scala |
spark-2.3.2/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity |
Entropy.scala |
Gini.scala |
Gini.scala |
||
Impurities.scala |
Impurities.scala |
||
Impurity.scala |
Impurity.scala |
||
Variance.scala |
Variance.scala |
Spark-ml-algo-lib工程目录 |
Spark-ml-algo-lib工程文件名 |
Breeze原文件所在目录 |
Breeze原文件名 |
---|---|---|---|
Spark-ml-algo-lib/ml-accelerator/ src/main/scala/breeze/optimize |
FirstOrderMinimizerX.scala |
breeze-releases-v0.13.1/math/src/ main/scala/breeze/optimize |
FirstOrderMinimizer.scala |
LBFGSX.scala |
LBFGS.scala |
||
OWLQNX.scala |
OWLQN.scala |
完成4后,Spark-ml-algo-lib工程的目录结构及目录下的文件如下:
Spark-ml-algo-lib ├── ml-accelerator │ └── src │ └── main │ └── scala │ ├── breeze │ │ └── optimize │ │ ├── FirstOrderMinimizerX.scala │ │ ├── LBFGSX.scala │ │ └── OWLQNX.scala │ └── org │ └── apache │ └── spark │ ├── ml │ │ ├── classification │ │ │ ├── DecisionTreeClassifier.scala │ │ │ ├── GBTClassifier.scala │ │ │ ├── LinearSVC.scala │ │ │ ├── LogisticRegression.scala │ │ │ └── RandomForestClassifier.scala │ │ ├── optim │ │ │ ├── aggregator │ │ │ │ ├── DifferentiableLossAggregatorX.scala │ │ │ │ ├── HingeAggregatorX.scala │ │ │ │ ├── HuberAggregatorX.scala │ │ │ │ ├── LeastSquaresAggregatorX.scala │ │ │ │ └── LogisticAggregatorX.scala │ │ │ └── loss │ │ │ └── RDDLossFunctionX.scala │ │ ├── regression │ │ │ ├── DecisionTreeRegressor.scala │ │ │ ├── GBTRegressor.scala │ │ │ ├── LinearRegression.scala │ │ │ └── RandomForestRegressor.scala │ │ └── tree │ │ ├── impl │ │ │ ├── DecisionForest.scala │ │ │ ├── GradientBoostedTrees.scala │ │ │ ├── NodeIdCache.scala │ │ │ ├── RandomForest4GBDTX.scala │ │ │ ├── RandomForestRaw.scala │ │ │ └── RandomForest.scala │ │ └── treeParams.scala │ └── mllib │ ├── clustering │ │ ├── KMACCm.scala │ │ └── KMeans.scala │ ├── linalg │ │ ├── distributed │ │ │ └── RowMatrix.scala │ │ └── EigenValueDecomposition.scala │ └── tree │ └── DecisionTree.scala └── ml-core └── src └── main └── scala └── org └── apache └── spark ├── ml │ └── tree │ ├── impl │ │ ├── BaggedPoint.scala │ │ ├── DTFeatureStatsAggregator.scala │ │ ├── DTStatsAggregator.scala │ │ ├── GradientBoostedTreesCore.scala │ │ ├── TreePointX.scala │ │ └── TreePointY.scala │ ├── Node.scala │ └── Split.scala └── mllib └── tree └── impurity ├── Entropy.scala ├── Gini.scala ├── Impurities.scala ├── Impurity.scala └── Variance.scala
1 2 3 |
cd /opt/Spark-ml-algo-lib wget https://github.com/kunpengcompute/Spark-ml-algo-lib/releases/download/v1.1.0/Spark-ml-algo-lib.patch patch -p1 < Spark-ml-algo-lib.patch |
完整的机器学习算法加速库适配代码Spark-ml-algo-lib的目录及目录下的文件如下:
Spark-ml-algo-lib ├── LICENSE ├── ml-accelerator │ ├── pom.xml │ └── src │ └── main │ └── scala │ ├── breeze │ │ └── optimize │ │ ├── FirstOrderMinimizerX.scala │ │ ├── LBFGSX.scala │ │ └── OWLQNX.scala │ └── org │ └── apache │ └── spark │ ├── ml │ │ ├── classification │ │ │ ├── DecisionTreeClassifier.scala │ │ │ ├── GBTClassifier.scala │ │ │ ├── LinearSVC.scala │ │ │ ├── LogisticRegression.scala │ │ │ └── RandomForestClassifier.scala │ │ ├── optim │ │ │ ├── aggregator │ │ │ │ ├── DifferentiableLossAggregatorX.scala │ │ │ │ ├── HingeAggregatorX.scala │ │ │ │ ├── HuberAggregatorX.scala │ │ │ │ ├── LeastSquaresAggregatorX.scala │ │ │ │ └── LogisticAggregatorX.scala │ │ │ └── loss │ │ │ └── RDDLossFunctionX.scala │ │ ├── regression │ │ │ ├── DecisionTreeRegressor.scala │ │ │ ├── GBTRegressor.scala │ │ │ ├── LinearRegression.scala │ │ │ └── RandomForestRegressor.scala │ │ └── tree │ │ ├── impl │ │ │ ├── DecisionForest.scala │ │ │ ├── GradientBoostedTrees.scala │ │ │ ├── NodeIdCache.scala │ │ │ ├── RandomForest4GBDTX.scala │ │ │ ├── RandomForestRaw.scala │ │ │ └── RandomForest.scala │ │ └── treeParams.scala │ └── mllib │ ├── clustering │ │ ├── KMACCm.scala │ │ └── KMeans.scala │ ├── linalg │ │ ├── distributed │ │ │ └── RowMatrix.scala │ │ └── EigenValueDecomposition.scala │ └── tree │ └── DecisionTree.scala ├── ml-core │ ├── pom.xml │ └── src │ └── main │ └── scala │ └── org │ └── apache │ └── spark │ ├── ml │ │ └── tree │ │ ├── impl │ │ │ ├── BaggedPoint.scala │ │ │ ├── DTFeatureStatsAggregator.scala │ │ │ ├── DTStatsAggregator.scala │ │ │ ├── GradientBoostedTreesCore.scala │ │ │ ├── TreePointX.scala │ │ │ └── TreePointY.scala │ │ ├── Node.scala │ │ └── Split.scala │ └── mllib │ └── tree │ └── impurity │ ├── Entropy.scala │ ├── Gini.scala │ ├── Impurities.scala │ ├── Impurity.scala │ └── Variance.scala ├── ml-kernel-client │ ├── pom.xml │ └── src │ └── main │ └── scala │ ├── breeze │ │ ├── linalg │ │ │ ├── blas │ │ │ │ ├── Dgemv.scala │ │ │ │ └── Gramian.scala │ │ │ ├── DenseMatrixUtil.scala │ │ │ ├── DenseVectorUtil.scala │ │ │ └── lapack │ │ │ └── EigenDecomposition.scala │ │ └── optimize │ │ ├── ACC.scala │ │ ├── LBFGSL.scala │ │ └── OWLQNL.scala │ └── org │ └── apache │ └── spark │ ├── ml │ │ └── tree │ │ └── impl │ │ ├── DTUtils.scala │ │ ├── GradientBoostedTreesUtil.scala │ │ └── RFUtils.scala │ ├── mllib.clustering │ │ └── KmeansUtil.scala │ └── mllib.linalg.distributed │ └── RowMatrixUtil.scala ├── pom.xml ├── README.md └── scalastyle-config.xml