diff --git a/scala/dllib/pom.xml b/scala/dllib/pom.xml
index 5f3b6f97163..626cc2ae76b 100644
--- a/scala/dllib/pom.xml
+++ b/scala/dllib/pom.xml
@@ -332,6 +332,11 @@
1.7.16
test
+
+ com.microsoft.azure
+ synapseml-lightgbm_2.12
+ 0.9.5
+
@@ -374,6 +379,14 @@
META-INF/maven/com.google.protobuf/protobuf-java/*
+
+ *:*
+
+ META-INF/*.SF
+ META-INF/*.DSA
+ META-INF/*.RSA
+
+
diff --git a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/nnframes/NNClassifier.scala b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/nnframes/NNClassifier.scala
index b44e7354205..24e5a26cbbf 100644
--- a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/nnframes/NNClassifier.scala
+++ b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/nnframes/NNClassifier.scala
@@ -322,396 +322,3 @@ object NNClassifierModel extends MLReadable[NNClassifierModel[_]] {
new NNClassifierModel.NNClassifierModelReader
}
}
-
-class XGBClassifier (val xgboostParams: Map[String, Any] = Map()) {
- val sc = SparkContext.getOrCreate()
- sc.getConf.set("spark.task.cpus", Engine.coreNumber().toString)
- private val model = new XGBoostClassifier(xgboostParams)
- model.setNthread(Engine.coreNumber())
- model.setNumWorkers(Engine.nodeNumber())
- model.setMaxBins(256)
-
- def setFeaturesCol(featuresColName: String): this.type = {
- model.setFeaturesCol(featuresColName)
- this
- }
-
- def fit(df: DataFrame): XGBClassifierModel = {
- df.repartition(Engine.nodeNumber())
- val xgbmodel = model.fit(df)
- new XGBClassifierModel(xgbmodel)
- }
-
- def setNthread(value: Int): this.type = {
- model.setNthread(value)
- this
- }
-
- def setNumRound(value: Int): this.type = {
- model.setNumRound(value)
- this
- }
-
- def setNumWorkers(value: Int): this.type = {
- model.setNumWorkers(value)
- this
- }
-
- def setEta(value: Double): this.type = {
- model.setEta(value)
- this
- }
-
- def setGamma(value: Int): this.type = {
- model.setGamma(value)
- this
- }
-
- def setMaxDepth(value: Int): this.type = {
- model.setMaxDepth(value)
- this
- }
-
- def setMissing(value: Float): this.type = {
- model.setMissing(value)
- this
- }
-
- def setLabelCol(labelColName: String): this.type = {
- model.setLabelCol(labelColName)
- this
- }
- def setTreeMethod(value: String): this.type = {
- model.setTreeMethod(value)
- this
- }
-
- def setObjective(value: String): this.type = {
- model.setObjective(value)
- this
- }
-
- def setNumClass(value: Int): this.type = {
- model.setNumClass(value)
- this
- }
-
- def setTimeoutRequestWorkers(value: Long): this.type = {
- model.setTimeoutRequestWorkers(value)
- this
- }
-}
-/**
- * [[XGBClassifierModel]] is a trained XGBoost classification model.
- * The prediction column will have the prediction results.
- *
- * @param model trained XGBoostClassificationModel to use in prediction.
- */
-class XGBClassifierModel private[bigdl](
- val model: XGBoostClassificationModel) {
- private var featuresCols: String = null
- private var predictionCol: String = null
-
- def setFeaturesCol(featuresColName: String): this.type = {
- featuresCols = featuresColName
- this
- }
-
- def setPredictionCol(value: String): this.type = {
- predictionCol = value
- this
- }
-
- def setInferBatchSize(value: Int): this.type = {
- model.setInferBatchSize(value)
- this
- }
-
- def transform(dataset: DataFrame): DataFrame = {
- Log4Error.invalidInputError(featuresCols!=None, "Please set feature columns before transform")
- model.setFeaturesCol(featuresCols)
- var output = model.transform(dataset)
- if(predictionCol != null) {
- output = output.withColumnRenamed("prediction", predictionCol)
- }
- output
- }
-
- def save(path: String): Unit = {
- model.write.overwrite().save(path)
- }
-
-}
-
-object XGBClassifierModel {
- def load(path: String, numClass: Int): XGBClassifierModel = {
- new XGBClassifierModel(XGBoostHelper.load(path, numClass))
- }
-
- def load(path: String): XGBClassifierModel = {
- new XGBClassifierModel(XGBoostClassificationModel.load(path))
- }
-
-}
-
-/**
- * [[XGBRegressor]] xgboost wrapper of XGBRegressor.
- */
-class XGBRegressor () {
-
- private val model = new XGBoostRegressor()
- model.setNthread(Engine.coreNumber())
- model.setMaxBins(256)
-
- def setLabelCol(labelColName : String) : this.type = {
- model.setLabelCol(labelColName)
- this
- }
-
- def setFeaturesCol(featuresColName: String): this.type = {
- model.setFeaturesCol(featuresColName)
- this
- }
-
- def fit(df: DataFrame): XGBRegressorModel = {
- df.repartition(Engine.nodeNumber())
- val xgbModel = model.fit(df)
- new XGBRegressorModel(xgbModel)
- }
-
- def setNumRound(value: Int): this.type = {
- model.setNumRound(value)
- this
- }
-
- def setNumWorkers(value: Int): this.type = {
- model.setNumWorkers(value)
- this
- }
-
- def setNthread(value: Int): this.type = {
- model.setNthread(value)
- this
- }
-
- def setSilent(value: Int): this.type = {
- model.setSilent(value)
- this
- }
-
- def setMissing(value: Float): this.type = {
- model.setMissing(value)
- this
- }
-
- def setCheckpointPath(value: String): this.type = {
- model.setCheckpointPath(value)
- this
- }
-
- def setCheckpointInterval(value: Int): this.type = {
- model.setCheckpointInterval(value)
- this
- }
-
- def setSeed(value: Long): this.type = {
- model.setSeed(value)
- this
- }
-
- def setEta(value: Double): this.type = {
- model.setEta(value)
- this
- }
-
- def setGamma(value: Double): this.type = {
- model.setGamma(value)
- this
- }
-
- def setMaxDepth(value: Int): this.type = {
- model.setMaxDepth(value)
- this
- }
-
- def setMinChildWeight(value: Double): this.type = {
- model.setMinChildWeight(value)
- this
- }
-
- def setMaxDeltaStep(value: Double): this.type = {
- model.setMaxDeltaStep(value)
- this
- }
-
- def setColsampleBytree(value: Double): this.type = {
- model.setColsampleBytree(value)
- this
- }
-
- def setColsampleBylevel(value: Double): this.type = {
- model.setColsampleBylevel(value)
- this
- }
-
- def setLambda(value: Double): this.type = {
- model.setLambda(value)
- this
- }
-
- def setAlpha(value: Double): this.type = {
- model.setAlpha(value)
- this
- }
-
- def setTreeMethod(value: String): this.type = {
- model.setTreeMethod(value)
- this
- }
-
- def setGrowPolicy(value: String): this.type = {
- model.setGrowPolicy(value)
- this
- }
-
- def setMaxBins(value: Int): this.type = {
- model.setMaxBins(value)
- this
- }
-
- def setMaxLeaves(value: Int): this.type = {
- model.setMaxLeaves(value)
- this
- }
-
- def setSketchEps(value: Double): this.type = {
- model.setSketchEps(value)
- this
- }
-
- def setScalePosWeight(value: Double): this.type = {
- model.setScalePosWeight(value)
- this
- }
-
- def setSampleType(value: String): this.type = {
- model.setSampleType(value)
- this
- }
-
- def setNormalizeType(value: String): this.type = {
- model.setNormalizeType(value)
- this
- }
-
- def setRateDrop(value: Double): this.type = {
- model.setRateDrop(value)
- this
- }
-
- def setSkipDrop(value: Double): this.type = {
- model.setSkipDrop(value)
- this
- }
-
- def setLambdaBias(value: Double): this.type = {
- model.setLambdaBias(value)
- this
- }
-
- def setObjective(value: String): this.type = {
- model.setObjective(value)
- this
- }
-
- def setObjectiveType(value: String): this.type = {
- model.setObjectiveType(value)
- this
- }
-
- def setSubsample(value: Double): this.type = {
- model.setSubsample(value)
- this
- }
-
- def setBaseScore(value: Double): this.type = {
- model.setBaseScore(value)
- this
- }
-
- def setEvalMetric(value: String): this.type = {
- model.setEvalMetric(value)
- this
- }
-
- def setNumEarlyStoppingRounds(value: Int): this.type = {
- model.setNumEarlyStoppingRounds(value)
- this
- }
-
- def setMaximizeEvaluationMetrics(value: Boolean): this.type = {
- model.setMaximizeEvaluationMetrics(value)
- this
- }
-}
-
-/**
- * [[XGBRegressorModel]] xgboost wrapper of XGBRegressorModel.
- */
-class XGBRegressorModel private[bigdl](val model: XGBoostRegressionModel) {
- var predictionCol: String = null
- var featuresCol: String = "features"
- var featurearray: Array[String] = Array("features")
- def setPredictionCol(value: String): this.type = {
- predictionCol = value
- this
- }
-
- def setInferBatchSize(value: Int): this.type = {
- model.setInferBatchSize(value)
- this
- }
-
- def setFeaturesCol(value: String): this.type = {
- model.setFeaturesCol(value)
- featuresCol = value
- this
- }
-
- def transform(dataset: DataFrame): DataFrame = {
- val featureVectorAssembler = new VectorAssembler()
- .setInputCols(featurearray)
- .setOutputCol("featureAssembledVector")
- val assembledDF = featureVectorAssembler.transform(dataset)
- import org.apache.spark.sql.functions.{col, udf}
- import org.apache.spark.ml.linalg.Vector
- val asDense = udf((v: Vector) => v.toDense)
- val xgbInput = assembledDF.withColumn("DenseFeatures", asDense(col("featureAssembledVector")))
- model.setFeaturesCol("DenseFeatures")
- var output = model.transform(xgbInput).drop("DenseFeatures", "featureAssembledVector")
- if(predictionCol != null) {
- output = output.withColumnRenamed("prediction", predictionCol)
- }
- output
- }
-
- def save(path: String): Unit = {
- model.write.overwrite().save(path)
- }
-}
-
-object XGBRegressorModel {
- /**
- * Load pretrained Zoo XGBRegressorModel.
- */
- def load(path: String): XGBRegressorModel = {
- new XGBRegressorModel(XGBoostRegressionModel.load(path))
- }
-
- /**
- * Load pretrained xgboost XGBoostRegressionModel.
- */
- def loadFromXGB(path: String): XGBRegressorModel = {
- new XGBRegressorModel(XGBoostHelper.load(path))
- }
-}
diff --git a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/nnframes/TreeModel.scala b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/nnframes/TreeModel.scala
new file mode 100644
index 00000000000..bcd6a25e964
--- /dev/null
+++ b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/nnframes/TreeModel.scala
@@ -0,0 +1,797 @@
+/*
+ * Copyright 2016 The BigDL Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.intel.analytics.bigdl.dllib.nnframes
+
+import com.intel.analytics.bigdl.dllib.utils.{Engine, Log4Error}
+import org.apache.spark.sql.SparkSession
+import ml.dmlc.xgboost4j.scala.spark._
+import org.apache.spark.ml.feature.VectorAssembler
+import org.apache.spark.sql.DataFrame
+import com.microsoft.azure.synapse.ml.lightgbm.{LightGBMClassificationModel => MLightGBMClassificationModel}
+import com.microsoft.azure.synapse.ml.lightgbm.{LightGBMClassifier => MLightGBMClassifier}
+import com.microsoft.azure.synapse.ml.lightgbm.{LightGBMRegressionModel => MLightGBMRegressionModel}
+import com.microsoft.azure.synapse.ml.lightgbm.{LightGBMRegressor => MLightGBMRegressor}
+import com.microsoft.azure.synapse.ml.lightgbm.{LightGBMRankerModel => MLightGBMRankerModel}
+import com.microsoft.azure.synapse.ml.lightgbm.{LightGBMRanker => MLightGBMRanker}
+import com.microsoft.azure.synapse.ml.lightgbm.{LightGBMBase => MLightGBMBase}
+import com.microsoft.azure.synapse.ml.lightgbm.params.{LightGBMParams => MLightGBMParams}
+import org.apache.spark.ml.Model
+import org.apache.spark.ml.param.{ParamMap, Params}
+import org.apache.spark.ml.util.Identifiable
+
+class XGBClassifier (val xgboostParams: Map[String, Any] = Map()) {
+ val sc = SparkSession.active.sparkContext
+ sc.getConf.set("spark.task.cpus", Engine.coreNumber().toString)
+ private val estimator = new XGBoostClassifier(xgboostParams)
+ estimator.setNthread(Engine.coreNumber())
+ estimator.setNumWorkers(Engine.nodeNumber())
+ estimator.setMaxBins(256)
+
+ def setFeaturesCol(featuresColName: String): this.type = {
+ estimator.setFeaturesCol(featuresColName)
+ this
+ }
+
+ def fit(df: DataFrame): XGBClassifierModel = {
+ df.repartition(Engine.nodeNumber())
+ val xgbmodel = estimator.fit(df)
+ new XGBClassifierModel(xgbmodel)
+ }
+
+ def setNthread(value: Int): this.type = {
+ estimator.setNthread(value)
+ this
+ }
+
+ def setNumRound(value: Int): this.type = {
+ estimator.setNumRound(value)
+ this
+ }
+
+ def setNumWorkers(value: Int): this.type = {
+ estimator.setNumWorkers(value)
+ this
+ }
+
+ def setEta(value: Double): this.type = {
+ estimator.setEta(value)
+ this
+ }
+
+ def setGamma(value: Int): this.type = {
+ estimator.setGamma(value)
+ this
+ }
+
+ def setMaxDepth(value: Int): this.type = {
+ estimator.setMaxDepth(value)
+ this
+ }
+
+ def setMissing(value: Float): this.type = {
+ estimator.setMissing(value)
+ this
+ }
+
+ def setLabelCol(labelColName: String): this.type = {
+ estimator.setLabelCol(labelColName)
+ this
+ }
+ def setTreeMethod(value: String): this.type = {
+ estimator.setTreeMethod(value)
+ this
+ }
+
+ def setObjective(value: String): this.type = {
+ estimator.setObjective(value)
+ this
+ }
+
+ def setNumClass(value: Int): this.type = {
+ estimator.setNumClass(value)
+ this
+ }
+
+ def setTimeoutRequestWorkers(value: Long): this.type = {
+ estimator.setTimeoutRequestWorkers(value)
+ this
+ }
+}
+/**
+ * [[XGBClassifierModel]] is a trained XGBoost classification model.
+ * The prediction column will have the prediction results.
+ *
+ * @param model trained XGBoostClassificationModel to use in prediction.
+ */
+class XGBClassifierModel private[bigdl](
+ val model: XGBoostClassificationModel) {
+ private var featuresCols: String = null
+ private var predictionCol: String = null
+
+ def setFeaturesCol(featuresColName: String): this.type = {
+ featuresCols = featuresColName
+ this
+ }
+
+ def setPredictionCol(value: String): this.type = {
+ predictionCol = value
+ this
+ }
+
+ def setInferBatchSize(value: Int): this.type = {
+ model.setInferBatchSize(value)
+ this
+ }
+
+ def transform(dataset: DataFrame): DataFrame = {
+ Log4Error.invalidInputError(featuresCols!=None, "Please set feature columns before transform")
+ model.setFeaturesCol(featuresCols)
+ var output = model.transform(dataset)
+ if(predictionCol != null) {
+ output = output.withColumnRenamed("prediction", predictionCol)
+ }
+ output
+ }
+
+ def save(path: String): Unit = {
+ model.write.overwrite().save(path)
+ }
+
+}
+
+object XGBClassifierModel {
+ def load(path: String, numClass: Int): XGBClassifierModel = {
+ new XGBClassifierModel(XGBoostHelper.load(path, numClass))
+ }
+
+ def load(path: String): XGBClassifierModel = {
+ new XGBClassifierModel(XGBoostClassificationModel.load(path))
+ }
+
+}
+
+/**
+ * [[XGBRegressor]] xgboost wrapper of XGBRegressor.
+ */
+class XGBRegressor () {
+
+ private val estimator = new XGBoostRegressor()
+ estimator.setNthread(Engine.coreNumber())
+ estimator.setMaxBins(256)
+
+ def setLabelCol(labelColName : String) : this.type = {
+ estimator.setLabelCol(labelColName)
+ this
+ }
+
+ def setFeaturesCol(featuresColName: String): this.type = {
+ estimator.setFeaturesCol(featuresColName)
+ this
+ }
+
+ def fit(df: DataFrame): XGBRegressorModel = {
+ df.repartition(Engine.nodeNumber())
+ val xgbModel = estimator.fit(df)
+ new XGBRegressorModel(xgbModel)
+ }
+
+ def setNumRound(value: Int): this.type = {
+ estimator.setNumRound(value)
+ this
+ }
+
+ def setNumWorkers(value: Int): this.type = {
+ estimator.setNumWorkers(value)
+ this
+ }
+
+ def setNthread(value: Int): this.type = {
+ estimator.setNthread(value)
+ this
+ }
+
+ def setSilent(value: Int): this.type = {
+ estimator.setSilent(value)
+ this
+ }
+
+ def setMissing(value: Float): this.type = {
+ estimator.setMissing(value)
+ this
+ }
+
+ def setCheckpointPath(value: String): this.type = {
+ estimator.setCheckpointPath(value)
+ this
+ }
+
+ def setCheckpointInterval(value: Int): this.type = {
+ estimator.setCheckpointInterval(value)
+ this
+ }
+
+ def setSeed(value: Long): this.type = {
+ estimator.setSeed(value)
+ this
+ }
+
+ def setEta(value: Double): this.type = {
+ estimator.setEta(value)
+ this
+ }
+
+ def setGamma(value: Double): this.type = {
+ estimator.setGamma(value)
+ this
+ }
+
+ def setMaxDepth(value: Int): this.type = {
+ estimator.setMaxDepth(value)
+ this
+ }
+
+ def setMinChildWeight(value: Double): this.type = {
+ estimator.setMinChildWeight(value)
+ this
+ }
+
+ def setMaxDeltaStep(value: Double): this.type = {
+ estimator.setMaxDeltaStep(value)
+ this
+ }
+
+ def setColsampleBytree(value: Double): this.type = {
+ estimator.setColsampleBytree(value)
+ this
+ }
+
+ def setColsampleBylevel(value: Double): this.type = {
+ estimator.setColsampleBylevel(value)
+ this
+ }
+
+ def setLambda(value: Double): this.type = {
+ estimator.setLambda(value)
+ this
+ }
+
+ def setAlpha(value: Double): this.type = {
+ estimator.setAlpha(value)
+ this
+ }
+
+ def setTreeMethod(value: String): this.type = {
+ estimator.setTreeMethod(value)
+ this
+ }
+
+ def setGrowPolicy(value: String): this.type = {
+ estimator.setGrowPolicy(value)
+ this
+ }
+
+ def setMaxBins(value: Int): this.type = {
+ estimator.setMaxBins(value)
+ this
+ }
+
+ def setMaxLeaves(value: Int): this.type = {
+ estimator.setMaxLeaves(value)
+ this
+ }
+
+ def setSketchEps(value: Double): this.type = {
+ estimator.setSketchEps(value)
+ this
+ }
+
+ def setScalePosWeight(value: Double): this.type = {
+ estimator.setScalePosWeight(value)
+ this
+ }
+
+ def setSampleType(value: String): this.type = {
+ estimator.setSampleType(value)
+ this
+ }
+
+ def setNormalizeType(value: String): this.type = {
+ estimator.setNormalizeType(value)
+ this
+ }
+
+ def setRateDrop(value: Double): this.type = {
+ estimator.setRateDrop(value)
+ this
+ }
+
+ def setSkipDrop(value: Double): this.type = {
+ estimator.setSkipDrop(value)
+ this
+ }
+
+ def setLambdaBias(value: Double): this.type = {
+ estimator.setLambdaBias(value)
+ this
+ }
+
+ def setObjective(value: String): this.type = {
+ estimator.setObjective(value)
+ this
+ }
+
+ def setObjectiveType(value: String): this.type = {
+ estimator.setObjectiveType(value)
+ this
+ }
+
+ def setSubsample(value: Double): this.type = {
+ estimator.setSubsample(value)
+ this
+ }
+
+ def setBaseScore(value: Double): this.type = {
+ estimator.setBaseScore(value)
+ this
+ }
+
+ def setEvalMetric(value: String): this.type = {
+ estimator.setEvalMetric(value)
+ this
+ }
+
+ def setNumEarlyStoppingRounds(value: Int): this.type = {
+ estimator.setNumEarlyStoppingRounds(value)
+ this
+ }
+
+ def setMaximizeEvaluationMetrics(value: Boolean): this.type = {
+ estimator.setMaximizeEvaluationMetrics(value)
+ this
+ }
+}
+
+/**
+ * [[XGBRegressorModel]] xgboost wrapper of XGBRegressorModel.
+ */
+class XGBRegressorModel private[bigdl](val model: XGBoostRegressionModel) {
+ var predictionCol: String = null
+ var featuresCol: String = "features"
+ var featurearray: Array[String] = Array("features")
+ def setPredictionCol(value: String): this.type = {
+ predictionCol = value
+ this
+ }
+
+ def setInferBatchSize(value: Int): this.type = {
+ model.setInferBatchSize(value)
+ this
+ }
+
+ def setFeaturesCol(value: String): this.type = {
+ model.setFeaturesCol(value)
+ featuresCol = value
+ this
+ }
+
+ def transform(dataset: DataFrame): DataFrame = {
+ val featureVectorAssembler = new VectorAssembler()
+ .setInputCols(featurearray)
+ .setOutputCol("featureAssembledVector")
+ val assembledDF = featureVectorAssembler.transform(dataset)
+ import org.apache.spark.ml.linalg.Vector
+ import org.apache.spark.sql.functions.{col, udf}
+ val asDense = udf((v: Vector) => v.toDense)
+ val xgbInput = assembledDF.withColumn("DenseFeatures", asDense(col("featureAssembledVector")))
+ model.setFeaturesCol("DenseFeatures")
+ var output = model.transform(xgbInput).drop("DenseFeatures", "featureAssembledVector")
+ if(predictionCol != null) {
+ output = output.withColumnRenamed("prediction", predictionCol)
+ }
+ output
+ }
+
+ def save(path: String): Unit = {
+ model.write.overwrite().save(path)
+ }
+}
+
+object XGBRegressorModel {
+ /**
+ * Load pretrained Zoo XGBRegressorModel.
+ */
+ def load(path: String): XGBRegressorModel = {
+ new XGBRegressorModel(XGBoostRegressionModel.load(path))
+ }
+
+ /**
+ * Load pretrained xgboost XGBoostRegressionModel.
+ */
+ def loadFromXGB(path: String): XGBRegressorModel = {
+ new XGBRegressorModel(XGBoostHelper.load(path))
+ }
+}
+
+/**
+ * [[lightGBMClassifier wrapper]]
+ */
+class LightGBMClassifier {
+
+ val sc = SparkSession.active.sparkContext
+ sc.getConf.set("spark.task.cpus", Engine.coreNumber().toString)
+
+ val estimator = new MLightGBMClassifier()
+ estimator.setNumThreads(Engine.coreNumber())
+
+ def setLabelCol(labelColName : String) : this.type = {
+ estimator.setLabelCol(labelColName)
+ this
+ }
+
+ def setFeaturesCol(featuresColName: String): this.type = {
+ estimator.setFeaturesCol(featuresColName)
+ this
+ }
+
+ def setBoostingType(value: String): this.type = {
+ estimator.setBoostingType(value)
+ this
+ }
+ // for regularization
+ def setMaxBin(value: Int): this.type = {
+ estimator.setMaxBin(value)
+ this
+ }
+
+ def setNumLeaves(value: Int): this.type = {
+ estimator.setNumLeaves(value)
+ this
+ }
+
+ def setMinDataInLeaf(value: Int): this.type = {
+ estimator.setMinDataInLeaf(value)
+ this
+ }
+
+ def setMinSumHessianInLeaf(value: Int): this.type = {
+ estimator.setMinSumHessianInLeaf(value)
+ this
+ }
+
+ def setBaggingFraction(value: Double): this.type = {
+ estimator.setBaggingFraction(value)
+ this
+ }
+
+ def setBaggingFreq(value: Int): this.type = {
+ estimator.setBaggingFreq(value)
+ this
+ }
+
+ def setFeatureFraction(value: Double): this.type = {
+ estimator.setFeatureFraction(value)
+ this
+ }
+
+ def setLambdaL1(value: Double): this.type = {
+ estimator.setLambdaL1(value)
+ this
+ }
+
+ def setLambdaL2(value: Double): this.type = {
+ estimator.setLambdaL2(value)
+ this
+ }
+
+ def setMaxDepth(value: Int): this.type = {
+ estimator.setMaxDepth(value)
+ this
+ }
+
+ def setMinGainToSplit(value: Double): this.type = {
+ estimator.setMinGainToSplit(value)
+ this
+ }
+
+ def setMaxDeltaStep(value: Double): this.type = {
+ estimator.setMaxDeltaStep(value)
+ this
+ }
+
+ def setSkipDrop(value: Double): this.type = {
+ estimator.setSkipDrop(value)
+ this
+ }
+
+ // training
+ def setNumIterations(value: Int): this.type = {
+ estimator.setNumIterations(value)
+ this
+ }
+ def getNumIterations: Int = {
+ estimator.getNumIterations
+ }
+
+ def setLearningRate(value: Double): this.type = {
+ estimator.setLearningRate(value)
+ this
+ }
+
+ def setEarlyStoppingRound(value: Int): this.type = {
+ estimator.setEarlyStoppingRound(value)
+ this
+ }
+
+ def setCategoricalSlotNames(value: Array[String]): this.type = {
+ estimator.setCategoricalSlotNames(value)
+ this
+ }
+ def setCategoricalSlotIndexes(value: Array[Int]): this.type = {
+ estimator.setCategoricalSlotIndexes(value)
+ this
+ }
+
+ def setObjective(value: String): this.type = {
+ estimator.setObjective(value)
+ this
+ }
+
+
+ def setIsUnbalance(value: Boolean): this.type = {
+ estimator.setIsUnbalance(value)
+ this
+ }
+ def setNumThreads(value: Int): this.type = {
+ estimator.setNumThreads(value)
+ this
+ }
+ def fit(df: DataFrame): LightGBMClassifierModel = {
+ df.repartition(Engine.nodeNumber())
+ val lightgbmmodel = estimator.fit(df)
+ new LightGBMClassifierModel(lightgbmmodel)
+ }
+}
+
+/**
+ * [[LightGBMClassifierModel]] is a trained LightGBM classification model.
+ * The prediction column will have the prediction results.
+ *
+ * @param model trained MLightGBMClassificationModel to use in prediction.
+ */
+class LightGBMClassifierModel private[bigdl](val model: MLightGBMClassificationModel) {
+ private var featuresCols: String = "features"
+ private var predictionCol: String = "prediction"
+
+ def setFeaturesCol(featuresColName: String): this.type = {
+ featuresCols = featuresColName
+ this
+ }
+
+ def setPredictionCol(value: String): this.type = {
+ predictionCol = value
+ this
+ }
+
+ def transform(dataset: DataFrame): DataFrame = {
+ Log4Error.invalidInputError(featuresCols!=None, "Please set feature columns before transform")
+ model.setFeaturesCol(featuresCols)
+ var output = model.transform(dataset)
+ if(predictionCol != null) {
+ output = output.withColumnRenamed("prediction", predictionCol)
+ }
+ output
+ }
+
+ def saveNativeModel(path: String): Unit = {
+ model.saveNativeModel(path, overwrite = true)
+ }
+}
+
+object LightGBMClassifierModel {
+ def loadNativeModel(path: String): LightGBMClassifierModel = {
+ new LightGBMClassifierModel(MLightGBMClassificationModel.loadNativeModelFromFile(path))
+ }
+}
+
+/**
+ * [[LightGBMRegressor]] lightGBM wrapper of LightGBMRegressor.
+ */
+class LightGBMRegressor {
+
+ val sc = SparkSession.active.sparkContext
+ sc.getConf.set("spark.task.cpus", Engine.coreNumber().toString)
+
+ private val estimator = new MLightGBMRegressor()
+ estimator.setNumThreads(Engine.coreNumber())
+
+ def setAlpha(value: Double): this.type = {
+ estimator.setAlpha(value)
+ this
+ }
+
+ def setLabelCol(labelColName : String) : this.type = {
+ estimator.setLabelCol(labelColName)
+ this
+ }
+
+ def setFeaturesCol(featuresColName: String): this.type = {
+ estimator.setFeaturesCol(featuresColName)
+ this
+ }
+
+ def setBoostingType(value: String): this.type = {
+ estimator.setBoostingType(value)
+ this
+ }
+ // for regularization
+ def setMaxBin(value: Int): this.type = {
+ estimator.setMaxBin(value)
+ this
+ }
+
+ def setNumLeaves(value: Int): this.type = {
+ estimator.setNumLeaves(value)
+ this
+ }
+
+ def setMinDataInLeaf(value: Int): this.type = {
+ estimator.setMinDataInLeaf(value)
+ this
+ }
+
+ def setMinSumHessianInLeaf(value: Int): this.type = {
+ estimator.setMinSumHessianInLeaf(value)
+ this
+ }
+
+ def setBaggingFraction(value: Double): this.type = {
+ estimator.setBaggingFraction(value)
+ this
+ }
+
+ def setBaggingFreq(value: Int): this.type = {
+ estimator.setBaggingFreq(value)
+ this
+ }
+
+ def setFeatureFraction(value: Double): this.type = {
+ estimator.setFeatureFraction(value)
+ this
+ }
+
+ def setLambdaL1(value: Double): this.type = {
+ estimator.setLambdaL1(value)
+ this
+ }
+
+ def setLambdaL2(value: Double): this.type = {
+ estimator.setLambdaL2(value)
+ this
+ }
+
+ def setMaxDepth(value: Int): this.type = {
+ estimator.setMaxDepth(value)
+ this
+ }
+
+ def setMinGainToSplit(value: Double): this.type = {
+ estimator.setMinGainToSplit(value)
+ this
+ }
+
+ def setMaxDeltaStep(value: Double): this.type = {
+ estimator.setMaxDeltaStep(value)
+ this
+ }
+
+ def setSkipDrop(value: Double): this.type = {
+ estimator.setSkipDrop(value)
+ this
+ }
+
+ // training
+ def setNumIterations(value: Int): this.type = {
+ estimator.setNumIterations(value)
+ this
+ }
+
+
+ def setLearningRate(value: Double): this.type = {
+ estimator.setLearningRate(value)
+ this
+ }
+
+ def setEarlyStoppingRound(value: Int): this.type = {
+ estimator.setEarlyStoppingRound(value)
+ this
+ }
+
+ def setCategoricalSlotNames(value: Array[String]): this.type = {
+ estimator.setCategoricalSlotNames(value)
+ this
+ }
+ def setCategoricalSlotIndexes(value: Array[Int]): this.type = {
+ estimator.setCategoricalSlotIndexes(value)
+ this
+ }
+
+ def setObjective(value: String): this.type = {
+ estimator.setObjective(value)
+ this
+ }
+
+ def setNumThreads(value: Int): this.type = {
+ estimator.setNumThreads(value)
+ this
+ }
+
+ def fit(df: DataFrame): LightGBMRegressorModel = {
+ df.repartition(Engine.nodeNumber())
+ val lightgbmmodel = estimator.fit(df)
+ new LightGBMRegressorModel(lightgbmmodel)
+ }
+}
+
+/**
+ * [[LightGBMRegressorModel]] lightGBM wrapper of LightGBMRegressorModel.
+ */
+class LightGBMRegressorModel private[bigdl](val model: MLightGBMRegressionModel) {
+ var predictionCol: String = null
+ var featuresCol: String = "features"
+ var featurearray: Array[String] = Array("features")
+ def setPredictionCol(value: String): this.type = {
+ predictionCol = value
+ this
+ }
+
+ def setFeaturesCol(value: String): this.type = {
+ model.setFeaturesCol(value)
+ featuresCol = value
+ this
+ }
+
+ def transform(dataset: DataFrame): DataFrame = {
+ val featureVectorAssembler = new VectorAssembler()
+ .setInputCols(featurearray)
+ .setOutputCol("featureAssembledVector")
+ val assembledDF = featureVectorAssembler.transform(dataset)
+ import org.apache.spark.ml.linalg.Vector
+ import org.apache.spark.sql.functions.{col, udf}
+ val asDense = udf((v: Vector) => v.toDense)
+ val xgbInput = assembledDF.withColumn("DenseFeatures", asDense(col("featureAssembledVector")))
+ model.setFeaturesCol("DenseFeatures")
+ var output = model.transform(xgbInput).drop("DenseFeatures", "featureAssembledVector")
+ if(predictionCol != null) {
+ output = output.withColumnRenamed("prediction", predictionCol)
+ }
+ output
+ }
+
+ def saveNativeModel(path: String): Unit = {
+ model.saveNativeModel(path, overwrite = true)
+ }
+}
+
+object LightGBMRegressorModel {
+ /**
+ * Load pretrained Zoo XGBRegressorModel.
+ */
+ def loadNativeModel(path: String): LightGBMRegressorModel = {
+ new LightGBMRegressorModel(MLightGBMRegressionModel.loadNativeModelFromFile(path))
+ }
+}
+
diff --git a/scala/dllib/src/test/scala/com/intel/analytics/bigdl/dllib/nnframes/LightGBMTrainSpec.scala b/scala/dllib/src/test/scala/com/intel/analytics/bigdl/dllib/nnframes/LightGBMTrainSpec.scala
new file mode 100644
index 00000000000..aaf3ab0a047
--- /dev/null
+++ b/scala/dllib/src/test/scala/com/intel/analytics/bigdl/dllib/nnframes/LightGBMTrainSpec.scala
@@ -0,0 +1,114 @@
+/*
+ * Copyright 2016 The BigDL Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.intel.analytics.bigdl.dllib.nnframes
+
+import com.intel.analytics.bigdl.dllib.keras.ZooSpecHelper
+import com.intel.analytics.bigdl.dllib.utils.{Engine, TestUtils}
+import org.apache.spark.SparkContext
+import org.apache.spark.ml.feature.VectorAssembler
+import org.apache.spark.sql.{SQLContext, SparkSession}
+import com.microsoft.azure.synapse.ml.lightgbm.{LightGBMClassifier => MLightGBMClassifier}
+import org.apache.spark.SparkConf
+
+
+class LightGBMTrainSpec extends ZooSpecHelper {
+ var sc : SparkContext = _
+ var sqlContext : SQLContext = _
+
+ override def doBefore(): Unit = {
+ val conf = new SparkConf().setAppName("Test lightGBM").setMaster("local[1]")
+ sc = SparkContext.getOrCreate(conf)
+ sqlContext = new SQLContext(sc)
+ }
+
+ override def doAfter(): Unit = {
+ if (sc != null) {
+ sc.stop()
+ }
+ }
+
+ "LightGBMClassifer train" should "work" in {
+ val spark = SparkSession.builder().getOrCreate()
+ import spark.implicits._
+ Engine.init
+ val df = Seq(
+ (1.0, 2.0, 3.0, 4.0, 1),
+ (1.0, 3.0, 8.0, 2.0, 0)
+ ).toDF("f1", "f2", "f3", "f4", "label")
+ val vectorAssembler = new VectorAssembler()
+ .setInputCols(Array("f1", "f2", "f3", "f4"))
+ .setOutputCol("features")
+ val assembledDf = vectorAssembler.transform(df).select("features", "label").cache()
+ if (spark.version.substring(0, 3).toDouble >= 3.1) {
+ val lightGBMclassifier = new LightGBMClassifier()
+ val classifier = new MLightGBMClassifier()
+ val model1 = lightGBMclassifier.fit(assembledDf)
+ val model = classifier.fit(assembledDf)
+ val res1 = model1.transform(assembledDf)
+ TestUtils.conditionFailTest(res1.count() == 2)
+ }
+ }
+
+ "LightGBMClassifer save" should "work" in {
+ val spark = SparkSession.builder().getOrCreate()
+ import spark.implicits._
+ Engine.init
+ val df = Seq(
+ (1.0, 2.0, 3.0, 4.0, 1),
+ (1.0, 3.0, 8.0, 2.0, 0)
+ ).toDF("f1", "f2", "f3", "f4", "label")
+ val vectorAssembler = new VectorAssembler()
+ .setInputCols(Array("f1", "f2", "f3", "f4"))
+ .setOutputCol("features")
+ val assembledDf = vectorAssembler.transform(df).select("features", "label").cache()
+ if (spark.version.substring(0, 3).toDouble >= 3.1) {
+ val lightGBMclassifier = new LightGBMClassifier()
+ val model = lightGBMclassifier.fit(assembledDf)
+ model.saveNativeModel("/tmp/lightgbm/classifier1")
+ val model2 = LightGBMClassifierModel.loadNativeModel("/tmp/lightgbm/classifier1")
+ val res2 = model2.transform(assembledDf)
+ TestUtils.conditionFailTest(res2.count() == 2)
+ }
+ }
+
+ "LightGBMRegressor train" should "work" in {
+ val spark = SparkSession.builder().getOrCreate()
+ import spark.implicits._
+ Engine.init
+ val df = Seq(
+ (1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 1.0f, 2.0f, 4.0f, 8.0f, 3.0f, 116.3668f),
+ (1.0f, 3.0f, 8.0f, 6.0f, 5.0f, 9.0f, 5.0f, 6.0f, 7.0f, 4.0f, 116.367f),
+ (2.0f, 1.0f, 5.0f, 7.0f, 6.0f, 7.0f, 4.0f, 1.0f, 2.0f, 3.0f, 116.367f),
+ (2.0f, 1.0f, 4.0f, 3.0f, 6.0f, 1.0f, 3.0f, 2.0f, 1.0f, 3.0f, 116.3668f)
+ ).toDF("f1", "f2", "f3", "f4", "f5", "f6", "f7", "f8", "f9", "f10", "label")
+ val vectorAssembler = new VectorAssembler()
+ .setInputCols(Array("f1", "f2", "f3", "f4", "f5", "f6", "f7", "f8", "f9", "f10"))
+ .setOutputCol("features")
+ val assembledDf = vectorAssembler.transform(df).select("features", "label").cache()
+ if (spark.version.substring(0, 3).toDouble >= 3.1) {
+ val lightGBMRegressor = new LightGBMRegressor()
+ val regressorModel0 = lightGBMRegressor.fit(assembledDf)
+ val y0 = regressorModel0.transform(assembledDf)
+ regressorModel0.saveNativeModel("/tmp/test")
+ val model = LightGBMRegressorModel.loadNativeModel("/tmp/test")
+ val y0_0 = model.transform(assembledDf)
+ TestUtils.conditionFailTest(y0.count() == 4)
+ TestUtils.conditionFailTest(y0_0.count() == 4)
+ }
+ }
+}
+
diff --git a/scala/dllib/src/test/scala/com/intel/analytics/bigdl/dllib/nnframes/XgboostTrainSpec.scala b/scala/dllib/src/test/scala/com/intel/analytics/bigdl/dllib/nnframes/XgboostTrainSpec.scala
index 5c138ad3d21..ec5c3a62a4e 100644
--- a/scala/dllib/src/test/scala/com/intel/analytics/bigdl/dllib/nnframes/XgboostTrainSpec.scala
+++ b/scala/dllib/src/test/scala/com/intel/analytics/bigdl/dllib/nnframes/XgboostTrainSpec.scala
@@ -16,10 +16,10 @@
package com.intel.analytics.bigdl.dllib.nnframes
-import com.intel.analytics.bigdl.dllib.utils.Engine
+import com.intel.analytics.bigdl.dllib.utils.{Engine, TestUtils}
import com.intel.analytics.bigdl.dllib.keras.ZooSpecHelper
import org.apache.spark.SparkContext
-import org.apache.spark.ml.feature.{VectorAssembler}
+import org.apache.spark.ml.feature.VectorAssembler
import org.apache.spark.sql.{SQLContext, SparkSession}
class XgboostTrainSpec extends ZooSpecHelper {
@@ -38,7 +38,7 @@ class XgboostTrainSpec extends ZooSpecHelper {
}
}
-/*
+
"XGBClassifer train" should "work" in {
if (!(scala.util.Properties.isMac || scala.util.Properties.isWin)) {
val spark = SparkSession.builder().getOrCreate()
@@ -57,13 +57,9 @@ class XgboostTrainSpec extends ZooSpecHelper {
xgbCf0.setNumRound(10)
xgbCf0.setNthread(1)
val model = xgbCf0.fit(assembledDf)
-
- model.setFeaturesCol(Array("f1", "f2", "f3", "f4"))
- // testdf = df.cache()
- println("the df is: ")
- df.show()
- val res = model.transform(df)
- print(res)
+ model.setFeaturesCol("features")
+ model.save("/tmp/xgboost")
+ val res = model.transform(assembledDf)
res.show()
}
}
@@ -90,9 +86,9 @@ class XgboostTrainSpec extends ZooSpecHelper {
xgbRegressorModel0.save("/tmp/test")
val model = XGBRegressorModel.load("/tmp/test")
val y0_0 = model.transform(assembledDf)
- TestUtils.conditionFailTest(y0_0.except(y0).count()==0)
+ y0_0.show()
+ TestUtils.conditionFailTest(y0_0.except(y0).count() == 0)
}
}
-*/
}
diff --git a/scala/dllib/src/test/scala/com/intel/analytics/bigdl/dllib/parameters/FP16ParameterSpec.scala b/scala/dllib/src/test/scala/com/intel/analytics/bigdl/dllib/optim/parameters/FP16ParameterSpec.scala
similarity index 100%
rename from scala/dllib/src/test/scala/com/intel/analytics/bigdl/dllib/parameters/FP16ParameterSpec.scala
rename to scala/dllib/src/test/scala/com/intel/analytics/bigdl/dllib/optim/parameters/FP16ParameterSpec.scala