diff --git a/dl/src/test/integration-test.robot b/dl/src/test/integration-test.robot index c6b98c7a6ed..53d8e4033fe 100644 --- a/dl/src/test/integration-test.robot +++ b/dl/src/test/integration-test.robot @@ -59,24 +59,24 @@ Remove Input Run Spark Test [Arguments] ${submit} ${spark_master} DownLoad Input - Log To Console begin lenet Train ${submit} --master ${spark_master} --conf "spark.serializer=org.apache.spark.serializer.JavaSerializer" --driver-memory 5g --executor-cores 16 --total-executor-cores 32 --class com.intel.analytics.bigdl.models.lenet.Train ${jar_path} -f ${mnist_data_source} -b 256 -e 3 - Run Shell ${submit} --master ${spark_master} --conf "spark.serializer=org.apache.spark.serializer.JavaSerializer" --driver-memory 5g --executor-cores 16 --total-executor-cores 32 --class com.intel.analytics.bigdl.models.lenet.Train ${jar_path} -f ${mnist_data_source} -b 256 -e 3 + Log To Console begin lenet Train ${submit} --master ${spark_master} --conf "spark.serializer=org.apache.spark.serializer.JavaSerializer" --driver-memory 5g --executor-cores 16 --total-executor-cores 32 --class com.intel.analytics.bigdl.models.lenet.Train ${jar_path} -f ${mnist_data_source} -b 256 -e 3 --optimizerVersion "optimizerV2" + Run Shell ${submit} --master ${spark_master} --conf "spark.serializer=org.apache.spark.serializer.JavaSerializer" --driver-memory 5g --executor-cores 16 --total-executor-cores 32 --class com.intel.analytics.bigdl.models.lenet.Train ${jar_path} -f ${mnist_data_source} -b 256 -e 3 --optimizerVersion "optimizerV2" Log To Console begin lenet Train local[4] - Run Shell ${submit} --master local[4] --class com.intel.analytics.bigdl.models.lenet.Train ${jar_path} -f /tmp/mnist -b 120 -e 1 + Run Shell ${submit} --master local[4] --class com.intel.analytics.bigdl.models.lenet.Train ${jar_path} -f /tmp/mnist -b 120 -e 1 --optimizerVersion "optimizerV2" Log To Console begin autoencoder Train - Run Shell ${submit} --master ${spark_master} --executor-cores 4 --total-executor-cores 8 --class com.intel.analytics.bigdl.models.autoencoder.Train ${jar_path} -b 120 -e 1 -f /tmp/mnist + Run Shell ${submit} --master ${spark_master} --executor-cores 4 --total-executor-cores 8 --class com.intel.analytics.bigdl.models.autoencoder.Train ${jar_path} -b 120 -e 1 -f /tmp/mnist --optimizerVersion "optimizerV2" Log To Console begin PTBWordLM - Run Shell ${submit} --master ${spark_master} --conf "spark.serializer=org.apache.spark.serializer.JavaSerializer" --driver-memory 40g --executor-memory 40g --executor-cores 8 --total-executor-cores 8 --class com.intel.analytics.bigdl.example.languagemodel.PTBWordLM ${jar_path} -f ./simple-examples/data -b 120 --numLayers 2 --vocab 10001 --hidden 650 --numSteps 35 --learningRate 0.005 -e 1 --learningRateDecay 0.001 --keepProb 0.5 --overWrite + Run Shell ${submit} --master ${spark_master} --conf "spark.serializer=org.apache.spark.serializer.JavaSerializer" --driver-memory 40g --executor-memory 40g --executor-cores 8 --total-executor-cores 8 --class com.intel.analytics.bigdl.example.languagemodel.PTBWordLM ${jar_path} -f ./simple-examples/data -b 120 --numLayers 2 --vocab 10001 --hidden 650 --numSteps 35 --learningRate 0.005 -e 1 --learningRateDecay 0.001 --keepProb 0.5 --overWrite --optimizerVersion "optimizerV2" Log To Console begin resnet Train - Run Shell ${submit} --master ${spark_master} --conf "spark.serializer=org.apache.spark.serializer.JavaSerializer" --driver-memory 5g --executor-memory 5g --executor-cores 8 --total-executor-cores 32 --class com.intel.analytics.bigdl.models.resnet.TrainCIFAR10 ${jar_path} -f /tmp/cifar --batchSize 448 --optnet true --depth 20 --classes 10 --shortcutType A --nEpochs 1 --learningRate 0.1 + Run Shell ${submit} --master ${spark_master} --conf "spark.serializer=org.apache.spark.serializer.JavaSerializer" --driver-memory 5g --executor-memory 5g --executor-cores 8 --total-executor-cores 32 --class com.intel.analytics.bigdl.models.resnet.TrainCIFAR10 ${jar_path} -f /tmp/cifar --batchSize 448 --optnet true --depth 20 --classes 10 --shortcutType A --nEpochs 1 --learningRate 0.1 --optimizerVersion "optimizerV2" Log To Console begin DLClassifierLeNet - Run Shell ${submit} --master ${spark_master} --executor-cores 16 --total-executor-cores 16 --driver-memory 5g --executor-memory 30g --class com.intel.analytics.bigdl.example.MLPipeline.DLClassifierLeNet ${jar_path} -b 1200 -f /tmp/mnist --maxEpoch 1 + Run Shell ${submit} --master ${spark_master} --executor-cores 16 --total-executor-cores 16 --driver-memory 5g --executor-memory 30g --class com.intel.analytics.bigdl.example.MLPipeline.DLClassifierLeNet ${jar_path} -b 1200 -f /tmp/mnist --maxEpoch 1 Log To Console begin rnn Train - Run Shell ${submit} --master ${spark_master} --driver-memory 5g --executor-memory 5g --executor-cores 12 --total-executor-cores 12 --class com.intel.analytics.bigdl.models.rnn.Train ${jar_path} -f ./ -s ./models --nEpochs 1 --checkpoint ./model/ -b 12 + Run Shell ${submit} --master ${spark_master} --driver-memory 5g --executor-memory 5g --executor-cores 12 --total-executor-cores 12 --class com.intel.analytics.bigdl.models.rnn.Train ${jar_path} -f ./ -s ./models --nEpochs 1 --checkpoint ./model/ -b 12 --optimizerVersion "optimizerV2" Log To Console begin inceptionV1 train - Run Shell ${submit} --master ${spark_master} --conf "spark.serializer=org.apache.spark.serializer.JavaSerializer" --driver-memory 20g --executor-memory 40g --executor-cores 10 --total-executor-cores 20 --class com.intel.analytics.bigdl.models.inception.TrainInceptionV1 ${jar_path} -b 40 -f ${imagenet_test_data_source} --learningRate 0.1 -i 100 + Run Shell ${submit} --master ${spark_master} --conf "spark.serializer=org.apache.spark.serializer.JavaSerializer" --driver-memory 20g --executor-memory 40g --executor-cores 10 --total-executor-cores 20 --class com.intel.analytics.bigdl.models.inception.TrainInceptionV1 ${jar_path} -b 40 -f ${imagenet_test_data_source} --learningRate 0.1 -i 100 --optimizerVersion "optimizerV2" Log To Console begin text classification - Run Shell ${submit} --master ${spark_master} --driver-memory 5g --executor-memory 5g --total-executor-cores 32 --executor-cores 8 --class com.intel.analytics.bigdl.example.textclassification.TextClassifier ${jar_path} --batchSize 128 --baseDir /tmp/text_data --partitionNum 32 + Run Shell ${submit} --master ${spark_master} --driver-memory 5g --executor-memory 5g --total-executor-cores 32 --executor-cores 8 --class com.intel.analytics.bigdl.example.textclassification.TextClassifier ${jar_path} --batchSize 128 --baseDir /tmp/text_data --partitionNum 32 Remove Input Spark2.2 Test Suite @@ -125,18 +125,18 @@ Yarn Test Suite Log To Console begin text classification Run Shell ${submit} --master yarn --deploy-mode client --conf "spark.serializer=org.apache.spark.serializer.JavaSerializer" --conf spark.yarn.executor.memoryOverhead=40000 --executor-cores 10 --num-executors 2 --driver-memory 20g --executor-memory 40g --class com.intel.analytics.bigdl.example.textclassification.TextClassifier ${jar_path} --batchSize 240 --baseDir /tmp/text_data --partitionNum 4 Log To Console begin lenet - Run Shell ${submit} --master yarn --deploy-mode client --conf "spark.serializer=org.apache.spark.serializer.JavaSerializer" --executor-cores 10 --num-executors 3 --driver-memory 20g --class com.intel.analytics.bigdl.models.lenet.Train ${jar_path} -f ${mnist_data_source} -b 120 -e 3 + Run Shell ${submit} --master yarn --deploy-mode client --conf "spark.serializer=org.apache.spark.serializer.JavaSerializer" --executor-cores 10 --num-executors 3 --driver-memory 20g --class com.intel.analytics.bigdl.models.lenet.Train ${jar_path} -f ${mnist_data_source} -b 120 -e 3 --optimizerVersion "optimizerV2" Log To Console begin autoencoder Train - Run Shell ${submit} --master yarn --deploy-mode client --conf "spark.serializer=org.apache.spark.serializer.JavaSerializer" --executor-cores 10 --num-executors 3 --driver-memory 20g --class com.intel.analytics.bigdl.models.autoencoder.Train ${jar_path} -b 120 -e 1 -f /tmp/mnist + Run Shell ${submit} --master yarn --deploy-mode client --conf "spark.serializer=org.apache.spark.serializer.JavaSerializer" --executor-cores 10 --num-executors 3 --driver-memory 20g --class com.intel.analytics.bigdl.models.autoencoder.Train ${jar_path} -b 120 -e 1 -f /tmp/mnist --optimizerVersion "optimizerV2" Log To Console begin resnet Train - Run Shell ${submit} --master yarn --deploy-mode client --conf "spark.serializer=org.apache.spark.serializer.JavaSerializer" --executor-cores 10 --num-executors 3 --driver-memory 20g --class com.intel.analytics.bigdl.models.resnet.TrainCIFAR10 ${jar_path} -f /tmp/cifar --batchSize 120 --optnet true --depth 20 --classes 10 --shortcutType A --nEpochs 1 --learningRate 0.1 + Run Shell ${submit} --master yarn --deploy-mode client --conf "spark.serializer=org.apache.spark.serializer.JavaSerializer" --executor-cores 10 --num-executors 3 --driver-memory 20g --class com.intel.analytics.bigdl.models.resnet.TrainCIFAR10 ${jar_path} -f /tmp/cifar --batchSize 120 --optnet true --depth 20 --classes 10 --shortcutType A --nEpochs 1 --learningRate 0.1 --optimizerVersion "optimizerV2" Log To Console begin rnn Train - Run Shell ${submit} --master yarn --deploy-mode client --conf "spark.serializer=org.apache.spark.serializer.JavaSerializer" --executor-cores 10 --num-executors 3 --driver-memory 20g --class com.intel.analytics.bigdl.models.rnn.Train ${jar_path} -f ./ -s ./models --nEpochs 1 --checkpoint ./model/ -b 120 + Run Shell ${submit} --master yarn --deploy-mode client --conf "spark.serializer=org.apache.spark.serializer.JavaSerializer" --executor-cores 10 --num-executors 3 --driver-memory 20g --class com.intel.analytics.bigdl.models.rnn.Train ${jar_path} -f ./ -s ./models --nEpochs 1 --checkpoint ./model/ -b 120 --optimizerVersion "optimizerV2" Log To Console begin PTBWordLM - Run Shell ${submit} --master yarn --deploy-mode client --conf "spark.serializer=org.apache.spark.serializer.JavaSerializer" --executor-cores 8 --num-executors 1 --driver-memory 20g --executor-memory 40g --class com.intel.analytics.bigdl.example.languagemodel.PTBWordLM ${jar_path} -f ./simple-examples/data -b 120 --numLayers 2 --vocab 10001 --hidden 650 --numSteps 35 --learningRate 0.005 -e 1 --learningRateDecay 0.001 --keepProb 0.5 --overWrite + Run Shell ${submit} --master yarn --deploy-mode client --conf "spark.serializer=org.apache.spark.serializer.JavaSerializer" --executor-cores 8 --num-executors 1 --driver-memory 20g --executor-memory 40g --class com.intel.analytics.bigdl.example.languagemodel.PTBWordLM ${jar_path} -f ./simple-examples/data -b 120 --numLayers 2 --vocab 10001 --hidden 650 --numSteps 35 --learningRate 0.005 -e 1 --learningRateDecay 0.001 --keepProb 0.5 --overWrite --optimizerVersion "optimizerV2" Log To Console begin inceptionV1 train - Run Shell ${submit} --master yarn --deploy-mode client --conf "spark.serializer=org.apache.spark.serializer.JavaSerializer" --executor-cores 10 --num-executors 2 --driver-memory 20g --executor-memory 40g --class com.intel.analytics.bigdl.models.inception.TrainInceptionV1 ${jar_path} -b 40 -f ${imagenet_test_data_source} --learningRate 0.1 -i 100 - Run Shell ${submit} --master yarn --deploy-mode client --executor-memory 2g --driver-memory 2g --executor-cores 10 --num-executors 2 --properties-file ${curdir}/dist/conf/spark-bigdl.conf --jars ${jar_path} --py-files ${curdir}/dist/lib/bigdl-${version}-python-api.zip --conf spark.driver.extraClassPath=${jar_path} --conf spark.executor.extraClassPath=bigdl-${version}-jar-with-dependencies.jar ${curdir}/pyspark/bigdl/models/lenet/lenet5.py -b 200 --action train --endTriggerType epoch --endTriggerNum 1 + Run Shell ${submit} --master yarn --deploy-mode client --conf "spark.serializer=org.apache.spark.serializer.JavaSerializer" --executor-cores 10 --num-executors 2 --driver-memory 20g --executor-memory 40g --class com.intel.analytics.bigdl.models.inception.TrainInceptionV1 ${jar_path} -b 40 -f ${imagenet_test_data_source} --learningRate 0.1 -i 100 --optimizerVersion "optimizerV2" + Run Shell ${submit} --master yarn --deploy-mode client --executor-memory 2g --driver-memory 2g --executor-cores 10 --num-executors 2 --properties-file ${curdir}/dist/conf/spark-bigdl.conf --jars ${jar_path} --py-files ${curdir}/dist/lib/bigdl-${version}-python-api.zip --conf spark.driver.extraClassPath=${jar_path} --conf spark.executor.extraClassPath=bigdl-${version}-jar-with-dependencies.jar ${curdir}/pyspark/bigdl/models/lenet/lenet5.py -b 200 --action train --endTriggerType epoch --endTriggerNum 1 Remove Environment Variable http_proxy https_proxy Remove Input diff --git a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/example/languagemodel/PTBWordLM.scala b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/example/languagemodel/PTBWordLM.scala index ad4077d7295..66a80440485 100644 --- a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/example/languagemodel/PTBWordLM.scala +++ b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/example/languagemodel/PTBWordLM.scala @@ -22,7 +22,7 @@ import com.intel.analytics.bigdl.dataset.{DataSet, SampleToMiniBatch} import com.intel.analytics.bigdl.nn.{CrossEntropyCriterion, Module, TimeDistributedCriterion} import com.intel.analytics.bigdl.optim._ import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric._ -import com.intel.analytics.bigdl.utils.Engine +import com.intel.analytics.bigdl.utils.{Engine, OptimizerV1, OptimizerV2} import org.apache.log4j.{Level, Logger} import org.apache.spark.SparkContext import com.intel.analytics.bigdl.example.languagemodel.Utils._ @@ -82,6 +82,13 @@ object PTBWordLM { keepProb = param.keepProb) } + if (param.optimizerVersion.isDefined) { + param.optimizerVersion.get.toLowerCase match { + case "optimizerv1" => Engine.setOptimizerVersion(OptimizerV1) + case "optimizerv2" => Engine.setOptimizerVersion(OptimizerV2) + } + } + val optimMethod = if (param.stateSnapshot.isDefined) { OptimMethod.load[Float](param.stateSnapshot.get) } else { diff --git a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/example/languagemodel/README.md b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/example/languagemodel/README.md index 7f813287534..31aacc5cea5 100644 --- a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/example/languagemodel/README.md +++ b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/example/languagemodel/README.md @@ -47,4 +47,5 @@ In the above commands: ```--numSteps```: number of words per record in LM ```--overWrite```: do overwrite when saving checkpoint ```--keepProb```: the probability to do dropout -```--withTransformerModel```: use transformer model in this LM \ No newline at end of file +```--withTransformerModel```: use transformer model in this LM +```--optimizerVersion```: option can be used to set DistriOptimizer version, the value can be "optimizerV1" or "optimizerV2" \ No newline at end of file diff --git a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/example/languagemodel/Utils.scala b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/example/languagemodel/Utils.scala index e3e2cf96e2e..bdeb381df73 100644 --- a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/example/languagemodel/Utils.scala +++ b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/example/languagemodel/Utils.scala @@ -51,7 +51,8 @@ object Utils { numSteps: Int = 20, overWriteCheckpoint: Boolean = false, keepProb: Float = 2.0f, - withTransformerModel: Boolean = false) + withTransformerModel: Boolean = false, + optimizerVersion: Option[String] = None) val trainParser = new OptionParser[TrainParams]("BigDL ptbModel Train Example") { opt[String]('f', "dataFolder") @@ -114,5 +115,9 @@ object Utils { opt[Boolean]("withTransformerModel") .text("Use transformer model in this LM") .action((x, c) => c.copy(withTransformerModel = true)) + + opt[String]("optimizerVersion") + .text("state optimizer version") + .action((x, c) => c.copy(optimizerVersion = Some(x))) } } diff --git a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/example/treeLSTMSentiment/README.md b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/example/treeLSTMSentiment/README.md index 12928534f25..35bdb816eef 100644 --- a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/example/treeLSTMSentiment/README.md +++ b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/example/treeLSTMSentiment/README.md @@ -65,5 +65,6 @@ Next just run the following command to run the code: --regRate # number of L2 regularization rate, default is 1e-4 --p # number of dropout probability rate, default is 0.5 --epoch # number of epochs, default is 5 + --optimizerVersion # option to set DistriOptimizer version, default is "optimizerV1" ``` diff --git a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/example/treeLSTMSentiment/Train.scala b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/example/treeLSTMSentiment/Train.scala index a1722a55c67..bd7c8785e5d 100644 --- a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/example/treeLSTMSentiment/Train.scala +++ b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/example/treeLSTMSentiment/Train.scala @@ -23,7 +23,7 @@ import com.intel.analytics.bigdl.nn.{TimeDistributedCriterion, _} import com.intel.analytics.bigdl.numeric.NumericFloat import com.intel.analytics.bigdl.optim._ import com.intel.analytics.bigdl.tensor.Tensor -import com.intel.analytics.bigdl.utils.{Engine, LoggerFilter, T} +import com.intel.analytics.bigdl.utils.{Engine, LoggerFilter, OptimizerV1, OptimizerV2, T} import org.apache.log4j.{Level => Levle4j, Logger => Logger4j} import org.apache.spark.SparkContext import org.slf4j.{Logger, LoggerFactory} @@ -93,6 +93,13 @@ object Train { val trainRDD = toSample(trainTreeRDD, trainLabelRDD, trainSentenceRDD) val devRDD = toSample(devTreeRDD, devLabelRDD, devSentenceRDD) + if (param.optimizerVersion.isDefined) { + param.optimizerVersion.get.toLowerCase match { + case "optimizerv1" => Engine.setOptimizerVersion(OptimizerV1) + case "optimizerv2" => Engine.setOptimizerVersion(OptimizerV2) + } + } + val optimizer = Optimizer( model = TreeLSTMSentiment(word2VecTensor, param.hiddenSize, classNum, param.p), sampleRDD = trainRDD, diff --git a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/example/treeLSTMSentiment/Utils.scala b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/example/treeLSTMSentiment/Utils.scala index 016016eb234..6d6a5728fd1 100644 --- a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/example/treeLSTMSentiment/Utils.scala +++ b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/example/treeLSTMSentiment/Utils.scala @@ -234,6 +234,9 @@ object Utils { opt[String]('e', "epoch") .text("max epoch") .action((x, c) => c.copy(epoch = x.toInt)) + opt[String]("optimizerVersion") + .text("state optimizer version") + .action((x, c) => c.copy(optimizerVersion = Some(x))) } case class TreeLSTMSentimentParam ( @@ -243,6 +246,7 @@ object Utils { override val learningRate: Double = 0.05, regRate: Double = 1e-4, p: Double = 0.5, - epoch: Int = 5 + epoch: Int = 5, + optimizerVersion: Option[String] = None ) extends AbstractTextClassificationParams } diff --git a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/autoencoder/Train.scala b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/autoencoder/Train.scala index 88a065ec4a3..9212ef36201 100644 --- a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/autoencoder/Train.scala +++ b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/autoencoder/Train.scala @@ -26,7 +26,7 @@ import com.intel.analytics.bigdl.optim._ import com.intel.analytics.bigdl.tensor.Tensor import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric._ -import com.intel.analytics.bigdl.utils.{Engine, T, Table} +import com.intel.analytics.bigdl.utils.{Engine, OptimizerV1, OptimizerV2, T, Table} import org.apache.log4j.{Level, Logger} import org.apache.spark.SparkContext @@ -73,6 +73,13 @@ object Train { if (param.graphModel) Autoencoder.graph(classNum = 32) else Autoencoder(classNum = 32) } + if (param.optimizerVersion.isDefined) { + param.optimizerVersion.get.toLowerCase match { + case "optimizerv1" => Engine.setOptimizerVersion(OptimizerV1) + case "optimizerv2" => Engine.setOptimizerVersion(OptimizerV2) + } + } + val optimMethod = if (param.stateSnapshot.isDefined) { OptimMethod.load[Float](param.stateSnapshot.get) } else { diff --git a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/autoencoder/Utils.scala b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/autoencoder/Utils.scala index 3da797dbf62..533a9c3d11c 100644 --- a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/autoencoder/Utils.scala +++ b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/autoencoder/Utils.scala @@ -34,7 +34,8 @@ object Utils { stateSnapshot: Option[String] = None, batchSize: Int = 150, maxEpoch: Int = 10, - graphModel: Boolean = false + graphModel: Boolean = false, + optimizerVersion: Option[String] = None ) val trainParser = new OptionParser[TrainParams]("BigDL Autoencoder on MNIST") { @@ -59,6 +60,9 @@ object Utils { opt[Unit]('g', "graphModel") .text("use graph model") .action((x, c) => c.copy(graphModel = true)) + opt[String]("optimizerVersion") + .text("state optimizer version") + .action((x, c) => c.copy(optimizerVersion = Some(x))) } private[bigdl] def load(featureFile: Path, labelFile: Path): Array[ByteRecord] = { diff --git a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/inception/Options.scala b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/inception/Options.scala index 685c3d5c198..127a7141395 100644 --- a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/inception/Options.scala +++ b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/inception/Options.scala @@ -39,7 +39,8 @@ object Options { warmupEpoch: Option[Int] = None, gradientL2NormThreshold: Option[Double] = None, gradientMin: Option[Double] = None, - gradientMax: Option[Double] = None + gradientMax: Option[Double] = None, + optimizerVersion: Option[String] = None ) val trainParser = new OptionParser[TrainParams]("BigDL Inception Example") { @@ -99,6 +100,9 @@ object Options { opt[Double]("gradientMin") .text("min gradient clipping by") .action((x, c) => c.copy(gradientMin = Some(x))) + opt[String]("optimizerVersion") + .text("state optimizer version") + .action((x, c) => c.copy(optimizerVersion = Some(x))) } case class TestParams( diff --git a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/inception/README.md b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/inception/README.md index 397654145a4..4bc58823cb7 100644 --- a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/inception/README.md +++ b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/inception/README.md @@ -95,6 +95,7 @@ policy. * --gradientL2NormThreshold: optional. Gradient L2-Norm threshold used for norm2 gradient clipping. * --gradientMin: optional. Max gradient clipping by value, used in constant gradient clipping. * --gradientMax: optional. Min gradient clipping by value, used in constant gradient clipping. +* --optimizerVersion: option can be used to set DistriOptimizer version, the value can be "optimizerV1" or "optimizerV2". ## Test the Model * Spark standalone, example command diff --git a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/inception/Train.scala b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/inception/Train.scala index dfb3b7434fb..3d9cecc6d4f 100644 --- a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/inception/Train.scala +++ b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/inception/Train.scala @@ -19,7 +19,7 @@ import com.intel.analytics.bigdl._ import com.intel.analytics.bigdl.nn.{ClassNLLCriterion, Module} import com.intel.analytics.bigdl.optim.SGD.{MultiStep, Poly, SequentialSchedule, Warmup} import com.intel.analytics.bigdl.optim._ -import com.intel.analytics.bigdl.utils.{Engine, LoggerFilter, T, Table} +import com.intel.analytics.bigdl.utils.{Engine, LoggerFilter, OptimizerV1, OptimizerV2, T, Table} import org.apache.log4j.{Level, Logger} import org.apache.spark.SparkContext @@ -71,6 +71,13 @@ object TrainInceptionV1 { val warmupIteration = param.warmupEpoch.getOrElse(0) * iterationPerEpoch + if (param.optimizerVersion.isDefined) { + param.optimizerVersion.get.toLowerCase match { + case "optimizerv1" => Engine.setOptimizerVersion(OptimizerV1) + case "optimizerv2" => Engine.setOptimizerVersion(OptimizerV2) + } + } + val optimMethod = if (param.stateSnapshot.isDefined) { OptimMethod.load[Float](param.stateSnapshot.get) } else { diff --git a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/lenet/README.md b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/lenet/README.md index 97be7f5a1bf..a06b0b01f8e 100644 --- a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/lenet/README.md +++ b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/lenet/README.md @@ -111,3 +111,4 @@ In the above command * -f: where you put your MNIST data * --model: the model snapshot file * -b: The mini-batch size. It is expected that the mini-batch size is a multiple of node_number * core_number. +* --optimizerVersion: option can be used to set DistriOptimizer version, the value can be "optimizerV1" or "optimizerV2". diff --git a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/lenet/Train.scala b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/lenet/Train.scala index 34d772281bb..b60f2bab3cc 100644 --- a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/lenet/Train.scala +++ b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/lenet/Train.scala @@ -62,6 +62,13 @@ object Train { case MklDnn => CrossEntropyCriterion() } + if (param.optimizerVersion.isDefined) { + param.optimizerVersion.get.toLowerCase match { + case "optimizerv1" => Engine.setOptimizerVersion(OptimizerV1) + case "optimizerv2" => Engine.setOptimizerVersion(OptimizerV2) + } + } + val optimMethod = if (param.stateSnapshot.isDefined) { OptimMethod.load[Float](param.stateSnapshot.get) } else { diff --git a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/lenet/Utils.scala b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/lenet/Utils.scala index c78a89b205f..d62ed98c136 100644 --- a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/lenet/Utils.scala +++ b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/lenet/Utils.scala @@ -20,7 +20,7 @@ import java.nio.ByteBuffer import java.nio.file.{Files, Path, Paths} import com.intel.analytics.bigdl.dataset.ByteRecord -import com.intel.analytics.bigdl.utils.File +import com.intel.analytics.bigdl.utils.{File, OptimizerVersion} import scopt.OptionParser object Utils { @@ -42,7 +42,8 @@ object Utils { coreNumber: Int = -1, nodeNumber: Int = -1, overWriteCheckpoint: Boolean = false, - graphModel: Boolean = false + graphModel: Boolean = false, + optimizerVersion: Option[String] = None ) val trainParser = new OptionParser[TrainParams]("BigDL Lenet Train Example") { @@ -79,6 +80,9 @@ object Utils { opt[Unit]('g', "graphModel") .text("use graph model") .action((x, c) => c.copy(graphModel = true)) + opt[String]("optimizerVersion") + .text("state optimizer version") + .action((x, c) => c.copy(optimizerVersion = Some(x))) } case class TestParams( diff --git a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/resnet/README.md b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/resnet/README.md index 49180b6e597..c4ea1a76ba8 100644 --- a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/resnet/README.md +++ b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/resnet/README.md @@ -73,6 +73,7 @@ We support Local and Spark versions of training. Users can define env Engine.setOptimizerVersion(OptimizerV1) + case "optimizerv2" => Engine.setOptimizerVersion(OptimizerV2) + } + } + val optimMethod = if (param.stateSnapshot.isDefined) { OptimMethod.load[Float](param.stateSnapshot.get) } else { diff --git a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/resnet/TrainImageNet.scala b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/resnet/TrainImageNet.scala index 0a3828f3b7d..e598f9e0308 100644 --- a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/resnet/TrainImageNet.scala +++ b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/resnet/TrainImageNet.scala @@ -89,6 +89,13 @@ object TrainImageNet { println(model) + if (param.optimizerVersion.isDefined) { + param.optimizerVersion.get.toLowerCase match { + case "optimizerv1" => Engine.setOptimizerVersion(OptimizerV1) + case "optimizerv2" => Engine.setOptimizerVersion(OptimizerV2) + } + } + val optimMethod = if (param.stateSnapshot.isDefined) { val optim = OptimMethod.load[Float](param.stateSnapshot.get).asInstanceOf[SGD[Float]] val baseLr = param.learningRate diff --git a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/resnet/Utils.scala b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/resnet/Utils.scala index 6bead538804..7b58e3d7a88 100644 --- a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/resnet/Utils.scala +++ b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/resnet/Utils.scala @@ -44,7 +44,8 @@ object Utils { nesterov: Boolean = true, graphModel: Boolean = false, warmupEpoch: Int = 0, - maxLr: Double = 0.0) + maxLr: Double = 0.0, + optimizerVersion: Option[String] = None) val trainParser = new OptionParser[TrainParams]("BigDL ResNet Example") { head("Train ResNet model on single node") @@ -102,6 +103,9 @@ object Utils { opt[Double]("maxLr") .text("maxLr") .action((x, c) => c.copy(maxLr = x)) + opt[String]("optimizerVersion") + .text("state optimizer version") + .action((x, c) => c.copy(optimizerVersion = Some(x))) } case class TestParams( diff --git a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/rnn/Train.scala b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/rnn/Train.scala index a123bd57e25..45495c7b0ed 100644 --- a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/rnn/Train.scala +++ b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/rnn/Train.scala @@ -24,7 +24,7 @@ import com.intel.analytics.bigdl.dataset.text.utils.SentenceToken import com.intel.analytics.bigdl.nn.{CrossEntropyCriterion, Module, TimeDistributedCriterion} import com.intel.analytics.bigdl.optim._ import com.intel.analytics.bigdl.tensor.{Storage, Tensor} -import com.intel.analytics.bigdl.utils.{Engine, T, Table} +import com.intel.analytics.bigdl.utils.{Engine, OptimizerV1, OptimizerV2, T, Table} import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric._ import org.apache.log4j.{Level, Logger} import org.apache.spark.SparkContext @@ -101,6 +101,13 @@ object Train { curModel } + if (param.optimizerVersion.isDefined) { + param.optimizerVersion.get.toLowerCase match { + case "optimizerv1" => Engine.setOptimizerVersion(OptimizerV1) + case "optimizerv2" => Engine.setOptimizerVersion(OptimizerV2) + } + } + val optimMethod = if (param.stateSnapshot.isDefined) { OptimMethod.load[Float](param.stateSnapshot.get) } else { diff --git a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/rnn/Utils.scala b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/rnn/Utils.scala index bd43f7468c5..6694d2c95bf 100644 --- a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/rnn/Utils.scala +++ b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/rnn/Utils.scala @@ -51,7 +51,8 @@ object Utils { nEpochs: Int = 30, sentFile: Option[String] = None, tokenFile: Option[String] = None, - overWriteCheckpoint: Boolean = false) + overWriteCheckpoint: Boolean = false, + optimizerVersion: Option[String] = None) val trainParser = new OptionParser[TrainParams]("BigDL SimpleRNN Train Example") { opt[String]('f', "dataFolder") @@ -123,6 +124,9 @@ object Utils { opt[Unit]("overWrite") .text("overwrite checkpoint files") .action( (_, c) => c.copy(overWriteCheckpoint = true) ) + opt[String]("optimizerVersion") + .text("state optimizer version") + .action((x, c) => c.copy(optimizerVersion = Some(x))) } case class TestParams( diff --git a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/vgg/README.md b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/vgg/README.md index d5729ed20d2..e6dd5385769 100644 --- a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/vgg/README.md +++ b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/vgg/README.md @@ -58,6 +58,7 @@ there are some files already exist in the folder, the old file will not be overw safety of your model files. * -b: The mini-batch size. It is expected that the mini-batch size is a multiple of node_number * core_number. * --summary: Where you store the training metainfo, which can be visualized in tensorboard +* --optimizerVersion: option to set DistriOptimizer version, the value can be "optimizerV1" or "optimizerV2". ## Test Model Example command for running in Spark local mode ``` diff --git a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/vgg/Train.scala b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/vgg/Train.scala index 911b031af7d..056e9f44809 100644 --- a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/vgg/Train.scala +++ b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/vgg/Train.scala @@ -24,7 +24,7 @@ import com.intel.analytics.bigdl.dataset.image._ import com.intel.analytics.bigdl.nn.{ClassNLLCriterion, Module} import com.intel.analytics.bigdl.optim._ import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric._ -import com.intel.analytics.bigdl.utils.{Engine, LoggerFilter, T, Table} +import com.intel.analytics.bigdl.utils.{Engine, LoggerFilter, OptimizerV1, OptimizerV2, T, Table} import com.intel.analytics.bigdl.visualization.{TrainSummary, ValidationSummary} import org.apache.log4j.{Level, Logger} import org.apache.spark.SparkContext @@ -53,6 +53,13 @@ object Train { if (param.graphModel) VggForCifar10.graph(classNum = 10) else VggForCifar10(classNum = 10) } + if (param.optimizerVersion.isDefined) { + param.optimizerVersion.get.toLowerCase match { + case "optimizerv1" => Engine.setOptimizerVersion(OptimizerV1) + case "optimizerv2" => Engine.setOptimizerVersion(OptimizerV2) + } + } + val optimMethod = if (param.stateSnapshot.isDefined) { OptimMethod.load[Float](param.stateSnapshot.get) } else { diff --git a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/vgg/TrainImageNet.scala b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/vgg/TrainImageNet.scala index 9e454b201a4..00d6495187f 100644 --- a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/vgg/TrainImageNet.scala +++ b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/vgg/TrainImageNet.scala @@ -20,7 +20,7 @@ import com.intel.analytics.bigdl.nn import com.intel.analytics.bigdl.nn.{CrossEntropyCriterion, Module, SoftmaxWithCriterion} import com.intel.analytics.bigdl.optim.SGD.{Poly, SequentialSchedule, Warmup} import com.intel.analytics.bigdl.optim._ -import com.intel.analytics.bigdl.utils.{Engine, LoggerFilter, MklBlas, MklDnn} +import com.intel.analytics.bigdl.utils.{Engine, LoggerFilter, MklBlas, MklDnn, OptimizerV1, OptimizerV2} import com.intel.analytics.bigdl.visualization.TrainSummary import org.apache.log4j.{Level, Logger} import org.apache.spark.SparkContext @@ -61,6 +61,13 @@ object TrainImageNet { println(model) + if (param.optimizerVersion.isDefined) { + param.optimizerVersion.get.toLowerCase match { + case "optimizerv1" => Engine.setOptimizerVersion(OptimizerV1) + case "optimizerv2" => Engine.setOptimizerVersion(OptimizerV2) + } + } + val optimMethod = if (param.stateSnapshot.isDefined) { OptimMethod.load[Float](param.stateSnapshot.get).asInstanceOf[SGD[Float]] } else { diff --git a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/vgg/Utils.scala b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/vgg/Utils.scala index 25ede36e984..4dc1d6f2fb2 100644 --- a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/vgg/Utils.scala +++ b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/vgg/Utils.scala @@ -57,7 +57,8 @@ object Utils { checkpointIteration: Int = 1000, maxLr: Double = 0.06, warmupEpoch: Option[Int] = None, - gradientL2NormThreshold: Option[Double] = None + gradientL2NormThreshold: Option[Double] = None, + optimizerVersion: Option[String] = None ) val trainParser = new OptionParser[TrainParams]("BigDL Vgg Example") { @@ -115,6 +116,9 @@ object Utils { opt[Double]("gradientL2NormThreshold") .text("gradient L2-Norm threshold") .action((x, c) => c.copy(gradientL2NormThreshold = Some(x))) + opt[String]("optimizerVersion") + .text("state optimizer version") + .action((x, c) => c.copy(optimizerVersion = Some(x))) } case class TestParams(