Skip to content

Commit

Permalink
test examples by distrioptimizerv2 (intel-analytics#3007)
Browse files Browse the repository at this point in the history
* enable scala examples by distrioptimizerv2

* update example's readme

* update integration test
  • Loading branch information
Le-Zheng authored Jun 30, 2020
1 parent 5864d2c commit 91945b0
Show file tree
Hide file tree
Showing 25 changed files with 144 additions and 35 deletions.
34 changes: 17 additions & 17 deletions dl/src/test/integration-test.robot
Original file line number Diff line number Diff line change
Expand Up @@ -59,24 +59,24 @@ Remove Input
Run Spark Test
[Arguments] ${submit} ${spark_master}
DownLoad Input
Log To Console begin lenet Train ${submit} --master ${spark_master} --conf "spark.serializer=org.apache.spark.serializer.JavaSerializer" --driver-memory 5g --executor-cores 16 --total-executor-cores 32 --class com.intel.analytics.bigdl.models.lenet.Train ${jar_path} -f ${mnist_data_source} -b 256 -e 3
Run Shell ${submit} --master ${spark_master} --conf "spark.serializer=org.apache.spark.serializer.JavaSerializer" --driver-memory 5g --executor-cores 16 --total-executor-cores 32 --class com.intel.analytics.bigdl.models.lenet.Train ${jar_path} -f ${mnist_data_source} -b 256 -e 3
Log To Console begin lenet Train ${submit} --master ${spark_master} --conf "spark.serializer=org.apache.spark.serializer.JavaSerializer" --driver-memory 5g --executor-cores 16 --total-executor-cores 32 --class com.intel.analytics.bigdl.models.lenet.Train ${jar_path} -f ${mnist_data_source} -b 256 -e 3 --optimizerVersion "optimizerV2"
Run Shell ${submit} --master ${spark_master} --conf "spark.serializer=org.apache.spark.serializer.JavaSerializer" --driver-memory 5g --executor-cores 16 --total-executor-cores 32 --class com.intel.analytics.bigdl.models.lenet.Train ${jar_path} -f ${mnist_data_source} -b 256 -e 3 --optimizerVersion "optimizerV2"
Log To Console begin lenet Train local[4]
Run Shell ${submit} --master local[4] --class com.intel.analytics.bigdl.models.lenet.Train ${jar_path} -f /tmp/mnist -b 120 -e 1
Run Shell ${submit} --master local[4] --class com.intel.analytics.bigdl.models.lenet.Train ${jar_path} -f /tmp/mnist -b 120 -e 1 --optimizerVersion "optimizerV2"
Log To Console begin autoencoder Train
Run Shell ${submit} --master ${spark_master} --executor-cores 4 --total-executor-cores 8 --class com.intel.analytics.bigdl.models.autoencoder.Train ${jar_path} -b 120 -e 1 -f /tmp/mnist
Run Shell ${submit} --master ${spark_master} --executor-cores 4 --total-executor-cores 8 --class com.intel.analytics.bigdl.models.autoencoder.Train ${jar_path} -b 120 -e 1 -f /tmp/mnist --optimizerVersion "optimizerV2"
Log To Console begin PTBWordLM
Run Shell ${submit} --master ${spark_master} --conf "spark.serializer=org.apache.spark.serializer.JavaSerializer" --driver-memory 40g --executor-memory 40g --executor-cores 8 --total-executor-cores 8 --class com.intel.analytics.bigdl.example.languagemodel.PTBWordLM ${jar_path} -f ./simple-examples/data -b 120 --numLayers 2 --vocab 10001 --hidden 650 --numSteps 35 --learningRate 0.005 -e 1 --learningRateDecay 0.001 --keepProb 0.5 --overWrite
Run Shell ${submit} --master ${spark_master} --conf "spark.serializer=org.apache.spark.serializer.JavaSerializer" --driver-memory 40g --executor-memory 40g --executor-cores 8 --total-executor-cores 8 --class com.intel.analytics.bigdl.example.languagemodel.PTBWordLM ${jar_path} -f ./simple-examples/data -b 120 --numLayers 2 --vocab 10001 --hidden 650 --numSteps 35 --learningRate 0.005 -e 1 --learningRateDecay 0.001 --keepProb 0.5 --overWrite --optimizerVersion "optimizerV2"
Log To Console begin resnet Train
Run Shell ${submit} --master ${spark_master} --conf "spark.serializer=org.apache.spark.serializer.JavaSerializer" --driver-memory 5g --executor-memory 5g --executor-cores 8 --total-executor-cores 32 --class com.intel.analytics.bigdl.models.resnet.TrainCIFAR10 ${jar_path} -f /tmp/cifar --batchSize 448 --optnet true --depth 20 --classes 10 --shortcutType A --nEpochs 1 --learningRate 0.1
Run Shell ${submit} --master ${spark_master} --conf "spark.serializer=org.apache.spark.serializer.JavaSerializer" --driver-memory 5g --executor-memory 5g --executor-cores 8 --total-executor-cores 32 --class com.intel.analytics.bigdl.models.resnet.TrainCIFAR10 ${jar_path} -f /tmp/cifar --batchSize 448 --optnet true --depth 20 --classes 10 --shortcutType A --nEpochs 1 --learningRate 0.1 --optimizerVersion "optimizerV2"
Log To Console begin DLClassifierLeNet
Run Shell ${submit} --master ${spark_master} --executor-cores 16 --total-executor-cores 16 --driver-memory 5g --executor-memory 30g --class com.intel.analytics.bigdl.example.MLPipeline.DLClassifierLeNet ${jar_path} -b 1200 -f /tmp/mnist --maxEpoch 1
Run Shell ${submit} --master ${spark_master} --executor-cores 16 --total-executor-cores 16 --driver-memory 5g --executor-memory 30g --class com.intel.analytics.bigdl.example.MLPipeline.DLClassifierLeNet ${jar_path} -b 1200 -f /tmp/mnist --maxEpoch 1
Log To Console begin rnn Train
Run Shell ${submit} --master ${spark_master} --driver-memory 5g --executor-memory 5g --executor-cores 12 --total-executor-cores 12 --class com.intel.analytics.bigdl.models.rnn.Train ${jar_path} -f ./ -s ./models --nEpochs 1 --checkpoint ./model/ -b 12
Run Shell ${submit} --master ${spark_master} --driver-memory 5g --executor-memory 5g --executor-cores 12 --total-executor-cores 12 --class com.intel.analytics.bigdl.models.rnn.Train ${jar_path} -f ./ -s ./models --nEpochs 1 --checkpoint ./model/ -b 12 --optimizerVersion "optimizerV2"
Log To Console begin inceptionV1 train
Run Shell ${submit} --master ${spark_master} --conf "spark.serializer=org.apache.spark.serializer.JavaSerializer" --driver-memory 20g --executor-memory 40g --executor-cores 10 --total-executor-cores 20 --class com.intel.analytics.bigdl.models.inception.TrainInceptionV1 ${jar_path} -b 40 -f ${imagenet_test_data_source} --learningRate 0.1 -i 100
Run Shell ${submit} --master ${spark_master} --conf "spark.serializer=org.apache.spark.serializer.JavaSerializer" --driver-memory 20g --executor-memory 40g --executor-cores 10 --total-executor-cores 20 --class com.intel.analytics.bigdl.models.inception.TrainInceptionV1 ${jar_path} -b 40 -f ${imagenet_test_data_source} --learningRate 0.1 -i 100 --optimizerVersion "optimizerV2"
Log To Console begin text classification
Run Shell ${submit} --master ${spark_master} --driver-memory 5g --executor-memory 5g --total-executor-cores 32 --executor-cores 8 --class com.intel.analytics.bigdl.example.textclassification.TextClassifier ${jar_path} --batchSize 128 --baseDir /tmp/text_data --partitionNum 32
Run Shell ${submit} --master ${spark_master} --driver-memory 5g --executor-memory 5g --total-executor-cores 32 --executor-cores 8 --class com.intel.analytics.bigdl.example.textclassification.TextClassifier ${jar_path} --batchSize 128 --baseDir /tmp/text_data --partitionNum 32
Remove Input

Spark2.2 Test Suite
Expand Down Expand Up @@ -125,18 +125,18 @@ Yarn Test Suite
Log To Console begin text classification
Run Shell ${submit} --master yarn --deploy-mode client --conf "spark.serializer=org.apache.spark.serializer.JavaSerializer" --conf spark.yarn.executor.memoryOverhead=40000 --executor-cores 10 --num-executors 2 --driver-memory 20g --executor-memory 40g --class com.intel.analytics.bigdl.example.textclassification.TextClassifier ${jar_path} --batchSize 240 --baseDir /tmp/text_data --partitionNum 4
Log To Console begin lenet
Run Shell ${submit} --master yarn --deploy-mode client --conf "spark.serializer=org.apache.spark.serializer.JavaSerializer" --executor-cores 10 --num-executors 3 --driver-memory 20g --class com.intel.analytics.bigdl.models.lenet.Train ${jar_path} -f ${mnist_data_source} -b 120 -e 3
Run Shell ${submit} --master yarn --deploy-mode client --conf "spark.serializer=org.apache.spark.serializer.JavaSerializer" --executor-cores 10 --num-executors 3 --driver-memory 20g --class com.intel.analytics.bigdl.models.lenet.Train ${jar_path} -f ${mnist_data_source} -b 120 -e 3 --optimizerVersion "optimizerV2"
Log To Console begin autoencoder Train
Run Shell ${submit} --master yarn --deploy-mode client --conf "spark.serializer=org.apache.spark.serializer.JavaSerializer" --executor-cores 10 --num-executors 3 --driver-memory 20g --class com.intel.analytics.bigdl.models.autoencoder.Train ${jar_path} -b 120 -e 1 -f /tmp/mnist
Run Shell ${submit} --master yarn --deploy-mode client --conf "spark.serializer=org.apache.spark.serializer.JavaSerializer" --executor-cores 10 --num-executors 3 --driver-memory 20g --class com.intel.analytics.bigdl.models.autoencoder.Train ${jar_path} -b 120 -e 1 -f /tmp/mnist --optimizerVersion "optimizerV2"
Log To Console begin resnet Train
Run Shell ${submit} --master yarn --deploy-mode client --conf "spark.serializer=org.apache.spark.serializer.JavaSerializer" --executor-cores 10 --num-executors 3 --driver-memory 20g --class com.intel.analytics.bigdl.models.resnet.TrainCIFAR10 ${jar_path} -f /tmp/cifar --batchSize 120 --optnet true --depth 20 --classes 10 --shortcutType A --nEpochs 1 --learningRate 0.1
Run Shell ${submit} --master yarn --deploy-mode client --conf "spark.serializer=org.apache.spark.serializer.JavaSerializer" --executor-cores 10 --num-executors 3 --driver-memory 20g --class com.intel.analytics.bigdl.models.resnet.TrainCIFAR10 ${jar_path} -f /tmp/cifar --batchSize 120 --optnet true --depth 20 --classes 10 --shortcutType A --nEpochs 1 --learningRate 0.1 --optimizerVersion "optimizerV2"
Log To Console begin rnn Train
Run Shell ${submit} --master yarn --deploy-mode client --conf "spark.serializer=org.apache.spark.serializer.JavaSerializer" --executor-cores 10 --num-executors 3 --driver-memory 20g --class com.intel.analytics.bigdl.models.rnn.Train ${jar_path} -f ./ -s ./models --nEpochs 1 --checkpoint ./model/ -b 120
Run Shell ${submit} --master yarn --deploy-mode client --conf "spark.serializer=org.apache.spark.serializer.JavaSerializer" --executor-cores 10 --num-executors 3 --driver-memory 20g --class com.intel.analytics.bigdl.models.rnn.Train ${jar_path} -f ./ -s ./models --nEpochs 1 --checkpoint ./model/ -b 120 --optimizerVersion "optimizerV2"
Log To Console begin PTBWordLM
Run Shell ${submit} --master yarn --deploy-mode client --conf "spark.serializer=org.apache.spark.serializer.JavaSerializer" --executor-cores 8 --num-executors 1 --driver-memory 20g --executor-memory 40g --class com.intel.analytics.bigdl.example.languagemodel.PTBWordLM ${jar_path} -f ./simple-examples/data -b 120 --numLayers 2 --vocab 10001 --hidden 650 --numSteps 35 --learningRate 0.005 -e 1 --learningRateDecay 0.001 --keepProb 0.5 --overWrite
Run Shell ${submit} --master yarn --deploy-mode client --conf "spark.serializer=org.apache.spark.serializer.JavaSerializer" --executor-cores 8 --num-executors 1 --driver-memory 20g --executor-memory 40g --class com.intel.analytics.bigdl.example.languagemodel.PTBWordLM ${jar_path} -f ./simple-examples/data -b 120 --numLayers 2 --vocab 10001 --hidden 650 --numSteps 35 --learningRate 0.005 -e 1 --learningRateDecay 0.001 --keepProb 0.5 --overWrite --optimizerVersion "optimizerV2"
Log To Console begin inceptionV1 train
Run Shell ${submit} --master yarn --deploy-mode client --conf "spark.serializer=org.apache.spark.serializer.JavaSerializer" --executor-cores 10 --num-executors 2 --driver-memory 20g --executor-memory 40g --class com.intel.analytics.bigdl.models.inception.TrainInceptionV1 ${jar_path} -b 40 -f ${imagenet_test_data_source} --learningRate 0.1 -i 100
Run Shell ${submit} --master yarn --deploy-mode client --executor-memory 2g --driver-memory 2g --executor-cores 10 --num-executors 2 --properties-file ${curdir}/dist/conf/spark-bigdl.conf --jars ${jar_path} --py-files ${curdir}/dist/lib/bigdl-${version}-python-api.zip --conf spark.driver.extraClassPath=${jar_path} --conf spark.executor.extraClassPath=bigdl-${version}-jar-with-dependencies.jar ${curdir}/pyspark/bigdl/models/lenet/lenet5.py -b 200 --action train --endTriggerType epoch --endTriggerNum 1
Run Shell ${submit} --master yarn --deploy-mode client --conf "spark.serializer=org.apache.spark.serializer.JavaSerializer" --executor-cores 10 --num-executors 2 --driver-memory 20g --executor-memory 40g --class com.intel.analytics.bigdl.models.inception.TrainInceptionV1 ${jar_path} -b 40 -f ${imagenet_test_data_source} --learningRate 0.1 -i 100 --optimizerVersion "optimizerV2"
Run Shell ${submit} --master yarn --deploy-mode client --executor-memory 2g --driver-memory 2g --executor-cores 10 --num-executors 2 --properties-file ${curdir}/dist/conf/spark-bigdl.conf --jars ${jar_path} --py-files ${curdir}/dist/lib/bigdl-${version}-python-api.zip --conf spark.driver.extraClassPath=${jar_path} --conf spark.executor.extraClassPath=bigdl-${version}-jar-with-dependencies.jar ${curdir}/pyspark/bigdl/models/lenet/lenet5.py -b 200 --action train --endTriggerType epoch --endTriggerNum 1
Remove Environment Variable http_proxy https_proxy
Remove Input

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ import com.intel.analytics.bigdl.dataset.{DataSet, SampleToMiniBatch}
import com.intel.analytics.bigdl.nn.{CrossEntropyCriterion, Module, TimeDistributedCriterion}
import com.intel.analytics.bigdl.optim._
import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric._
import com.intel.analytics.bigdl.utils.Engine
import com.intel.analytics.bigdl.utils.{Engine, OptimizerV1, OptimizerV2}
import org.apache.log4j.{Level, Logger}
import org.apache.spark.SparkContext
import com.intel.analytics.bigdl.example.languagemodel.Utils._
Expand Down Expand Up @@ -82,6 +82,13 @@ object PTBWordLM {
keepProb = param.keepProb)
}

if (param.optimizerVersion.isDefined) {
param.optimizerVersion.get.toLowerCase match {
case "optimizerv1" => Engine.setOptimizerVersion(OptimizerV1)
case "optimizerv2" => Engine.setOptimizerVersion(OptimizerV2)
}
}

val optimMethod = if (param.stateSnapshot.isDefined) {
OptimMethod.load[Float](param.stateSnapshot.get)
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,4 +47,5 @@ In the above commands:
```--numSteps```: number of words per record in LM
```--overWrite```: do overwrite when saving checkpoint
```--keepProb```: the probability to do dropout
```--withTransformerModel```: use transformer model in this LM
```--withTransformerModel```: use transformer model in this LM
```--optimizerVersion```: option can be used to set DistriOptimizer version, the value can be "optimizerV1" or "optimizerV2"
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,8 @@ object Utils {
numSteps: Int = 20,
overWriteCheckpoint: Boolean = false,
keepProb: Float = 2.0f,
withTransformerModel: Boolean = false)
withTransformerModel: Boolean = false,
optimizerVersion: Option[String] = None)

val trainParser = new OptionParser[TrainParams]("BigDL ptbModel Train Example") {
opt[String]('f', "dataFolder")
Expand Down Expand Up @@ -114,5 +115,9 @@ object Utils {
opt[Boolean]("withTransformerModel")
.text("Use transformer model in this LM")
.action((x, c) => c.copy(withTransformerModel = true))

opt[String]("optimizerVersion")
.text("state optimizer version")
.action((x, c) => c.copy(optimizerVersion = Some(x)))
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -65,5 +65,6 @@ Next just run the following command to run the code:
--regRate # number of L2 regularization rate, default is 1e-4
--p # number of dropout probability rate, default is 0.5
--epoch # number of epochs, default is 5
--optimizerVersion # option to set DistriOptimizer version, default is "optimizerV1"
```
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ import com.intel.analytics.bigdl.nn.{TimeDistributedCriterion, _}
import com.intel.analytics.bigdl.numeric.NumericFloat
import com.intel.analytics.bigdl.optim._
import com.intel.analytics.bigdl.tensor.Tensor
import com.intel.analytics.bigdl.utils.{Engine, LoggerFilter, T}
import com.intel.analytics.bigdl.utils.{Engine, LoggerFilter, OptimizerV1, OptimizerV2, T}
import org.apache.log4j.{Level => Levle4j, Logger => Logger4j}
import org.apache.spark.SparkContext
import org.slf4j.{Logger, LoggerFactory}
Expand Down Expand Up @@ -93,6 +93,13 @@ object Train {
val trainRDD = toSample(trainTreeRDD, trainLabelRDD, trainSentenceRDD)
val devRDD = toSample(devTreeRDD, devLabelRDD, devSentenceRDD)

if (param.optimizerVersion.isDefined) {
param.optimizerVersion.get.toLowerCase match {
case "optimizerv1" => Engine.setOptimizerVersion(OptimizerV1)
case "optimizerv2" => Engine.setOptimizerVersion(OptimizerV2)
}
}

val optimizer = Optimizer(
model = TreeLSTMSentiment(word2VecTensor, param.hiddenSize, classNum, param.p),
sampleRDD = trainRDD,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,9 @@ object Utils {
opt[String]('e', "epoch")
.text("max epoch")
.action((x, c) => c.copy(epoch = x.toInt))
opt[String]("optimizerVersion")
.text("state optimizer version")
.action((x, c) => c.copy(optimizerVersion = Some(x)))
}

case class TreeLSTMSentimentParam (
Expand All @@ -243,6 +246,7 @@ object Utils {
override val learningRate: Double = 0.05,
regRate: Double = 1e-4,
p: Double = 0.5,
epoch: Int = 5
epoch: Int = 5,
optimizerVersion: Option[String] = None
) extends AbstractTextClassificationParams
}
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ import com.intel.analytics.bigdl.optim._
import com.intel.analytics.bigdl.tensor.Tensor
import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric
import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric._
import com.intel.analytics.bigdl.utils.{Engine, T, Table}
import com.intel.analytics.bigdl.utils.{Engine, OptimizerV1, OptimizerV2, T, Table}
import org.apache.log4j.{Level, Logger}
import org.apache.spark.SparkContext

Expand Down Expand Up @@ -73,6 +73,13 @@ object Train {
if (param.graphModel) Autoencoder.graph(classNum = 32) else Autoencoder(classNum = 32)
}

if (param.optimizerVersion.isDefined) {
param.optimizerVersion.get.toLowerCase match {
case "optimizerv1" => Engine.setOptimizerVersion(OptimizerV1)
case "optimizerv2" => Engine.setOptimizerVersion(OptimizerV2)
}
}

val optimMethod = if (param.stateSnapshot.isDefined) {
OptimMethod.load[Float](param.stateSnapshot.get)
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@ object Utils {
stateSnapshot: Option[String] = None,
batchSize: Int = 150,
maxEpoch: Int = 10,
graphModel: Boolean = false
graphModel: Boolean = false,
optimizerVersion: Option[String] = None
)

val trainParser = new OptionParser[TrainParams]("BigDL Autoencoder on MNIST") {
Expand All @@ -59,6 +60,9 @@ object Utils {
opt[Unit]('g', "graphModel")
.text("use graph model")
.action((x, c) => c.copy(graphModel = true))
opt[String]("optimizerVersion")
.text("state optimizer version")
.action((x, c) => c.copy(optimizerVersion = Some(x)))
}

private[bigdl] def load(featureFile: Path, labelFile: Path): Array[ByteRecord] = {
Expand Down
Loading

0 comments on commit 91945b0

Please sign in to comment.