From 437a6d7b470c441cb4763f8c2fd515a0cd3217d7 Mon Sep 17 00:00:00 2001 From: wtaozhang <1299799152@qq.com> Date: Mon, 27 May 2019 23:41:56 +0800 Subject: [PATCH] Solve conflicts --- .../spark/automl/tuner/acquisition/UCB.scala | 60 ++++++++++++++ .../automl/tuner/config/EarlyStopping.scala | 79 +++++++++++++++++++ .../spark/ml/core/AutoOfflineLearner.scala | 22 +++++- .../tencent/angel/spark/ml/AutoGPLRTest.scala | 7 +- 4 files changed, 162 insertions(+), 6 deletions(-) create mode 100644 spark-on-angel/automl/src/main/scala/com/tencent/angel/spark/automl/tuner/acquisition/UCB.scala create mode 100644 spark-on-angel/automl/src/main/scala/com/tencent/angel/spark/automl/tuner/config/EarlyStopping.scala diff --git a/spark-on-angel/automl/src/main/scala/com/tencent/angel/spark/automl/tuner/acquisition/UCB.scala b/spark-on-angel/automl/src/main/scala/com/tencent/angel/spark/automl/tuner/acquisition/UCB.scala new file mode 100644 index 000000000..d7a91cb30 --- /dev/null +++ b/spark-on-angel/automl/src/main/scala/com/tencent/angel/spark/automl/tuner/acquisition/UCB.scala @@ -0,0 +1,60 @@ +/* + * Tencent is pleased to support the open source community by making Angel available. + * + * Copyright (C) 2017-2018 THL A29 Limited, a Tencent company. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in + * compliance with the License. You may obtain a copy of the License at + * + * https://opensource.org/licenses/Apache-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the License + * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing permissions and limitations under + * the License. + * + */ + + +package com.tencent.angel.spark.automl.tuner.acquisition + +import com.tencent.angel.spark.automl.tuner.surrogate.Surrogate +import org.apache.commons.logging.{Log, LogFactory} +import org.apache.spark.ml.linalg.{Vector, Vectors} + +/** + * Expected improvement. + * + * @param surrogate + * @param beta : Controls the upper confidence bound + * Assume : + * - t: number of iteration + * - d: dimension of optimization space + * - v: hyperparameter v = 1 + * - delta: small constant 0.1 (prob of regret) + * Suggest value:beta = sqrt( v* (2* log( (t**(d/2. + 2))*(pi**2)/(3. * delta) ))) + */ +class UCB( + override val surrogate: Surrogate, + val beta: Double = 100) + extends Acquisition(surrogate) { + + val LOG: Log = LogFactory.getLog(classOf[Surrogate]) + + override def compute(X: Vector, derivative: Boolean = false): (Double, Vector) = { + val pred = surrogate.predict(X) // (mean, variance) + + val m: Double = pred._1 + val s: Double = Math.sqrt(pred._2) + + if (s == 0) { + // if std is zero, we have observed x on all instances + // using a RF, std should be never exactly 0.0 + (0.0, Vectors.dense(new Array[Double](X.size))) + } else { + val ucb = m + beta*s + + (ucb, Vectors.dense(new Array[Double](X.size))) + } + } +} \ No newline at end of file diff --git a/spark-on-angel/automl/src/main/scala/com/tencent/angel/spark/automl/tuner/config/EarlyStopping.scala b/spark-on-angel/automl/src/main/scala/com/tencent/angel/spark/automl/tuner/config/EarlyStopping.scala new file mode 100644 index 000000000..b5cd73d28 --- /dev/null +++ b/spark-on-angel/automl/src/main/scala/com/tencent/angel/spark/automl/tuner/config/EarlyStopping.scala @@ -0,0 +1,79 @@ +/* + * Tencent is pleased to support the open source community by making Angel available. + * + * Copyright (C) 2017-2018 THL A29 Limited, a Tencent company. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in + * compliance with the License. You may obtain a copy of the License at + * + * https://opensource.org/licenses/Apache-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the License + * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing permissions and limitations under + * the License. + * + */ + + +package com.tencent.angel.spark.automl.tuner.config + +/** + * A single configuration + * + * @param patience : How long to wait after last time validation loss improved. + * Default: 5 + * @param minimize : Whether to minimize or maximize the val_score + * Default: false + */ +class EarlyStopping(patience:Int=5, + var minDelta:Double = 0.0, + minimize:Boolean=false) { + + var counter: Int = 0 + var bestScore: Double = Double.NegativeInfinity + var earlyStop: Boolean = false + val pat = patience + + def greater(a: Double, b: Double): Boolean = { + if (a > b) { + return true + } + else { + return false + } + } + + var monitorOp = greater _ + + def less(a: Double, b: Double): Boolean = { + if (a > b) { + return false + } + else { + return true + } + } + + if (minimize) { + monitorOp = less _ + minDelta = -minDelta + bestScore = Double.PositiveInfinity + } + + + def update(val_score: Double): Unit = { + val score = val_score + if (monitorOp(score - minDelta, bestScore)) { + bestScore = score + counter = 0 + } + else { + counter += 1 + println(s"EarlyStopping counter: ${counter} out of ${patience}") + if (counter >= patience) { + earlyStop = true + } + } + } +} \ No newline at end of file diff --git a/spark-on-angel/mllib/src/main/scala/com/tencent/angel/spark/ml/core/AutoOfflineLearner.scala b/spark-on-angel/mllib/src/main/scala/com/tencent/angel/spark/ml/core/AutoOfflineLearner.scala index 59675aadd..702411dd1 100644 --- a/spark-on-angel/mllib/src/main/scala/com/tencent/angel/spark/ml/core/AutoOfflineLearner.scala +++ b/spark-on-angel/mllib/src/main/scala/com/tencent/angel/spark/ml/core/AutoOfflineLearner.scala @@ -23,7 +23,7 @@ import com.tencent.angel.ml.core.optimizer.loss.{L2Loss, LogLoss} import com.tencent.angel.ml.feature.LabeledData import com.tencent.angel.ml.math2.matrix.{BlasDoubleMatrix, BlasFloatMatrix} import com.tencent.angel.spark.context.PSContext -import com.tencent.angel.spark.automl.tuner.config.{Configuration, ConfigurationSpace} +import com.tencent.angel.spark.automl.tuner.config.{Configuration, ConfigurationSpace,EarlyStopping} import com.tencent.angel.spark.automl.tuner.parameter.{ParamConfig, ParamParser, ParamSpace} import com.tencent.angel.spark.automl.tuner.solver.Solver import com.tencent.angel.spark.automl.utils.AutoMLException @@ -37,7 +37,8 @@ import scala.collection.mutable import scala.reflect.ClassTag import scala.util.Random -class AutoOfflineLearner(var tuneIter: Int = 20, var minimize: Boolean = true, var surrogate: String = "GaussianProcess") { +class AutoOfflineLearner(var tuneIter: Int = 20, var minimize: Boolean = true, var surrogate: String = "GaussianProcess", + var earlyStopping: EarlyStopping = new EarlyStopping(patience = 0)) { // Shared configuration with Angel-PS val conf = SharedConf.get() @@ -221,7 +222,22 @@ class AutoOfflineLearner(var tuneIter: Int = 20, var minimize: Boolean = true, v resetParam(paramMap) model.resetParam(paramMap).graph.init(0) val result = train(data, model) - solver.feed(config, result._1) + if(earlyStopping.pat > 0){ + earlyStopping.update(result._1) + if (earlyStopping.earlyStop) { + println("Early stopping") + val result: (Vector, Double) = solver.optimal + solver.stop + println(s"Best configuration ${result._1.toArray.mkString(",")}, best performance: ${result._2}") + return + } + else { + solver.feed(config, result._1) + } + } + else { + solver.feed(config, result._1) + } } } val result: (Vector, Double) = solver.optimal diff --git a/spark-on-angel/mllib/src/test/scala/com/tencent/angel/spark/ml/AutoGPLRTest.scala b/spark-on-angel/mllib/src/test/scala/com/tencent/angel/spark/ml/AutoGPLRTest.scala index 0bc5cedd2..33790fa6b 100644 --- a/spark-on-angel/mllib/src/test/scala/com/tencent/angel/spark/ml/AutoGPLRTest.scala +++ b/spark-on-angel/mllib/src/test/scala/com/tencent/angel/spark/ml/AutoGPLRTest.scala @@ -4,6 +4,7 @@ import com.tencent.angel.RunningMode import com.tencent.angel.conf.AngelConf import com.tencent.angel.ml.core.conf.{MLConf, SharedConf} import com.tencent.angel.ml.matrix.RowType +import com.tencent.angel.spark.automl.tuner.config.EarlyStopping import com.tencent.angel.spark.ml.classification.LogisticRegression import com.tencent.angel.spark.ml.core.AutoOfflineLearner @@ -15,7 +16,7 @@ class AutoGPLRTest extends PSFunSuite with SharedPSContext { override def beforeAll(): Unit = { super.beforeAll() - input = "../data/census/census_148d_train.libsvm" + input = "../../data/census/census_148d_train.libsvm" // build SharedConf with params SharedConf.get() @@ -38,8 +39,8 @@ class AutoGPLRTest extends PSFunSuite with SharedPSContext { SharedConf.get().set(MLConf.ML_AUTO_TUNER_MODEL, "GaussianProcess") SharedConf.get().set(MLConf.ML_AUTO_TUNER_PARAMS, "ml.learn.rate|C|double|0.1:1:100#ml.learn.decay|D|float|0,0.01,0.1") - - learner = new AutoOfflineLearner().init() + val Earlystop = new EarlyStopping(patience = 5, minimize = false, minDelta = 0.01) + learner = new AutoOfflineLearner(earlyStopping = Earlystop).init() } override def afterAll(): Unit = {