Skip to content

Commit

Permalink
Merge pull request #776 from zwt233/Auto-ML
Browse files Browse the repository at this point in the history
Add Early Stopping Function and a new acquisition function(GP-UCB)
  • Loading branch information
bluesjjw authored May 28, 2019
2 parents d29e2a7 + 437a6d7 commit 8e2fac5
Show file tree
Hide file tree
Showing 4 changed files with 162 additions and 6 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
/*
* Tencent is pleased to support the open source community by making Angel available.
*
* Copyright (C) 2017-2018 THL A29 Limited, a Tencent company. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in
* compliance with the License. You may obtain a copy of the License at
*
* https://opensource.org/licenses/Apache-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing permissions and limitations under
* the License.
*
*/


package com.tencent.angel.spark.automl.tuner.acquisition

import com.tencent.angel.spark.automl.tuner.surrogate.Surrogate
import org.apache.commons.logging.{Log, LogFactory}
import org.apache.spark.ml.linalg.{Vector, Vectors}

/**
* Expected improvement.
*
* @param surrogate
* @param beta : Controls the upper confidence bound
* Assume :
* - t: number of iteration
* - d: dimension of optimization space
* - v: hyperparameter v = 1
* - delta: small constant 0.1 (prob of regret)
* Suggest value:beta = sqrt( v* (2* log( (t**(d/2. + 2))*(pi**2)/(3. * delta) )))
*/
class UCB(
override val surrogate: Surrogate,
val beta: Double = 100)
extends Acquisition(surrogate) {

val LOG: Log = LogFactory.getLog(classOf[Surrogate])

override def compute(X: Vector, derivative: Boolean = false): (Double, Vector) = {
val pred = surrogate.predict(X) // (mean, variance)

val m: Double = pred._1
val s: Double = Math.sqrt(pred._2)

if (s == 0) {
// if std is zero, we have observed x on all instances
// using a RF, std should be never exactly 0.0
(0.0, Vectors.dense(new Array[Double](X.size)))
} else {
val ucb = m + beta*s

(ucb, Vectors.dense(new Array[Double](X.size)))
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
/*
* Tencent is pleased to support the open source community by making Angel available.
*
* Copyright (C) 2017-2018 THL A29 Limited, a Tencent company. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in
* compliance with the License. You may obtain a copy of the License at
*
* https://opensource.org/licenses/Apache-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing permissions and limitations under
* the License.
*
*/


package com.tencent.angel.spark.automl.tuner.config

/**
* A single configuration
*
* @param patience : How long to wait after last time validation loss improved.
* Default: 5
* @param minimize : Whether to minimize or maximize the val_score
* Default: false
*/
class EarlyStopping(patience:Int=5,
var minDelta:Double = 0.0,
minimize:Boolean=false) {

var counter: Int = 0
var bestScore: Double = Double.NegativeInfinity
var earlyStop: Boolean = false
val pat = patience

def greater(a: Double, b: Double): Boolean = {
if (a > b) {
return true
}
else {
return false
}
}

var monitorOp = greater _

def less(a: Double, b: Double): Boolean = {
if (a > b) {
return false
}
else {
return true
}
}

if (minimize) {
monitorOp = less _
minDelta = -minDelta
bestScore = Double.PositiveInfinity
}


def update(val_score: Double): Unit = {
val score = val_score
if (monitorOp(score - minDelta, bestScore)) {
bestScore = score
counter = 0
}
else {
counter += 1
println(s"EarlyStopping counter: ${counter} out of ${patience}")
if (counter >= patience) {
earlyStop = true
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ import com.tencent.angel.ml.core.optimizer.loss.{L2Loss, LogLoss}
import com.tencent.angel.ml.feature.LabeledData
import com.tencent.angel.ml.math2.matrix.{BlasDoubleMatrix, BlasFloatMatrix}
import com.tencent.angel.spark.context.PSContext
import com.tencent.angel.spark.automl.tuner.config.{Configuration, ConfigurationSpace}
import com.tencent.angel.spark.automl.tuner.config.{Configuration, ConfigurationSpace,EarlyStopping}
import com.tencent.angel.spark.automl.tuner.parameter.{ParamConfig, ParamParser, ParamSpace}
import com.tencent.angel.spark.automl.tuner.solver.Solver
import com.tencent.angel.spark.automl.utils.AutoMLException
Expand All @@ -37,7 +37,8 @@ import scala.collection.mutable
import scala.reflect.ClassTag
import scala.util.Random

class AutoOfflineLearner(var tuneIter: Int = 20, var minimize: Boolean = true, var surrogate: String = "GaussianProcess") {
class AutoOfflineLearner(var tuneIter: Int = 20, var minimize: Boolean = true, var surrogate: String = "GaussianProcess",
var earlyStopping: EarlyStopping = new EarlyStopping(patience = 0)) {

// Shared configuration with Angel-PS
val conf = SharedConf.get()
Expand Down Expand Up @@ -221,7 +222,22 @@ class AutoOfflineLearner(var tuneIter: Int = 20, var minimize: Boolean = true, v
resetParam(paramMap)
model.resetParam(paramMap).graph.init(0)
val result = train(data, model)
solver.feed(config, result._1)
if(earlyStopping.pat > 0){
earlyStopping.update(result._1)
if (earlyStopping.earlyStop) {
println("Early stopping")
val result: (Vector, Double) = solver.optimal
solver.stop
println(s"Best configuration ${result._1.toArray.mkString(",")}, best performance: ${result._2}")
return
}
else {
solver.feed(config, result._1)
}
}
else {
solver.feed(config, result._1)
}
}
}
val result: (Vector, Double) = solver.optimal
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import com.tencent.angel.RunningMode
import com.tencent.angel.conf.AngelConf
import com.tencent.angel.ml.core.conf.{MLConf, SharedConf}
import com.tencent.angel.ml.matrix.RowType
import com.tencent.angel.spark.automl.tuner.config.EarlyStopping
import com.tencent.angel.spark.ml.classification.LogisticRegression
import com.tencent.angel.spark.ml.core.AutoOfflineLearner

Expand All @@ -15,7 +16,7 @@ class AutoGPLRTest extends PSFunSuite with SharedPSContext {
override def beforeAll(): Unit = {
super.beforeAll()

input = "../data/census/census_148d_train.libsvm"
input = "../../data/census/census_148d_train.libsvm"

// build SharedConf with params
SharedConf.get()
Expand All @@ -38,8 +39,8 @@ class AutoGPLRTest extends PSFunSuite with SharedPSContext {
SharedConf.get().set(MLConf.ML_AUTO_TUNER_MODEL, "GaussianProcess")
SharedConf.get().set(MLConf.ML_AUTO_TUNER_PARAMS,
"ml.learn.rate|C|double|0.1:1:100#ml.learn.decay|D|float|0,0.01,0.1")

learner = new AutoOfflineLearner().init()
val Earlystop = new EarlyStopping(patience = 5, minimize = false, minDelta = 0.01)
learner = new AutoOfflineLearner(earlyStopping = Earlystop).init()
}

override def afterAll(): Unit = {
Expand Down

0 comments on commit 8e2fac5

Please sign in to comment.