Skip to content

Commit

Permalink
More mima excludes, added lots of warnings to not use impurity
Browse files Browse the repository at this point in the history
  • Loading branch information
vlad17 committed Nov 1, 2016
1 parent d3b948b commit 5f54f4d
Show file tree
Hide file tree
Showing 4 changed files with 35 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -59,9 +59,8 @@ import org.apache.spark.sql.types.DoubleType
* - In the case of squared error loss, variance impurity and mean leaf estimates happen
* to make the SGB and TreeBoost algorithms identical.
*
* [[GBTClassifier]] will use the usual `"loss-based"` impurity by default, conforming to
* [[GBTClassifier]] will use the `"loss-based"` impurity by default, conforming to
* TreeBoost behavior. For SGB, set impurity to `"variance"`.
* To use of TreeBoost, set impurity to `"loss-based"`.
*
* Currently, however, even TreeBoost behavior uses variance impurity for split selection for
* ease and speed. This is the approach `R`'s
Expand All @@ -70,7 +69,7 @@ import org.apache.spark.sql.types.DoubleType
@Since("1.4.0")
class GBTClassifier @Since("1.4.0") (
@Since("1.4.0") override val uid: String)
extends Classifier[Vector, GBTClassifier, GBTClassificationModel]
extends Predictor[Vector, GBTClassifier, GBTClassificationModel]
with GBTClassifierParams with DefaultParamsWritable with Logging {

@Since("1.4.0")
Expand Down Expand Up @@ -102,6 +101,18 @@ class GBTClassifier @Since("1.4.0") (
@Since("1.4.0")
override def setCheckpointInterval(value: Int): this.type = super.setCheckpointInterval(value)

/**
* Impurity-setting is currently only offered as a way to recover pre-2.0.2 Spark GBT
* behavior (which is Stochastic Gradient Boosting): set impurity to `"variance"` for this.
* @param value new impurity value
* @return this
*/
@Since("1.4.0")
@deprecated(
"Control over impurity will be removed, as it is an implementation detail of GBTs",
"2.0.2")
override def setImpurity(value: String): this.type = super.setImpurity(value)

// Parameters from TreeEnsembleParams:

@Since("1.4.0")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -98,10 +98,16 @@ class GBTRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: String)

/**
* Note that the loss-based impurity is currently NOT compatible with absolute loss.
*
* Impurity-setting is currently only offered as a way to recover pre-2.0.2 Spark GBT
* behavior (which is Stochastic Gradient Boosting): set impurity to `"variance"` for this.
* @param value new impurity value
* @return this
*/
@Since("1.4.0")
@deprecated(
"Control over impurity will be removed, as it is an implementation detail of GBTs",
"2.0.2")
override def setImpurity(value: String): this.type = super.setImpurity(value)

// Parameters from TreeEnsembleParams:
Expand Down
10 changes: 8 additions & 2 deletions mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala
Original file line number Diff line number Diff line change
Expand Up @@ -516,8 +516,11 @@ private[ml] trait GBTClassifierParams extends GBTParams with TreeClassifierParam
* Also used for terminal leaf value prediction.
* Supported: "loss-based" (default) and "variance"
*
* @group param
* @group expertParam
*/
@deprecated(
"Control over impurity will be removed, as it is an implementation detail of GBTs",
"2.0.2")
override val impurity: Param[String] = new Param[String](this, "impurity", "Criterion used for" +
" information gain calculation (case-insensitive). Supported options:" +
s" ${GBTClassifierParams.supportedImpurities.mkString(", ")}",
Expand Down Expand Up @@ -590,8 +593,11 @@ private[ml] trait GBTRegressorParams extends GBTParams with TreeRegressorParams
* Also used for terminal leaf value prediction.
* Supported: "loss-based" and "variance" (default)
*
* @group param
* @group expertParam
*/
@deprecated(
"Control over impurity will be removed, as it is an implementation detail of GBTs",
"2.0.2")
override val impurity: Param[String] = new Param[String](this, "impurity", "Criterion used for" +
" information gain calculation (case-insensitive). Supported options:" +
s" ${GBTRegressorParams.supportedImpurities.mkString(", ")}",
Expand Down
8 changes: 7 additions & 1 deletion project/MimaExcludes.scala
Original file line number Diff line number Diff line change
Expand Up @@ -869,7 +869,13 @@ object MimaExcludes {
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.GBTClassifier.getOldBoostingStrategy"),
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.GBTClassificationModel.getOldBoostingStrategy"),
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.GBTRegressor.getOldBoostingStrategy"),
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.GBTRegressionModel.getOldBoostingStrategy")
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.GBTRegressionModel.getOldBoostingStrategy"),
ProblemFilters.exclude[InheritedNewAbstractMethodProblem]("org.apache.spark.ml.tree.TreeRegressorParamsWithDefault.org$apache$spark$ml$tree$TreeRegressorParamsWithDefault$_setter_$impurity_="),
ProblemFilters.exclude[InheritedNewAbstractMethodProblem]("org.apache.spark.ml.tree.TreeClassifierParamsWithDefault.org$apache$spark$ml$tree$TreeClassifierParamsWithDefault$_setter_$impurity_="),
ProblemFilters.exclude[InheritedNewAbstractMethodProblem]("org.apache.spark.ml.tree.TreeClassifierParamsWithDefault.org$apache$spark$ml$tree$TreeClassifierParamsWithDefault$_setter_$impurity_="),
ProblemFilters.exclude[InheritedNewAbstractMethodProblem]("org.apache.spark.ml.tree.TreeRegressorParamsWithDefault.org$apache$spark$ml$tree$TreeRegressorParamsWithDefault$_setter_$impurity_="),
ProblemFilters.exclude[InheritedNewAbstractMethodProblem]("org.apache.spark.ml.tree.TreeClassifierParamsWithDefault.org$apache$spark$ml$tree$TreeClassifierParamsWithDefault$_setter_$impurity_="),
ProblemFilters.exclude[InheritedNewAbstractMethodProblem]("org.apache.spark.ml.tree.TreeRegressorParamsWithDefault.org$apache$spark$ml$tree$TreeRegressorParamsWithDefault$_setter_$impurity_=")
)
}

Expand Down

0 comments on commit 5f54f4d

Please sign in to comment.