diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala
index 37eaaa28a9619..2026d5ba5270c 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala
@@ -41,7 +41,7 @@ class NaiveBayesModel(
private val brzTheta = new BDM[Double](theta.length, theta(0).length)
{
- // Need to put an extra pair of braces to prevent Scala treat `i` as a member.
+ // Need to put an extra pair of braces to prevent Scala treating `i` as a member.
var i = 0
while (i < theta.length) {
var j = 0
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala
index dee9594a9dd79..04e7e4241910e 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala
@@ -33,13 +33,15 @@ import org.apache.spark.util.random.XORShiftRandom
import org.apache.spark.mllib.linalg.{Vector, Vectors}
/**
+ * EXPERIMENTAL
+ *
* A class that implements a decision tree algorithm for classification and regression. It
* supports both continuous and categorical features.
* @param strategy The configuration parameters for the tree algorithm which specify the type
* of algorithm (classification, regression, etc.), feature type (continuous,
* categorical), depth of the tree, quantile calculation strategy, etc.
*/
-class DecisionTree private(val strategy: Strategy) extends Serializable with Logging {
+class DecisionTree (private val strategy: Strategy) extends Serializable with Logging {
/**
* Method to train a decision tree model over an RDD
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala
index df565f3eb8859..0cbe7d73cddad 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala
@@ -40,4 +40,4 @@ class Strategy (
val maxDepth: Int,
val maxBins: Int = 100,
val quantileCalculationStrategy: QuantileStrategy = Sort,
- val categoricalFeaturesInfo: Map[Int,Int] = Map[Int,Int]()) extends Serializable
+ val categoricalFeaturesInfo: Map[Int, Int] = Map[Int, Int]()) extends Serializable
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Entropy.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Entropy.scala
index b93995fcf9441..beec48bb3a108 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Entropy.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Entropy.scala
@@ -18,20 +18,24 @@
package org.apache.spark.mllib.tree.impurity
/**
+ * EXPERIMENTAL
+ *
* Class for calculating [[http://en.wikipedia.org/wiki/Binary_entropy_function entropy]] during
* binary classification.
*/
object Entropy extends Impurity {
- def log2(x: Double) = scala.math.log(x) / scala.math.log(2)
+ private[tree] def log2(x: Double) = scala.math.log(x) / scala.math.log(2)
/**
+ * DEVELOPER API
+ *
* entropy calculation
* @param c0 count of instances with label 0
* @param c1 count of instances with label 1
* @return entropy value
*/
- def calculate(c0: Double, c1: Double): Double = {
+ override def calculate(c0: Double, c1: Double): Double = {
if (c0 == 0 || c1 == 0) {
0
} else {
@@ -42,6 +46,6 @@ object Entropy extends Impurity {
}
}
- def calculate(count: Double, sum: Double, sumSquares: Double): Double =
+ override def calculate(count: Double, sum: Double, sumSquares: Double): Double =
throw new UnsupportedOperationException("Entropy.calculate")
}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Gini.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Gini.scala
index c0407554a91b3..5babe7d10d111 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Gini.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Gini.scala
@@ -18,6 +18,8 @@
package org.apache.spark.mllib.tree.impurity
/**
+ * EXPERIMENTAL
+ *
* Class for calculating the
* [[http://en.wikipedia.org/wiki/Decision_tree_learning#Gini_impurity Gini impurity]]
* during binary classification.
@@ -25,6 +27,8 @@ package org.apache.spark.mllib.tree.impurity
object Gini extends Impurity {
/**
+ * DEVELOPER API
+ *
* Gini coefficient calculation
* @param c0 count of instances with label 0
* @param c1 count of instances with label 1
@@ -41,6 +45,6 @@ object Gini extends Impurity {
}
}
- def calculate(count: Double, sum: Double, sumSquares: Double): Double =
+ override def calculate(count: Double, sum: Double, sumSquares: Double): Double =
throw new UnsupportedOperationException("Gini.calculate")
}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala
index 43f296ac56bc8..e6fa115030e7a 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala
@@ -18,12 +18,14 @@
package org.apache.spark.mllib.tree.impurity
/**
+ * EXPERIMENTAL
+ *
* Trait for calculating information gain.
*/
trait Impurity extends Serializable {
/**
- * DEVELOPER API - UNSTABLE
+ * DEVELOPER API
*
* information calculation for binary classification
* @param c0 count of instances with label 0
@@ -33,7 +35,7 @@ trait Impurity extends Serializable {
def calculate(c0 : Double, c1 : Double): Double
/**
- * DEVELOPER API - UNSTABLE
+ * DEVELOPER API
*
* information calculation for regression
* @param count number of instances
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Variance.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Variance.scala
index b74577dcec167..7be3b9236ecd9 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Variance.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Variance.scala
@@ -18,6 +18,8 @@
package org.apache.spark.mllib.tree.impurity
/**
+ * EXPERIMENTAL
+ *
* Class for calculating variance during regression
*/
object Variance extends Impurity {
@@ -25,6 +27,8 @@ object Variance extends Impurity {
throw new UnsupportedOperationException("Variance.calculate")
/**
+ * DEVELOPER API
+ *
* variance calculation
* @param count number of instances
* @param sum sum of labels
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Bin.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Bin.scala
index a57faa13745f7..2d71e1e366069 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Bin.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Bin.scala
@@ -30,4 +30,5 @@ import org.apache.spark.mllib.tree.configuration.FeatureType._
* @param featureType type of feature -- categorical or continuous
* @param category categorical label value accepted in the bin
*/
+private[tree]
case class Bin(lowSplit: Split, highSplit: Split, featureType: FeatureType, category: Double)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala
index a6dca84a2ce09..e336ea74e3b76 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala
@@ -22,6 +22,8 @@ import org.apache.spark.rdd.RDD
import org.apache.spark.mllib.linalg.Vector
/**
+ * EXPERIMENTAL
+ *
* Model to store the decision tree parameters
* @param topNode root node
* @param algo algorithm type -- classification or regression
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Filter.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Filter.scala
index ebc9595eafef3..2deaf4ae8dcab 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Filter.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Filter.scala
@@ -22,7 +22,7 @@ package org.apache.spark.mllib.tree.model
* @param split split specifying the feature index, type and threshold
* @param comparison integer specifying <,=,>
*/
-case class Filter(split: Split, comparison: Int) {
+private[tree] case class Filter(split: Split, comparison: Int) {
// Comparison -1,0,1 signifies <.=,>
override def toString = " split = " + split + "comparison = " + comparison
}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/InformationGainStats.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/InformationGainStats.scala
index 99bf79cf12e45..aa1a478ea41b5 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/InformationGainStats.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/InformationGainStats.scala
@@ -18,6 +18,8 @@
package org.apache.spark.mllib.tree.model
/**
+ * DEVELOPER API
+ *
* Information gain statistics for each split
* @param gain information gain value
* @param impurity current node impurity
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Node.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Node.scala
index aac3f9ce308f7..361361f937c76 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Node.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Node.scala
@@ -22,6 +22,8 @@ import org.apache.spark.mllib.tree.configuration.FeatureType._
import org.apache.spark.mllib.linalg.Vector
/**
+ * DEVELOPER API
+ *
* Node in a decision tree
* @param id integer node id
* @param predict predicted value at the node
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Split.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Split.scala
index 4e64a81dda74e..1ceb64ca44290 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Split.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Split.scala
@@ -30,7 +30,7 @@ case class Split(
feature: Int,
threshold: Double,
featureType: FeatureType,
- categories: List[Double]){
+ categories: List[Double]) {
override def toString =
"Feature = " + feature + ", threshold = " + threshold + ", featureType = " + featureType +
@@ -42,7 +42,7 @@ case class Split(
* @param feature feature index
* @param featureType type of feature -- categorical or continuous
*/
-class DummyLowSplit(feature: Int, featureType: FeatureType)
+private[tree] class DummyLowSplit(feature: Int, featureType: FeatureType)
extends Split(feature, Double.MinValue, featureType, List())
/**
@@ -50,7 +50,7 @@ class DummyLowSplit(feature: Int, featureType: FeatureType)
* @param feature feature index
* @param featureType type of feature -- categorical or continuous
*/
-class DummyHighSplit(feature: Int, featureType: FeatureType)
+private[tree] class DummyHighSplit(feature: Int, featureType: FeatureType)
extends Split(feature, Double.MaxValue, featureType, List())
/**
@@ -59,6 +59,6 @@ class DummyHighSplit(feature: Int, featureType: FeatureType)
* @param feature feature index
* @param featureType type of feature -- categorical or continuous
*/
-class DummyCategoricalSplit(feature: Int, featureType: FeatureType)
+private[tree] class DummyCategoricalSplit(feature: Int, featureType: FeatureType)
extends Split(feature, Double.MaxValue, featureType, List())