Skip to content

Commit

Permalink
Make some variables final and Use IntParam and DoubleParam
Browse files Browse the repository at this point in the history
  • Loading branch information
yu-iskw committed Jul 3, 2015
1 parent 19326f8 commit 2f392e1
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 15 deletions.
29 changes: 15 additions & 14 deletions mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@
package org.apache.spark.ml.clustering

import org.apache.spark.annotation.Experimental
import org.apache.spark.ml.param.{Param, Params, IntParam, DoubleParam, ParamMap}
import org.apache.spark.ml.param.shared.{HasFeaturesCol, HasMaxIter, HasPredictionCol, HasSeed}
import org.apache.spark.ml.param.{Param, ParamMap, Params}
import org.apache.spark.ml.util.{Identifiable, SchemaUtils}
import org.apache.spark.ml.{Estimator, Model}
import org.apache.spark.mllib.clustering.{KMeans => MLlibKMeans, KMeansModel => MLlibKMeansModel}
Expand All @@ -40,7 +40,7 @@ private[clustering] trait KMeansParams
* Set the number of clusters to create (k). Default: 2.
* @group param
*/
val k = new Param[Int](this, "k", "number of clusters to create", (x: Int) => x > 1)
final val k = new IntParam(this, "k", "number of clusters to create", (x: Int) => x > 1)

/** @group getParam */
def getK: Int = $(k)
Expand All @@ -51,8 +51,8 @@ private[clustering] trait KMeansParams
* return the best clustering found over any run. Default: 1.
* @group param
*/
val runs = new Param[Int](this, "runs", "number of runs of the algorithm to execute in parallel",
(value: Int) => value >= 1)
final val runs = new IntParam(this, "runs",
"number of runs of the algorithm to execute in parallel", (value: Int) => value >= 1)

/** @group getParam */
def getRuns: Int = $(runs)
Expand All @@ -62,7 +62,7 @@ private[clustering] trait KMeansParams
* If all centers move less than this Euclidean distance, we stop iterating one run.
* @group param
*/
val epsilon = new Param[Double](this, "epsilon", "distance threshold")
final val epsilon = new DoubleParam(this, "epsilon", "distance threshold")

/** @group getParam */
def getEpsilon: Double = $(epsilon)
Expand All @@ -73,7 +73,7 @@ private[clustering] trait KMeansParams
* (Bahmani et al., Scalable K-Means++, VLDB 2012). Default: k-means||.
* @group param
*/
val initMode = new Param[String](this, "initMode", "initialization algorithm",
final val initMode = new Param[String](this, "initMode", "initialization algorithm",
(value: String) => MLlibKMeans.validateInitializationMode(value))

/** @group getParam */
Expand All @@ -84,7 +84,7 @@ private[clustering] trait KMeansParams
* setting -- the default of 5 is almost always enough. Default: 5.
* @group param
*/
val initSteps = new Param[Int](this, "initSteps", "number of steps for k-means||",
final val initSteps = new IntParam(this, "initSteps", "number of steps for k-means||",
(value: Int) => value > 0)

/** @group getParam */
Expand Down Expand Up @@ -139,13 +139,14 @@ class KMeansModel private[ml] (
@Experimental
class KMeans(override val uid: String) extends Estimator[KMeansModel] with KMeansParams {

setDefault(k, 2)
setDefault(maxIter, 20)
setDefault(runs, 1)
setDefault(initMode, MLlibKMeans.K_MEANS_PARALLEL)
setDefault(initSteps, 5)
setDefault(epsilon, 1e-4)
setDefault(seed, Utils.random.nextLong())
setDefault(
k -> 2,
maxIter -> 20,
runs -> 1,
initMode -> MLlibKMeans.K_MEANS_PARALLEL,
initSteps -> 5,
epsilon -> 1e-4,
seed -> Utils.random.nextLong())

override def copy(extra: ParamMap): Estimator[KMeansModel] = defaultCopy(extra)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ object KMeansSuite {

class KMeansSuite extends SparkFunSuite with MLlibTestSparkContext {

val k = 5
final val k = 5
@transient var dataset: DataFrame = _

override def beforeAll(): Unit = {
Expand Down

0 comments on commit 2f392e1

Please sign in to comment.