From d0885526e16c3d5b798d92bf98873cfd67f15301 Mon Sep 17 00:00:00 2001
From: Xiangrui Meng <meng@databricks.com>
Date: Sun, 30 Mar 2014 18:39:09 -0700
Subject: [PATCH] use static constructor for MLContext

---
 .../org/apache/spark/mllib/MLContext.scala    | 23 +++++++++++++------
 .../apache/spark/mllib/MLContextSuite.scala   |  7 +++---
 2 files changed, 20 insertions(+), 10 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/MLContext.scala b/mllib/src/main/scala/org/apache/spark/mllib/MLContext.scala
index fb4d458cd8a09..3cd09b0d48113 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/MLContext.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/MLContext.scala
@@ -23,7 +23,12 @@ import org.apache.spark.mllib.linalg.Vectors
 import org.apache.spark.mllib.regression.LabeledPoint
 import org.apache.spark.rdd.RDD
 
-class MLContext(self: SparkContext) {
+/**
+ * Provides methods related to machine learning on top of [[org.apache.spark.SparkContext]].
+ *
+ * @param sparkContext a [[org.apache.spark.SparkContext]] instance
+ */
+class MLContext(val sparkContext: SparkContext) {
   /**
    * Reads labeled data in the LIBSVM format into an RDD[LabeledPoint].
    * The LIBSVM format is a text-based format used by LIBSVM and LIBLINEAR.
@@ -34,16 +39,16 @@ class MLContext(self: SparkContext) {
    * where the feature indices are converted to zero-based.
    *
    * @param path file or directory path in any Hadoop-supported file system URI
-   * @param numFeatures number of features, it will be determined from input
-   *                    if a non-positive value is given
-   *@param labelParser parser for labels, default: _.toDouble
+   * @param numFeatures number of features, which will be determined from the input data if a
+   *                    non-positive value is given. The default value is 0.
+   * @param labelParser parser for labels, default: _.toDouble
    * @return labeled data stored as an RDD[LabeledPoint]
    */
   def libSVMFile(
       path: String,
-      numFeatures: Int,
+      numFeatures: Int = 0,
       labelParser: String => Double = _.toDouble): RDD[LabeledPoint] = {
-    val parsed = self.textFile(path).map(_.trim).filter(!_.isEmpty).map(_.split(' '))
+    val parsed = sparkContext.textFile(path).map(_.trim).filter(!_.isEmpty).map(_.split(' '))
     // Determine number of features.
     val d = if (numFeatures > 0) {
       numFeatures
@@ -70,5 +75,9 @@ class MLContext(self: SparkContext) {
 }
 
 object MLContext {
-  implicit def sparkContextToMLContext(sc: SparkContext): MLContext = new MLContext(sc)
+  /**
+   * Creates an [[org.apache.spark.mllib.MLContext]] instance from
+   * an [[org.apache.spark.SparkContext]] instance.
+   */
+  def apply(sc: SparkContext): MLContext = new MLContext(sc)
 }
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/MLContextSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/MLContextSuite.scala
index 743102b54fa9e..05be434590c48 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/MLContextSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/MLContextSuite.scala
@@ -24,7 +24,6 @@ import org.scalatest.FunSuite
 import com.google.common.base.Charsets
 import com.google.common.io.Files
 
-import org.apache.spark.mllib.MLContext._
 import org.apache.spark.mllib.linalg.Vectors
 import org.apache.spark.mllib.util.LocalSparkContext
 
@@ -40,8 +39,10 @@ class MLContextSuite extends FunSuite with LocalSparkContext {
     val file = new File(tempDir.getPath, "part-00000")
     Files.write(lines, file, Charsets.US_ASCII)
 
-    val pointsWithNumFeatures = sc.libSVMFile(tempDir.toURI.toString, 6).collect()
-    val pointsWithoutNumFeatures = sc.libSVMFile(tempDir.toURI.toString, 0).collect()
+    val mlc = MLContext(sc)
+
+    val pointsWithNumFeatures = mlc.libSVMFile(tempDir.toURI.toString, 6).collect()
+    val pointsWithoutNumFeatures = mlc.libSVMFile(tempDir.toURI.toString, 0).collect()
 
     for (points <- Seq(pointsWithNumFeatures, pointsWithoutNumFeatures)) {
       assert(points.length === 3)