deprecate defaultMinSplits

pdeyhim · Apr 18, 2014 · 4b60541 · 4b60541
1 parent ba2c663
commit 4b60541
Show file tree

Hide file tree

Showing 4 changed files with 29 additions and 16 deletions.
diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -427,7 +427,7 @@ class SparkContext(config: SparkConf) extends Logging {
    * Read a text file from HDFS, a local file system (available on all nodes), or any
    * Hadoop-supported file system URI, and return it as an RDD of Strings.
    */
-  def textFile(path: String, minPartitions: Int = defaultMinSplits): RDD[String] = {
+  def textFile(path: String, minPartitions: Int = defaultMinPartitions): RDD[String] = {
     hadoopFile(path, classOf[TextInputFormat], classOf[LongWritable], classOf[Text],
       minPartitions).map(pair => pair._2.toString)
   }
@@ -459,7 +459,8 @@ class SparkContext(config: SparkConf) extends Logging {
    *
    * @param minPartitions A suggestion value of the minimal splitting number for input data.
    */
-  def wholeTextFiles(path: String, minPartitions: Int = defaultMinSplits): RDD[(String, String)] = {
+  def wholeTextFiles(path: String, minPartitions: Int = defaultMinPartitions):
+  RDD[(String, String)] = {
     val job = new NewHadoopJob(hadoopConfiguration)
     NewFileInputFormat.addInputPath(job, new Path(path))
     val updateConf = job.getConfiguration
@@ -493,7 +494,7 @@ class SparkContext(config: SparkConf) extends Logging {
       inputFormatClass: Class[_ <: InputFormat[K, V]],
       keyClass: Class[K],
       valueClass: Class[V],
-      minPartitions: Int = defaultMinSplits
+      minPartitions: Int = defaultMinPartitions
       ): RDD[(K, V)] = {
     // Add necessary security credentials to the JobConf before broadcasting it.
     SparkHadoopUtil.get.addCredentials(conf)
@@ -512,7 +513,7 @@ class SparkContext(config: SparkConf) extends Logging {
       inputFormatClass: Class[_ <: InputFormat[K, V]],
       keyClass: Class[K],
       valueClass: Class[V],
-      minPartitions: Int = defaultMinSplits
+      minPartitions: Int = defaultMinPartitions
       ): RDD[(K, V)] = {
     // A Hadoop configuration can be about 10 KB, which is pretty big, so broadcast it.
     val confBroadcast = broadcast(new SerializableWritable(hadoopConfiguration))
@@ -565,7 +566,7 @@ class SparkContext(config: SparkConf) extends Logging {
    */
   def hadoopFile[K, V, F <: InputFormat[K, V]](path: String)
       (implicit km: ClassTag[K], vm: ClassTag[V], fm: ClassTag[F]): RDD[(K, V)] =
-    hadoopFile[K, V, F](path, defaultMinSplits)
+    hadoopFile[K, V, F](path, defaultMinPartitions)
 
   /** Get an RDD for a Hadoop file with an arbitrary new API InputFormat. */
   def newAPIHadoopFile[K, V, F <: NewInputFormat[K, V]]
@@ -641,7 +642,7 @@ class SparkContext(config: SparkConf) extends Logging {
     * */
   def sequenceFile[K, V](path: String, keyClass: Class[K], valueClass: Class[V]
       ): RDD[(K, V)] =
-    sequenceFile(path, keyClass, valueClass, defaultMinSplits)
+    sequenceFile(path, keyClass, valueClass, defaultMinPartitions)
 
   /**
    * Version of sequenceFile() for types implicitly convertible to Writables through a
@@ -665,7 +666,7 @@ class SparkContext(config: SparkConf) extends Logging {
    * a `map` function.
    */
    def sequenceFile[K, V]
-       (path: String, minPartitions: Int = defaultMinSplits)
+       (path: String, minPartitions: Int = defaultMinPartitions)
        (implicit km: ClassTag[K], vm: ClassTag[V],
         kcf: () => WritableConverter[K], vcf: () => WritableConverter[V])
       : RDD[(K, V)] = {
@@ -688,7 +689,7 @@ class SparkContext(config: SparkConf) extends Logging {
    */
   def objectFile[T: ClassTag](
       path: String,
-      minPartitions: Int = defaultMinSplits
+      minPartitions: Int = defaultMinPartitions
       ): RDD[T] = {
     sequenceFile(path, classOf[NullWritable], classOf[BytesWritable], minPartitions)
       .flatMap(x => Utils.deserialize[Array[T]](x._2.getBytes))
@@ -1183,8 +1184,12 @@ class SparkContext(config: SparkConf) extends Logging {
   def defaultParallelism: Int = taskScheduler.defaultParallelism
 
   /** Default min number of partitions for Hadoop RDDs when not given by user */
+  @deprecated("use defaultMinPartitions", "1.0.0")
   def defaultMinSplits: Int = math.min(defaultParallelism, 2)
 
+  /** Default min number of partitions for Hadoop RDDs when not given by user */
+  def defaultMinPartitions: Int = math.min(defaultParallelism, 2)
+
   private val nextShuffleId = new AtomicInteger(0)
 
   private[spark] def newShuffleId(): Int = nextShuffleId.getAndIncrement()

diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala b/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala
@@ -109,9 +109,17 @@ class JavaSparkContext(val sc: SparkContext) extends JavaSparkContextVarargsWork
   /** Default level of parallelism to use when not given by user (e.g. parallelize and makeRDD). */
   def defaultParallelism: java.lang.Integer = sc.defaultParallelism
 
-  /** Default min number of partitions for Hadoop RDDs when not given by user */
+  /**
+   * Default min number of partitions for Hadoop RDDs when not given by user.
+   * @deprecated As of Spark 1.0.0, defaultMinSplits is deprecated, use
+   *            {@link #defaultMinPartitions()} instead
+   */
+  @Deprecated
   def defaultMinSplits: java.lang.Integer = sc.defaultMinSplits
 
+  /** Default min number of partitions for Hadoop RDDs when not given by user */
+  def defaultMinPartitions: java.lang.Integer = sc.defaultMinPartitions
+
   /** Distribute a local Scala collection to form an RDD. */
   def parallelize[T](list: java.util.List[T], numSlices: Int): JavaRDD[T] = {
     implicit val ctag: ClassTag[T] = fakeClassTag

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala
@@ -57,16 +57,16 @@ object MLUtils {
    * @param labelParser parser for labels, default: 1.0 if label > 0.5 or 0.0 otherwise
    * @param numFeatures number of features, which will be determined from the input data if a
    *                    negative value is given. The default value is -1.
-   * @param minSplits min number of partitions, default: sc.defaultMinSplits
+   * @param minPartitions min number of partitions, default: sc.defaultMinPartitions
    * @return labeled data stored as an RDD[LabeledPoint]
    */
   def loadLibSVMData(
       sc: SparkContext,
       path: String,
       labelParser: LabelParser,
       numFeatures: Int,
-      minSplits: Int): RDD[LabeledPoint] = {
-    val parsed = sc.textFile(path, minSplits)
+      minPartitions: Int): RDD[LabeledPoint] = {
+    val parsed = sc.textFile(path, minPartitions)
       .map(_.trim)
       .filter(!_.isEmpty)
       .map(_.split(' '))
@@ -101,7 +101,7 @@ object MLUtils {
    * with number of features determined automatically and the default number of partitions.
    */
   def loadLibSVMData(sc: SparkContext, path: String): RDD[LabeledPoint] =
-    loadLibSVMData(sc, path, BinaryLabelParser, -1, sc.defaultMinSplits)
+    loadLibSVMData(sc, path, BinaryLabelParser, -1, sc.defaultMinPartitions)
 
   /**
    * Loads labeled data in the LIBSVM format into an RDD[LabeledPoint],
@@ -112,7 +112,7 @@ object MLUtils {
       sc: SparkContext,
       path: String,
       labelParser: LabelParser): RDD[LabeledPoint] =
-    loadLibSVMData(sc, path, labelParser, -1, sc.defaultMinSplits)
+    loadLibSVMData(sc, path, labelParser, -1, sc.defaultMinPartitions)
 
   /**
    * Loads labeled data in the LIBSVM format into an RDD[LabeledPoint],
@@ -124,7 +124,7 @@ object MLUtils {
       path: String,
       labelParser: LabelParser,
       numFeatures: Int): RDD[LabeledPoint] =
-    loadLibSVMData(sc, path, labelParser, numFeatures, sc.defaultMinSplits)
+    loadLibSVMData(sc, path, labelParser, numFeatures, sc.defaultMinPartitions)
 
   /**
    * :: Experimental ::

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala
@@ -52,7 +52,7 @@ class HadoopTableReader(@transient _tableDesc: TableDesc, @transient sc: HiveCon
   // Choose the minimum number of splits. If mapred.map.tasks is set, then use that unless
   // it is smaller than what Spark suggests.
   private val _minSplitsPerRDD = math.max(
-    sc.hiveconf.getInt("mapred.map.tasks", 1), sc.sparkContext.defaultMinSplits)
+    sc.hiveconf.getInt("mapred.map.tasks", 1), sc.sparkContext.defaultMinPartitions)
 
   // TODO: set aws s3 credentials.