Code review feedback

witgo · Apr 5, 2014 · cd7a465 · cd7a465
1 parent 2f706f1
commit cd7a465
Show file tree

Hide file tree

Showing 15 changed files with 50 additions and 13 deletions.
diff --git a/core/src/main/scala/org/apache/spark/Aggregator.scala b/core/src/main/scala/org/apache/spark/Aggregator.scala
@@ -21,13 +21,13 @@ import org.apache.spark.util.collection.{AppendOnlyMap, ExternalAppendOnlyMap}
 
 /**
  * <span class="badge badge-red">DEVELOPER API - UNSTABLE</span>
- *
  * A set of functions used to aggregate data.
  *
  * @param createCombiner function to create the initial value of the aggregation.
  * @param mergeValue function to merge a new value into the aggregation result.
  * @param mergeCombiners function to merge outputs from multiple mergeValue function.
  */
+
 case class Aggregator[K, V, C] (
     createCombiner: V => C,
     mergeValue: (C, V) => C,

diff --git a/core/src/main/scala/org/apache/spark/FutureAction.scala b/core/src/main/scala/org/apache/spark/FutureAction.scala
@@ -86,6 +86,8 @@ trait FutureAction[T] extends Future[T] {
 
 
 /**
+ * <span class="badge badge-red">EXPERIMENTAL API</span>
+ *
  * A [[FutureAction]] holding the result of an action that triggers a single job. Examples include
  * count, collect, reduce.
  */

diff --git a/core/src/main/scala/org/apache/spark/InterruptibleIterator.scala b/core/src/main/scala/org/apache/spark/InterruptibleIterator.scala
@@ -21,7 +21,7 @@ package org.apache.spark
  * An iterator that wraps around an existing iterator to provide task killing functionality.
  * It works by checking the interrupted flag in [[TaskContext]].
  */
-class InterruptibleIterator[+T](val context: TaskContext, val delegate: Iterator[T])
+private[spark] class InterruptibleIterator[+T](val context: TaskContext, val delegate: Iterator[T])
   extends Iterator[T] {
 
   def hasNext: Boolean = !context.interrupted && delegate.hasNext

diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -184,7 +184,7 @@ class SparkContext(
     jars.foreach(addJar)
   }
 
-  def warnSparkMem(value: String): String = {
+  private def warnSparkMem(value: String): String = {
     logWarning("Using SPARK_MEM to set amount of memory to use per executor process is " +
       "deprecated, please use spark.executor.memory instead.")
     value
@@ -665,6 +665,11 @@ class SparkContext(
     postEnvironmentUpdate()
   }
 
+  /**
+   * <span class="badge badge-red">DEVELOPER API - UNSTABLE</span>
+   *
+   * Register a listener to receive up-calls from events that happen during execution.
+   */
   def addSparkListener(listener: SparkListener) {
     listenerBus.addListener(listener)
   }
@@ -974,6 +979,8 @@ class SparkContext(
   }
 
   /**
+   * <span class="badge badge-red">DEVELOPER API - UNSTABLE</span>
+   *
    * Run a job that can return approximate results.
    */
   def runApproximateJob[T, U, R](
@@ -991,6 +998,8 @@ class SparkContext(
   }
 
   /**
+   * <span class="badge badge-red">EXPERIMENTAL API</span>
+   *
    * Submit a job for execution and return a FutureJob holding the result.
    */
   def submitJob[T, U, R](

diff --git a/core/src/main/scala/org/apache/spark/broadcast/BroadcastFactory.scala b/core/src/main/scala/org/apache/spark/broadcast/BroadcastFactory.scala
@@ -21,6 +21,8 @@ import org.apache.spark.SecurityManager
 import org.apache.spark.SparkConf
 
 /**
+ * <span class="badge badge-red">DEVELOPER API - UNSTABLE</span>
+ *
  * An interface for all the broadcast implementations in Spark (to allow 
  * multiple broadcast implementations). SparkContext uses a user-specified
  * BroadcastFactory implementation to instantiate a particular broadcast for the

diff --git a/core/src/main/scala/org/apache/spark/package.scala b/core/src/main/scala/org/apache/spark/package.scala
@@ -32,6 +32,15 @@ package org.apache
  *
  * Java programmers should reference the [[spark.api.java]] package
  * for Spark programming APIs in Java.
+ *
+ * Classes and methods marked with <span class="badge badge-red">EXPERIMENTAL API</span> are
+ * user-facing features which have not been officially adopted by the Spark project. These are
+ * subject to change or removal in minor releases.
+ *
+ * Classes and methods marked with <span class="badge badge-red">DEVELOPER API - UNSTABLE</span>
+ * are intended for advanced users want to extend Spark through lower level interfaces. These are
+ * subject to changes or removal in minor releases.
+ *
  */
 package object spark {
   // For package docs only

diff --git a/core/src/main/scala/org/apache/spark/rdd/CoGroupedRDD.scala b/core/src/main/scala/org/apache/spark/rdd/CoGroupedRDD.scala
@@ -51,13 +51,17 @@ private[spark] class CoGroupPartition(idx: Int, val deps: Array[CoGroupSplitDep]
 }
 
 /**
+ * <span class="badge badge-red">DEVELOPER API - UNSTABLE</span>
+ *
  * A RDD that cogroups its parents. For each key k in parent RDDs, the resulting RDD contains a
  * tuple with the list of values for that key.
  *
+ * Note: This is an internal API. We recommend users use RDD.coGroup(...) instead of
+ * instantiating this directly.
+
  * @param rdds parent RDDs.
- * @param part partitioner used to partition the shuffle output.
+ * @param part partitioner used to partition the shuffle output
  */
-private[spark]
 class CoGroupedRDD[K](@transient var rdds: Seq[RDD[_ <: Product2[K, _]]], part: Partitioner)
   extends RDD[(K, Seq[Seq[_]])](rdds.head.context, Nil) {
 

diff --git a/core/src/main/scala/org/apache/spark/rdd/EmptyRDD.scala b/core/src/main/scala/org/apache/spark/rdd/EmptyRDD.scala
@@ -22,7 +22,7 @@ import scala.reflect.ClassTag
 import org.apache.spark.{Partition, SparkContext, TaskContext}
 
 /**
- * An RDD that is empty, i.e. has no element in it.
+ * An RDD that has no partitions and no elements..
  */
 private[spark] class EmptyRDD[T: ClassTag](sc: SparkContext) extends RDD[T](sc, Nil) {
 

diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
@@ -513,6 +513,8 @@ abstract class RDD[T: ClassTag](
   }
 
   /**
+   * <span class="badge badge-red">DEVELOPER API - UNSTABLE</span>
+   *
    * Return a new RDD by applying a function to each partition of this RDD. This is a variant of
    * mapPartitions that also passes the TaskContext into the closure.
    */
@@ -775,7 +777,9 @@ abstract class RDD[T: ClassTag](
   def count(): Long = sc.runJob(this, Utils.getIteratorSize _).sum
 
   /**
-   * (Experimental) Approximate version of count() that returns a potentially incomplete result
+   * <span class="badge badge-red">EXPERIMENTAL API</span>
+   *
+   * Approximate version of count() that returns a potentially incomplete result
    * within a timeout, even if not all tasks have finished.
    */
   def countApprox(timeout: Long, confidence: Double = 0.95): PartialResult[BoundedDouble] = {
@@ -821,7 +825,9 @@ abstract class RDD[T: ClassTag](
   }
 
   /**
-   * (Experimental) Approximate version of countByValue().
+   * <span class="badge badge-red">EXPERIMENTAL API</span>
+   *
+   * Approximate version of countByValue().
    */
   def countByValueApprox(
       timeout: Long,
@@ -843,6 +849,8 @@ abstract class RDD[T: ClassTag](
   }
 
   /**
+   * <span class="badge badge-red">EXPERIMENTAL API</span>
+   *
    * Return approximate number of distinct elements in the RDD.
    *
    * The accuracy of approximation can be controlled through the relative standard deviation

diff --git a/core/src/main/scala/org/apache/spark/scheduler/JobLogger.scala b/core/src/main/scala/org/apache/spark/scheduler/JobLogger.scala
@@ -28,6 +28,8 @@ import org.apache.spark._
 import org.apache.spark.executor.TaskMetrics
 
 /**
+ * <span class="badge badge-red">DEVELOPER API - UNSTABLE</span>
+ *
  * A logger class to record runtime information for jobs in Spark. This class outputs one log file
  * for each Spark job, containing tasks start/stop and shuffle information. JobLogger is a subclass
  * of SparkListener, use addSparkListener to add JobLogger to a SparkContext after the SparkContext

diff --git a/core/src/main/scala/org/apache/spark/scheduler/SplitInfo.scala b/core/src/main/scala/org/apache/spark/scheduler/SplitInfo.scala
@@ -21,6 +21,7 @@ import collection.mutable.ArrayBuffer
 
 // information about a specific split instance : handles both split instances.
 // So that we do not need to worry about the differences.
+private[spark]
 class SplitInfo(val inputFormatClazz: Class[_], val hostLocation: String, val path: String,
                 val length: Long, val underlyingSplit: Any) {
   override def toString(): String = {

diff --git a/core/src/main/scala/org/apache/spark/util/BoundedPriorityQueue.scala b/core/src/main/scala/org/apache/spark/util/BoundedPriorityQueue.scala
@@ -24,13 +24,11 @@ import scala.collection.JavaConverters._
 import scala.collection.generic.Growable
 
 /**
- * <span class="badge badge-red">DEVELOPER API - UNSTABLE</span>
- *
  * Bounded priority queue. This class wraps the original PriorityQueue
  * class and modifies it such that only the top K elements are retained.
  * The top K elements are defined by an implicit Ordering[A].
  */
-class BoundedPriorityQueue[A](maxSize: Int)(implicit ord: Ordering[A])
+private[spark] class BoundedPriorityQueue[A](maxSize: Int)(implicit ord: Ordering[A])
   extends Iterable[A] with Growable[A] with Serializable {
 
   private val underlying = new JPriorityQueue[A](maxSize, ord)

diff --git a/core/src/main/scala/org/apache/spark/util/Vector.scala b/core/src/main/scala/org/apache/spark/util/Vector.scala
@@ -21,6 +21,7 @@ import scala.util.Random
 
 import org.apache.spark.util.random.XORShiftRandom
 
+@deprecated("Use Vector from Spark's mllib.linalg package instead.", "1.0.0")
 class Vector(val elements: Array[Double]) extends Serializable {
   def length = elements.length
 

diff --git a/graphx/src/main/scala/org/apache/spark/graphx/package.scala b/graphx/src/main/scala/org/apache/spark/graphx/package.scala
@@ -22,7 +22,8 @@ import org.apache.spark.util.collection.OpenHashSet
 /**
  * <span class="badge badge-red">ALPHA COMPONENT</span>
  *
- * GraphX is a graph processing framework built on top of Spark. */
+ * GraphX is a graph processing framework built on top of Spark.
+ */
 package object graphx {
   /**
    * A 64-bit vertex identifier that uniquely identifies a vertex within a graph. It does not need

diff --git a/...che/spark/storage/StoragePerfTester.scala → ...pache/spark/tools/StoragePerfTester.scala b/...che/spark/storage/StoragePerfTester.scala → ...pache/spark/tools/StoragePerfTester.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.storage
+package org.apache.spark.tools
 
 import java.util.concurrent.{CountDownLatch, Executors}
 import java.util.concurrent.atomic.AtomicLong