Skip to content

Commit

Permalink
Merge pull request apache#249 from ngbinh/partitionInJavaSortByKey
Browse files Browse the repository at this point in the history
Expose numPartitions parameter in JavaPairRDD.sortByKey()

This change makes Java and Scala API on sortByKey() the same.
  • Loading branch information
JoshRosen committed Dec 14, 2013
2 parents 97ac060 + 0b494f7 commit 2fd781d
Showing 1 changed file with 14 additions and 0 deletions.
14 changes: 14 additions & 0 deletions core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala
Original file line number Diff line number Diff line change
Expand Up @@ -588,6 +588,20 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])(implicit val kClassTag: ClassTag[K
fromRDD(new OrderedRDDFunctions[K, V, (K, V)](rdd).sortByKey(ascending))
}

/**
* Sort the RDD by key, so that each partition contains a sorted range of the elements. Calling
* `collect` or `save` on the resulting RDD will return or output an ordered list of records
* (in the `save` case, they will be written to multiple `part-X` files in the filesystem, in
* order of the keys).
*/
def sortByKey(comp: Comparator[K], ascending: Boolean, numPartitions: Int): JavaPairRDD[K, V] = {
class KeyOrdering(val a: K) extends Ordered[K] {
override def compare(b: K) = comp.compare(a, b)
}
implicit def toOrdered(x: K): Ordered[K] = new KeyOrdering(x)
fromRDD(new OrderedRDDFunctions[K, V, (K, V)](rdd).sortByKey(ascending, numPartitions))
}

/**
* Return an RDD with the keys of each tuple.
*/
Expand Down

0 comments on commit 2fd781d

Please sign in to comment.