diff --git a/kotlin-spark-api/3.2/src/main/kotlin/org/jetbrains/kotlinx/spark/api/Dataset.kt b/kotlin-spark-api/3.2/src/main/kotlin/org/jetbrains/kotlinx/spark/api/Dataset.kt index b5070b84..31227762 100644 --- a/kotlin-spark-api/3.2/src/main/kotlin/org/jetbrains/kotlinx/spark/api/Dataset.kt +++ b/kotlin-spark-api/3.2/src/main/kotlin/org/jetbrains/kotlinx/spark/api/Dataset.kt @@ -309,6 +309,30 @@ inline fun Dataset.fullJoin( */ inline fun Dataset.sort(columns: (Dataset) -> Array): Dataset = sort(*columns(this)) +/** Returns a dataset sorted by the first (`_1`) value of each [Tuple2] inside. */ +@JvmName("sortByTuple2Key") +fun Dataset>.sortByKey(): Dataset> = sort("_1") + +/** Returns a dataset sorted by the second (`_2`) value of each [Tuple2] inside. */ +@JvmName("sortByTuple2Value") +fun Dataset>.sortByValue(): Dataset> = sort("_2") + +/** Returns a dataset sorted by the first (`_1`) value of each [Arity2] inside. */ +@JvmName("sortByArity2Key") +fun Dataset>.sortByKey(): Dataset> = sort("_1") + +/** Returns a dataset sorted by the second (`_2`) value of each [Arity2] inside. */ +@JvmName("sortByArity2Value") +fun Dataset>.sortByValue(): Dataset> = sort("_2") + +/** Returns a dataset sorted by the first (`first`) value of each [Pair] inside. */ +@JvmName("sortByPairKey") +fun Dataset>.sortByKey(): Dataset> = sort("first") + +/** Returns a dataset sorted by the second (`second`) value of each [Pair] inside. */ +@JvmName("sortByPairValue") +fun Dataset>.sortByValue(): Dataset> = sort("second") + /** * This function creates block, where one can call any further computations on already cached dataset * Data will be unpersisted automatically at the end of computation diff --git a/kotlin-spark-api/3.2/src/test/kotlin/org/jetbrains/kotlinx/spark/api/DatasetFunctionTest.kt b/kotlin-spark-api/3.2/src/test/kotlin/org/jetbrains/kotlinx/spark/api/DatasetFunctionTest.kt index f352ff68..495948e3 100644 --- a/kotlin-spark-api/3.2/src/test/kotlin/org/jetbrains/kotlinx/spark/api/DatasetFunctionTest.kt +++ b/kotlin-spark-api/3.2/src/test/kotlin/org/jetbrains/kotlinx/spark/api/DatasetFunctionTest.kt @@ -282,6 +282,42 @@ class DatasetFunctionTest : ShouldSpec({ dataset6.toList>() shouldBe listOf(listOf(1, 2, 3), listOf(4, 5, 6)) } + + should("Sort Arity2 Dataset") { + val list = listOf( + c(1, 6), + c(2, 5), + c(3, 4), + ) + val dataset = list.toDS() + + dataset.sortByKey().collectAsList() shouldBe list.sortedBy { it._1 } + dataset.sortByValue().collectAsList() shouldBe list.sortedBy { it._2 } + } + + should("Sort Tuple2 Dataset") { + val list = listOf( + Tuple2(1, 6), + Tuple2(2, 5), + Tuple2(3, 4), + ) + val dataset = list.toDS() + + dataset.sortByKey().collectAsList() shouldBe list.sortedBy { it._1 } + dataset.sortByValue().collectAsList() shouldBe list.sortedBy { it._2 } + } + + should("Sort Pair Dataset") { + val list = listOf( + Pair(1, 6), + Pair(2, 5), + Pair(3, 4), + ) + val dataset = list.toDS() + + dataset.sortByKey().collectAsList() shouldBe list.sortedBy { it.first } + dataset.sortByValue().collectAsList() shouldBe list.sortedBy { it.second } + } } } @@ -401,6 +437,7 @@ class DatasetFunctionTest : ShouldSpec({ b.count() shouldBe 1 } + } } })