diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/Integration.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/Integration.kt index 3311301bb..41bfc3158 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/Integration.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/Integration.kt @@ -30,15 +30,9 @@ import org.jetbrains.kotlinx.dataframe.api.SplitWithTransform import org.jetbrains.kotlinx.dataframe.api.Update import org.jetbrains.kotlinx.dataframe.api.asColumnGroup import org.jetbrains.kotlinx.dataframe.api.asDataFrame -import org.jetbrains.kotlinx.dataframe.api.at import org.jetbrains.kotlinx.dataframe.api.columnsCount -import org.jetbrains.kotlinx.dataframe.api.dataFrameOf -import org.jetbrains.kotlinx.dataframe.api.frames -import org.jetbrains.kotlinx.dataframe.api.into import org.jetbrains.kotlinx.dataframe.api.isColumnGroup import org.jetbrains.kotlinx.dataframe.api.name -import org.jetbrains.kotlinx.dataframe.api.toDataFrame -import org.jetbrains.kotlinx.dataframe.api.values import org.jetbrains.kotlinx.dataframe.codeGen.CodeWithConverter import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup import org.jetbrains.kotlinx.dataframe.columns.ColumnReference @@ -340,35 +334,3 @@ public fun KotlinKernelHost.useSchemas(schemaClasses: Iterable>) { public fun KotlinKernelHost.useSchemas(vararg schemaClasses: KClass<*>): Unit = useSchemas(schemaClasses.asIterable()) public inline fun KotlinKernelHost.useSchema(): Unit = useSchemas(T::class) - -/** - * Converts [dataframeLike] to [AnyFrame]. - * If [dataframeLike] is already [AnyFrame] then it is returned as is. - * If it's not possible to convert [dataframeLike] to [AnyFrame] then [IllegalArgumentException] is thrown. - */ -internal fun convertToDataFrame(dataframeLike: Any): AnyFrame = - when (dataframeLike) { - is Pivot<*> -> dataframeLike.frames().toDataFrame() - is ReducedPivot<*> -> dataframeLike.values().toDataFrame() - is PivotGroupBy<*> -> dataframeLike.frames() - is ReducedPivotGroupBy<*> -> dataframeLike.values() - is SplitWithTransform<*, *, *> -> dataframeLike.into() - is Split<*, *> -> dataframeLike.toDataFrame() - is Merge<*, *, *> -> dataframeLike.into("merged") - is Gather<*, *, *, *> -> dataframeLike.into("key", "value") - is Update<*, *> -> dataframeLike.df - is Convert<*, *> -> dataframeLike.df - is FormattedFrame<*> -> dataframeLike.df - is AnyCol -> dataFrameOf(dataframeLike) - is AnyRow -> dataframeLike.toDataFrame() - is GroupBy<*, *> -> dataframeLike.toDataFrame() - is AnyFrame -> dataframeLike - is DisableRowsLimitWrapper -> dataframeLike.value - is MoveClause<*, *> -> dataframeLike.df - is RenameClause<*, *> -> dataframeLike.df - is ReplaceClause<*, *> -> dataframeLike.df - is GroupClause<*, *> -> dataframeLike.into("untitled") - is InsertClause<*> -> dataframeLike.at(0) - is FormatClause<*, *> -> dataframeLike.df - else -> throw IllegalArgumentException("Unsupported type: ${dataframeLike::class}") - } diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/JupyterHtmlRenderer.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/JupyterHtmlRenderer.kt index 7add25c68..c8f3debd5 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/JupyterHtmlRenderer.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/JupyterHtmlRenderer.kt @@ -3,12 +3,21 @@ package org.jetbrains.kotlinx.dataframe.jupyter import com.beust.klaxon.json import org.jetbrains.kotlinx.dataframe.api.rows import org.jetbrains.kotlinx.dataframe.api.toDataFrame -import org.jetbrains.kotlinx.dataframe.io.* +import org.jetbrains.kotlinx.dataframe.io.DataFrameHtmlData +import org.jetbrains.kotlinx.dataframe.io.DisplayConfiguration +import org.jetbrains.kotlinx.dataframe.io.encodeFrame +import org.jetbrains.kotlinx.dataframe.io.toHTML +import org.jetbrains.kotlinx.dataframe.jupyter.KotlinNotebookPluginUtils.convertToDataFrame import org.jetbrains.kotlinx.dataframe.nrow import org.jetbrains.kotlinx.dataframe.size -import org.jetbrains.kotlinx.jupyter.api.* import org.jetbrains.kotlinx.jupyter.api.HtmlData +import org.jetbrains.kotlinx.jupyter.api.JupyterClientType +import org.jetbrains.kotlinx.jupyter.api.KotlinKernelVersion +import org.jetbrains.kotlinx.jupyter.api.MimeTypedResult +import org.jetbrains.kotlinx.jupyter.api.Notebook import org.jetbrains.kotlinx.jupyter.api.libraries.JupyterIntegration +import org.jetbrains.kotlinx.jupyter.api.mimeResult +import org.jetbrains.kotlinx.jupyter.api.renderHtmlAsIFrameIfNeeded /** Starting from this version, dataframe integration will respond with additional data for rendering in Kotlin Notebooks plugin. */ private const val MIN_KERNEL_VERSION_FOR_NEW_TABLES_UI = "0.11.0.311" diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/KotlinNotebookPluginUtils.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/KotlinNotebookPluginUtils.kt index 28315785f..8119e989a 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/KotlinNotebookPluginUtils.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/KotlinNotebookPluginUtils.kt @@ -1,14 +1,45 @@ package org.jetbrains.kotlinx.dataframe.jupyter +import org.jetbrains.kotlinx.dataframe.AnyCol import org.jetbrains.kotlinx.dataframe.AnyFrame +import org.jetbrains.kotlinx.dataframe.AnyRow +import org.jetbrains.kotlinx.dataframe.api.Convert +import org.jetbrains.kotlinx.dataframe.api.FormatClause +import org.jetbrains.kotlinx.dataframe.api.FormattedFrame +import org.jetbrains.kotlinx.dataframe.api.Gather +import org.jetbrains.kotlinx.dataframe.api.GroupBy +import org.jetbrains.kotlinx.dataframe.api.GroupClause +import org.jetbrains.kotlinx.dataframe.api.InsertClause +import org.jetbrains.kotlinx.dataframe.api.Merge +import org.jetbrains.kotlinx.dataframe.api.MoveClause +import org.jetbrains.kotlinx.dataframe.api.Pivot +import org.jetbrains.kotlinx.dataframe.api.PivotGroupBy +import org.jetbrains.kotlinx.dataframe.api.ReducedGroupBy +import org.jetbrains.kotlinx.dataframe.api.ReducedPivot +import org.jetbrains.kotlinx.dataframe.api.ReducedPivotGroupBy +import org.jetbrains.kotlinx.dataframe.api.RenameClause +import org.jetbrains.kotlinx.dataframe.api.ReplaceClause +import org.jetbrains.kotlinx.dataframe.api.Split +import org.jetbrains.kotlinx.dataframe.api.SplitWithTransform +import org.jetbrains.kotlinx.dataframe.api.Update +import org.jetbrains.kotlinx.dataframe.api.at +import org.jetbrains.kotlinx.dataframe.api.dataFrameOf import org.jetbrains.kotlinx.dataframe.api.filter +import org.jetbrains.kotlinx.dataframe.api.frames +import org.jetbrains.kotlinx.dataframe.api.into +import org.jetbrains.kotlinx.dataframe.api.sortBy +import org.jetbrains.kotlinx.dataframe.api.toDataFrame +import org.jetbrains.kotlinx.dataframe.api.values +import org.jetbrains.kotlinx.dataframe.columns.ColumnPath +import org.jetbrains.kotlinx.dataframe.columns.toColumnSet +import org.jetbrains.kotlinx.dataframe.impl.ColumnNameGenerator /** * A class with utility methods for Kotlin Notebook Plugin integration. - * Kotlin Notebook Plugin is acts as a client of Kotlin Jupyter kernel and use this functionality + * Kotlin Notebook Plugin acts as a client of Kotlin Jupyter kernel and uses this functionality * for dynamic pagination when rendering dataframes. - * The plugin sends Kotlin following code to the kernel to evaluate - * DISPLAY(KotlinNotebooksPluginUtils.getRowsSubsetForRendering(Out[x], 0, 20), "") + * The plugin sends the following code to the kernel to evaluate: + * DISPLAY(KotlinNotebooksPluginUtils.getRowsSubsetForRendering(Out[...], 0, 20), "") */ public object KotlinNotebookPluginUtils { /** @@ -30,4 +61,110 @@ public object KotlinNotebookPluginUtils { */ public fun getRowsSubsetForRendering(df: AnyFrame, startIdx: Int, endIdx: Int): DisableRowsLimitWrapper = DisableRowsLimitWrapper(df.filter { it.index() in startIdx until endIdx }) + + /** + * Sorts a dataframe-like object by multiple columns. + * + * @param dataFrameLike The dataframe-like object to sort. + * @param columnPaths The list of columns to sort by. Each element in the list represents a column path + * @param desc The list of booleans indicating whether each column should be sorted in descending order. + * The size of this list should be the same as the size of the `columns` list. + * + * @throws IllegalArgumentException if `dataFrameLike` is `null`. + * + * @return The sorted dataframe. + */ + public fun sortByColumns( + dataFrameLike: Any?, + columnPaths: List>, + desc: List + ): AnyFrame = when (dataFrameLike) { + null -> throw IllegalArgumentException("Dataframe is null") + else -> sortByColumns(convertToDataFrame(dataFrameLike), columnPaths, desc) + } + + /** + * Sorts the given data frame by the specified columns. + * + * @param df The data frame to be sorted. + * @param columnPaths The paths of the columns to be sorted. Each path is represented as a list of strings. + * @param isDesc A list of booleans indicating whether each column should be sorted in descending order. + * The size of this list must be equal to the size of the columnPaths list. + * @return The sorted data frame. + */ + public fun sortByColumns(df: AnyFrame, columnPaths: List>, isDesc: List): AnyFrame = + df.sortBy { + require(columnPaths.all { it.isNotEmpty() }) + require(columnPaths.size == isDesc.size) + + val sortKeys = columnPaths.map { path -> + ColumnPath(path) + } + + (sortKeys zip isDesc).map { (key, desc) -> + if (desc) key.desc() else key + }.toColumnSet() + } + + /** + * Converts [dataframeLike] to [AnyFrame]. + * If [dataframeLike] is already [AnyFrame] then it is returned as is. + * If it's not possible to convert [dataframeLike] to [AnyFrame] then [IllegalArgumentException] is thrown. + */ + public fun convertToDataFrame(dataframeLike: Any): AnyFrame = + when (dataframeLike) { + is Pivot<*> -> dataframeLike.frames().toDataFrame() + is ReducedGroupBy<*, *> -> dataframeLike.values() + is ReducedPivot<*> -> dataframeLike.values().toDataFrame() + is PivotGroupBy<*> -> dataframeLike.frames() + is ReducedPivotGroupBy<*> -> dataframeLike.values() + is SplitWithTransform<*, *, *> -> dataframeLike.into() + is Split<*, *> -> dataframeLike.toDataFrame() + is Merge<*, *, *> -> dataframeLike.into( + generateRandomVariationOfColumnName( + "merged", + dataframeLike.df.columnNames() + ) + ) + + is Gather<*, *, *, *> -> dataframeLike.into( + generateRandomVariationOfColumnName("key", dataframeLike.df.columnNames()), + generateRandomVariationOfColumnName("value", dataframeLike.df.columnNames()) + ) + + is Update<*, *> -> dataframeLike.df + is Convert<*, *> -> dataframeLike.df + is FormattedFrame<*> -> dataframeLike.df + is AnyCol -> dataFrameOf(dataframeLike) + is AnyRow -> dataframeLike.toDataFrame() + is GroupBy<*, *> -> dataframeLike.toDataFrame() + is AnyFrame -> dataframeLike + is DisableRowsLimitWrapper -> dataframeLike.value + is MoveClause<*, *> -> dataframeLike.df + is RenameClause<*, *> -> dataframeLike.df + is ReplaceClause<*, *> -> dataframeLike.df + is GroupClause<*, *> -> dataframeLike.into( + generateRandomVariationOfColumnName( + "untitled", + dataframeLike.df.columnNames() + ) + ) + + is InsertClause<*> -> dataframeLike.at(0) + is FormatClause<*, *> -> dataframeLike.df + else -> throw IllegalArgumentException("Unsupported type: ${dataframeLike::class}") + } + + /** + * Generates a random variation of a column name that is unique among the provided used names. + * + * @param preferredName The preferred name for the column. + * @param usedNames The list of already used column names. + * @return A unique random variation of the preferred name. + */ + public fun generateRandomVariationOfColumnName( + preferredName: String, + usedNames: List = emptyList() + ): String = + ColumnNameGenerator(usedNames).addUnique(preferredName) } diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/RenderingTests.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/RenderingTests.kt index 11be55cb9..1f711cef8 100644 --- a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/RenderingTests.kt +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/RenderingTests.kt @@ -1,6 +1,11 @@ package org.jetbrains.kotlinx.dataframe.jupyter -import com.beust.klaxon.* +import com.beust.klaxon.JsonArray +import com.beust.klaxon.JsonObject +import com.beust.klaxon.Parser +import io.kotest.assertions.throwables.shouldNotThrow +import io.kotest.matchers.comparables.shouldBeGreaterThan +import io.kotest.matchers.comparables.shouldBeLessThan import io.kotest.matchers.shouldBe import io.kotest.matchers.string.shouldContain import io.kotest.matchers.string.shouldNotContain @@ -79,8 +84,7 @@ class RenderingTests : JupyterReplTestCase() { @Test fun `test kotlin notebook plugin utils rows subset`() { - @Language("kts") - val result = exec( + val json = executeScriptAndParseDataframeResult( """ data class Row(val id: Int) val df = (1..100).map { Row(it) }.toDataFrame() @@ -88,16 +92,29 @@ class RenderingTests : JupyterReplTestCase() { """.trimIndent() ) - val json = parseDataframeJson(result) - - json.int("nrow") shouldBe 30 - json.int("ncol") shouldBe 1 + assertDataFrameDimensions(json, 30, 1) val rows = json.array>("kotlin_dataframe")!! rows.getObj(0).int("id") shouldBe 21 rows.getObj(rows.lastIndex).int("id") shouldBe 50 } + /** + * Executes the given `script` and parses the resulting DataFrame as a `JsonObject`. + * + * @param script the script to be executed + * @return the parsed DataFrame result as a `JsonObject` + */ + private fun executeScriptAndParseDataframeResult(@Language("kts") script: String): JsonObject { + val result = exec(script) + return parseDataframeJson(result) + } + + private fun assertDataFrameDimensions(json: JsonObject, expectedRows: Int, expectedColumns: Int) { + json.int("nrow") shouldBe expectedRows + json.int("ncol") shouldBe expectedColumns + } + private fun parseDataframeJson(result: MimeTypedResult): JsonObject { val parser = Parser.default() return parser.parse(StringBuilder(result["application/kotlindataframe+json"]!!)) as JsonObject @@ -106,23 +123,121 @@ class RenderingTests : JupyterReplTestCase() { private fun JsonArray<*>.getObj(index: Int) = this.get(index) as JsonObject @Test - fun `test kotlin notebook plugin utils groupby`() { - @Language("kts") - val result = exec( + fun `test kotlin notebook plugin utils sort by one column asc`() { + val json = executeScriptAndParseDataframeResult( + """ + data class CustomRow(val id: Int, val category: String) + val df = (1..100).map { CustomRow(it, if (it % 2 == 0) "even" else "odd") }.toDataFrame() + KotlinNotebookPluginUtils.sortByColumns(df, listOf(listOf("id")), listOf(false)) + """.trimIndent() + ) + + assertDataFrameDimensions(json, 100, 2) + assertSortedById(json, false) + } + + @Suppress("UNCHECKED_CAST") + private fun assertSortedById(json: JsonObject, desc: Boolean) { + val rows = json["kotlin_dataframe"] as JsonArray + var previousId = if (desc) 101 else 0 + rows.forEach { row -> + val currentId = row.int("id")!! + if (desc) currentId shouldBeLessThan previousId else currentId shouldBeGreaterThan previousId + previousId = currentId + } + } + + @Test + fun `test kotlin notebook plugin utils sort by one column desc`() { + val json = executeScriptAndParseDataframeResult( + """ + data class CustomRow(val id: Int, val category: String) + val df = (1..100).map { CustomRow(it, if (it % 2 == 0) "even" else "odd") }.toDataFrame() + KotlinNotebookPluginUtils.sortByColumns(df, listOf(listOf("id")), listOf(true)) + """.trimIndent() + ) + + assertDataFrameDimensions(json, 100, 2) + assertSortedById(json, true) + } + + @Suppress("UNCHECKED_CAST") + @Test + fun `test kotlin notebook plugin utils sort by multiple columns`() { + val json = executeScriptAndParseDataframeResult( + """ + data class CustomRow(val id: Int, val category: String) + val df = (1..100).map { CustomRow(it, if (it % 2 == 0) "even" else "odd") }.toDataFrame() + KotlinNotebookPluginUtils.getRowsSubsetForRendering( + KotlinNotebookPluginUtils.sortByColumns(df, listOf(listOf("category"), listOf("id")), listOf(true, false)), + 0, 100 + ) + """.trimIndent() + ) + + assertDataFrameDimensions(json, 100, 2) + + val rows = json["kotlin_dataframe"] as JsonArray + assertSortedByCategory(rows) + assertSortedById(rows) + } + + private fun assertSortedByCategory(rows: JsonArray) { + rows.forEachIndexed { i, row -> + val currentCategory = row.string("category") + if (i < 50) currentCategory shouldBe "odd" + else currentCategory shouldBe "even" + } + } + + private fun assertSortedById(rows: JsonArray) { + var previousCategory = "odd" + var previousId = 0 + for (row in rows) { + val currentCategory = row.string("category")!! + val currentId = row.int("id")!! + + if (previousCategory == "odd" && currentCategory == "even") { + previousId shouldBeGreaterThan currentId + } else if (previousCategory == currentCategory) { + previousId shouldBeLessThan currentId + } + + previousCategory = currentCategory + previousId = currentId + } + } + + @Test + fun `test kotlin dataframe conversion groupby`() { + val json = executeScriptAndParseDataframeResult( """ data class Row(val id: Int, val group: Int) val df = (1..100).map { Row(it, if (it <= 50) 1 else 2) }.toDataFrame() - KotlinNotebookPluginUtils.getRowsSubsetForRendering(df.groupBy("group"), 0, 10) + KotlinNotebookPluginUtils.convertToDataFrame(df.groupBy("group")) """.trimIndent() ) - val json = parseDataframeJson(result) - - json.int("nrow") shouldBe 2 - json.int("ncol") shouldBe 2 + assertDataFrameDimensions(json, 2, 2) val rows = json.array>("kotlin_dataframe")!! rows.getObj(0).array("group1")!!.size shouldBe 50 rows.getObj(1).array("group1")!!.size shouldBe 50 } + + // Regression KTNB-424 + @Test + fun `test kotlin dataframe conversion ReducedGroupBy`() { + shouldNotThrow { + val json = executeScriptAndParseDataframeResult( + """ + data class Row(val id: Int, val group: Int) + val df = (1..100).map { Row(it, if (it <= 50) 1 else 2) }.toDataFrame() + KotlinNotebookPluginUtils.convertToDataFrame(df.groupBy("group").first()) + """.trimIndent() + ) + + assertDataFrameDimensions(json, 2, 2) + } + } } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/Integration.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/Integration.kt index 3311301bb..41bfc3158 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/Integration.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/Integration.kt @@ -30,15 +30,9 @@ import org.jetbrains.kotlinx.dataframe.api.SplitWithTransform import org.jetbrains.kotlinx.dataframe.api.Update import org.jetbrains.kotlinx.dataframe.api.asColumnGroup import org.jetbrains.kotlinx.dataframe.api.asDataFrame -import org.jetbrains.kotlinx.dataframe.api.at import org.jetbrains.kotlinx.dataframe.api.columnsCount -import org.jetbrains.kotlinx.dataframe.api.dataFrameOf -import org.jetbrains.kotlinx.dataframe.api.frames -import org.jetbrains.kotlinx.dataframe.api.into import org.jetbrains.kotlinx.dataframe.api.isColumnGroup import org.jetbrains.kotlinx.dataframe.api.name -import org.jetbrains.kotlinx.dataframe.api.toDataFrame -import org.jetbrains.kotlinx.dataframe.api.values import org.jetbrains.kotlinx.dataframe.codeGen.CodeWithConverter import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup import org.jetbrains.kotlinx.dataframe.columns.ColumnReference @@ -340,35 +334,3 @@ public fun KotlinKernelHost.useSchemas(schemaClasses: Iterable>) { public fun KotlinKernelHost.useSchemas(vararg schemaClasses: KClass<*>): Unit = useSchemas(schemaClasses.asIterable()) public inline fun KotlinKernelHost.useSchema(): Unit = useSchemas(T::class) - -/** - * Converts [dataframeLike] to [AnyFrame]. - * If [dataframeLike] is already [AnyFrame] then it is returned as is. - * If it's not possible to convert [dataframeLike] to [AnyFrame] then [IllegalArgumentException] is thrown. - */ -internal fun convertToDataFrame(dataframeLike: Any): AnyFrame = - when (dataframeLike) { - is Pivot<*> -> dataframeLike.frames().toDataFrame() - is ReducedPivot<*> -> dataframeLike.values().toDataFrame() - is PivotGroupBy<*> -> dataframeLike.frames() - is ReducedPivotGroupBy<*> -> dataframeLike.values() - is SplitWithTransform<*, *, *> -> dataframeLike.into() - is Split<*, *> -> dataframeLike.toDataFrame() - is Merge<*, *, *> -> dataframeLike.into("merged") - is Gather<*, *, *, *> -> dataframeLike.into("key", "value") - is Update<*, *> -> dataframeLike.df - is Convert<*, *> -> dataframeLike.df - is FormattedFrame<*> -> dataframeLike.df - is AnyCol -> dataFrameOf(dataframeLike) - is AnyRow -> dataframeLike.toDataFrame() - is GroupBy<*, *> -> dataframeLike.toDataFrame() - is AnyFrame -> dataframeLike - is DisableRowsLimitWrapper -> dataframeLike.value - is MoveClause<*, *> -> dataframeLike.df - is RenameClause<*, *> -> dataframeLike.df - is ReplaceClause<*, *> -> dataframeLike.df - is GroupClause<*, *> -> dataframeLike.into("untitled") - is InsertClause<*> -> dataframeLike.at(0) - is FormatClause<*, *> -> dataframeLike.df - else -> throw IllegalArgumentException("Unsupported type: ${dataframeLike::class}") - } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/JupyterHtmlRenderer.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/JupyterHtmlRenderer.kt index 7add25c68..c8f3debd5 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/JupyterHtmlRenderer.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/JupyterHtmlRenderer.kt @@ -3,12 +3,21 @@ package org.jetbrains.kotlinx.dataframe.jupyter import com.beust.klaxon.json import org.jetbrains.kotlinx.dataframe.api.rows import org.jetbrains.kotlinx.dataframe.api.toDataFrame -import org.jetbrains.kotlinx.dataframe.io.* +import org.jetbrains.kotlinx.dataframe.io.DataFrameHtmlData +import org.jetbrains.kotlinx.dataframe.io.DisplayConfiguration +import org.jetbrains.kotlinx.dataframe.io.encodeFrame +import org.jetbrains.kotlinx.dataframe.io.toHTML +import org.jetbrains.kotlinx.dataframe.jupyter.KotlinNotebookPluginUtils.convertToDataFrame import org.jetbrains.kotlinx.dataframe.nrow import org.jetbrains.kotlinx.dataframe.size -import org.jetbrains.kotlinx.jupyter.api.* import org.jetbrains.kotlinx.jupyter.api.HtmlData +import org.jetbrains.kotlinx.jupyter.api.JupyterClientType +import org.jetbrains.kotlinx.jupyter.api.KotlinKernelVersion +import org.jetbrains.kotlinx.jupyter.api.MimeTypedResult +import org.jetbrains.kotlinx.jupyter.api.Notebook import org.jetbrains.kotlinx.jupyter.api.libraries.JupyterIntegration +import org.jetbrains.kotlinx.jupyter.api.mimeResult +import org.jetbrains.kotlinx.jupyter.api.renderHtmlAsIFrameIfNeeded /** Starting from this version, dataframe integration will respond with additional data for rendering in Kotlin Notebooks plugin. */ private const val MIN_KERNEL_VERSION_FOR_NEW_TABLES_UI = "0.11.0.311" diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/KotlinNotebookPluginUtils.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/KotlinNotebookPluginUtils.kt index 28315785f..8119e989a 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/KotlinNotebookPluginUtils.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/KotlinNotebookPluginUtils.kt @@ -1,14 +1,45 @@ package org.jetbrains.kotlinx.dataframe.jupyter +import org.jetbrains.kotlinx.dataframe.AnyCol import org.jetbrains.kotlinx.dataframe.AnyFrame +import org.jetbrains.kotlinx.dataframe.AnyRow +import org.jetbrains.kotlinx.dataframe.api.Convert +import org.jetbrains.kotlinx.dataframe.api.FormatClause +import org.jetbrains.kotlinx.dataframe.api.FormattedFrame +import org.jetbrains.kotlinx.dataframe.api.Gather +import org.jetbrains.kotlinx.dataframe.api.GroupBy +import org.jetbrains.kotlinx.dataframe.api.GroupClause +import org.jetbrains.kotlinx.dataframe.api.InsertClause +import org.jetbrains.kotlinx.dataframe.api.Merge +import org.jetbrains.kotlinx.dataframe.api.MoveClause +import org.jetbrains.kotlinx.dataframe.api.Pivot +import org.jetbrains.kotlinx.dataframe.api.PivotGroupBy +import org.jetbrains.kotlinx.dataframe.api.ReducedGroupBy +import org.jetbrains.kotlinx.dataframe.api.ReducedPivot +import org.jetbrains.kotlinx.dataframe.api.ReducedPivotGroupBy +import org.jetbrains.kotlinx.dataframe.api.RenameClause +import org.jetbrains.kotlinx.dataframe.api.ReplaceClause +import org.jetbrains.kotlinx.dataframe.api.Split +import org.jetbrains.kotlinx.dataframe.api.SplitWithTransform +import org.jetbrains.kotlinx.dataframe.api.Update +import org.jetbrains.kotlinx.dataframe.api.at +import org.jetbrains.kotlinx.dataframe.api.dataFrameOf import org.jetbrains.kotlinx.dataframe.api.filter +import org.jetbrains.kotlinx.dataframe.api.frames +import org.jetbrains.kotlinx.dataframe.api.into +import org.jetbrains.kotlinx.dataframe.api.sortBy +import org.jetbrains.kotlinx.dataframe.api.toDataFrame +import org.jetbrains.kotlinx.dataframe.api.values +import org.jetbrains.kotlinx.dataframe.columns.ColumnPath +import org.jetbrains.kotlinx.dataframe.columns.toColumnSet +import org.jetbrains.kotlinx.dataframe.impl.ColumnNameGenerator /** * A class with utility methods for Kotlin Notebook Plugin integration. - * Kotlin Notebook Plugin is acts as a client of Kotlin Jupyter kernel and use this functionality + * Kotlin Notebook Plugin acts as a client of Kotlin Jupyter kernel and uses this functionality * for dynamic pagination when rendering dataframes. - * The plugin sends Kotlin following code to the kernel to evaluate - * DISPLAY(KotlinNotebooksPluginUtils.getRowsSubsetForRendering(Out[x], 0, 20), "") + * The plugin sends the following code to the kernel to evaluate: + * DISPLAY(KotlinNotebooksPluginUtils.getRowsSubsetForRendering(Out[...], 0, 20), "") */ public object KotlinNotebookPluginUtils { /** @@ -30,4 +61,110 @@ public object KotlinNotebookPluginUtils { */ public fun getRowsSubsetForRendering(df: AnyFrame, startIdx: Int, endIdx: Int): DisableRowsLimitWrapper = DisableRowsLimitWrapper(df.filter { it.index() in startIdx until endIdx }) + + /** + * Sorts a dataframe-like object by multiple columns. + * + * @param dataFrameLike The dataframe-like object to sort. + * @param columnPaths The list of columns to sort by. Each element in the list represents a column path + * @param desc The list of booleans indicating whether each column should be sorted in descending order. + * The size of this list should be the same as the size of the `columns` list. + * + * @throws IllegalArgumentException if `dataFrameLike` is `null`. + * + * @return The sorted dataframe. + */ + public fun sortByColumns( + dataFrameLike: Any?, + columnPaths: List>, + desc: List + ): AnyFrame = when (dataFrameLike) { + null -> throw IllegalArgumentException("Dataframe is null") + else -> sortByColumns(convertToDataFrame(dataFrameLike), columnPaths, desc) + } + + /** + * Sorts the given data frame by the specified columns. + * + * @param df The data frame to be sorted. + * @param columnPaths The paths of the columns to be sorted. Each path is represented as a list of strings. + * @param isDesc A list of booleans indicating whether each column should be sorted in descending order. + * The size of this list must be equal to the size of the columnPaths list. + * @return The sorted data frame. + */ + public fun sortByColumns(df: AnyFrame, columnPaths: List>, isDesc: List): AnyFrame = + df.sortBy { + require(columnPaths.all { it.isNotEmpty() }) + require(columnPaths.size == isDesc.size) + + val sortKeys = columnPaths.map { path -> + ColumnPath(path) + } + + (sortKeys zip isDesc).map { (key, desc) -> + if (desc) key.desc() else key + }.toColumnSet() + } + + /** + * Converts [dataframeLike] to [AnyFrame]. + * If [dataframeLike] is already [AnyFrame] then it is returned as is. + * If it's not possible to convert [dataframeLike] to [AnyFrame] then [IllegalArgumentException] is thrown. + */ + public fun convertToDataFrame(dataframeLike: Any): AnyFrame = + when (dataframeLike) { + is Pivot<*> -> dataframeLike.frames().toDataFrame() + is ReducedGroupBy<*, *> -> dataframeLike.values() + is ReducedPivot<*> -> dataframeLike.values().toDataFrame() + is PivotGroupBy<*> -> dataframeLike.frames() + is ReducedPivotGroupBy<*> -> dataframeLike.values() + is SplitWithTransform<*, *, *> -> dataframeLike.into() + is Split<*, *> -> dataframeLike.toDataFrame() + is Merge<*, *, *> -> dataframeLike.into( + generateRandomVariationOfColumnName( + "merged", + dataframeLike.df.columnNames() + ) + ) + + is Gather<*, *, *, *> -> dataframeLike.into( + generateRandomVariationOfColumnName("key", dataframeLike.df.columnNames()), + generateRandomVariationOfColumnName("value", dataframeLike.df.columnNames()) + ) + + is Update<*, *> -> dataframeLike.df + is Convert<*, *> -> dataframeLike.df + is FormattedFrame<*> -> dataframeLike.df + is AnyCol -> dataFrameOf(dataframeLike) + is AnyRow -> dataframeLike.toDataFrame() + is GroupBy<*, *> -> dataframeLike.toDataFrame() + is AnyFrame -> dataframeLike + is DisableRowsLimitWrapper -> dataframeLike.value + is MoveClause<*, *> -> dataframeLike.df + is RenameClause<*, *> -> dataframeLike.df + is ReplaceClause<*, *> -> dataframeLike.df + is GroupClause<*, *> -> dataframeLike.into( + generateRandomVariationOfColumnName( + "untitled", + dataframeLike.df.columnNames() + ) + ) + + is InsertClause<*> -> dataframeLike.at(0) + is FormatClause<*, *> -> dataframeLike.df + else -> throw IllegalArgumentException("Unsupported type: ${dataframeLike::class}") + } + + /** + * Generates a random variation of a column name that is unique among the provided used names. + * + * @param preferredName The preferred name for the column. + * @param usedNames The list of already used column names. + * @return A unique random variation of the preferred name. + */ + public fun generateRandomVariationOfColumnName( + preferredName: String, + usedNames: List = emptyList() + ): String = + ColumnNameGenerator(usedNames).addUnique(preferredName) } diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/RenderingTests.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/RenderingTests.kt index 11be55cb9..1f711cef8 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/RenderingTests.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/RenderingTests.kt @@ -1,6 +1,11 @@ package org.jetbrains.kotlinx.dataframe.jupyter -import com.beust.klaxon.* +import com.beust.klaxon.JsonArray +import com.beust.klaxon.JsonObject +import com.beust.klaxon.Parser +import io.kotest.assertions.throwables.shouldNotThrow +import io.kotest.matchers.comparables.shouldBeGreaterThan +import io.kotest.matchers.comparables.shouldBeLessThan import io.kotest.matchers.shouldBe import io.kotest.matchers.string.shouldContain import io.kotest.matchers.string.shouldNotContain @@ -79,8 +84,7 @@ class RenderingTests : JupyterReplTestCase() { @Test fun `test kotlin notebook plugin utils rows subset`() { - @Language("kts") - val result = exec( + val json = executeScriptAndParseDataframeResult( """ data class Row(val id: Int) val df = (1..100).map { Row(it) }.toDataFrame() @@ -88,16 +92,29 @@ class RenderingTests : JupyterReplTestCase() { """.trimIndent() ) - val json = parseDataframeJson(result) - - json.int("nrow") shouldBe 30 - json.int("ncol") shouldBe 1 + assertDataFrameDimensions(json, 30, 1) val rows = json.array>("kotlin_dataframe")!! rows.getObj(0).int("id") shouldBe 21 rows.getObj(rows.lastIndex).int("id") shouldBe 50 } + /** + * Executes the given `script` and parses the resulting DataFrame as a `JsonObject`. + * + * @param script the script to be executed + * @return the parsed DataFrame result as a `JsonObject` + */ + private fun executeScriptAndParseDataframeResult(@Language("kts") script: String): JsonObject { + val result = exec(script) + return parseDataframeJson(result) + } + + private fun assertDataFrameDimensions(json: JsonObject, expectedRows: Int, expectedColumns: Int) { + json.int("nrow") shouldBe expectedRows + json.int("ncol") shouldBe expectedColumns + } + private fun parseDataframeJson(result: MimeTypedResult): JsonObject { val parser = Parser.default() return parser.parse(StringBuilder(result["application/kotlindataframe+json"]!!)) as JsonObject @@ -106,23 +123,121 @@ class RenderingTests : JupyterReplTestCase() { private fun JsonArray<*>.getObj(index: Int) = this.get(index) as JsonObject @Test - fun `test kotlin notebook plugin utils groupby`() { - @Language("kts") - val result = exec( + fun `test kotlin notebook plugin utils sort by one column asc`() { + val json = executeScriptAndParseDataframeResult( + """ + data class CustomRow(val id: Int, val category: String) + val df = (1..100).map { CustomRow(it, if (it % 2 == 0) "even" else "odd") }.toDataFrame() + KotlinNotebookPluginUtils.sortByColumns(df, listOf(listOf("id")), listOf(false)) + """.trimIndent() + ) + + assertDataFrameDimensions(json, 100, 2) + assertSortedById(json, false) + } + + @Suppress("UNCHECKED_CAST") + private fun assertSortedById(json: JsonObject, desc: Boolean) { + val rows = json["kotlin_dataframe"] as JsonArray + var previousId = if (desc) 101 else 0 + rows.forEach { row -> + val currentId = row.int("id")!! + if (desc) currentId shouldBeLessThan previousId else currentId shouldBeGreaterThan previousId + previousId = currentId + } + } + + @Test + fun `test kotlin notebook plugin utils sort by one column desc`() { + val json = executeScriptAndParseDataframeResult( + """ + data class CustomRow(val id: Int, val category: String) + val df = (1..100).map { CustomRow(it, if (it % 2 == 0) "even" else "odd") }.toDataFrame() + KotlinNotebookPluginUtils.sortByColumns(df, listOf(listOf("id")), listOf(true)) + """.trimIndent() + ) + + assertDataFrameDimensions(json, 100, 2) + assertSortedById(json, true) + } + + @Suppress("UNCHECKED_CAST") + @Test + fun `test kotlin notebook plugin utils sort by multiple columns`() { + val json = executeScriptAndParseDataframeResult( + """ + data class CustomRow(val id: Int, val category: String) + val df = (1..100).map { CustomRow(it, if (it % 2 == 0) "even" else "odd") }.toDataFrame() + KotlinNotebookPluginUtils.getRowsSubsetForRendering( + KotlinNotebookPluginUtils.sortByColumns(df, listOf(listOf("category"), listOf("id")), listOf(true, false)), + 0, 100 + ) + """.trimIndent() + ) + + assertDataFrameDimensions(json, 100, 2) + + val rows = json["kotlin_dataframe"] as JsonArray + assertSortedByCategory(rows) + assertSortedById(rows) + } + + private fun assertSortedByCategory(rows: JsonArray) { + rows.forEachIndexed { i, row -> + val currentCategory = row.string("category") + if (i < 50) currentCategory shouldBe "odd" + else currentCategory shouldBe "even" + } + } + + private fun assertSortedById(rows: JsonArray) { + var previousCategory = "odd" + var previousId = 0 + for (row in rows) { + val currentCategory = row.string("category")!! + val currentId = row.int("id")!! + + if (previousCategory == "odd" && currentCategory == "even") { + previousId shouldBeGreaterThan currentId + } else if (previousCategory == currentCategory) { + previousId shouldBeLessThan currentId + } + + previousCategory = currentCategory + previousId = currentId + } + } + + @Test + fun `test kotlin dataframe conversion groupby`() { + val json = executeScriptAndParseDataframeResult( """ data class Row(val id: Int, val group: Int) val df = (1..100).map { Row(it, if (it <= 50) 1 else 2) }.toDataFrame() - KotlinNotebookPluginUtils.getRowsSubsetForRendering(df.groupBy("group"), 0, 10) + KotlinNotebookPluginUtils.convertToDataFrame(df.groupBy("group")) """.trimIndent() ) - val json = parseDataframeJson(result) - - json.int("nrow") shouldBe 2 - json.int("ncol") shouldBe 2 + assertDataFrameDimensions(json, 2, 2) val rows = json.array>("kotlin_dataframe")!! rows.getObj(0).array("group1")!!.size shouldBe 50 rows.getObj(1).array("group1")!!.size shouldBe 50 } + + // Regression KTNB-424 + @Test + fun `test kotlin dataframe conversion ReducedGroupBy`() { + shouldNotThrow { + val json = executeScriptAndParseDataframeResult( + """ + data class Row(val id: Int, val group: Int) + val df = (1..100).map { Row(it, if (it <= 50) 1 else 2) }.toDataFrame() + KotlinNotebookPluginUtils.convertToDataFrame(df.groupBy("group").first()) + """.trimIndent() + ) + + assertDataFrameDimensions(json, 2, 2) + } + } }