From bfde84140652351a2b50fdbbb5c09ed0074c65f6 Mon Sep 17 00:00:00 2001 From: Nikita Ermolenko Date: Mon, 13 Nov 2023 13:31:49 +0200 Subject: [PATCH 1/4] Move `convertToDataFrame` KotlinNotebookPluginUtils and fix possible column names clashes Moved `convertToDataFrame` function KotlinNotebookPluginUtils and made it public to make it acessible in Kotlin Notebook plugin. An auxiliary method to generate unique variations of a string was added to fix issue with column names clashes when convert intermediate object to dataframe. Fixes KTNB-424, KTNB-415 --- .../kotlinx/dataframe/jupyter/Integration.kt | 38 --------- .../dataframe/jupyter/JupyterHtmlRenderer.kt | 1 + .../jupyter/KotlinNotebookPluginUtils.kt | 81 +++++++++++++++++++ .../kotlinx/dataframe/jupyter/Integration.kt | 38 --------- .../dataframe/jupyter/JupyterHtmlRenderer.kt | 13 ++- .../jupyter/KotlinNotebookPluginUtils.kt | 81 +++++++++++++++++++ 6 files changed, 174 insertions(+), 78 deletions(-) diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/Integration.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/Integration.kt index 3311301bb..41bfc3158 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/Integration.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/Integration.kt @@ -30,15 +30,9 @@ import org.jetbrains.kotlinx.dataframe.api.SplitWithTransform import org.jetbrains.kotlinx.dataframe.api.Update import org.jetbrains.kotlinx.dataframe.api.asColumnGroup import org.jetbrains.kotlinx.dataframe.api.asDataFrame -import org.jetbrains.kotlinx.dataframe.api.at import org.jetbrains.kotlinx.dataframe.api.columnsCount -import org.jetbrains.kotlinx.dataframe.api.dataFrameOf -import org.jetbrains.kotlinx.dataframe.api.frames -import org.jetbrains.kotlinx.dataframe.api.into import org.jetbrains.kotlinx.dataframe.api.isColumnGroup import org.jetbrains.kotlinx.dataframe.api.name -import org.jetbrains.kotlinx.dataframe.api.toDataFrame -import org.jetbrains.kotlinx.dataframe.api.values import org.jetbrains.kotlinx.dataframe.codeGen.CodeWithConverter import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup import org.jetbrains.kotlinx.dataframe.columns.ColumnReference @@ -340,35 +334,3 @@ public fun KotlinKernelHost.useSchemas(schemaClasses: Iterable>) { public fun KotlinKernelHost.useSchemas(vararg schemaClasses: KClass<*>): Unit = useSchemas(schemaClasses.asIterable()) public inline fun KotlinKernelHost.useSchema(): Unit = useSchemas(T::class) - -/** - * Converts [dataframeLike] to [AnyFrame]. - * If [dataframeLike] is already [AnyFrame] then it is returned as is. - * If it's not possible to convert [dataframeLike] to [AnyFrame] then [IllegalArgumentException] is thrown. - */ -internal fun convertToDataFrame(dataframeLike: Any): AnyFrame = - when (dataframeLike) { - is Pivot<*> -> dataframeLike.frames().toDataFrame() - is ReducedPivot<*> -> dataframeLike.values().toDataFrame() - is PivotGroupBy<*> -> dataframeLike.frames() - is ReducedPivotGroupBy<*> -> dataframeLike.values() - is SplitWithTransform<*, *, *> -> dataframeLike.into() - is Split<*, *> -> dataframeLike.toDataFrame() - is Merge<*, *, *> -> dataframeLike.into("merged") - is Gather<*, *, *, *> -> dataframeLike.into("key", "value") - is Update<*, *> -> dataframeLike.df - is Convert<*, *> -> dataframeLike.df - is FormattedFrame<*> -> dataframeLike.df - is AnyCol -> dataFrameOf(dataframeLike) - is AnyRow -> dataframeLike.toDataFrame() - is GroupBy<*, *> -> dataframeLike.toDataFrame() - is AnyFrame -> dataframeLike - is DisableRowsLimitWrapper -> dataframeLike.value - is MoveClause<*, *> -> dataframeLike.df - is RenameClause<*, *> -> dataframeLike.df - is ReplaceClause<*, *> -> dataframeLike.df - is GroupClause<*, *> -> dataframeLike.into("untitled") - is InsertClause<*> -> dataframeLike.at(0) - is FormatClause<*, *> -> dataframeLike.df - else -> throw IllegalArgumentException("Unsupported type: ${dataframeLike::class}") - } diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/JupyterHtmlRenderer.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/JupyterHtmlRenderer.kt index 7add25c68..3b3963c55 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/JupyterHtmlRenderer.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/JupyterHtmlRenderer.kt @@ -4,6 +4,7 @@ import com.beust.klaxon.json import org.jetbrains.kotlinx.dataframe.api.rows import org.jetbrains.kotlinx.dataframe.api.toDataFrame import org.jetbrains.kotlinx.dataframe.io.* +import org.jetbrains.kotlinx.dataframe.jupyter.KotlinNotebookPluginUtils.convertToDataFrame import org.jetbrains.kotlinx.dataframe.nrow import org.jetbrains.kotlinx.dataframe.size import org.jetbrains.kotlinx.jupyter.api.* diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/KotlinNotebookPluginUtils.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/KotlinNotebookPluginUtils.kt index 28315785f..dd8ebc5b3 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/KotlinNotebookPluginUtils.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/KotlinNotebookPluginUtils.kt @@ -1,7 +1,39 @@ package org.jetbrains.kotlinx.dataframe.jupyter +import org.jetbrains.kotlinx.dataframe.AnyCol import org.jetbrains.kotlinx.dataframe.AnyFrame +import org.jetbrains.kotlinx.dataframe.AnyRow +import org.jetbrains.kotlinx.dataframe.api.Convert +import org.jetbrains.kotlinx.dataframe.api.FormatClause +import org.jetbrains.kotlinx.dataframe.api.FormattedFrame +import org.jetbrains.kotlinx.dataframe.api.Gather +import org.jetbrains.kotlinx.dataframe.api.GroupBy +import org.jetbrains.kotlinx.dataframe.api.GroupClause +import org.jetbrains.kotlinx.dataframe.api.InsertClause +import org.jetbrains.kotlinx.dataframe.api.Merge +import org.jetbrains.kotlinx.dataframe.api.MoveClause +import org.jetbrains.kotlinx.dataframe.api.Pivot +import org.jetbrains.kotlinx.dataframe.api.PivotGroupBy +import org.jetbrains.kotlinx.dataframe.api.ReducedGroupBy +import org.jetbrains.kotlinx.dataframe.api.ReducedPivot +import org.jetbrains.kotlinx.dataframe.api.ReducedPivotGroupBy +import org.jetbrains.kotlinx.dataframe.api.RenameClause +import org.jetbrains.kotlinx.dataframe.api.ReplaceClause +import org.jetbrains.kotlinx.dataframe.api.Split +import org.jetbrains.kotlinx.dataframe.api.SplitWithTransform +import org.jetbrains.kotlinx.dataframe.api.Update +import org.jetbrains.kotlinx.dataframe.api.at +import org.jetbrains.kotlinx.dataframe.api.dataFrameOf import org.jetbrains.kotlinx.dataframe.api.filter +import org.jetbrains.kotlinx.dataframe.api.frames +import org.jetbrains.kotlinx.dataframe.api.into +import org.jetbrains.kotlinx.dataframe.api.sortBy +import org.jetbrains.kotlinx.dataframe.api.toDataFrame +import org.jetbrains.kotlinx.dataframe.api.values +import org.jetbrains.kotlinx.dataframe.columns.ColumnPath +import org.jetbrains.kotlinx.dataframe.columns.toColumnSet +import kotlin.random.Random + /** * A class with utility methods for Kotlin Notebook Plugin integration. @@ -30,4 +62,53 @@ public object KotlinNotebookPluginUtils { */ public fun getRowsSubsetForRendering(df: AnyFrame, startIdx: Int, endIdx: Int): DisableRowsLimitWrapper = DisableRowsLimitWrapper(df.filter { it.index() in startIdx until endIdx }) + /** + * Converts [dataframeLike] to [AnyFrame]. + * If [dataframeLike] is already [AnyFrame] then it is returned as is. + * If it's not possible to convert [dataframeLike] to [AnyFrame] then [IllegalArgumentException] is thrown. + */ + public fun convertToDataFrame(dataframeLike: Any): AnyFrame = + when (dataframeLike) { + is Pivot<*> -> dataframeLike.frames().toDataFrame() + is ReducedGroupBy<*, *> -> dataframeLike.values() + is ReducedPivot<*> -> dataframeLike.values().toDataFrame() + is PivotGroupBy<*> -> dataframeLike.frames() + is ReducedPivotGroupBy<*> -> dataframeLike.values() + is SplitWithTransform<*, *, *> -> dataframeLike.into() + is Split<*, *> -> dataframeLike.toDataFrame() + is Merge<*, *, *> -> dataframeLike.into(generateRandomVariationOfString("merged")) + is Gather<*, *, *, *> -> dataframeLike.into( + generateRandomVariationOfString("key"), + generateRandomVariationOfString("value") + ) + is Update<*, *> -> dataframeLike.df + is Convert<*, *> -> dataframeLike.df + is FormattedFrame<*> -> dataframeLike.df + is AnyCol -> dataFrameOf(dataframeLike) + is AnyRow -> dataframeLike.toDataFrame() + is GroupBy<*, *> -> dataframeLike.toDataFrame() + is AnyFrame -> dataframeLike + is DisableRowsLimitWrapper -> dataframeLike.value + is MoveClause<*, *> -> dataframeLike.df + is RenameClause<*, *> -> dataframeLike.df + is ReplaceClause<*, *> -> dataframeLike.df + is GroupClause<*, *> -> dataframeLike.into(generateRandomVariationOfString("untitled")) + is InsertClause<*> -> dataframeLike.at(0) + is FormatClause<*, *> -> dataframeLike.df + else -> throw IllegalArgumentException("Unsupported type: ${dataframeLike::class}") + } + + /** + * Generates a random variation of the given string by appending a unique hash to it. + * + * @param str the original string to generate variation from + * @return a random variation of the original string + */ + public fun generateRandomVariationOfString(str: String): String { + val timeStamp = System.currentTimeMillis() + val random = Random.Default.nextInt() + val hash = "${timeStamp}_$random".hashCode() + + return "${str}_${String.format("%08X", hash)}" // get only 8 symbols from hash + } } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/Integration.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/Integration.kt index 3311301bb..41bfc3158 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/Integration.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/Integration.kt @@ -30,15 +30,9 @@ import org.jetbrains.kotlinx.dataframe.api.SplitWithTransform import org.jetbrains.kotlinx.dataframe.api.Update import org.jetbrains.kotlinx.dataframe.api.asColumnGroup import org.jetbrains.kotlinx.dataframe.api.asDataFrame -import org.jetbrains.kotlinx.dataframe.api.at import org.jetbrains.kotlinx.dataframe.api.columnsCount -import org.jetbrains.kotlinx.dataframe.api.dataFrameOf -import org.jetbrains.kotlinx.dataframe.api.frames -import org.jetbrains.kotlinx.dataframe.api.into import org.jetbrains.kotlinx.dataframe.api.isColumnGroup import org.jetbrains.kotlinx.dataframe.api.name -import org.jetbrains.kotlinx.dataframe.api.toDataFrame -import org.jetbrains.kotlinx.dataframe.api.values import org.jetbrains.kotlinx.dataframe.codeGen.CodeWithConverter import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup import org.jetbrains.kotlinx.dataframe.columns.ColumnReference @@ -340,35 +334,3 @@ public fun KotlinKernelHost.useSchemas(schemaClasses: Iterable>) { public fun KotlinKernelHost.useSchemas(vararg schemaClasses: KClass<*>): Unit = useSchemas(schemaClasses.asIterable()) public inline fun KotlinKernelHost.useSchema(): Unit = useSchemas(T::class) - -/** - * Converts [dataframeLike] to [AnyFrame]. - * If [dataframeLike] is already [AnyFrame] then it is returned as is. - * If it's not possible to convert [dataframeLike] to [AnyFrame] then [IllegalArgumentException] is thrown. - */ -internal fun convertToDataFrame(dataframeLike: Any): AnyFrame = - when (dataframeLike) { - is Pivot<*> -> dataframeLike.frames().toDataFrame() - is ReducedPivot<*> -> dataframeLike.values().toDataFrame() - is PivotGroupBy<*> -> dataframeLike.frames() - is ReducedPivotGroupBy<*> -> dataframeLike.values() - is SplitWithTransform<*, *, *> -> dataframeLike.into() - is Split<*, *> -> dataframeLike.toDataFrame() - is Merge<*, *, *> -> dataframeLike.into("merged") - is Gather<*, *, *, *> -> dataframeLike.into("key", "value") - is Update<*, *> -> dataframeLike.df - is Convert<*, *> -> dataframeLike.df - is FormattedFrame<*> -> dataframeLike.df - is AnyCol -> dataFrameOf(dataframeLike) - is AnyRow -> dataframeLike.toDataFrame() - is GroupBy<*, *> -> dataframeLike.toDataFrame() - is AnyFrame -> dataframeLike - is DisableRowsLimitWrapper -> dataframeLike.value - is MoveClause<*, *> -> dataframeLike.df - is RenameClause<*, *> -> dataframeLike.df - is ReplaceClause<*, *> -> dataframeLike.df - is GroupClause<*, *> -> dataframeLike.into("untitled") - is InsertClause<*> -> dataframeLike.at(0) - is FormatClause<*, *> -> dataframeLike.df - else -> throw IllegalArgumentException("Unsupported type: ${dataframeLike::class}") - } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/JupyterHtmlRenderer.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/JupyterHtmlRenderer.kt index 7add25c68..c8f3debd5 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/JupyterHtmlRenderer.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/JupyterHtmlRenderer.kt @@ -3,12 +3,21 @@ package org.jetbrains.kotlinx.dataframe.jupyter import com.beust.klaxon.json import org.jetbrains.kotlinx.dataframe.api.rows import org.jetbrains.kotlinx.dataframe.api.toDataFrame -import org.jetbrains.kotlinx.dataframe.io.* +import org.jetbrains.kotlinx.dataframe.io.DataFrameHtmlData +import org.jetbrains.kotlinx.dataframe.io.DisplayConfiguration +import org.jetbrains.kotlinx.dataframe.io.encodeFrame +import org.jetbrains.kotlinx.dataframe.io.toHTML +import org.jetbrains.kotlinx.dataframe.jupyter.KotlinNotebookPluginUtils.convertToDataFrame import org.jetbrains.kotlinx.dataframe.nrow import org.jetbrains.kotlinx.dataframe.size -import org.jetbrains.kotlinx.jupyter.api.* import org.jetbrains.kotlinx.jupyter.api.HtmlData +import org.jetbrains.kotlinx.jupyter.api.JupyterClientType +import org.jetbrains.kotlinx.jupyter.api.KotlinKernelVersion +import org.jetbrains.kotlinx.jupyter.api.MimeTypedResult +import org.jetbrains.kotlinx.jupyter.api.Notebook import org.jetbrains.kotlinx.jupyter.api.libraries.JupyterIntegration +import org.jetbrains.kotlinx.jupyter.api.mimeResult +import org.jetbrains.kotlinx.jupyter.api.renderHtmlAsIFrameIfNeeded /** Starting from this version, dataframe integration will respond with additional data for rendering in Kotlin Notebooks plugin. */ private const val MIN_KERNEL_VERSION_FOR_NEW_TABLES_UI = "0.11.0.311" diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/KotlinNotebookPluginUtils.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/KotlinNotebookPluginUtils.kt index 28315785f..dd8ebc5b3 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/KotlinNotebookPluginUtils.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/KotlinNotebookPluginUtils.kt @@ -1,7 +1,39 @@ package org.jetbrains.kotlinx.dataframe.jupyter +import org.jetbrains.kotlinx.dataframe.AnyCol import org.jetbrains.kotlinx.dataframe.AnyFrame +import org.jetbrains.kotlinx.dataframe.AnyRow +import org.jetbrains.kotlinx.dataframe.api.Convert +import org.jetbrains.kotlinx.dataframe.api.FormatClause +import org.jetbrains.kotlinx.dataframe.api.FormattedFrame +import org.jetbrains.kotlinx.dataframe.api.Gather +import org.jetbrains.kotlinx.dataframe.api.GroupBy +import org.jetbrains.kotlinx.dataframe.api.GroupClause +import org.jetbrains.kotlinx.dataframe.api.InsertClause +import org.jetbrains.kotlinx.dataframe.api.Merge +import org.jetbrains.kotlinx.dataframe.api.MoveClause +import org.jetbrains.kotlinx.dataframe.api.Pivot +import org.jetbrains.kotlinx.dataframe.api.PivotGroupBy +import org.jetbrains.kotlinx.dataframe.api.ReducedGroupBy +import org.jetbrains.kotlinx.dataframe.api.ReducedPivot +import org.jetbrains.kotlinx.dataframe.api.ReducedPivotGroupBy +import org.jetbrains.kotlinx.dataframe.api.RenameClause +import org.jetbrains.kotlinx.dataframe.api.ReplaceClause +import org.jetbrains.kotlinx.dataframe.api.Split +import org.jetbrains.kotlinx.dataframe.api.SplitWithTransform +import org.jetbrains.kotlinx.dataframe.api.Update +import org.jetbrains.kotlinx.dataframe.api.at +import org.jetbrains.kotlinx.dataframe.api.dataFrameOf import org.jetbrains.kotlinx.dataframe.api.filter +import org.jetbrains.kotlinx.dataframe.api.frames +import org.jetbrains.kotlinx.dataframe.api.into +import org.jetbrains.kotlinx.dataframe.api.sortBy +import org.jetbrains.kotlinx.dataframe.api.toDataFrame +import org.jetbrains.kotlinx.dataframe.api.values +import org.jetbrains.kotlinx.dataframe.columns.ColumnPath +import org.jetbrains.kotlinx.dataframe.columns.toColumnSet +import kotlin.random.Random + /** * A class with utility methods for Kotlin Notebook Plugin integration. @@ -30,4 +62,53 @@ public object KotlinNotebookPluginUtils { */ public fun getRowsSubsetForRendering(df: AnyFrame, startIdx: Int, endIdx: Int): DisableRowsLimitWrapper = DisableRowsLimitWrapper(df.filter { it.index() in startIdx until endIdx }) + /** + * Converts [dataframeLike] to [AnyFrame]. + * If [dataframeLike] is already [AnyFrame] then it is returned as is. + * If it's not possible to convert [dataframeLike] to [AnyFrame] then [IllegalArgumentException] is thrown. + */ + public fun convertToDataFrame(dataframeLike: Any): AnyFrame = + when (dataframeLike) { + is Pivot<*> -> dataframeLike.frames().toDataFrame() + is ReducedGroupBy<*, *> -> dataframeLike.values() + is ReducedPivot<*> -> dataframeLike.values().toDataFrame() + is PivotGroupBy<*> -> dataframeLike.frames() + is ReducedPivotGroupBy<*> -> dataframeLike.values() + is SplitWithTransform<*, *, *> -> dataframeLike.into() + is Split<*, *> -> dataframeLike.toDataFrame() + is Merge<*, *, *> -> dataframeLike.into(generateRandomVariationOfString("merged")) + is Gather<*, *, *, *> -> dataframeLike.into( + generateRandomVariationOfString("key"), + generateRandomVariationOfString("value") + ) + is Update<*, *> -> dataframeLike.df + is Convert<*, *> -> dataframeLike.df + is FormattedFrame<*> -> dataframeLike.df + is AnyCol -> dataFrameOf(dataframeLike) + is AnyRow -> dataframeLike.toDataFrame() + is GroupBy<*, *> -> dataframeLike.toDataFrame() + is AnyFrame -> dataframeLike + is DisableRowsLimitWrapper -> dataframeLike.value + is MoveClause<*, *> -> dataframeLike.df + is RenameClause<*, *> -> dataframeLike.df + is ReplaceClause<*, *> -> dataframeLike.df + is GroupClause<*, *> -> dataframeLike.into(generateRandomVariationOfString("untitled")) + is InsertClause<*> -> dataframeLike.at(0) + is FormatClause<*, *> -> dataframeLike.df + else -> throw IllegalArgumentException("Unsupported type: ${dataframeLike::class}") + } + + /** + * Generates a random variation of the given string by appending a unique hash to it. + * + * @param str the original string to generate variation from + * @return a random variation of the original string + */ + public fun generateRandomVariationOfString(str: String): String { + val timeStamp = System.currentTimeMillis() + val random = Random.Default.nextInt() + val hash = "${timeStamp}_$random".hashCode() + + return "${str}_${String.format("%08X", hash)}" // get only 8 symbols from hash + } } From 19a9e4f8c4588afbb138539a77c0a1f482087d47 Mon Sep 17 00:00:00 2001 From: Nikita Ermolenko Date: Mon, 13 Nov 2023 13:34:40 +0200 Subject: [PATCH 2/4] Add sorting functionality to KotlinNotebookPluginUtils Introduced two public methods named `sortByColumns` in KotlinNotebookPluginUtils for sorting a dataframe-like object, or a specific dataframe by multiple columns. This added functionality will aid in sorting dataframes in Kotlin Notebook plugin. Implemented measures to handle null dataframes and conflicting column sizes. Fixes KTNB-426. Co-authored-by: Jolan Rensen --- .../dataframe/jupyter/JupyterHtmlRenderer.kt | 12 ++++- .../jupyter/KotlinNotebookPluginUtils.kt | 46 ++++++++++++++++++- .../jupyter/KotlinNotebookPluginUtils.kt | 46 ++++++++++++++++++- 3 files changed, 100 insertions(+), 4 deletions(-) diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/JupyterHtmlRenderer.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/JupyterHtmlRenderer.kt index 3b3963c55..c8f3debd5 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/JupyterHtmlRenderer.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/JupyterHtmlRenderer.kt @@ -3,13 +3,21 @@ package org.jetbrains.kotlinx.dataframe.jupyter import com.beust.klaxon.json import org.jetbrains.kotlinx.dataframe.api.rows import org.jetbrains.kotlinx.dataframe.api.toDataFrame -import org.jetbrains.kotlinx.dataframe.io.* +import org.jetbrains.kotlinx.dataframe.io.DataFrameHtmlData +import org.jetbrains.kotlinx.dataframe.io.DisplayConfiguration +import org.jetbrains.kotlinx.dataframe.io.encodeFrame +import org.jetbrains.kotlinx.dataframe.io.toHTML import org.jetbrains.kotlinx.dataframe.jupyter.KotlinNotebookPluginUtils.convertToDataFrame import org.jetbrains.kotlinx.dataframe.nrow import org.jetbrains.kotlinx.dataframe.size -import org.jetbrains.kotlinx.jupyter.api.* import org.jetbrains.kotlinx.jupyter.api.HtmlData +import org.jetbrains.kotlinx.jupyter.api.JupyterClientType +import org.jetbrains.kotlinx.jupyter.api.KotlinKernelVersion +import org.jetbrains.kotlinx.jupyter.api.MimeTypedResult +import org.jetbrains.kotlinx.jupyter.api.Notebook import org.jetbrains.kotlinx.jupyter.api.libraries.JupyterIntegration +import org.jetbrains.kotlinx.jupyter.api.mimeResult +import org.jetbrains.kotlinx.jupyter.api.renderHtmlAsIFrameIfNeeded /** Starting from this version, dataframe integration will respond with additional data for rendering in Kotlin Notebooks plugin. */ private const val MIN_KERNEL_VERSION_FOR_NEW_TABLES_UI = "0.11.0.311" diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/KotlinNotebookPluginUtils.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/KotlinNotebookPluginUtils.kt index dd8ebc5b3..cf200ee74 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/KotlinNotebookPluginUtils.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/KotlinNotebookPluginUtils.kt @@ -34,7 +34,6 @@ import org.jetbrains.kotlinx.dataframe.columns.ColumnPath import org.jetbrains.kotlinx.dataframe.columns.toColumnSet import kotlin.random.Random - /** * A class with utility methods for Kotlin Notebook Plugin integration. * Kotlin Notebook Plugin is acts as a client of Kotlin Jupyter kernel and use this functionality @@ -62,6 +61,51 @@ public object KotlinNotebookPluginUtils { */ public fun getRowsSubsetForRendering(df: AnyFrame, startIdx: Int, endIdx: Int): DisableRowsLimitWrapper = DisableRowsLimitWrapper(df.filter { it.index() in startIdx until endIdx }) + + /** + * Sorts a dataframe-like object by multiple columns. + * + * @param dataFrameLike The dataframe-like object to sort. + * @param columnPaths The list of columns to sort by. Each element in the list represents a column path + * @param desc The list of booleans indicating whether each column should be sorted in descending order. + * The size of this list should be the same as the size of the `columns` list. + * + * @throws IllegalArgumentException if `dataFrameLike` is `null`. + * + * @return The sorted dataframe. + */ + public fun sortByColumns( + dataFrameLike: Any?, + columnPaths: List>, + desc: List + ): AnyFrame = when (dataFrameLike) { + null -> throw IllegalArgumentException("Dataframe is null") + else -> sortByColumns(convertToDataFrame(dataFrameLike), columnPaths, desc) + } + + /** + * Sorts the given data frame by the specified columns. + * + * @param df The data frame to be sorted. + * @param columnPaths The paths of the columns to be sorted. Each path is represented as a list of strings. + * @param isDesc A list of booleans indicating whether each column should be sorted in descending order. + * The size of this list must be equal to the size of the columnPaths list. + * @return The sorted data frame. + */ + public fun sortByColumns(df: AnyFrame, columnPaths: List>, isDesc: List): AnyFrame = + df.sortBy { + require(columnPaths.all { it.isNotEmpty() }) + require(columnPaths.size == isDesc.size) + + val sortKeys = columnPaths.map { path -> + ColumnPath(path) + } + + (sortKeys zip isDesc).map { (key, desc) -> + if (desc) key.desc() else key + }.toColumnSet() + } + /** * Converts [dataframeLike] to [AnyFrame]. * If [dataframeLike] is already [AnyFrame] then it is returned as is. diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/KotlinNotebookPluginUtils.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/KotlinNotebookPluginUtils.kt index dd8ebc5b3..cf200ee74 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/KotlinNotebookPluginUtils.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/KotlinNotebookPluginUtils.kt @@ -34,7 +34,6 @@ import org.jetbrains.kotlinx.dataframe.columns.ColumnPath import org.jetbrains.kotlinx.dataframe.columns.toColumnSet import kotlin.random.Random - /** * A class with utility methods for Kotlin Notebook Plugin integration. * Kotlin Notebook Plugin is acts as a client of Kotlin Jupyter kernel and use this functionality @@ -62,6 +61,51 @@ public object KotlinNotebookPluginUtils { */ public fun getRowsSubsetForRendering(df: AnyFrame, startIdx: Int, endIdx: Int): DisableRowsLimitWrapper = DisableRowsLimitWrapper(df.filter { it.index() in startIdx until endIdx }) + + /** + * Sorts a dataframe-like object by multiple columns. + * + * @param dataFrameLike The dataframe-like object to sort. + * @param columnPaths The list of columns to sort by. Each element in the list represents a column path + * @param desc The list of booleans indicating whether each column should be sorted in descending order. + * The size of this list should be the same as the size of the `columns` list. + * + * @throws IllegalArgumentException if `dataFrameLike` is `null`. + * + * @return The sorted dataframe. + */ + public fun sortByColumns( + dataFrameLike: Any?, + columnPaths: List>, + desc: List + ): AnyFrame = when (dataFrameLike) { + null -> throw IllegalArgumentException("Dataframe is null") + else -> sortByColumns(convertToDataFrame(dataFrameLike), columnPaths, desc) + } + + /** + * Sorts the given data frame by the specified columns. + * + * @param df The data frame to be sorted. + * @param columnPaths The paths of the columns to be sorted. Each path is represented as a list of strings. + * @param isDesc A list of booleans indicating whether each column should be sorted in descending order. + * The size of this list must be equal to the size of the columnPaths list. + * @return The sorted data frame. + */ + public fun sortByColumns(df: AnyFrame, columnPaths: List>, isDesc: List): AnyFrame = + df.sortBy { + require(columnPaths.all { it.isNotEmpty() }) + require(columnPaths.size == isDesc.size) + + val sortKeys = columnPaths.map { path -> + ColumnPath(path) + } + + (sortKeys zip isDesc).map { (key, desc) -> + if (desc) key.desc() else key + }.toColumnSet() + } + /** * Converts [dataframeLike] to [AnyFrame]. * If [dataframeLike] is already [AnyFrame] then it is returned as is. From 99cf8206290649badf9ce0da9a90c4aeedb7f6ce Mon Sep 17 00:00:00 2001 From: Nikita Ermolenko Date: Mon, 13 Nov 2023 13:36:33 +0200 Subject: [PATCH 3/4] Add tests for 'RenderingTests.kt' --- .../dataframe/jupyter/RenderingTests.kt | 145 ++++++++++++++++-- .../dataframe/jupyter/RenderingTests.kt | 145 ++++++++++++++++-- 2 files changed, 260 insertions(+), 30 deletions(-) diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/RenderingTests.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/RenderingTests.kt index 11be55cb9..1f711cef8 100644 --- a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/RenderingTests.kt +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/RenderingTests.kt @@ -1,6 +1,11 @@ package org.jetbrains.kotlinx.dataframe.jupyter -import com.beust.klaxon.* +import com.beust.klaxon.JsonArray +import com.beust.klaxon.JsonObject +import com.beust.klaxon.Parser +import io.kotest.assertions.throwables.shouldNotThrow +import io.kotest.matchers.comparables.shouldBeGreaterThan +import io.kotest.matchers.comparables.shouldBeLessThan import io.kotest.matchers.shouldBe import io.kotest.matchers.string.shouldContain import io.kotest.matchers.string.shouldNotContain @@ -79,8 +84,7 @@ class RenderingTests : JupyterReplTestCase() { @Test fun `test kotlin notebook plugin utils rows subset`() { - @Language("kts") - val result = exec( + val json = executeScriptAndParseDataframeResult( """ data class Row(val id: Int) val df = (1..100).map { Row(it) }.toDataFrame() @@ -88,16 +92,29 @@ class RenderingTests : JupyterReplTestCase() { """.trimIndent() ) - val json = parseDataframeJson(result) - - json.int("nrow") shouldBe 30 - json.int("ncol") shouldBe 1 + assertDataFrameDimensions(json, 30, 1) val rows = json.array>("kotlin_dataframe")!! rows.getObj(0).int("id") shouldBe 21 rows.getObj(rows.lastIndex).int("id") shouldBe 50 } + /** + * Executes the given `script` and parses the resulting DataFrame as a `JsonObject`. + * + * @param script the script to be executed + * @return the parsed DataFrame result as a `JsonObject` + */ + private fun executeScriptAndParseDataframeResult(@Language("kts") script: String): JsonObject { + val result = exec(script) + return parseDataframeJson(result) + } + + private fun assertDataFrameDimensions(json: JsonObject, expectedRows: Int, expectedColumns: Int) { + json.int("nrow") shouldBe expectedRows + json.int("ncol") shouldBe expectedColumns + } + private fun parseDataframeJson(result: MimeTypedResult): JsonObject { val parser = Parser.default() return parser.parse(StringBuilder(result["application/kotlindataframe+json"]!!)) as JsonObject @@ -106,23 +123,121 @@ class RenderingTests : JupyterReplTestCase() { private fun JsonArray<*>.getObj(index: Int) = this.get(index) as JsonObject @Test - fun `test kotlin notebook plugin utils groupby`() { - @Language("kts") - val result = exec( + fun `test kotlin notebook plugin utils sort by one column asc`() { + val json = executeScriptAndParseDataframeResult( + """ + data class CustomRow(val id: Int, val category: String) + val df = (1..100).map { CustomRow(it, if (it % 2 == 0) "even" else "odd") }.toDataFrame() + KotlinNotebookPluginUtils.sortByColumns(df, listOf(listOf("id")), listOf(false)) + """.trimIndent() + ) + + assertDataFrameDimensions(json, 100, 2) + assertSortedById(json, false) + } + + @Suppress("UNCHECKED_CAST") + private fun assertSortedById(json: JsonObject, desc: Boolean) { + val rows = json["kotlin_dataframe"] as JsonArray + var previousId = if (desc) 101 else 0 + rows.forEach { row -> + val currentId = row.int("id")!! + if (desc) currentId shouldBeLessThan previousId else currentId shouldBeGreaterThan previousId + previousId = currentId + } + } + + @Test + fun `test kotlin notebook plugin utils sort by one column desc`() { + val json = executeScriptAndParseDataframeResult( + """ + data class CustomRow(val id: Int, val category: String) + val df = (1..100).map { CustomRow(it, if (it % 2 == 0) "even" else "odd") }.toDataFrame() + KotlinNotebookPluginUtils.sortByColumns(df, listOf(listOf("id")), listOf(true)) + """.trimIndent() + ) + + assertDataFrameDimensions(json, 100, 2) + assertSortedById(json, true) + } + + @Suppress("UNCHECKED_CAST") + @Test + fun `test kotlin notebook plugin utils sort by multiple columns`() { + val json = executeScriptAndParseDataframeResult( + """ + data class CustomRow(val id: Int, val category: String) + val df = (1..100).map { CustomRow(it, if (it % 2 == 0) "even" else "odd") }.toDataFrame() + KotlinNotebookPluginUtils.getRowsSubsetForRendering( + KotlinNotebookPluginUtils.sortByColumns(df, listOf(listOf("category"), listOf("id")), listOf(true, false)), + 0, 100 + ) + """.trimIndent() + ) + + assertDataFrameDimensions(json, 100, 2) + + val rows = json["kotlin_dataframe"] as JsonArray + assertSortedByCategory(rows) + assertSortedById(rows) + } + + private fun assertSortedByCategory(rows: JsonArray) { + rows.forEachIndexed { i, row -> + val currentCategory = row.string("category") + if (i < 50) currentCategory shouldBe "odd" + else currentCategory shouldBe "even" + } + } + + private fun assertSortedById(rows: JsonArray) { + var previousCategory = "odd" + var previousId = 0 + for (row in rows) { + val currentCategory = row.string("category")!! + val currentId = row.int("id")!! + + if (previousCategory == "odd" && currentCategory == "even") { + previousId shouldBeGreaterThan currentId + } else if (previousCategory == currentCategory) { + previousId shouldBeLessThan currentId + } + + previousCategory = currentCategory + previousId = currentId + } + } + + @Test + fun `test kotlin dataframe conversion groupby`() { + val json = executeScriptAndParseDataframeResult( """ data class Row(val id: Int, val group: Int) val df = (1..100).map { Row(it, if (it <= 50) 1 else 2) }.toDataFrame() - KotlinNotebookPluginUtils.getRowsSubsetForRendering(df.groupBy("group"), 0, 10) + KotlinNotebookPluginUtils.convertToDataFrame(df.groupBy("group")) """.trimIndent() ) - val json = parseDataframeJson(result) - - json.int("nrow") shouldBe 2 - json.int("ncol") shouldBe 2 + assertDataFrameDimensions(json, 2, 2) val rows = json.array>("kotlin_dataframe")!! rows.getObj(0).array("group1")!!.size shouldBe 50 rows.getObj(1).array("group1")!!.size shouldBe 50 } + + // Regression KTNB-424 + @Test + fun `test kotlin dataframe conversion ReducedGroupBy`() { + shouldNotThrow { + val json = executeScriptAndParseDataframeResult( + """ + data class Row(val id: Int, val group: Int) + val df = (1..100).map { Row(it, if (it <= 50) 1 else 2) }.toDataFrame() + KotlinNotebookPluginUtils.convertToDataFrame(df.groupBy("group").first()) + """.trimIndent() + ) + + assertDataFrameDimensions(json, 2, 2) + } + } } diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/RenderingTests.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/RenderingTests.kt index 11be55cb9..1f711cef8 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/RenderingTests.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/RenderingTests.kt @@ -1,6 +1,11 @@ package org.jetbrains.kotlinx.dataframe.jupyter -import com.beust.klaxon.* +import com.beust.klaxon.JsonArray +import com.beust.klaxon.JsonObject +import com.beust.klaxon.Parser +import io.kotest.assertions.throwables.shouldNotThrow +import io.kotest.matchers.comparables.shouldBeGreaterThan +import io.kotest.matchers.comparables.shouldBeLessThan import io.kotest.matchers.shouldBe import io.kotest.matchers.string.shouldContain import io.kotest.matchers.string.shouldNotContain @@ -79,8 +84,7 @@ class RenderingTests : JupyterReplTestCase() { @Test fun `test kotlin notebook plugin utils rows subset`() { - @Language("kts") - val result = exec( + val json = executeScriptAndParseDataframeResult( """ data class Row(val id: Int) val df = (1..100).map { Row(it) }.toDataFrame() @@ -88,16 +92,29 @@ class RenderingTests : JupyterReplTestCase() { """.trimIndent() ) - val json = parseDataframeJson(result) - - json.int("nrow") shouldBe 30 - json.int("ncol") shouldBe 1 + assertDataFrameDimensions(json, 30, 1) val rows = json.array>("kotlin_dataframe")!! rows.getObj(0).int("id") shouldBe 21 rows.getObj(rows.lastIndex).int("id") shouldBe 50 } + /** + * Executes the given `script` and parses the resulting DataFrame as a `JsonObject`. + * + * @param script the script to be executed + * @return the parsed DataFrame result as a `JsonObject` + */ + private fun executeScriptAndParseDataframeResult(@Language("kts") script: String): JsonObject { + val result = exec(script) + return parseDataframeJson(result) + } + + private fun assertDataFrameDimensions(json: JsonObject, expectedRows: Int, expectedColumns: Int) { + json.int("nrow") shouldBe expectedRows + json.int("ncol") shouldBe expectedColumns + } + private fun parseDataframeJson(result: MimeTypedResult): JsonObject { val parser = Parser.default() return parser.parse(StringBuilder(result["application/kotlindataframe+json"]!!)) as JsonObject @@ -106,23 +123,121 @@ class RenderingTests : JupyterReplTestCase() { private fun JsonArray<*>.getObj(index: Int) = this.get(index) as JsonObject @Test - fun `test kotlin notebook plugin utils groupby`() { - @Language("kts") - val result = exec( + fun `test kotlin notebook plugin utils sort by one column asc`() { + val json = executeScriptAndParseDataframeResult( + """ + data class CustomRow(val id: Int, val category: String) + val df = (1..100).map { CustomRow(it, if (it % 2 == 0) "even" else "odd") }.toDataFrame() + KotlinNotebookPluginUtils.sortByColumns(df, listOf(listOf("id")), listOf(false)) + """.trimIndent() + ) + + assertDataFrameDimensions(json, 100, 2) + assertSortedById(json, false) + } + + @Suppress("UNCHECKED_CAST") + private fun assertSortedById(json: JsonObject, desc: Boolean) { + val rows = json["kotlin_dataframe"] as JsonArray + var previousId = if (desc) 101 else 0 + rows.forEach { row -> + val currentId = row.int("id")!! + if (desc) currentId shouldBeLessThan previousId else currentId shouldBeGreaterThan previousId + previousId = currentId + } + } + + @Test + fun `test kotlin notebook plugin utils sort by one column desc`() { + val json = executeScriptAndParseDataframeResult( + """ + data class CustomRow(val id: Int, val category: String) + val df = (1..100).map { CustomRow(it, if (it % 2 == 0) "even" else "odd") }.toDataFrame() + KotlinNotebookPluginUtils.sortByColumns(df, listOf(listOf("id")), listOf(true)) + """.trimIndent() + ) + + assertDataFrameDimensions(json, 100, 2) + assertSortedById(json, true) + } + + @Suppress("UNCHECKED_CAST") + @Test + fun `test kotlin notebook plugin utils sort by multiple columns`() { + val json = executeScriptAndParseDataframeResult( + """ + data class CustomRow(val id: Int, val category: String) + val df = (1..100).map { CustomRow(it, if (it % 2 == 0) "even" else "odd") }.toDataFrame() + KotlinNotebookPluginUtils.getRowsSubsetForRendering( + KotlinNotebookPluginUtils.sortByColumns(df, listOf(listOf("category"), listOf("id")), listOf(true, false)), + 0, 100 + ) + """.trimIndent() + ) + + assertDataFrameDimensions(json, 100, 2) + + val rows = json["kotlin_dataframe"] as JsonArray + assertSortedByCategory(rows) + assertSortedById(rows) + } + + private fun assertSortedByCategory(rows: JsonArray) { + rows.forEachIndexed { i, row -> + val currentCategory = row.string("category") + if (i < 50) currentCategory shouldBe "odd" + else currentCategory shouldBe "even" + } + } + + private fun assertSortedById(rows: JsonArray) { + var previousCategory = "odd" + var previousId = 0 + for (row in rows) { + val currentCategory = row.string("category")!! + val currentId = row.int("id")!! + + if (previousCategory == "odd" && currentCategory == "even") { + previousId shouldBeGreaterThan currentId + } else if (previousCategory == currentCategory) { + previousId shouldBeLessThan currentId + } + + previousCategory = currentCategory + previousId = currentId + } + } + + @Test + fun `test kotlin dataframe conversion groupby`() { + val json = executeScriptAndParseDataframeResult( """ data class Row(val id: Int, val group: Int) val df = (1..100).map { Row(it, if (it <= 50) 1 else 2) }.toDataFrame() - KotlinNotebookPluginUtils.getRowsSubsetForRendering(df.groupBy("group"), 0, 10) + KotlinNotebookPluginUtils.convertToDataFrame(df.groupBy("group")) """.trimIndent() ) - val json = parseDataframeJson(result) - - json.int("nrow") shouldBe 2 - json.int("ncol") shouldBe 2 + assertDataFrameDimensions(json, 2, 2) val rows = json.array>("kotlin_dataframe")!! rows.getObj(0).array("group1")!!.size shouldBe 50 rows.getObj(1).array("group1")!!.size shouldBe 50 } + + // Regression KTNB-424 + @Test + fun `test kotlin dataframe conversion ReducedGroupBy`() { + shouldNotThrow { + val json = executeScriptAndParseDataframeResult( + """ + data class Row(val id: Int, val group: Int) + val df = (1..100).map { Row(it, if (it <= 50) 1 else 2) }.toDataFrame() + KotlinNotebookPluginUtils.convertToDataFrame(df.groupBy("group").first()) + """.trimIndent() + ) + + assertDataFrameDimensions(json, 2, 2) + } + } } From d15ecda24c913356cc1a64706cafd16dbce0196b Mon Sep 17 00:00:00 2001 From: Nikita Ermolenko Date: Mon, 13 Nov 2023 16:45:15 +0200 Subject: [PATCH 4/4] Refactor column name generation in KotlinNotebookPluginUtils Updated the 'generateRandomVariationOfString' function to 'generateRandomVariationOfColumnName' in 'KotlinNotebookPluginUtils.kt' to ensure uniqueness among used names. ColumnNameGenerator was used to create unique column names based on preferred name and used column names. --- .../jupyter/KotlinNotebookPluginUtils.kt | 48 ++++++++++++------- .../jupyter/KotlinNotebookPluginUtils.kt | 48 ++++++++++++------- 2 files changed, 60 insertions(+), 36 deletions(-) diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/KotlinNotebookPluginUtils.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/KotlinNotebookPluginUtils.kt index cf200ee74..8119e989a 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/KotlinNotebookPluginUtils.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/KotlinNotebookPluginUtils.kt @@ -32,14 +32,14 @@ import org.jetbrains.kotlinx.dataframe.api.toDataFrame import org.jetbrains.kotlinx.dataframe.api.values import org.jetbrains.kotlinx.dataframe.columns.ColumnPath import org.jetbrains.kotlinx.dataframe.columns.toColumnSet -import kotlin.random.Random +import org.jetbrains.kotlinx.dataframe.impl.ColumnNameGenerator /** * A class with utility methods for Kotlin Notebook Plugin integration. - * Kotlin Notebook Plugin is acts as a client of Kotlin Jupyter kernel and use this functionality + * Kotlin Notebook Plugin acts as a client of Kotlin Jupyter kernel and uses this functionality * for dynamic pagination when rendering dataframes. - * The plugin sends Kotlin following code to the kernel to evaluate - * DISPLAY(KotlinNotebooksPluginUtils.getRowsSubsetForRendering(Out[x], 0, 20), "") + * The plugin sends the following code to the kernel to evaluate: + * DISPLAY(KotlinNotebooksPluginUtils.getRowsSubsetForRendering(Out[...], 0, 20), "") */ public object KotlinNotebookPluginUtils { /** @@ -120,11 +120,18 @@ public object KotlinNotebookPluginUtils { is ReducedPivotGroupBy<*> -> dataframeLike.values() is SplitWithTransform<*, *, *> -> dataframeLike.into() is Split<*, *> -> dataframeLike.toDataFrame() - is Merge<*, *, *> -> dataframeLike.into(generateRandomVariationOfString("merged")) + is Merge<*, *, *> -> dataframeLike.into( + generateRandomVariationOfColumnName( + "merged", + dataframeLike.df.columnNames() + ) + ) + is Gather<*, *, *, *> -> dataframeLike.into( - generateRandomVariationOfString("key"), - generateRandomVariationOfString("value") + generateRandomVariationOfColumnName("key", dataframeLike.df.columnNames()), + generateRandomVariationOfColumnName("value", dataframeLike.df.columnNames()) ) + is Update<*, *> -> dataframeLike.df is Convert<*, *> -> dataframeLike.df is FormattedFrame<*> -> dataframeLike.df @@ -136,23 +143,28 @@ public object KotlinNotebookPluginUtils { is MoveClause<*, *> -> dataframeLike.df is RenameClause<*, *> -> dataframeLike.df is ReplaceClause<*, *> -> dataframeLike.df - is GroupClause<*, *> -> dataframeLike.into(generateRandomVariationOfString("untitled")) + is GroupClause<*, *> -> dataframeLike.into( + generateRandomVariationOfColumnName( + "untitled", + dataframeLike.df.columnNames() + ) + ) + is InsertClause<*> -> dataframeLike.at(0) is FormatClause<*, *> -> dataframeLike.df else -> throw IllegalArgumentException("Unsupported type: ${dataframeLike::class}") } /** - * Generates a random variation of the given string by appending a unique hash to it. + * Generates a random variation of a column name that is unique among the provided used names. * - * @param str the original string to generate variation from - * @return a random variation of the original string + * @param preferredName The preferred name for the column. + * @param usedNames The list of already used column names. + * @return A unique random variation of the preferred name. */ - public fun generateRandomVariationOfString(str: String): String { - val timeStamp = System.currentTimeMillis() - val random = Random.Default.nextInt() - val hash = "${timeStamp}_$random".hashCode() - - return "${str}_${String.format("%08X", hash)}" // get only 8 symbols from hash - } + public fun generateRandomVariationOfColumnName( + preferredName: String, + usedNames: List = emptyList() + ): String = + ColumnNameGenerator(usedNames).addUnique(preferredName) } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/KotlinNotebookPluginUtils.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/KotlinNotebookPluginUtils.kt index cf200ee74..8119e989a 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/KotlinNotebookPluginUtils.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/KotlinNotebookPluginUtils.kt @@ -32,14 +32,14 @@ import org.jetbrains.kotlinx.dataframe.api.toDataFrame import org.jetbrains.kotlinx.dataframe.api.values import org.jetbrains.kotlinx.dataframe.columns.ColumnPath import org.jetbrains.kotlinx.dataframe.columns.toColumnSet -import kotlin.random.Random +import org.jetbrains.kotlinx.dataframe.impl.ColumnNameGenerator /** * A class with utility methods for Kotlin Notebook Plugin integration. - * Kotlin Notebook Plugin is acts as a client of Kotlin Jupyter kernel and use this functionality + * Kotlin Notebook Plugin acts as a client of Kotlin Jupyter kernel and uses this functionality * for dynamic pagination when rendering dataframes. - * The plugin sends Kotlin following code to the kernel to evaluate - * DISPLAY(KotlinNotebooksPluginUtils.getRowsSubsetForRendering(Out[x], 0, 20), "") + * The plugin sends the following code to the kernel to evaluate: + * DISPLAY(KotlinNotebooksPluginUtils.getRowsSubsetForRendering(Out[...], 0, 20), "") */ public object KotlinNotebookPluginUtils { /** @@ -120,11 +120,18 @@ public object KotlinNotebookPluginUtils { is ReducedPivotGroupBy<*> -> dataframeLike.values() is SplitWithTransform<*, *, *> -> dataframeLike.into() is Split<*, *> -> dataframeLike.toDataFrame() - is Merge<*, *, *> -> dataframeLike.into(generateRandomVariationOfString("merged")) + is Merge<*, *, *> -> dataframeLike.into( + generateRandomVariationOfColumnName( + "merged", + dataframeLike.df.columnNames() + ) + ) + is Gather<*, *, *, *> -> dataframeLike.into( - generateRandomVariationOfString("key"), - generateRandomVariationOfString("value") + generateRandomVariationOfColumnName("key", dataframeLike.df.columnNames()), + generateRandomVariationOfColumnName("value", dataframeLike.df.columnNames()) ) + is Update<*, *> -> dataframeLike.df is Convert<*, *> -> dataframeLike.df is FormattedFrame<*> -> dataframeLike.df @@ -136,23 +143,28 @@ public object KotlinNotebookPluginUtils { is MoveClause<*, *> -> dataframeLike.df is RenameClause<*, *> -> dataframeLike.df is ReplaceClause<*, *> -> dataframeLike.df - is GroupClause<*, *> -> dataframeLike.into(generateRandomVariationOfString("untitled")) + is GroupClause<*, *> -> dataframeLike.into( + generateRandomVariationOfColumnName( + "untitled", + dataframeLike.df.columnNames() + ) + ) + is InsertClause<*> -> dataframeLike.at(0) is FormatClause<*, *> -> dataframeLike.df else -> throw IllegalArgumentException("Unsupported type: ${dataframeLike::class}") } /** - * Generates a random variation of the given string by appending a unique hash to it. + * Generates a random variation of a column name that is unique among the provided used names. * - * @param str the original string to generate variation from - * @return a random variation of the original string + * @param preferredName The preferred name for the column. + * @param usedNames The list of already used column names. + * @return A unique random variation of the preferred name. */ - public fun generateRandomVariationOfString(str: String): String { - val timeStamp = System.currentTimeMillis() - val random = Random.Default.nextInt() - val hash = "${timeStamp}_$random".hashCode() - - return "${str}_${String.format("%08X", hash)}" // get only 8 symbols from hash - } + public fun generateRandomVariationOfColumnName( + preferredName: String, + usedNames: List = emptyList() + ): String = + ColumnNameGenerator(usedNames).addUnique(preferredName) }