Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

KTNB-693 Send the full dataframe schema as metadata #706

Merged
merged 3 commits into from
May 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ import org.jetbrains.kotlinx.dataframe.ColumnsContainer
import org.jetbrains.kotlinx.dataframe.DataColumn
import org.jetbrains.kotlinx.dataframe.api.indices
import org.jetbrains.kotlinx.dataframe.api.isList
import org.jetbrains.kotlinx.dataframe.api.name
import org.jetbrains.kotlinx.dataframe.api.rows
import org.jetbrains.kotlinx.dataframe.api.schema
import org.jetbrains.kotlinx.dataframe.api.take
import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup
import org.jetbrains.kotlinx.dataframe.columns.ColumnKind
Expand All @@ -22,12 +22,16 @@ import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.KOTLIN_DATAFRAM
import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.METADATA
import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.NCOL
import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.NROW
import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.TYPE
import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.TYPES
import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.VERSION
import org.jetbrains.kotlinx.dataframe.io.Base64ImageEncodingOptions
import org.jetbrains.kotlinx.dataframe.io.arrayColumnName
import org.jetbrains.kotlinx.dataframe.io.valueColumnName
import org.jetbrains.kotlinx.dataframe.name
import org.jetbrains.kotlinx.dataframe.ncol
import org.jetbrains.kotlinx.dataframe.nrow
import org.jetbrains.kotlinx.dataframe.schema.ColumnSchema
import org.jetbrains.kotlinx.dataframe.typeClass
import java.awt.image.BufferedImage
import java.io.IOException
Expand All @@ -53,9 +57,13 @@ internal object SerializationKeys {
const val VERSION = "\$version"
const val COLUMNS = "columns"
const val KOTLIN_DATAFRAME = "kotlin_dataframe"
const val TYPE = "type"
const val TYPES = "types"
}

internal const val SERIALIZATION_VERSION = "2.0.0"
// See docs/serialization_format.md for a description of
// serialization versions and format.
internal const val SERIALIZATION_VERSION = "2.1.0"

internal fun KlaxonJson.encodeRowWithMetadata(
frame: ColumnsContainer<*>,
Expand All @@ -65,24 +73,39 @@ internal fun KlaxonJson.encodeRowWithMetadata(
): JsonObject? {
val values = frame.columns().map { col ->
when (col) {
is ColumnGroup<*> -> obj(
DATA to encodeRowWithMetadata(col, index, rowLimit, imageEncodingOptions),
METADATA to obj(KIND to ColumnKind.Group.toString())
)

is ColumnGroup<*> -> {
val schema = col.schema()
obj(
DATA to encodeRowWithMetadata(col, index, rowLimit, imageEncodingOptions),
METADATA to obj(
KIND to ColumnKind.Group.toString(),
COLUMNS to schema.columns.keys,
TYPES to schema.columns.values.map { columnSchema ->
createJsonTypeDescriptor(columnSchema)
}
),
)
}
is FrameColumn<*> -> {
val data = if (rowLimit == null) encodeFrameWithMetadata(col[index], null, imageEncodingOptions)
else encodeFrameWithMetadata(col[index].take(rowLimit), rowLimit, imageEncodingOptions)
val data = if (rowLimit == null) {
encodeFrameWithMetadata(col[index], null, imageEncodingOptions)
} else {
encodeFrameWithMetadata(col[index].take(rowLimit), rowLimit, imageEncodingOptions)
}
val schema = col.schema.value
obj(
DATA to data,
METADATA to obj(
KIND to ColumnKind.Frame.toString(),
COLUMNS to schema.columns.keys,
TYPES to schema.columns.values.map { columnSchema ->
createJsonTypeDescriptor(columnSchema)
},
NCOL to col[index].ncol,
NROW to col[index].nrow
)
)
}

else -> encodeValue(col, index, imageEncodingOptions)
}.let { col.name to it }
}
Expand Down Expand Up @@ -148,6 +171,16 @@ private fun encodeBufferedImageAsBase64(
}
}

private fun createJsonTypeDescriptor(columnSchema: ColumnSchema): JsonObject {
return JsonObject(
mutableMapOf(KIND to columnSchema.kind.toString()).also {
if (columnSchema.kind == ColumnKind.Value) {
it.put(TYPE, columnSchema.type.toString())
}
}
)
}

internal fun KlaxonJson.encodeFrameWithMetadata(
frame: AnyFrame,
rowLimit: Int? = null,
Expand Down Expand Up @@ -257,6 +290,9 @@ internal fun KlaxonJson.encodeDataFrameWithMetadata(
VERSION to SERIALIZATION_VERSION,
METADATA to obj(
COLUMNS to frame.columnNames(),
TYPES to frame.schema().columns.values.map { colSchema ->
createJsonTypeDescriptor(colSchema)
},
NROW to frame.rowsCount(),
NCOL to frame.columnsCount()
),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@ package org.jetbrains.kotlinx.dataframe.jupyter

import com.beust.klaxon.json
import org.jetbrains.kotlinx.dataframe.api.take
import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.COLUMNS
import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.KOTLIN_DATAFRAME
import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.NCOL
import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.NROW
import org.jetbrains.kotlinx.dataframe.impl.io.encodeFrame
import org.jetbrains.kotlinx.dataframe.io.Base64ImageEncodingOptions
import org.jetbrains.kotlinx.dataframe.io.DataFrameHtmlData
Expand Down Expand Up @@ -69,10 +73,10 @@ internal inline fun <reified T : Any> JupyterHtmlRenderer.render(
!ideBuildNumber.supportsDynamicNestedTables() -> {
json {
obj(
"nrow" to df.size.nrow,
"ncol" to df.size.ncol,
"columns" to df.columnNames(),
"kotlin_dataframe" to encodeFrame(df.take(limit)),
NROW to df.size.nrow,
NCOL to df.size.ncol,
COLUMNS to df.columnNames(),
KOTLIN_DATAFRAME to encodeFrame(df.take(limit)),
)
}.toJsonString()
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ import org.jetbrains.kotlinx.dataframe.ColumnsContainer
import org.jetbrains.kotlinx.dataframe.DataColumn
import org.jetbrains.kotlinx.dataframe.api.indices
import org.jetbrains.kotlinx.dataframe.api.isList
import org.jetbrains.kotlinx.dataframe.api.name
import org.jetbrains.kotlinx.dataframe.api.rows
import org.jetbrains.kotlinx.dataframe.api.schema
import org.jetbrains.kotlinx.dataframe.api.take
import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup
import org.jetbrains.kotlinx.dataframe.columns.ColumnKind
Expand All @@ -22,12 +22,16 @@ import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.KOTLIN_DATAFRAM
import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.METADATA
import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.NCOL
import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.NROW
import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.TYPE
import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.TYPES
import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.VERSION
import org.jetbrains.kotlinx.dataframe.io.Base64ImageEncodingOptions
import org.jetbrains.kotlinx.dataframe.io.arrayColumnName
import org.jetbrains.kotlinx.dataframe.io.valueColumnName
import org.jetbrains.kotlinx.dataframe.name
import org.jetbrains.kotlinx.dataframe.ncol
import org.jetbrains.kotlinx.dataframe.nrow
import org.jetbrains.kotlinx.dataframe.schema.ColumnSchema
import org.jetbrains.kotlinx.dataframe.typeClass
import java.awt.image.BufferedImage
import java.io.IOException
Expand All @@ -53,9 +57,13 @@ internal object SerializationKeys {
const val VERSION = "\$version"
const val COLUMNS = "columns"
const val KOTLIN_DATAFRAME = "kotlin_dataframe"
const val TYPE = "type"
const val TYPES = "types"
}

internal const val SERIALIZATION_VERSION = "2.0.0"
// See docs/serialization_format.md for a description of
// serialization versions and format.
internal const val SERIALIZATION_VERSION = "2.1.0"

internal fun KlaxonJson.encodeRowWithMetadata(
frame: ColumnsContainer<*>,
Expand All @@ -65,24 +73,39 @@ internal fun KlaxonJson.encodeRowWithMetadata(
): JsonObject? {
val values = frame.columns().map { col ->
when (col) {
is ColumnGroup<*> -> obj(
DATA to encodeRowWithMetadata(col, index, rowLimit, imageEncodingOptions),
METADATA to obj(KIND to ColumnKind.Group.toString())
)

is ColumnGroup<*> -> {
val schema = col.schema()
obj(
DATA to encodeRowWithMetadata(col, index, rowLimit, imageEncodingOptions),
METADATA to obj(
KIND to ColumnKind.Group.toString(),
COLUMNS to schema.columns.keys,
TYPES to schema.columns.values.map { columnSchema ->
createJsonTypeDescriptor(columnSchema)
}
),
)
}
is FrameColumn<*> -> {
val data = if (rowLimit == null) encodeFrameWithMetadata(col[index], null, imageEncodingOptions)
else encodeFrameWithMetadata(col[index].take(rowLimit), rowLimit, imageEncodingOptions)
val data = if (rowLimit == null) {
encodeFrameWithMetadata(col[index], null, imageEncodingOptions)
} else {
encodeFrameWithMetadata(col[index].take(rowLimit), rowLimit, imageEncodingOptions)
}
val schema = col.schema.value
obj(
DATA to data,
METADATA to obj(
KIND to ColumnKind.Frame.toString(),
COLUMNS to schema.columns.keys,
TYPES to schema.columns.values.map { columnSchema ->
createJsonTypeDescriptor(columnSchema)
},
NCOL to col[index].ncol,
NROW to col[index].nrow
)
)
}

else -> encodeValue(col, index, imageEncodingOptions)
}.let { col.name to it }
}
Expand Down Expand Up @@ -148,6 +171,16 @@ private fun encodeBufferedImageAsBase64(
}
}

private fun createJsonTypeDescriptor(columnSchema: ColumnSchema): JsonObject {
return JsonObject(
mutableMapOf(KIND to columnSchema.kind.toString()).also {
if (columnSchema.kind == ColumnKind.Value) {
it.put(TYPE, columnSchema.type.toString())
}
}
)
}

internal fun KlaxonJson.encodeFrameWithMetadata(
frame: AnyFrame,
rowLimit: Int? = null,
Expand Down Expand Up @@ -257,6 +290,9 @@ internal fun KlaxonJson.encodeDataFrameWithMetadata(
VERSION to SERIALIZATION_VERSION,
METADATA to obj(
COLUMNS to frame.columnNames(),
TYPES to frame.schema().columns.values.map { colSchema ->
createJsonTypeDescriptor(colSchema)
},
NROW to frame.rowsCount(),
NCOL to frame.columnsCount()
),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@ package org.jetbrains.kotlinx.dataframe.jupyter

import com.beust.klaxon.json
import org.jetbrains.kotlinx.dataframe.api.take
import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.COLUMNS
import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.KOTLIN_DATAFRAME
import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.NCOL
import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.NROW
import org.jetbrains.kotlinx.dataframe.impl.io.encodeFrame
import org.jetbrains.kotlinx.dataframe.io.Base64ImageEncodingOptions
import org.jetbrains.kotlinx.dataframe.io.DataFrameHtmlData
Expand Down Expand Up @@ -65,14 +69,15 @@ internal inline fun <reified T : Any> JupyterHtmlRenderer.render(
if (notebook.kernelVersion >= KotlinKernelVersion.from(MIN_KERNEL_VERSION_FOR_NEW_TABLES_UI)!!) {
val ideBuildNumber = KotlinNotebookPluginUtils.getKotlinNotebookIDEBuildNumber()

// TODO Do we need to handle the improved meta data here as well?
val jsonEncodedDf = when {
!ideBuildNumber.supportsDynamicNestedTables() -> {
json {
obj(
"nrow" to df.size.nrow,
"ncol" to df.size.ncol,
"columns" to df.columnNames(),
"kotlin_dataframe" to encodeFrame(df.take(limit)),
NROW to df.size.nrow,
NCOL to df.size.ncol,
COLUMNS to df.columnNames(),
KOTLIN_DATAFRAME to encodeFrame(df.take(limit)),
)
}.toJsonString()
}
Expand Down
Loading