Merge pull request #747 from Kotlin/castTo

Add castTo to help working with implicitly generated schemas in notebooks and plugin
Kotlin · Jun 21, 2024 · 35d56dd · 35d56dd
2 parents 2e3acfe + 9ca36e8
commit 35d56dd
Show file tree

Hide file tree

Showing 4 changed files with 73 additions and 0 deletions.
diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/cast.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/cast.kt
@@ -33,6 +33,13 @@ public inline fun <reified T> AnyFrame.cast(verify: Boolean = true): DataFrame<T
 ).cast()
 else cast()
 
+public inline fun <reified T> AnyFrame.castTo(
+    @Suppress("UNUSED_PARAMETER") df: DataFrame<T>,
+    verify: Boolean = true
+): DataFrame<T> {
+    return cast<T>(verify = verify)
+}
+
 public fun <T> AnyRow.cast(): DataRow<T> = this as DataRow<T>
 
 public inline fun <reified T> AnyRow.cast(verify: Boolean = true): DataRow<T> = df().cast<T>(verify)[0]

diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Modify.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Modify.kt
@@ -14,13 +14,15 @@ import org.jetbrains.kotlinx.dataframe.api.at
 import org.jetbrains.kotlinx.dataframe.api.by
 import org.jetbrains.kotlinx.dataframe.api.byName
 import org.jetbrains.kotlinx.dataframe.api.cast
+import org.jetbrains.kotlinx.dataframe.api.castTo
 import org.jetbrains.kotlinx.dataframe.api.colsOf
 import org.jetbrains.kotlinx.dataframe.api.column
 import org.jetbrains.kotlinx.dataframe.api.columnGroup
 import org.jetbrains.kotlinx.dataframe.api.columnOf
 import org.jetbrains.kotlinx.dataframe.api.concat
 import org.jetbrains.kotlinx.dataframe.api.convert
 import org.jetbrains.kotlinx.dataframe.api.convertTo
+import org.jetbrains.kotlinx.dataframe.api.count
 import org.jetbrains.kotlinx.dataframe.api.dataFrameOf
 import org.jetbrains.kotlinx.dataframe.api.default
 import org.jetbrains.kotlinx.dataframe.api.dropNulls
@@ -100,6 +102,7 @@ import org.jetbrains.kotlinx.dataframe.explainer.PluginCallbackProxy
 import org.jetbrains.kotlinx.dataframe.explainer.TransformDataFrameExpressions
 import org.jetbrains.kotlinx.dataframe.impl.api.mapNotNullValues
 import org.jetbrains.kotlinx.dataframe.indices
+import org.jetbrains.kotlinx.dataframe.io.readJson
 import org.jetbrains.kotlinx.dataframe.io.readJsonStr
 import org.jetbrains.kotlinx.dataframe.io.renderToString
 import org.jetbrains.kotlinx.dataframe.testResource
@@ -1421,4 +1424,33 @@ class Modify : TestBase() {
                | 1    kotlin    /kotlin           180
                |""".trimMargin()
     }
+
+    @DataSchema
+    interface ImplicitSchema {
+        val perf: Double
+    }
+
+    @Test
+    @Ignore
+    @Suppress("UNUSED_VARIABLE")
+    fun castToGenerateSchema() {
+        // SampleStart
+        val sample = DataFrame.readJson("sample.json")
+        // SampleEnd
+    }
+
+    @Test
+    @Suppress("KotlinConstantConditions")
+    fun castTo() {
+        val sample = dataFrameOf("perf")(10.0, 20.0, 12.0).cast<ImplicitSchema>()
+        val files = listOf<String>() // not intended to run
+        // SampleStart
+        for (file in files) {
+            // df here is expected to have the same structure as sample
+            val df = DataFrame.readJson(file).castTo(sample)
+            val count = df.count { perf > 10.0 }
+            println("$file: $count")
+        }
+        // SampleEnd
+    }
 }
diff --git a/docs/StardustDocs/images/implicitlyGeneratedSchema.png b/docs/StardustDocs/images/implicitlyGeneratedSchema.png
diff --git a/docs/StardustDocs/topics/cast.md b/docs/StardustDocs/topics/cast.md
@@ -26,3 +26,37 @@ df.cast<Person>()
 ```
 
 To convert [`DataFrame`](DataFrame.md) columns to match given schema, use [`convertTo`](convertTo.md) operation.
+
+**Reusing implicitly generated schema**
+
+```kotlin
+castTo<T>(df: DataFrame<T>)
+```
+
+In notebooks, dataframe types are implicitly generated.
+
+![Implicitly generated schema](implicitlyGeneratedSchema.png)
+
+This type can be referred to, but its name will change whenever you re-execute cells.
+Here how you can do it in a more robust way:
+
+<!---FUN castToGenerateSchema-->
+
+```kotlin
+val sample = DataFrame.readJson("sample.json")
+```
+
+<!---END-->
+
+<!---FUN castTo-->
+
+```kotlin
+for (file in files) {
+    // df here is expected to have the same structure as sample
+    val df = DataFrame.readJson(file).castTo(sample)
+    val count = df.count { perf > 10.0 }
+    println("$file: $count")
+}
+```
+
+<!---END-->