diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/cast.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/cast.kt index 58f155a99..4451d8e83 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/cast.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/cast.kt @@ -33,6 +33,13 @@ public inline fun AnyFrame.cast(verify: Boolean = true): DataFrame AnyFrame.castTo( + @Suppress("UNUSED_PARAMETER") df: DataFrame, + verify: Boolean = true +): DataFrame { + return cast(verify = verify) +} + public fun AnyRow.cast(): DataRow = this as DataRow public inline fun AnyRow.cast(verify: Boolean = true): DataRow = df().cast(verify)[0] diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Modify.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Modify.kt index ffd880c48..e53503342 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Modify.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Modify.kt @@ -14,6 +14,7 @@ import org.jetbrains.kotlinx.dataframe.api.at import org.jetbrains.kotlinx.dataframe.api.by import org.jetbrains.kotlinx.dataframe.api.byName import org.jetbrains.kotlinx.dataframe.api.cast +import org.jetbrains.kotlinx.dataframe.api.castTo import org.jetbrains.kotlinx.dataframe.api.colsOf import org.jetbrains.kotlinx.dataframe.api.column import org.jetbrains.kotlinx.dataframe.api.columnGroup @@ -21,6 +22,7 @@ import org.jetbrains.kotlinx.dataframe.api.columnOf import org.jetbrains.kotlinx.dataframe.api.concat import org.jetbrains.kotlinx.dataframe.api.convert import org.jetbrains.kotlinx.dataframe.api.convertTo +import org.jetbrains.kotlinx.dataframe.api.count import org.jetbrains.kotlinx.dataframe.api.dataFrameOf import org.jetbrains.kotlinx.dataframe.api.default import org.jetbrains.kotlinx.dataframe.api.dropNulls @@ -100,6 +102,7 @@ import org.jetbrains.kotlinx.dataframe.explainer.PluginCallbackProxy import org.jetbrains.kotlinx.dataframe.explainer.TransformDataFrameExpressions import org.jetbrains.kotlinx.dataframe.impl.api.mapNotNullValues import org.jetbrains.kotlinx.dataframe.indices +import org.jetbrains.kotlinx.dataframe.io.readJson import org.jetbrains.kotlinx.dataframe.io.readJsonStr import org.jetbrains.kotlinx.dataframe.io.renderToString import org.jetbrains.kotlinx.dataframe.testResource @@ -1421,4 +1424,33 @@ class Modify : TestBase() { | 1 kotlin /kotlin 180 |""".trimMargin() } + + @DataSchema + interface ImplicitSchema { + val perf: Double + } + + @Test + @Ignore + @Suppress("UNUSED_VARIABLE") + fun castToGenerateSchema() { + // SampleStart + val sample = DataFrame.readJson("sample.json") + // SampleEnd + } + + @Test + @Suppress("KotlinConstantConditions") + fun castTo() { + val sample = dataFrameOf("perf")(10.0, 20.0, 12.0).cast() + val files = listOf() // not intended to run + // SampleStart + for (file in files) { + // df here is expected to have the same structure as sample + val df = DataFrame.readJson(file).castTo(sample) + val count = df.count { perf > 10.0 } + println("$file: $count") + } + // SampleEnd + } } diff --git a/docs/StardustDocs/images/implicitlyGeneratedSchema.png b/docs/StardustDocs/images/implicitlyGeneratedSchema.png new file mode 100644 index 000000000..cf496d3dd Binary files /dev/null and b/docs/StardustDocs/images/implicitlyGeneratedSchema.png differ diff --git a/docs/StardustDocs/topics/cast.md b/docs/StardustDocs/topics/cast.md index 7150dc30b..f467d855e 100644 --- a/docs/StardustDocs/topics/cast.md +++ b/docs/StardustDocs/topics/cast.md @@ -26,3 +26,37 @@ df.cast() ``` To convert [`DataFrame`](DataFrame.md) columns to match given schema, use [`convertTo`](convertTo.md) operation. + +**Reusing implicitly generated schema** + +```kotlin +castTo(df: DataFrame) +``` + +In notebooks, dataframe types are implicitly generated. + +![Implicitly generated schema](implicitlyGeneratedSchema.png) + +This type can be referred to, but its name will change whenever you re-execute cells. +Here how you can do it in a more robust way: + + + +```kotlin +val sample = DataFrame.readJson("sample.json") +``` + + + + + +```kotlin +for (file in files) { + // df here is expected to have the same structure as sample + val df = DataFrame.readJson(file).castTo(sample) + val count = df.count { perf > 10.0 } + println("$file: $count") +} +``` + +