From e9355027860e691d51753b01063edef8cb6ed865 Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Wed, 28 Dec 2022 16:23:34 +0100 Subject: [PATCH 01/50] added jcp support --- build.gradle.kts | 9 +++++++ core/build.gradle.kts | 59 ++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 64 insertions(+), 4 deletions(-) diff --git a/build.gradle.kts b/build.gradle.kts index ced84ec8a..c92ea0d4a 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -3,6 +3,15 @@ import org.jetbrains.kotlinx.publisher.apache2 import org.jetbrains.kotlinx.publisher.developer import org.jetbrains.kotlinx.publisher.githubRepo +buildscript { + repositories { + mavenCentral() + } + dependencies { + classpath("com.igormaznitsa:jcp:7.0.5") + } +} + @Suppress("DSL_SCOPE_VIOLATION", "UnstableApiUsage") plugins { kotlin("jvm") version libs.versions.kotlin diff --git a/core/build.gradle.kts b/core/build.gradle.kts index dd5ab8281..ddc63bd87 100644 --- a/core/build.gradle.kts +++ b/core/build.gradle.kts @@ -1,3 +1,8 @@ +import com.igormaznitsa.jcp.gradle.JcpTask +import kotlinx.kover.api.KoverTaskExtension +import org.gradle.jvm.tasks.Jar +import org.jetbrains.dataframe.gradle.DataSchemaVisibility + @Suppress("DSL_SCOPE_VIOLATION", "UnstableApiUsage") plugins { kotlin("jvm") @@ -10,6 +15,7 @@ plugins { id("org.jetbrains.kotlinx.kover") id("org.jmailen.kotlinter") id("org.jetbrains.kotlinx.dataframe") + id("com.igormaznitsa.jcp") } group = "org.jetbrains.kotlinx" @@ -127,16 +133,61 @@ tasks.withType { targetCompatibility = JavaVersion.VERSION_1_8.toString() } -tasks.withType { +// Setup preprocessing with JCP for main sources + +val kotlinMainSources = kotlin.sourceSets.main.get().kotlin.sourceDirectories + +val preprocessMain by tasks.creating(JcpTask::class) { + sources.set(kotlinMainSources.filter { "ksp" !in it.path }) + clearTarget.set(true) + fileExtensions.set(listOf("kt")) + vars.set( + mapOf() + ) + outputs.upToDateWhen { target.get().exists() } +} + +tasks.compileKotlin { dependsOn(tasks.lintKotlin) kotlinOptions { - freeCompilerArgs = freeCompilerArgs + listOf("-Xinline-classes", "-Xopt-in=kotlin.RequiresOptIn") + freeCompilerArgs += listOf("-Xinline-classes", "-Xopt-in=kotlin.RequiresOptIn") + } + + dependsOn(preprocessMain) + outputs.upToDateWhen { + preprocessMain.outcomingFiles.files.isEmpty() } + + doFirst { + kotlin { + sourceSets { + main { + kotlin.setSrcDirs( + kotlinMainSources.filter { "ksp" in it.path } + preprocessMain.target.get() + ) + } + } + } + } + + doLast { + kotlin { + sourceSets { + main { + kotlin.setSrcDirs(kotlinMainSources) + } + } + } + } +} + +tasks.withType { + duplicatesStrategy = DuplicatesStrategy.EXCLUDE } tasks.test { maxHeapSize = "2048m" - extensions.configure(kotlinx.kover.api.KoverTaskExtension::class) { + extensions.configure(KoverTaskExtension::class) { excludes.set( listOf( "org.jetbrains.kotlinx.dataframe.jupyter.*", @@ -174,7 +225,7 @@ artifacts { dataframes { schema { sourceSet = "test" - visibility = org.jetbrains.dataframe.gradle.DataSchemaVisibility.IMPLICIT_PUBLIC + visibility = DataSchemaVisibility.IMPLICIT_PUBLIC data = "https://raw.githubusercontent.com/Kotlin/dataframe/master/data/jetbrains_repositories.csv" name = "org.jetbrains.kotlinx.dataframe.samples.api.Repository" } From fcbbf8e356ba97045f59e2817031a95c2ef4a86a Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Wed, 28 Dec 2022 16:24:19 +0100 Subject: [PATCH 02/50] added sample/test for reusable comments in add.kt --- .../jetbrains/kotlinx/dataframe/api/add.kt | 27 +++++-------------- 1 file changed, 6 insertions(+), 21 deletions(-) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/add.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/add.kt index 513840c08..4208a8bd2 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/add.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/add.kt @@ -17,7 +17,6 @@ import org.jetbrains.kotlinx.dataframe.columns.ColumnAccessor import org.jetbrains.kotlinx.dataframe.columns.ColumnPath import org.jetbrains.kotlinx.dataframe.columns.ColumnReference import org.jetbrains.kotlinx.dataframe.exceptions.DuplicateColumnNamesException -import org.jetbrains.kotlinx.dataframe.exceptions.UnequalColumnSizesException import org.jetbrains.kotlinx.dataframe.impl.api.insertImpl import org.jetbrains.kotlinx.dataframe.impl.columns.resolveSingle import kotlin.reflect.KProperty @@ -26,53 +25,39 @@ import kotlin.reflect.KProperty * `add` operation adds new columns to DataFrame. */ +//#local ADD = " * Original [DataFrame] is not modified.\n *\n * @throws [DuplicateColumnNamesException] if columns in expected result have repeated names\n * @throws [org.jetbrains.kotlinx.dataframe.exceptions.UnequalColumnSizesException] if columns in expected result have different sizes\n * @return new [DataFrame] with added columns" + // region Add existing columns /** * Creates new [DataFrame] with given columns added to the end of original [DataFrame.columns] list. * - * Original [DataFrame] is not modified. - * +/*$ADD$*/ * @param columns columns to add - * @throws [DuplicateColumnNamesException] if columns in expected result have repeated names - * @throws [UnequalColumnSizesException] if columns in expected result have different sizes - * @return new [DataFrame] with added columns */ public fun DataFrame.add(vararg columns: AnyBaseCol): DataFrame = addAll(columns.asIterable()) /** * Creates new [DataFrame] with given columns added to the end of original [DataFrame.columns] list. * - * Original [DataFrame] is not modified. - * +/*$ADD$*/ * @param columns columns to add - * @throws [DuplicateColumnNamesException] if columns in expected result have repeated names - * @throws [UnequalColumnSizesException] if columns in expected result have different sizes - * @return new [DataFrame] with added columns */ public fun DataFrame.addAll(columns: Iterable): DataFrame = dataFrameOf(columns() + columns).cast() /** * Creates new [DataFrame] with all columns from given [dataFrames] added to the end of original [DataFrame.columns] list. * - * Original [DataFrame] is not modified. - * +/*$ADD$*/ * @param dataFrames dataFrames to get columns from - * @throws [DuplicateColumnNamesException] if columns in expected result have repeated names - * @throws [UnequalColumnSizesException] if columns in expected result have different sizes - * @return new [DataFrame] with added columns */ public fun DataFrame.add(vararg dataFrames: AnyFrame): DataFrame = addAll(dataFrames.asIterable()) /** * Creates new [DataFrame] with all columns from given [dataFrames] added to the end of original [DataFrame.columns] list. * - * Original [DataFrame] is not modified. - * +/*$ADD$*/ * @param dataFrames dataFrames to get columns from - * @throws [DuplicateColumnNamesException] if columns in expected result have repeated names - * @throws [UnequalColumnSizesException] if columns in expected result have different sizes - * @return new [DataFrame] with added columns */ @JvmName("addAllFrames") public fun DataFrame.addAll(dataFrames: Iterable): DataFrame = addAll(dataFrames.flatMap { it.columns() }) From 1c1fefec3e604380d194965c16361b6d1e00de39 Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Wed, 28 Dec 2022 16:48:03 +0100 Subject: [PATCH 03/50] jcp now only gets parsed for building the jar! --- core/build.gradle.kts | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/core/build.gradle.kts b/core/build.gradle.kts index ddc63bd87..5a60dd970 100644 --- a/core/build.gradle.kts +++ b/core/build.gradle.kts @@ -153,10 +153,14 @@ tasks.compileKotlin { freeCompilerArgs += listOf("-Xinline-classes", "-Xopt-in=kotlin.RequiresOptIn") } - dependsOn(preprocessMain) outputs.upToDateWhen { preprocessMain.outcomingFiles.files.isEmpty() } +} + +tasks.withType { + duplicatesStrategy = DuplicatesStrategy.EXCLUDE + dependsOn(preprocessMain) doFirst { kotlin { @@ -181,10 +185,6 @@ tasks.compileKotlin { } } -tasks.withType { - duplicatesStrategy = DuplicatesStrategy.EXCLUDE -} - tasks.test { maxHeapSize = "2048m" extensions.configure(KoverTaskExtension::class) { From 12af079491b58307b375d6cda70c170e75e69b81 Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Thu, 29 Dec 2022 13:35:33 +0100 Subject: [PATCH 04/50] updated examples --- .../jetbrains/kotlinx/dataframe/api/add.kt | 31 +++++++++++++++---- .../jetbrains/kotlinx/dataframe/api/add.txt | 5 +++ 2 files changed, 30 insertions(+), 6 deletions(-) create mode 100644 core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/add.txt diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/add.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/add.kt index 4208a8bd2..6fa7e9d1e 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/add.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/add.kt @@ -17,6 +17,7 @@ import org.jetbrains.kotlinx.dataframe.columns.ColumnAccessor import org.jetbrains.kotlinx.dataframe.columns.ColumnPath import org.jetbrains.kotlinx.dataframe.columns.ColumnReference import org.jetbrains.kotlinx.dataframe.exceptions.DuplicateColumnNamesException +import org.jetbrains.kotlinx.dataframe.exceptions.UnequalColumnSizesException import org.jetbrains.kotlinx.dataframe.impl.api.insertImpl import org.jetbrains.kotlinx.dataframe.impl.columns.resolveSingle import kotlin.reflect.KProperty @@ -25,7 +26,19 @@ import kotlin.reflect.KProperty * `add` operation adds new columns to DataFrame. */ -//#local ADD = " * Original [DataFrame] is not modified.\n *\n * @throws [DuplicateColumnNamesException] if columns in expected result have repeated names\n * @throws [org.jetbrains.kotlinx.dataframe.exceptions.UnequalColumnSizesException] if columns in expected result have different sizes\n * @return new [DataFrame] with added columns" +/** either +//#local ADD1234 = evalFile("add.txt") + */ + +/** or +//#local ADD0 = " * Original [DataFrame] is not modified." +//#local ADD1 = " *" +//#local ADD2 = " * @throws [DuplicateColumnNamesException] if columns in expected result have repeated names" +//#local ADD3 = " * @throws [UnequalColumnSizesException] if columns in expected result have different sizes" +//#local ADD4 = " * @return new [DataFrame] with added columns" +// +//#local ADD = ADD0 + "\n" + ADD1 + "\n" + ADD2 + "\n" + ADD3 + "\n" + ADD4 + */ // region Add existing columns @@ -43,7 +56,8 @@ public fun DataFrame.add(vararg columns: AnyBaseCol): DataFrame = addA /*$ADD$*/ * @param columns columns to add */ -public fun DataFrame.addAll(columns: Iterable): DataFrame = dataFrameOf(columns() + columns).cast() +public fun DataFrame.addAll(columns: Iterable): DataFrame = + dataFrameOf(columns() + columns).cast() /** * Creates new [DataFrame] with all columns from given [dataFrames] added to the end of original [DataFrame.columns] list. @@ -60,7 +74,8 @@ public fun DataFrame.add(vararg dataFrames: AnyFrame): DataFrame = add * @param dataFrames dataFrames to get columns from */ @JvmName("addAllFrames") -public fun DataFrame.addAll(dataFrames: Iterable): DataFrame = addAll(dataFrames.flatMap { it.columns() }) +public fun DataFrame.addAll(dataFrames: Iterable): DataFrame = + addAll(dataFrames.flatMap { it.columns() }) // endregion @@ -151,11 +166,15 @@ public class AddDsl(@PublishedApi internal val df: DataFrame) : ColumnsCon return df.mapToColumn("", Infer.Nulls, expression) } - public inline infix fun String.from(noinline expression: RowExpression): Boolean = add(this, Infer.Nulls, expression) + public inline infix fun String.from(noinline expression: RowExpression): Boolean = + add(this, Infer.Nulls, expression) // TODO: use path instead of name - public inline infix fun ColumnAccessor.from(noinline expression: RowExpression): Boolean = name().from(expression) - public inline infix fun KProperty.from(noinline expression: RowExpression): Boolean = add(name, Infer.Nulls, expression) + public inline infix fun ColumnAccessor.from(noinline expression: RowExpression): Boolean = + name().from(expression) + + public inline infix fun KProperty.from(noinline expression: RowExpression): Boolean = + add(name, Infer.Nulls, expression) public infix fun String.from(column: Column): Boolean = add(column.rename(this)) public inline infix fun ColumnAccessor.from(column: ColumnReference): Boolean = name() from column diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/add.txt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/add.txt new file mode 100644 index 000000000..791d99b89 --- /dev/null +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/add.txt @@ -0,0 +1,5 @@ + * Original [DataFrame] is not modified. + * + * @throws [DuplicateColumnNamesException] if columns in expected result have repeated names + * @throws [org.jetbrains.kotlinx.dataframe.exceptions.UnequalColumnSizesException] if columns in expected result have different sizes + * @return new [DataFrame] with added columns From e4442fedada772dd970756f47b21c4c8055d4611 Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Wed, 18 Jan 2023 16:06:19 +0100 Subject: [PATCH 05/50] now using kdocIncludeGradlePlugin --- core/build.gradle.kts | 101 +++++++++--------- .../jetbrains/kotlinx/dataframe/api/add.kt | 53 ++++----- settings.gradle.kts | 15 +++ 3 files changed, 81 insertions(+), 88 deletions(-) diff --git a/core/build.gradle.kts b/core/build.gradle.kts index 5a60dd970..fab1207f1 100644 --- a/core/build.gradle.kts +++ b/core/build.gradle.kts @@ -1,7 +1,5 @@ -import com.igormaznitsa.jcp.gradle.JcpTask -import kotlinx.kover.api.KoverTaskExtension +import nl.jolanrensen.kdocInclude.ProcessKdocIncludeTask import org.gradle.jvm.tasks.Jar -import org.jetbrains.dataframe.gradle.DataSchemaVisibility @Suppress("DSL_SCOPE_VIOLATION", "UnstableApiUsage") plugins { @@ -15,7 +13,7 @@ plugins { id("org.jetbrains.kotlinx.kover") id("org.jmailen.kotlinter") id("org.jetbrains.kotlinx.dataframe") - id("com.igormaznitsa.jcp") + id("com.github.jolanrensen.kdocIncludeGradlePlugin") version "main-SNAPSHOT" } group = "org.jetbrains.kotlinx" @@ -58,6 +56,48 @@ kotlin.sourceSets { } } +// Backup the kotlin source files location +val kotlinMainSources = kotlin.sourceSets.main.get().kotlin.sourceDirectories + +val processKdocIncludeMain by tasks.creating(ProcessKdocIncludeTask::class) { + sources.set( + kotlinMainSources + .filterNot { "build/generated" in it.path } // Exclude generated sources + ) + debug.set(true) +} + +// Modify all Jar tasks such that before running the Kotlin sources are set to +// the target of processKdocIncludeMain and they are returned back to normal afterwards. +tasks.withType { + dependsOn(processKdocIncludeMain) + outputs.upToDateWhen { false } + + doFirst { + kotlin { + sourceSets { + main { + kotlin.setSrcDirs( + processKdocIncludeMain.targets + + kotlinMainSources.filter { "build/generated" in it.path } // Include generated sources (which were excluded above) + ) + } + } + } + } + + doLast { + kotlin { + sourceSets { + main { + kotlin.setSrcDirs(kotlinMainSources) + } + } + } + } +} + + tasks.lintKotlinMain { exclude("**/*keywords*/**") exclude { @@ -133,61 +173,16 @@ tasks.withType { targetCompatibility = JavaVersion.VERSION_1_8.toString() } -// Setup preprocessing with JCP for main sources - -val kotlinMainSources = kotlin.sourceSets.main.get().kotlin.sourceDirectories - -val preprocessMain by tasks.creating(JcpTask::class) { - sources.set(kotlinMainSources.filter { "ksp" !in it.path }) - clearTarget.set(true) - fileExtensions.set(listOf("kt")) - vars.set( - mapOf() - ) - outputs.upToDateWhen { target.get().exists() } -} - -tasks.compileKotlin { +tasks.withType { dependsOn(tasks.lintKotlin) kotlinOptions { - freeCompilerArgs += listOf("-Xinline-classes", "-Xopt-in=kotlin.RequiresOptIn") - } - - outputs.upToDateWhen { - preprocessMain.outcomingFiles.files.isEmpty() - } -} - -tasks.withType { - duplicatesStrategy = DuplicatesStrategy.EXCLUDE - dependsOn(preprocessMain) - - doFirst { - kotlin { - sourceSets { - main { - kotlin.setSrcDirs( - kotlinMainSources.filter { "ksp" in it.path } + preprocessMain.target.get() - ) - } - } - } - } - - doLast { - kotlin { - sourceSets { - main { - kotlin.setSrcDirs(kotlinMainSources) - } - } - } + freeCompilerArgs = freeCompilerArgs + listOf("-Xinline-classes", "-Xopt-in=kotlin.RequiresOptIn") } } tasks.test { maxHeapSize = "2048m" - extensions.configure(KoverTaskExtension::class) { + extensions.configure(kotlinx.kover.api.KoverTaskExtension::class) { excludes.set( listOf( "org.jetbrains.kotlinx.dataframe.jupyter.*", @@ -225,7 +220,7 @@ artifacts { dataframes { schema { sourceSet = "test" - visibility = DataSchemaVisibility.IMPLICIT_PUBLIC + visibility = org.jetbrains.dataframe.gradle.DataSchemaVisibility.IMPLICIT_PUBLIC data = "https://raw.githubusercontent.com/Kotlin/dataframe/master/data/jetbrains_repositories.csv" name = "org.jetbrains.kotlinx.dataframe.samples.api.Repository" } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/add.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/add.kt index 6fa7e9d1e..4ab4a1fa3 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/add.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/add.kt @@ -26,56 +26,43 @@ import kotlin.reflect.KProperty * `add` operation adds new columns to DataFrame. */ -/** either -//#local ADD1234 = evalFile("add.txt") - */ - -/** or -//#local ADD0 = " * Original [DataFrame] is not modified." -//#local ADD1 = " *" -//#local ADD2 = " * @throws [DuplicateColumnNamesException] if columns in expected result have repeated names" -//#local ADD3 = " * @throws [UnequalColumnSizesException] if columns in expected result have different sizes" -//#local ADD4 = " * @return new [DataFrame] with added columns" -// -//#local ADD = ADD0 + "\n" + ADD1 + "\n" + ADD2 + "\n" + ADD3 + "\n" + ADD4 +/** + * Creates new [DataFrame] with given columns added to the end of original [DataFrame.columns] list. + * + * Original [DataFrame] is not modified. + * + * @throws [DuplicateColumnNamesException] if columns in expected result have repeated names + * @throws [UnequalColumnSizesException] if columns in expected result have different sizes + * @return new [DataFrame] with added columns */ +private interface Add // region Add existing columns /** - * Creates new [DataFrame] with given columns added to the end of original [DataFrame.columns] list. - * -/*$ADD$*/ + * @include [Add] * @param columns columns to add */ public fun DataFrame.add(vararg columns: AnyBaseCol): DataFrame = addAll(columns.asIterable()) /** - * Creates new [DataFrame] with given columns added to the end of original [DataFrame.columns] list. - * -/*$ADD$*/ + * @include [Add] * @param columns columns to add */ -public fun DataFrame.addAll(columns: Iterable): DataFrame = - dataFrameOf(columns() + columns).cast() +public fun DataFrame.addAll(columns: Iterable): DataFrame = dataFrameOf(columns() + columns).cast() /** - * Creates new [DataFrame] with all columns from given [dataFrames] added to the end of original [DataFrame.columns] list. - * -/*$ADD$*/ + * @include [Add] * @param dataFrames dataFrames to get columns from */ public fun DataFrame.add(vararg dataFrames: AnyFrame): DataFrame = addAll(dataFrames.asIterable()) /** - * Creates new [DataFrame] with all columns from given [dataFrames] added to the end of original [DataFrame.columns] list. - * -/*$ADD$*/ + * @include [Add] * @param dataFrames dataFrames to get columns from */ @JvmName("addAllFrames") -public fun DataFrame.addAll(dataFrames: Iterable): DataFrame = - addAll(dataFrames.flatMap { it.columns() }) +public fun DataFrame.addAll(dataFrames: Iterable): DataFrame = addAll(dataFrames.flatMap { it.columns() }) // endregion @@ -166,15 +153,11 @@ public class AddDsl(@PublishedApi internal val df: DataFrame) : ColumnsCon return df.mapToColumn("", Infer.Nulls, expression) } - public inline infix fun String.from(noinline expression: RowExpression): Boolean = - add(this, Infer.Nulls, expression) + public inline infix fun String.from(noinline expression: RowExpression): Boolean = add(this, Infer.Nulls, expression) // TODO: use path instead of name - public inline infix fun ColumnAccessor.from(noinline expression: RowExpression): Boolean = - name().from(expression) - - public inline infix fun KProperty.from(noinline expression: RowExpression): Boolean = - add(name, Infer.Nulls, expression) + public inline infix fun ColumnAccessor.from(noinline expression: RowExpression): Boolean = name().from(expression) + public inline infix fun KProperty.from(noinline expression: RowExpression): Boolean = add(name, Infer.Nulls, expression) public infix fun String.from(column: Column): Boolean = add(column.rename(this)) public inline infix fun ColumnAccessor.from(column: ColumnReference): Boolean = name() from column diff --git a/settings.gradle.kts b/settings.gradle.kts index 24f580668..e8686a670 100644 --- a/settings.gradle.kts +++ b/settings.gradle.kts @@ -28,6 +28,21 @@ pluginManagement { repositories { mavenLocal() gradlePluginPortal() + maven(url = "https://jitpack.io") + } + + resolutionStrategy { + eachPlugin { + requested.apply { + val jitpackPlugins = listOf( + "com.github.jolanrensen.kdocIncludeGradlePlugin", + ) + if ("$id" in jitpackPlugins) { + val (_, _, user, name) = "$id".split(".", limit = 4) + useModule("com.github.$user:$name:$version") + } + } + } } } include("dataframe-excel") From da0be85dd1180bec81a65a5cda64d2d8faca16eb Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Wed, 18 Jan 2023 16:08:41 +0100 Subject: [PATCH 06/50] now using kdocIncludeGradlePlugin --- .../main/kotlin/org/jetbrains/kotlinx/dataframe/api/add.txt | 5 ----- 1 file changed, 5 deletions(-) delete mode 100644 core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/add.txt diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/add.txt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/add.txt deleted file mode 100644 index 791d99b89..000000000 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/add.txt +++ /dev/null @@ -1,5 +0,0 @@ - * Original [DataFrame] is not modified. - * - * @throws [DuplicateColumnNamesException] if columns in expected result have repeated names - * @throws [org.jetbrains.kotlinx.dataframe.exceptions.UnequalColumnSizesException] if columns in expected result have different sizes - * @return new [DataFrame] with added columns From d7dd6bc2fec47ba676615259b3bb14be98dad23a Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Mon, 23 Jan 2023 14:58:05 +0100 Subject: [PATCH 07/50] upgraded to docProcessor --- core/build.gradle.kts | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/core/build.gradle.kts b/core/build.gradle.kts index fab1207f1..5e94b4e51 100644 --- a/core/build.gradle.kts +++ b/core/build.gradle.kts @@ -1,4 +1,5 @@ -import nl.jolanrensen.kdocInclude.ProcessKdocIncludeTask +import nl.jolanrensen.docProcessor.* +import nl.jolanrensen.docProcessor.defaultProcessors.* import org.gradle.jvm.tasks.Jar @Suppress("DSL_SCOPE_VIOLATION", "UnstableApiUsage") @@ -13,7 +14,7 @@ plugins { id("org.jetbrains.kotlinx.kover") id("org.jmailen.kotlinter") id("org.jetbrains.kotlinx.dataframe") - id("com.github.jolanrensen.kdocIncludeGradlePlugin") version "main-SNAPSHOT" + id("com.github.jolanrensen.docProcessorGradlePlugin") version "v0.0.1" } group = "org.jetbrains.kotlinx" @@ -59,12 +60,11 @@ kotlin.sourceSets { // Backup the kotlin source files location val kotlinMainSources = kotlin.sourceSets.main.get().kotlin.sourceDirectories -val processKdocIncludeMain by tasks.creating(ProcessKdocIncludeTask::class) { - sources.set( - kotlinMainSources - .filterNot { "build/generated" in it.path } // Exclude generated sources - ) - debug.set(true) +val processKdocIncludeMain by creatingProcessDocTask( + sources = kotlinMainSources.filterNot { "build/generated" in it.path } // Exclude generated sources +) { + processors = listOf(INCLUDE_DOC_PROCESSOR) + debug = true } // Modify all Jar tasks such that before running the Kotlin sources are set to From c017319ee6e2350100d9da68013eedd2cf50c80f Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Mon, 23 Jan 2023 20:57:17 +0100 Subject: [PATCH 08/50] removed add again, fixed some small thing --- build.gradle.kts | 9 ----- .../jetbrains/kotlinx/dataframe/api/add.kt | 35 +++++++++++++------ settings.gradle.kts | 2 +- 3 files changed, 25 insertions(+), 21 deletions(-) diff --git a/build.gradle.kts b/build.gradle.kts index c92ea0d4a..ced84ec8a 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -3,15 +3,6 @@ import org.jetbrains.kotlinx.publisher.apache2 import org.jetbrains.kotlinx.publisher.developer import org.jetbrains.kotlinx.publisher.githubRepo -buildscript { - repositories { - mavenCentral() - } - dependencies { - classpath("com.igormaznitsa:jcp:7.0.5") - } -} - @Suppress("DSL_SCOPE_VIOLATION", "UnstableApiUsage") plugins { kotlin("jvm") version libs.versions.kotlin diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/add.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/add.kt index 4ab4a1fa3..513840c08 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/add.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/add.kt @@ -26,40 +26,53 @@ import kotlin.reflect.KProperty * `add` operation adds new columns to DataFrame. */ +// region Add existing columns + /** * Creates new [DataFrame] with given columns added to the end of original [DataFrame.columns] list. * * Original [DataFrame] is not modified. * + * @param columns columns to add * @throws [DuplicateColumnNamesException] if columns in expected result have repeated names * @throws [UnequalColumnSizesException] if columns in expected result have different sizes * @return new [DataFrame] with added columns */ -private interface Add - -// region Add existing columns - -/** - * @include [Add] - * @param columns columns to add - */ public fun DataFrame.add(vararg columns: AnyBaseCol): DataFrame = addAll(columns.asIterable()) /** - * @include [Add] + * Creates new [DataFrame] with given columns added to the end of original [DataFrame.columns] list. + * + * Original [DataFrame] is not modified. + * * @param columns columns to add + * @throws [DuplicateColumnNamesException] if columns in expected result have repeated names + * @throws [UnequalColumnSizesException] if columns in expected result have different sizes + * @return new [DataFrame] with added columns */ public fun DataFrame.addAll(columns: Iterable): DataFrame = dataFrameOf(columns() + columns).cast() /** - * @include [Add] + * Creates new [DataFrame] with all columns from given [dataFrames] added to the end of original [DataFrame.columns] list. + * + * Original [DataFrame] is not modified. + * * @param dataFrames dataFrames to get columns from + * @throws [DuplicateColumnNamesException] if columns in expected result have repeated names + * @throws [UnequalColumnSizesException] if columns in expected result have different sizes + * @return new [DataFrame] with added columns */ public fun DataFrame.add(vararg dataFrames: AnyFrame): DataFrame = addAll(dataFrames.asIterable()) /** - * @include [Add] + * Creates new [DataFrame] with all columns from given [dataFrames] added to the end of original [DataFrame.columns] list. + * + * Original [DataFrame] is not modified. + * * @param dataFrames dataFrames to get columns from + * @throws [DuplicateColumnNamesException] if columns in expected result have repeated names + * @throws [UnequalColumnSizesException] if columns in expected result have different sizes + * @return new [DataFrame] with added columns */ @JvmName("addAllFrames") public fun DataFrame.addAll(dataFrames: Iterable): DataFrame = addAll(dataFrames.flatMap { it.columns() }) diff --git a/settings.gradle.kts b/settings.gradle.kts index e8686a670..c908e5c26 100644 --- a/settings.gradle.kts +++ b/settings.gradle.kts @@ -35,7 +35,7 @@ pluginManagement { eachPlugin { requested.apply { val jitpackPlugins = listOf( - "com.github.jolanrensen.kdocIncludeGradlePlugin", + "com.github.jolanrensen.docProcessorGradlePlugin", ) if ("$id" in jitpackPlugins) { val (_, _, user, name) = "$id".split(".", limit = 4) From 63ccc2914137605d748f6a0c2d0b41c11b78d3e9 Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Tue, 24 Jan 2023 18:24:05 +0100 Subject: [PATCH 09/50] started on example doc structure using doc processor --- core/build.gradle.kts | 8 +- .../jetbrains/kotlinx/dataframe/api/Nulls.kt | 163 ++++++++++++------ .../jetbrains/kotlinx/dataframe/api/update.kt | 18 ++ .../dataframe/documentation/AccessApi.kt | 70 ++++++++ .../documentation/DocumentationUrls.kt | 33 ++++ .../documentation/samples/ApiLevels.kt | 132 ++++++++++++++ 6 files changed, 366 insertions(+), 58 deletions(-) create mode 100644 core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/AccessApi.kt create mode 100644 core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DocumentationUrls.kt create mode 100644 core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/samples/ApiLevels.kt diff --git a/core/build.gradle.kts b/core/build.gradle.kts index 5e94b4e51..64ade3457 100644 --- a/core/build.gradle.kts +++ b/core/build.gradle.kts @@ -14,7 +14,8 @@ plugins { id("org.jetbrains.kotlinx.kover") id("org.jmailen.kotlinter") id("org.jetbrains.kotlinx.dataframe") - id("com.github.jolanrensen.docProcessorGradlePlugin") version "v0.0.1" + id("com.github.jolanrensen.docProcessorGradlePlugin") version "v0.0.3" +// id("nl.jolanrensen.docProcessor") version "1.0-SNAPSHOT" } group = "org.jetbrains.kotlinx" @@ -63,7 +64,10 @@ val kotlinMainSources = kotlin.sourceSets.main.get().kotlin.sourceDirectories val processKdocIncludeMain by creatingProcessDocTask( sources = kotlinMainSources.filterNot { "build/generated" in it.path } // Exclude generated sources ) { - processors = listOf(INCLUDE_DOC_PROCESSOR) + processors = listOf( + INCLUDE_DOC_PROCESSOR, + SAMPLE_DOC_PROCESSOR, + ) debug = true } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/Nulls.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/Nulls.kt index 14daf12e8..f9b87caaa 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/Nulls.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/Nulls.kt @@ -9,6 +9,7 @@ import org.jetbrains.kotlinx.dataframe.DataColumn import org.jetbrains.kotlinx.dataframe.DataFrame import org.jetbrains.kotlinx.dataframe.columns.ColumnKind import org.jetbrains.kotlinx.dataframe.columns.ColumnReference +import org.jetbrains.kotlinx.dataframe.documentation.* import org.jetbrains.kotlinx.dataframe.impl.columns.toColumnSet import org.jetbrains.kotlinx.dataframe.impl.columns.toColumns import org.jetbrains.kotlinx.dataframe.kind @@ -17,20 +18,70 @@ import kotlin.reflect.KProperty // region fillNulls -public fun DataFrame.fillNulls(cols: ColumnsSelector): Update = - update(cols).where { it == null } - -public fun DataFrame.fillNulls(vararg cols: String): Update = - fillNulls { cols.toColumns() } - -public fun DataFrame.fillNulls(vararg cols: KProperty): Update = - fillNulls { cols.toColumns() } - -public fun DataFrame.fillNulls(vararg cols: ColumnReference): Update = - fillNulls { cols.toColumns() } - -public fun DataFrame.fillNulls(cols: Iterable>): Update = - fillNulls { cols.toColumnSet() } +/** + * Replace `null` values with given value or expression. + * Specific case of [UpdateOperation]. + * TODO samples + * + * @include [DocumentationUrls.Fill.FillNulls] + */ +internal interface FillNullsOperation + +/** + * @include [FillNullsOperation] + * + * @param columns The [ColumnsSelector] used to select columns to update. + */ +public fun DataFrame.fillNulls(columns: ColumnsSelector): Update = + update(columns).where { it == null } + +/** + * @include [FillNullsOperation] + * + * @receiver [DataFrame] + * @param T T + * @param C C + * @param columns Cols + * @return [Update] + */ +public fun DataFrame.fillNulls(vararg columns: String): Update = + fillNulls { columns.toColumns() } + +/** + * @include [FillNullsOperation] + * + * @receiver [DataFrame] + * @param T T + * @param C C + * @param columns Cols + * @return [Update] + */ +public fun DataFrame.fillNulls(vararg columns: KProperty): Update = + fillNulls { columns.toColumns() } + +/** + * @include [FillNullsOperation] + * + * @receiver [DataFrame] + * @param T T + * @param C C + * @param columns Cols + * @return [Update] + */ +public fun DataFrame.fillNulls(vararg columns: ColumnReference): Update = + fillNulls { columns.toColumns() } + +/** + * @include [FillNullsOperation] + * + * @receiver [DataFrame] + * @param T T + * @param C C + * @param columns Cols + * @return [Update] + */ +public fun DataFrame.fillNulls(columns: Iterable>): Update = + fillNulls { columns.toColumnSet() } // endregion @@ -56,64 +107,64 @@ internal inline val Float?.isNA: Boolean get() = this == null || this.isNaN() // region fillNaNs -public fun DataFrame.fillNaNs(cols: ColumnsSelector): Update = - update(cols).where { it.isNaN } +public fun DataFrame.fillNaNs(columns: ColumnsSelector): Update = + update(columns).where { it.isNaN } -public fun DataFrame.fillNaNs(vararg cols: String): Update = - fillNaNs { cols.toColumns() } +public fun DataFrame.fillNaNs(vararg columns: String): Update = + fillNaNs { columns.toColumns() } -public fun DataFrame.fillNaNs(vararg cols: KProperty): Update = - fillNaNs { cols.toColumns() } +public fun DataFrame.fillNaNs(vararg columns: KProperty): Update = + fillNaNs { columns.toColumns() } -public fun DataFrame.fillNaNs(vararg cols: ColumnReference): Update = - fillNaNs { cols.toColumns() } +public fun DataFrame.fillNaNs(vararg columns: ColumnReference): Update = + fillNaNs { columns.toColumns() } -public fun DataFrame.fillNaNs(cols: Iterable>): Update = - fillNaNs { cols.toColumnSet() } +public fun DataFrame.fillNaNs(columns: Iterable>): Update = + fillNaNs { columns.toColumnSet() } // endregion // region fillNA -public fun DataFrame.fillNA(cols: ColumnsSelector): Update = - update(cols).where { it.isNA } +public fun DataFrame.fillNA(columns: ColumnsSelector): Update = + update(columns).where { it.isNA } -public fun DataFrame.fillNA(vararg cols: String): Update = - fillNA { cols.toColumns() } +public fun DataFrame.fillNA(vararg columns: String): Update = + fillNA { columns.toColumns() } -public fun DataFrame.fillNA(vararg cols: KProperty): Update = - fillNA { cols.toColumns() } +public fun DataFrame.fillNA(vararg columns: KProperty): Update = + fillNA { columns.toColumns() } -public fun DataFrame.fillNA(vararg cols: ColumnReference): Update = - fillNA { cols.toColumns() } +public fun DataFrame.fillNA(vararg columns: ColumnReference): Update = + fillNA { columns.toColumns() } -public fun DataFrame.fillNA(cols: Iterable>): Update = - fillNA { cols.toColumnSet() } +public fun DataFrame.fillNA(columns: Iterable>): Update = + fillNA { columns.toColumnSet() } // endregion // region dropNulls public fun DataFrame.dropNulls(whereAllNull: Boolean = false, selector: ColumnsSelector): DataFrame { - val cols = this[selector] - return if (whereAllNull) drop { row -> cols.all { col -> col[row] == null } } - else drop { row -> cols.any { col -> col[row] == null } } + val columns = this[selector] + return if (whereAllNull) drop { row -> columns.all { col -> col[row] == null } } + else drop { row -> columns.any { col -> col[row] == null } } } public fun DataFrame.dropNulls(whereAllNull: Boolean = false): DataFrame = dropNulls(whereAllNull) { all() } -public fun DataFrame.dropNulls(vararg cols: KProperty<*>, whereAllNull: Boolean = false): DataFrame = - dropNulls(whereAllNull) { cols.toColumns() } +public fun DataFrame.dropNulls(vararg columns: KProperty<*>, whereAllNull: Boolean = false): DataFrame = + dropNulls(whereAllNull) { columns.toColumns() } -public fun DataFrame.dropNulls(vararg cols: String, whereAllNull: Boolean = false): DataFrame = - dropNulls(whereAllNull) { cols.toColumns() } +public fun DataFrame.dropNulls(vararg columns: String, whereAllNull: Boolean = false): DataFrame = + dropNulls(whereAllNull) { columns.toColumns() } -public fun DataFrame.dropNulls(vararg cols: Column, whereAllNull: Boolean = false): DataFrame = - dropNulls(whereAllNull) { cols.toColumns() } +public fun DataFrame.dropNulls(vararg columns: Column, whereAllNull: Boolean = false): DataFrame = + dropNulls(whereAllNull) { columns.toColumns() } -public fun DataFrame.dropNulls(cols: Iterable, whereAllNull: Boolean = false): DataFrame = - dropNulls(whereAllNull) { cols.toColumnSet() } +public fun DataFrame.dropNulls(columns: Iterable, whereAllNull: Boolean = false): DataFrame = + dropNulls(whereAllNull) { columns.toColumnSet() } public fun DataColumn.dropNulls(): DataColumn = (if (!hasNulls()) this else filter { it != null }) as DataColumn @@ -123,23 +174,23 @@ public fun DataColumn.dropNulls(): DataColumn = // region dropNA public fun DataFrame.dropNA(whereAllNA: Boolean = false, selector: ColumnsSelector): DataFrame { - val cols = this[selector] + val columns = this[selector] - return if (whereAllNA) drop { cols.all { this[it].isNA } } - else drop { cols.any { this[it].isNA } } + return if (whereAllNA) drop { columns.all { this[it].isNA } } + else drop { columns.any { this[it].isNA } } } -public fun DataFrame.dropNA(vararg cols: KProperty<*>, whereAllNA: Boolean = false): DataFrame = - dropNA(whereAllNA) { cols.toColumns() } +public fun DataFrame.dropNA(vararg columns: KProperty<*>, whereAllNA: Boolean = false): DataFrame = + dropNA(whereAllNA) { columns.toColumns() } -public fun DataFrame.dropNA(vararg cols: String, whereAllNA: Boolean = false): DataFrame = - dropNA(whereAllNA) { cols.toColumns() } +public fun DataFrame.dropNA(vararg columns: String, whereAllNA: Boolean = false): DataFrame = + dropNA(whereAllNA) { columns.toColumns() } -public fun DataFrame.dropNA(vararg cols: Column, whereAllNA: Boolean = false): DataFrame = - dropNA(whereAllNA) { cols.toColumns() } +public fun DataFrame.dropNA(vararg columns: Column, whereAllNA: Boolean = false): DataFrame = + dropNA(whereAllNA) { columns.toColumns() } -public fun DataFrame.dropNA(cols: Iterable, whereAllNA: Boolean = false): DataFrame = - dropNA(whereAllNA) { cols.toColumnSet() } +public fun DataFrame.dropNA(columns: Iterable, whereAllNA: Boolean = false): DataFrame = + dropNA(whereAllNA) { columns.toColumnSet() } public fun DataFrame.dropNA(whereAllNA: Boolean = false): DataFrame = dropNA(whereAllNA) { all() } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt index b0e6d9a1e..e89a13d5c 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt @@ -19,6 +19,24 @@ import org.jetbrains.kotlinx.dataframe.impl.headPlusArray import org.jetbrains.kotlinx.dataframe.index import kotlin.reflect.KProperty +/** + * Returns [DataFrame] with changed values in some cells. + * Column types can not be changed. + * + * TODO + */ +internal interface UpdateOperation { + + /** @param columns The [ColumnsSelector] used to select the columns of this [DataFrame] to update. */ + val columnsSelector: Nothing + + /** @param columns An [Iterable] of [ColumnReference]s belonging to this [DataFrame] to update. */ + val columnReferenceIterable: Nothing + + /** @param columns The column names belonging to this [DataFrame] to update. */ + val stringColumns: Nothing +} + public fun DataFrame.update(columns: ColumnsSelector): Update = Update(this, null, columns) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/AccessApi.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/AccessApi.kt new file mode 100644 index 000000000..b7b541fe4 --- /dev/null +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/AccessApi.kt @@ -0,0 +1,70 @@ +package org.jetbrains.kotlinx.dataframe.documentation + +import org.jetbrains.kotlinx.dataframe.documentation.samples.ApiLevels as ApiLevelsSample + +/** + * By nature data frames are dynamic objects, column labels depend on the input source and also new columns could be added + * or deleted while wrangling. Kotlin, in contrast, is a statically typed language and all types are defined and verified + * ahead of execution. That's why creating a flexible, handy, and, at the same time, safe API to a data frame is tricky. + * + * In `Kotlin DataFrame` we provide four different ways to access columns, and, while they are essentially different, they + * look pretty similar in the data wrangling DSL. These include: + * - [StringApi] + * - [ColumnAccessorsApi] + * - [KPropertiesApi] + * - [ExtensionPropertiesApi] + * + * @include [DocumentationUrls.AccessApis] + */ +internal interface AccessApi { + + /** + * String API. + * In this [AccessApi], columns are accessed by a [String] representing their name. + * Type-checking is done at runtime, name-checking too. + * + * @include [DocumentationUrls.AccessApis.StringApi] + * + * For example: + * @sample [ApiLevelsSample.strings] + */ + interface StringApi + + /** + * Column Accessors API. + * In this [AccessApi], every column has a descriptor; + * a variable that represents its name and type. + * + * @include [DocumentationUrls.AccessApis.ColumnAccessorsApi] + * + * For example: + * @sample [ApiLevelsSample.accessors3] + */ + interface ColumnAccessorsApi + + /** + * KProperties API. + * In this [AccessApi], columns accessed by the + * [`KProperty`](https://kotlinlang.org/docs/reflection.html#property-references) + * of some class. + * The name and type of column should match the name and type of property, respectively. + * + * @include [DocumentationUrls.AccessApis.KPropertiesApi] + * + * For example: + * @sample [ApiLevelsSample.kproperties1] + */ + interface KPropertiesApi + + /** + * Extension Properties API. + * In this [AccessApi], extension access properties are generated based on the dataframe schema. + * The name and type of properties are inferred from the name and type of the corresponding columns. + * + * @include [DocumentationUrls.AccessApis.ExtensionPropertiesApi] + * + * For example: + * @sample [ApiLevelsSample.extensionProperties1] + */ + interface ExtensionPropertiesApi +} diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DocumentationUrls.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DocumentationUrls.kt new file mode 100644 index 000000000..84a17e89b --- /dev/null +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DocumentationUrls.kt @@ -0,0 +1,33 @@ +package org.jetbrains.kotlinx.dataframe.documentation + +internal interface DocumentationUrls { + + /** [See `fill` documentation.](https://kotlin.github.io/dataframe/fill.html) */ + interface Fill { + + /** [See `fillNulls` documentation](https://kotlin.github.io/dataframe/fill.html#fillnulls) */ + interface FillNulls + + /** [See `fillNaNs` documentation](https://kotlin.github.io/dataframe/fill.html#fillnans) */ + interface FillNaNs + + /** [See `fillNA` documentation](https://kotlin.github.io/dataframe/fill.html#fillna) */ + interface FillNA + } + + /** [See Access APIs documentation.](https://kotlin.github.io/dataframe/apilevels.html) */ + interface AccessApis { + + /** [See String API documentation.](https://kotlin.github.io/dataframe/stringapi.html) */ + interface StringApi + + /** [See Column Accessors API documentation.](https://kotlin.github.io/dataframe/columnaccessorsapi.html) */ + interface ColumnAccessorsApi + + /** [See KProperties API documentation.](https://kotlin.github.io/dataframe/kpropertiesapi.html) */ + interface KPropertiesApi + + /** [See Extension Properties API documentation.](https://kotlin.github.io/dataframe/extensionpropertiesapi.html) */ + interface ExtensionPropertiesApi + } +} diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/samples/ApiLevels.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/samples/ApiLevels.kt new file mode 100644 index 000000000..699763eb1 --- /dev/null +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/samples/ApiLevels.kt @@ -0,0 +1,132 @@ +@file:Suppress("RemoveExplicitTypeArguments") + +package org.jetbrains.kotlinx.dataframe.documentation.samples + +import org.jetbrains.kotlinx.dataframe.ColumnsContainer +import org.jetbrains.kotlinx.dataframe.DataColumn +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.DataRow +import org.jetbrains.kotlinx.dataframe.annotations.ColumnName +import org.jetbrains.kotlinx.dataframe.api.add +import org.jetbrains.kotlinx.dataframe.api.cast +import org.jetbrains.kotlinx.dataframe.api.column +import org.jetbrains.kotlinx.dataframe.api.dropNulls +import org.jetbrains.kotlinx.dataframe.api.filter +import org.jetbrains.kotlinx.dataframe.api.toListOf +import org.jetbrains.kotlinx.dataframe.io.read + +internal interface ApiLevels { + + fun strings() { + DataFrame.read("titanic.csv") + .add("lastName") { "name"().split(",").last() } + .dropNulls("age") + .filter { + "survived"() && + "home"().endsWith("NY") && + "age"() in 10..20 + } + } + + fun accessors1() { + val survived by column() // accessor for Boolean column with name 'survived' + val home by column() + val age by column() + val name by column() + val lastName by column() + } + + fun accessors2() { + val survived by column() + val home by column() + val age by column() + val name by column() + val lastName by column() + + DataFrame.read("titanic.csv") + .add(lastName) { name().split(",").last() } + .dropNulls { age } + .filter { survived() && home().endsWith("NY") && age()!! in 10..20 } + } + + fun accessors3() { + val survived by column() + val home by column() + val age by column() + val name by column() + val lastName by column() + + DataFrame.read("titanic.csv") + .add(lastName) { name().split(",").last() } + .dropNulls { age } + .filter { survived() && home().endsWith("NY") && age()!! in 10..20 } + } + + fun kproperties1() { + data class Passenger( + val survived: Boolean, + val home: String, + val age: Int, + val lastName: String + ) + + val passengers = DataFrame.read("titanic.csv") + .add(Passenger::lastName) { "name"().split(",").last() } + .dropNulls(Passenger::age) + .filter { + it[Passenger::survived] && + it[Passenger::home].endsWith("NY") && + it[Passenger::age] in 10..20 + } + .toListOf() + } + + fun kproperties2() { + data class Passenger( + @ColumnName("survived") val isAlive: Boolean, + @ColumnName("home") val city: String, + val name: String + ) + + val passengers = DataFrame.read("titanic.csv") + .filter { it.get(Passenger::city).endsWith("NY") } + .toListOf() + } + + // @DataSchema + interface TitanicPassenger { + val survived: Boolean + val home: String + val age: Int + val name: String + } + + fun extensionProperties2() { + val df = DataFrame.read("titanic.csv").cast() + + df.add("lastName") { name.split(",").last() } + .dropNulls { age } + .filter { survived && home.endsWith("NY") && age in 10..20 } + } + + fun extensionProperties1() { + val df = DataFrame.read("titanic.csv") + } +} + +internal val ColumnsContainer.age: DataColumn @JvmName("TitanicPassenger_age") get() = this["age"] as DataColumn +internal val DataRow.age: Int @JvmName("TitanicPassenger_age") get() = this["age"] as Int +internal val ColumnsContainer.age: DataColumn @JvmName("NullableTitanicPassenger_age") get() = this["age"] as DataColumn +internal val DataRow.age: Int? @JvmName("NullableTitanicPassenger_age") get() = this["age"] as Int? +internal val ColumnsContainer.home: DataColumn @JvmName("TitanicPassenger_home") get() = this["home"] as DataColumn +internal val DataRow.home: String @JvmName("TitanicPassenger_home") get() = this["home"] as String +internal val ColumnsContainer.home: DataColumn @JvmName("NullableTitanicPassenger_home") get() = this["home"] as DataColumn +internal val DataRow.home: String? @JvmName("NullableTitanicPassenger_home") get() = this["home"] as String? +internal val ColumnsContainer.name: DataColumn @JvmName("TitanicPassenger_name") get() = this["name"] as DataColumn +internal val DataRow.name: String @JvmName("TitanicPassenger_name") get() = this["name"] as String +internal val ColumnsContainer.name: DataColumn @JvmName("NullableTitanicPassenger_name") get() = this["name"] as DataColumn +internal val DataRow.name: String? @JvmName("NullableTitanicPassenger_name") get() = this["name"] as String? +internal val ColumnsContainer.survived: DataColumn @JvmName("TitanicPassenger_survived") get() = this["survived"] as DataColumn +internal val DataRow.survived: Boolean @JvmName("TitanicPassenger_survived") get() = this["survived"] as Boolean +internal val ColumnsContainer.survived: DataColumn @JvmName("NullableTitanicPassenger_survived") get() = this["survived"] as DataColumn +internal val DataRow.survived: Boolean? @JvmName("NullableTitanicPassenger_survived") get() = this["survived"] as Boolean? From 970873252f88210bb9e8d2270feb799529e76fcb Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Thu, 26 Jan 2023 23:12:07 +0100 Subject: [PATCH 10/50] starting with update operation as template to figure out where to go next --- core/build.gradle.kts | 4 +- .../jetbrains/kotlinx/dataframe/api/update.kt | 150 +++++++++++++++++- .../dataframe/documentation/AccessApi.kt | 32 +++- .../documentation/DocumentationUrls.kt | 8 +- .../documentation/samples/ApiLevels.kt | 14 ++ 5 files changed, 190 insertions(+), 18 deletions(-) diff --git a/core/build.gradle.kts b/core/build.gradle.kts index 64ade3457..70c10d4d1 100644 --- a/core/build.gradle.kts +++ b/core/build.gradle.kts @@ -14,7 +14,7 @@ plugins { id("org.jetbrains.kotlinx.kover") id("org.jmailen.kotlinter") id("org.jetbrains.kotlinx.dataframe") - id("com.github.jolanrensen.docProcessorGradlePlugin") version "v0.0.3" + id("com.github.jolanrensen.docProcessorGradlePlugin") version "v0.0.6" // id("nl.jolanrensen.docProcessor") version "1.0-SNAPSHOT" } @@ -67,6 +67,8 @@ val processKdocIncludeMain by creatingProcessDocTask( processors = listOf( INCLUDE_DOC_PROCESSOR, SAMPLE_DOC_PROCESSOR, + INCLUDE_FILE_DOC_PROCESSOR, + COMMENT_DOC_PROCESSOR, ) debug = true } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt index e89a13d5c..651f9a3d9 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt @@ -11,6 +11,7 @@ import org.jetbrains.kotlinx.dataframe.RowValueExpression import org.jetbrains.kotlinx.dataframe.RowValueFilter import org.jetbrains.kotlinx.dataframe.Selector import org.jetbrains.kotlinx.dataframe.columns.ColumnReference +import org.jetbrains.kotlinx.dataframe.documentation.AccessApi import org.jetbrains.kotlinx.dataframe.impl.api.updateImpl import org.jetbrains.kotlinx.dataframe.impl.api.updateWithValuePerColumnImpl import org.jetbrains.kotlinx.dataframe.impl.columns.toColumnSet @@ -21,33 +22,139 @@ import kotlin.reflect.KProperty /** * Returns [DataFrame] with changed values in some cells. + * * Column types can not be changed. * - * TODO + * Usage: + * + * [update] { columns } + * + * [.[where] { [rowCondition][UpdateOperation.Where.Predicate] } ] + * + * [.[at] ([rowIndices][UpdateOperation.At.RowIndices]) ] + * + * .[with][Update.with] { [rowExpression][UpdateOperation.With.Expression] } | .[notNull] { rowExpression } | .[perCol] { colExpression } | .[perRowCol] { rowColExpression } | .[withValue] (value) | .[withNull] () | .[withZero] () | .[asFrame] { frameExpression } + * + * @comment TODO + * rowExpression: DataRow.(OldValue) -> NewValue + * colExpression: DataColumn.(DataColumn) -> NewValue + * rowColExpression: DataRow.(DataColumn) -> NewValue + * frameExpression: DataFrame.(DataFrame) -> DataFrame + * */ internal interface UpdateOperation { /** @param columns The [ColumnsSelector] used to select the columns of this [DataFrame] to update. */ - val columnsSelector: Nothing + interface ColumnsSelectorParam + + /** @param columns An [Iterable] of [ColumnReference]s of this [DataFrame] to update. */ + interface ColumnReferenceIterableParam - /** @param columns An [Iterable] of [ColumnReference]s belonging to this [DataFrame] to update. */ - val columnReferenceIterable: Nothing + /** @param columns The [ColumnReference]s of this [DataFrame] to update. */ + interface ColumnReferencesParam + + /** @param columns The [KProperty] values corresponding to columns of this [DataFrame] to update. */ + interface KPropertyColumnsParam /** @param columns The column names belonging to this [DataFrame] to update. */ - val stringColumns: Nothing + interface StringColumnsParam + + /** + * Only update the columns that pass a certain [predicate][UpdateOperation.Where.Predicate]. + * + * For example: + * ```kotlin + * df.update { city }.where { name.firstName == "Alice" }.withValue("Paris") + * ``` + */ + interface Where { + + /** The condition for rows to be included. A filter if you will. + * + * Can be seen as [DataRow].(oldValue: [C]) -> [Boolean] */ + interface Predicate + } + + /** + * Only update the columns at certain given [row indices][UpdateOperation.At.RowIndices]: + * + * Either a [Collection]<[Int]>, an [IntRange], or just `vararg` indices. + * + * For example: + * ```kotlin + * df.update { city }.at(5..10).withValue("Paris") + * ``` + */ + interface At { + + /** The indices of the rows to update. */ + interface RowIndices + } + + /** + * Update the selected columns using the given [expression][With.Expression]. + * + * For example: + * ```kotlin + * df.update { city }.with { name.firstName + " from " + it } + * ``` + */ + interface With { + + /** The expression to update the selected columns with. + * + * Can be seen as [DataRow].(oldValue: [C]) -> newValue: [C]? + * */ + interface Expression + } } +/** + * @include [UpdateOperation] + * @include [AccessApi.AnyApiLink] + * @include [UpdateOperation.ColumnsSelectorParam] + */ public fun DataFrame.update(columns: ColumnsSelector): Update = Update(this, null, columns) -public fun DataFrame.update(columns: Iterable>): Update = - update { columns.toColumnSet() } - +/** + * @include [UpdateOperation] + * API: + * - {@include [AccessApi.StringApiLink]} + * + * @include [UpdateOperation.StringColumnsParam] + */ public fun DataFrame.update(vararg columns: String): Update = update { columns.toColumns() } + +/** + * @include [UpdateOperation] + * API: + * - {@include [AccessApi.KPropertiesApiLink]} + * + * @include [UpdateOperation.KPropertyColumnsParam] + */ public fun DataFrame.update(vararg columns: KProperty): Update = update { columns.toColumns() } + +/** + * @include [UpdateOperation] + * API: + * - {@include [AccessApi.ColumnAccessorsApiLink]} + * + * @include [UpdateOperation.ColumnReferencesParam] + */ public fun DataFrame.update(vararg columns: ColumnReference): Update = update { columns.toColumns() } +/** + * @include [UpdateOperation] + * API: + * - {@include [AccessApi.ColumnAccessorsApiLink]} + * + * @include [UpdateOperation.ColumnReferenceIterableParam] + */ +public fun DataFrame.update(columns: Iterable>): Update = + update { columns.toColumnSet() } + public data class Update( val df: DataFrame, val filter: RowValueFilter?, @@ -57,11 +164,33 @@ public data class Update( Update(df, filter as RowValueFilter?, columns as ColumnsSelector) } +/** + * @include [UpdateOperation.Where] + * + * @param predicate {@include [UpdateOperation.Where.Predicate]} + */ public fun Update.where(predicate: RowValueFilter): Update = copy(filter = filter and predicate) +/** + * @include [UpdateOperation.At] + * + * @param rowIndices {@include [UpdateOperation.At.RowIndices]} + */ public fun Update.at(rowIndices: Collection): Update = where { index in rowIndices } + +/** + * @include [UpdateOperation.At] + * + * @param rowIndices {@include [UpdateOperation.At.RowIndices]} + */ public fun Update.at(vararg rowIndices: Int): Update = at(rowIndices.toSet()) + +/** + * @include [UpdateOperation.At] + * + * @param rowRange {@include [UpdateOperation.At.RowIndices]} + */ public fun Update.at(rowRange: IntRange): Update = where { index in rowRange } public infix fun Update.perRowCol(expression: RowColumnExpression): DataFrame = @@ -69,6 +198,11 @@ public infix fun Update.perRowCol(expression: RowColumnExpression = AddDataRow.(C) -> R +/** + * @include [UpdateOperation.With] + * + * @param expression {@include [UpdateOperation.With.Expression]} + */ public infix fun Update.with(expression: UpdateExpression): DataFrame = updateImpl { row, _, value -> expression(row, value) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/AccessApi.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/AccessApi.kt index b7b541fe4..3217ddde7 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/AccessApi.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/AccessApi.kt @@ -14,34 +14,50 @@ import org.jetbrains.kotlinx.dataframe.documentation.samples.ApiLevels as ApiLev * - [KPropertiesApi] * - [ExtensionPropertiesApi] * - * @include [DocumentationUrls.AccessApis] + * For more information: {@include [DocumentationUrls.AccessApis]} + * + * @comment We can link to here whenever we want to explain the different access APIs. */ internal interface AccessApi { + /** API: + * - {@include [AccessApi.ExtensionPropertiesApiLink]} + * - {@include [AccessApi.KPropertiesApiLink]} + * - {@include [AccessApi.ColumnAccessorsApiLink]} + * - {@include [AccessApi.StringApiLink]} + * */ + interface AnyApiLink + /** * String API. * In this [AccessApi], columns are accessed by a [String] representing their name. * Type-checking is done at runtime, name-checking too. * - * @include [DocumentationUrls.AccessApis.StringApi] + * For more information: {@include [DocumentationUrls.AccessApis.StringApi]} * * For example: * @sample [ApiLevelsSample.strings] */ interface StringApi + /** String API [AccessApi.StringApi]. */ + interface StringApiLink + /** * Column Accessors API. * In this [AccessApi], every column has a descriptor; * a variable that represents its name and type. * - * @include [DocumentationUrls.AccessApis.ColumnAccessorsApi] + * For more information: {@include [DocumentationUrls.AccessApis.ColumnAccessorsApi]} * * For example: * @sample [ApiLevelsSample.accessors3] */ interface ColumnAccessorsApi + /** Column Accessors API [AccessApi.ColumnAccessorsApi]. */ + interface ColumnAccessorsApiLink + /** * KProperties API. * In this [AccessApi], columns accessed by the @@ -49,22 +65,28 @@ internal interface AccessApi { * of some class. * The name and type of column should match the name and type of property, respectively. * - * @include [DocumentationUrls.AccessApis.KPropertiesApi] + * For more information: {@include [DocumentationUrls.AccessApis.KPropertiesApi]} * * For example: * @sample [ApiLevelsSample.kproperties1] */ interface KPropertiesApi + /** KProperties API [AccessApi.KPropertiesApi]. */ + interface KPropertiesApiLink + /** * Extension Properties API. * In this [AccessApi], extension access properties are generated based on the dataframe schema. * The name and type of properties are inferred from the name and type of the corresponding columns. * - * @include [DocumentationUrls.AccessApis.ExtensionPropertiesApi] + * For more information: {@include [DocumentationUrls.AccessApis.ExtensionPropertiesApi]} * * For example: * @sample [ApiLevelsSample.extensionProperties1] */ interface ExtensionPropertiesApi + + /** Extension Properties API [AccessApi.ExtensionPropertiesApi]. */ + interface ExtensionPropertiesApiLink } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DocumentationUrls.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DocumentationUrls.kt index 84a17e89b..011579998 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DocumentationUrls.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DocumentationUrls.kt @@ -1,17 +1,17 @@ package org.jetbrains.kotlinx.dataframe.documentation -internal interface DocumentationUrls { +private interface DocumentationUrls { /** [See `fill` documentation.](https://kotlin.github.io/dataframe/fill.html) */ interface Fill { - /** [See `fillNulls` documentation](https://kotlin.github.io/dataframe/fill.html#fillnulls) */ + /** [See `fillNulls` documentation.](https://kotlin.github.io/dataframe/fill.html#fillnulls) */ interface FillNulls - /** [See `fillNaNs` documentation](https://kotlin.github.io/dataframe/fill.html#fillnans) */ + /** [See `fillNaNs` documentation.](https://kotlin.github.io/dataframe/fill.html#fillnans) */ interface FillNaNs - /** [See `fillNA` documentation](https://kotlin.github.io/dataframe/fill.html#fillna) */ + /** [See `fillNA` documentation.](https://kotlin.github.io/dataframe/fill.html#fillna) */ interface FillNA } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/samples/ApiLevels.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/samples/ApiLevels.kt index 699763eb1..81c553b0c 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/samples/ApiLevels.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/samples/ApiLevels.kt @@ -18,6 +18,7 @@ import org.jetbrains.kotlinx.dataframe.io.read internal interface ApiLevels { fun strings() { + // SampleStart DataFrame.read("titanic.csv") .add("lastName") { "name"().split(",").last() } .dropNulls("age") @@ -26,17 +27,21 @@ internal interface ApiLevels { "home"().endsWith("NY") && "age"() in 10..20 } + // SampleEnd } fun accessors1() { + // SampleStart val survived by column() // accessor for Boolean column with name 'survived' val home by column() val age by column() val name by column() val lastName by column() + // SampleEnd } fun accessors2() { + // SampleStart val survived by column() val home by column() val age by column() @@ -47,9 +52,11 @@ internal interface ApiLevels { .add(lastName) { name().split(",").last() } .dropNulls { age } .filter { survived() && home().endsWith("NY") && age()!! in 10..20 } + // SampleEnd } fun accessors3() { + // SampleStart val survived by column() val home by column() val age by column() @@ -60,9 +67,11 @@ internal interface ApiLevels { .add(lastName) { name().split(",").last() } .dropNulls { age } .filter { survived() && home().endsWith("NY") && age()!! in 10..20 } + // SampleEnd } fun kproperties1() { + // SampleStart data class Passenger( val survived: Boolean, val home: String, @@ -79,9 +88,11 @@ internal interface ApiLevels { it[Passenger::age] in 10..20 } .toListOf() + // SampleEnd } fun kproperties2() { + // SampleStart data class Passenger( @ColumnName("survived") val isAlive: Boolean, @ColumnName("home") val city: String, @@ -91,6 +102,7 @@ internal interface ApiLevels { val passengers = DataFrame.read("titanic.csv") .filter { it.get(Passenger::city).endsWith("NY") } .toListOf() + // SampleEnd } // @DataSchema @@ -102,11 +114,13 @@ internal interface ApiLevels { } fun extensionProperties2() { + // SampleStart val df = DataFrame.read("titanic.csv").cast() df.add("lastName") { name.split(",").last() } .dropNulls { age } .filter { survived && home.endsWith("NY") && age in 10..20 } + // SampleEnd } fun extensionProperties1() { From fdda5ba5ebf5019630a94277df3d878a38d9aabd Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Mon, 30 Jan 2023 15:33:47 +0100 Subject: [PATCH 11/50] renamed Column to AnyColumnReference, similar to AnyFrame. Continuing with KDoc example structure --- core/build.gradle.kts | 2 +- .../jetbrains/kotlinx/dataframe/DataFrame.kt | 2 +- .../jetbrains/kotlinx/dataframe/DataRow.kt | 2 +- .../jetbrains/kotlinx/dataframe/aliases.kt | 2 +- .../dataframe/api/ColumnsSelectionDsl.kt | 15 +-- .../kotlinx/dataframe/api/DataFrameGet.kt | 5 +- .../kotlinx/dataframe/api/DataRowApi.kt | 6 +- .../jetbrains/kotlinx/dataframe/api/Nulls.kt | 15 +-- .../jetbrains/kotlinx/dataframe/api/add.kt | 26 +++-- .../kotlinx/dataframe/api/countDistinct.kt | 5 +- .../jetbrains/kotlinx/dataframe/api/cumSum.kt | 6 +- .../kotlinx/dataframe/api/distinct.kt | 10 +- .../jetbrains/kotlinx/dataframe/api/group.kt | 6 +- .../kotlinx/dataframe/api/groupBy.kt | 17 ++-- .../jetbrains/kotlinx/dataframe/api/move.kt | 16 +-- .../jetbrains/kotlinx/dataframe/api/pivot.kt | 20 ++-- .../jetbrains/kotlinx/dataframe/api/remove.kt | 10 +- .../jetbrains/kotlinx/dataframe/api/select.kt | 32 ++++-- .../kotlinx/dataframe/api/toDataFrame.kt | 16 ++- .../jetbrains/kotlinx/dataframe/api/unfold.kt | 4 +- .../kotlinx/dataframe/api/ungroup.kt | 4 +- .../jetbrains/kotlinx/dataframe/api/update.kt | 98 +++++++++---------- .../kotlinx/dataframe/api/valueCounts.kt | 4 +- .../jetbrains/kotlinx/dataframe/api/values.kt | 14 +-- .../jetbrains/kotlinx/dataframe/api/with.kt | 4 +- .../dataframe/documentation/AccessApi.kt | 21 ++-- .../documentation/HighLevelOperations.kt | 70 +++++++++++++ .../impl/aggregation/aggregations.kt | 4 +- .../kotlinx/dataframe/impl/api/move.kt | 4 +- 29 files changed, 277 insertions(+), 163 deletions(-) create mode 100644 core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/HighLevelOperations.kt diff --git a/core/build.gradle.kts b/core/build.gradle.kts index 70c10d4d1..f1f48fe68 100644 --- a/core/build.gradle.kts +++ b/core/build.gradle.kts @@ -14,7 +14,7 @@ plugins { id("org.jetbrains.kotlinx.kover") id("org.jmailen.kotlinter") id("org.jetbrains.kotlinx.dataframe") - id("com.github.jolanrensen.docProcessorGradlePlugin") version "v0.0.6" + id("com.github.jolanrensen.docProcessorGradlePlugin") version "v0.0.7" // id("nl.jolanrensen.docProcessor") version "1.0-SNAPSHOT" } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataFrame.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataFrame.kt index dd1c55636..9698f9630 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataFrame.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataFrame.kt @@ -57,7 +57,7 @@ public interface DataFrame : Aggregatable, ColumnsContainer { override operator fun get(columns: ColumnsSelector): List> = getColumnsImpl(UnresolvedColumnsPolicy.Fail, columns) - public operator fun get(first: Column, vararg other: Column): DataFrame = select(listOf(first) + other) + public operator fun get(first: AnyColumnReference, vararg other: AnyColumnReference): DataFrame = select(listOf(first) + other) public operator fun get(first: String, vararg other: String): DataFrame = select(listOf(first) + other) public operator fun get(columnRange: ClosedRange): DataFrame = select { columnRange.start..columnRange.endInclusive } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataRow.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataRow.kt index 989fb3df2..5a1375436 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataRow.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataRow.kt @@ -28,7 +28,7 @@ public interface DataRow { public operator fun get(column: ColumnReference): R public operator fun get(columns: List>): List = columns.map { get(it) } public operator fun get(property: KProperty): R = get(property.columnName) as R - public operator fun get(first: Column, vararg other: Column): DataRow = owner.get(first, *other)[index] + public operator fun get(first: AnyColumnReference, vararg other: AnyColumnReference): DataRow = owner.get(first, *other)[index] public operator fun get(first: String, vararg other: String): DataRow = owner.get(first, *other)[index] public operator fun get(path: ColumnPath): Any? = owner.get(path)[index] public operator fun get(name: String): Any? diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/aliases.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/aliases.kt index c7d63224e..74552bae8 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/aliases.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/aliases.kt @@ -138,7 +138,7 @@ public typealias RowValueFilter = RowValueExpression // region columns -public typealias Column = ColumnReference<*> +public typealias AnyColumnReference = ColumnReference<*> public typealias ColumnGroupReference = ColumnReference public typealias ColumnGroupAccessor = ColumnAccessor> diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/ColumnsSelectionDsl.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/ColumnsSelectionDsl.kt index 66b47de17..88d62b4e3 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/ColumnsSelectionDsl.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/ColumnsSelectionDsl.kt @@ -31,7 +31,6 @@ import org.jetbrains.kotlinx.dataframe.impl.columns.top import org.jetbrains.kotlinx.dataframe.impl.columns.transform import org.jetbrains.kotlinx.dataframe.impl.columns.transformSingle import org.jetbrains.kotlinx.dataframe.impl.columns.tree.dfs -import org.jetbrains.kotlinx.dataframe.impl.getColumnsWithPaths import kotlin.reflect.KProperty import kotlin.reflect.KType import kotlin.reflect.typeOf @@ -51,6 +50,10 @@ public interface ColumnSelectionDsl : ColumnsContainer { public operator fun String.get(column: String): ColumnPath = pathOf(this, column) } + +/** [Columns Selection DSL][ColumnsSelectionDsl] */ +internal interface ColumnSelectionDslLink + public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColumn> { public fun ColumnSet.first(condition: ColumnFilter): SingleColumn = @@ -67,7 +70,7 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum public operator fun String.rangeTo(endInclusive: String): ColumnSet<*> = toColumnAccessor().rangeTo(endInclusive.toColumnAccessor()) - public operator fun Column.rangeTo(endInclusive: Column): ColumnSet<*> = object : ColumnSet { + public operator fun AnyColumnReference.rangeTo(endInclusive: AnyColumnReference): ColumnSet<*> = object : ColumnSet { override fun resolve(context: ColumnResolutionContext): List> { val startPath = this@rangeTo.resolveSingle(context)!!.path val endPath = endInclusive.resolveSingle(context)!!.path @@ -161,7 +164,7 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum } public fun SingleColumn<*>.allAfter(colName: String): ColumnSet = allAfter(pathOf(colName)) - public fun SingleColumn<*>.allAfter(column: Column): ColumnSet = allAfter(column.path()) + public fun SingleColumn<*>.allAfter(column: AnyColumnReference): ColumnSet = allAfter(column.path()) // endregion @@ -180,7 +183,7 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum } public fun SingleColumn<*>.allSince(colName: String): ColumnSet = allSince(pathOf(colName)) - public fun SingleColumn<*>.allSince(column: Column): ColumnSet = allSince(column.path()) + public fun SingleColumn<*>.allSince(column: AnyColumnReference): ColumnSet = allSince(column.path()) // endregion @@ -199,7 +202,7 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum } public fun SingleColumn<*>.allBefore(colName: String): ColumnSet = allBefore(pathOf(colName)) - public fun SingleColumn<*>.allBefore(column: Column): ColumnSet = allBefore(column.path()) + public fun SingleColumn<*>.allBefore(column: AnyColumnReference): ColumnSet = allBefore(column.path()) // endregion @@ -218,7 +221,7 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum } public fun SingleColumn<*>.allUntil(colName: String): ColumnSet = allUntil(pathOf(colName)) - public fun SingleColumn<*>.allUntil(column: Column): ColumnSet = allUntil(column.path()) + public fun SingleColumn<*>.allUntil(column: AnyColumnReference): ColumnSet = allUntil(column.path()) // endregion diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/DataFrameGet.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/DataFrameGet.kt index 8a49a3aa4..354d5990d 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/DataFrameGet.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/DataFrameGet.kt @@ -1,7 +1,7 @@ package org.jetbrains.kotlinx.dataframe.api import org.jetbrains.kotlinx.dataframe.AnyCol -import org.jetbrains.kotlinx.dataframe.Column +import org.jetbrains.kotlinx.dataframe.AnyColumnReference import org.jetbrains.kotlinx.dataframe.ColumnSelector import org.jetbrains.kotlinx.dataframe.ColumnsContainer import org.jetbrains.kotlinx.dataframe.ColumnsSelector @@ -19,7 +19,6 @@ import org.jetbrains.kotlinx.dataframe.impl.columns.asAnyFrameColumn import org.jetbrains.kotlinx.dataframe.impl.columns.toColumns import org.jetbrains.kotlinx.dataframe.impl.getColumnPaths import org.jetbrains.kotlinx.dataframe.impl.getColumnsWithPaths -import org.jetbrains.kotlinx.dataframe.indices import org.jetbrains.kotlinx.dataframe.ncol import org.jetbrains.kotlinx.dataframe.nrow import kotlin.reflect.KProperty @@ -89,7 +88,7 @@ public fun ColumnsContainer.getColumnGroupOrNull(column: KProperty<*>): C public fun ColumnsContainer<*>.containsColumn(column: ColumnReference): Boolean = getColumnOrNull(column) != null public fun ColumnsContainer<*>.containsColumn(column: KProperty<*>): Boolean = containsColumn(column.columnName) -public operator fun ColumnsContainer<*>.contains(column: Column): Boolean = containsColumn(column) +public operator fun ColumnsContainer<*>.contains(column: AnyColumnReference): Boolean = containsColumn(column) public operator fun ColumnsContainer<*>.contains(column: KProperty<*>): Boolean = containsColumn(column) // region rows diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/DataRowApi.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/DataRowApi.kt index 9cdc67e72..d7987f5c4 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/DataRowApi.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/DataRowApi.kt @@ -1,7 +1,7 @@ package org.jetbrains.kotlinx.dataframe.api +import org.jetbrains.kotlinx.dataframe.AnyColumnReference import org.jetbrains.kotlinx.dataframe.AnyRow -import org.jetbrains.kotlinx.dataframe.Column import org.jetbrains.kotlinx.dataframe.ColumnsContainer import org.jetbrains.kotlinx.dataframe.DataColumn import org.jetbrains.kotlinx.dataframe.DataFrame @@ -57,10 +57,10 @@ public fun AnyRow.getValueOrNull(column: KProperty): T? = getValueOrNull< // region contains public fun AnyRow.containsKey(columnName: String): Boolean = owner.containsColumn(columnName) -public fun AnyRow.containsKey(column: Column): Boolean = owner.containsColumn(column) +public fun AnyRow.containsKey(column: AnyColumnReference): Boolean = owner.containsColumn(column) public fun AnyRow.containsKey(column: KProperty<*>): Boolean = owner.containsColumn(column) -public operator fun AnyRow.contains(column: Column): Boolean = containsKey(column) +public operator fun AnyRow.contains(column: AnyColumnReference): Boolean = containsKey(column) public operator fun AnyRow.contains(column: KProperty<*>): Boolean = containsKey(column) // endregion diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/Nulls.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/Nulls.kt index f9b87caaa..3b8a9c8f6 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/Nulls.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/Nulls.kt @@ -1,9 +1,9 @@ package org.jetbrains.kotlinx.dataframe.api import org.jetbrains.kotlinx.dataframe.AnyCol +import org.jetbrains.kotlinx.dataframe.AnyColumnReference import org.jetbrains.kotlinx.dataframe.AnyFrame import org.jetbrains.kotlinx.dataframe.AnyRow -import org.jetbrains.kotlinx.dataframe.Column import org.jetbrains.kotlinx.dataframe.ColumnsSelector import org.jetbrains.kotlinx.dataframe.DataColumn import org.jetbrains.kotlinx.dataframe.DataFrame @@ -20,7 +20,7 @@ import kotlin.reflect.KProperty /** * Replace `null` values with given value or expression. - * Specific case of [UpdateOperation]. + * Specific case of [Update]. * TODO samples * * @include [DocumentationUrls.Fill.FillNulls] @@ -160,10 +160,13 @@ public fun DataFrame.dropNulls(vararg columns: KProperty<*>, whereAllNull public fun DataFrame.dropNulls(vararg columns: String, whereAllNull: Boolean = false): DataFrame = dropNulls(whereAllNull) { columns.toColumns() } -public fun DataFrame.dropNulls(vararg columns: Column, whereAllNull: Boolean = false): DataFrame = +public fun DataFrame.dropNulls(vararg columns: AnyColumnReference, whereAllNull: Boolean = false): DataFrame = dropNulls(whereAllNull) { columns.toColumns() } -public fun DataFrame.dropNulls(columns: Iterable, whereAllNull: Boolean = false): DataFrame = +public fun DataFrame.dropNulls( + columns: Iterable, + whereAllNull: Boolean = false +): DataFrame = dropNulls(whereAllNull) { columns.toColumnSet() } public fun DataColumn.dropNulls(): DataColumn = @@ -186,10 +189,10 @@ public fun DataFrame.dropNA(vararg columns: KProperty<*>, whereAllNA: Boo public fun DataFrame.dropNA(vararg columns: String, whereAllNA: Boolean = false): DataFrame = dropNA(whereAllNA) { columns.toColumns() } -public fun DataFrame.dropNA(vararg columns: Column, whereAllNA: Boolean = false): DataFrame = +public fun DataFrame.dropNA(vararg columns: AnyColumnReference, whereAllNA: Boolean = false): DataFrame = dropNA(whereAllNA) { columns.toColumns() } -public fun DataFrame.dropNA(columns: Iterable, whereAllNA: Boolean = false): DataFrame = +public fun DataFrame.dropNA(columns: Iterable, whereAllNA: Boolean = false): DataFrame = dropNA(whereAllNA) { columns.toColumnSet() } public fun DataFrame.dropNA(whereAllNA: Boolean = false): DataFrame = diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/add.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/add.kt index 513840c08..4b045a455 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/add.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/add.kt @@ -3,9 +3,9 @@ package org.jetbrains.kotlinx.dataframe.api import org.jetbrains.kotlinx.dataframe.AnyBaseCol import org.jetbrains.kotlinx.dataframe.AnyCol import org.jetbrains.kotlinx.dataframe.AnyColumnGroupAccessor +import org.jetbrains.kotlinx.dataframe.AnyColumnReference import org.jetbrains.kotlinx.dataframe.AnyFrame import org.jetbrains.kotlinx.dataframe.AnyRow -import org.jetbrains.kotlinx.dataframe.Column import org.jetbrains.kotlinx.dataframe.ColumnsContainer import org.jetbrains.kotlinx.dataframe.DataColumn import org.jetbrains.kotlinx.dataframe.DataFrame @@ -50,7 +50,8 @@ public fun DataFrame.add(vararg columns: AnyBaseCol): DataFrame = addA * @throws [UnequalColumnSizesException] if columns in expected result have different sizes * @return new [DataFrame] with added columns */ -public fun DataFrame.addAll(columns: Iterable): DataFrame = dataFrameOf(columns() + columns).cast() +public fun DataFrame.addAll(columns: Iterable): DataFrame = + dataFrameOf(columns() + columns).cast() /** * Creates new [DataFrame] with all columns from given [dataFrames] added to the end of original [DataFrame.columns] list. @@ -75,7 +76,8 @@ public fun DataFrame.add(vararg dataFrames: AnyFrame): DataFrame = add * @return new [DataFrame] with added columns */ @JvmName("addAllFrames") -public fun DataFrame.addAll(dataFrames: Iterable): DataFrame = addAll(dataFrames.flatMap { it.columns() }) +public fun DataFrame.addAll(dataFrames: Iterable): DataFrame = + addAll(dataFrames.flatMap { it.columns() }) // endregion @@ -149,9 +151,9 @@ public class AddDsl(@PublishedApi internal val df: DataFrame) : ColumnsCon // TODO: support adding column into path internal val columns = mutableListOf() - public fun add(column: Column): Boolean = columns.add(column.resolveSingle(df)!!.data) + public fun add(column: AnyColumnReference): Boolean = columns.add(column.resolveSingle(df)!!.data) - public operator fun Column.unaryPlus(): Boolean = add(this) + public operator fun AnyColumnReference.unaryPlus(): Boolean = add(this) public operator fun String.unaryPlus(): Boolean = add(df[this]) @@ -166,17 +168,21 @@ public class AddDsl(@PublishedApi internal val df: DataFrame) : ColumnsCon return df.mapToColumn("", Infer.Nulls, expression) } - public inline infix fun String.from(noinline expression: RowExpression): Boolean = add(this, Infer.Nulls, expression) + public inline infix fun String.from(noinline expression: RowExpression): Boolean = + add(this, Infer.Nulls, expression) // TODO: use path instead of name - public inline infix fun ColumnAccessor.from(noinline expression: RowExpression): Boolean = name().from(expression) - public inline infix fun KProperty.from(noinline expression: RowExpression): Boolean = add(name, Infer.Nulls, expression) + public inline infix fun ColumnAccessor.from(noinline expression: RowExpression): Boolean = + name().from(expression) - public infix fun String.from(column: Column): Boolean = add(column.rename(this)) + public inline infix fun KProperty.from(noinline expression: RowExpression): Boolean = + add(name, Infer.Nulls, expression) + + public infix fun String.from(column: AnyColumnReference): Boolean = add(column.rename(this)) public inline infix fun ColumnAccessor.from(column: ColumnReference): Boolean = name() from column public inline infix fun KProperty.from(column: ColumnReference): Boolean = name from column - public infix fun Column.into(name: String): Boolean = add(rename(name)) + public infix fun AnyColumnReference.into(name: String): Boolean = add(rename(name)) public infix fun ColumnReference.into(column: ColumnAccessor): Boolean = into(column.name()) public infix fun ColumnReference.into(column: KProperty): Boolean = into(column.name) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/countDistinct.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/countDistinct.kt index 37cfdb6c9..cc4b04301 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/countDistinct.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/countDistinct.kt @@ -1,7 +1,7 @@ package org.jetbrains.kotlinx.dataframe.api +import org.jetbrains.kotlinx.dataframe.AnyColumnReference import org.jetbrains.kotlinx.dataframe.AnyFrame -import org.jetbrains.kotlinx.dataframe.Column import org.jetbrains.kotlinx.dataframe.ColumnsSelector import org.jetbrains.kotlinx.dataframe.DataFrame import org.jetbrains.kotlinx.dataframe.impl.columns.toColumns @@ -19,6 +19,7 @@ public fun DataFrame.countDistinct(columns: ColumnsSelector): In public fun DataFrame.countDistinct(vararg columns: String): Int = countDistinct { columns.toColumns() } public fun DataFrame.countDistinct(vararg columns: KProperty): Int = countDistinct { columns.toColumns() } -public fun DataFrame.countDistinct(vararg columns: Column): Int = countDistinct { columns.toColumns() } +public fun DataFrame.countDistinct(vararg columns: AnyColumnReference): Int = + countDistinct { columns.toColumns() } // endregion diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/cumSum.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/cumSum.kt index be60508cd..f67760cf3 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/cumSum.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/cumSum.kt @@ -1,6 +1,6 @@ package org.jetbrains.kotlinx.dataframe.api -import org.jetbrains.kotlinx.dataframe.Column +import org.jetbrains.kotlinx.dataframe.AnyColumnReference import org.jetbrains.kotlinx.dataframe.ColumnsSelector import org.jetbrains.kotlinx.dataframe.DataColumn import org.jetbrains.kotlinx.dataframe.DataFrame @@ -38,7 +38,7 @@ private val supportedClasses = setOf(Double::class, Float::class, Int::class, Lo public fun DataFrame.cumSum(skipNA: Boolean = defaultCumSumSkipNA, columns: ColumnsSelector): DataFrame = convert(columns).to { if (it.typeClass in supportedClasses) it.cast().cumSum(skipNA) else it } public fun DataFrame.cumSum(vararg columns: String, skipNA: Boolean = defaultCumSumSkipNA): DataFrame = cumSum(skipNA) { columns.toColumns() } -public fun DataFrame.cumSum(vararg columns: Column, skipNA: Boolean = defaultCumSumSkipNA): DataFrame = cumSum(skipNA) { columns.toColumns() } +public fun DataFrame.cumSum(vararg columns: AnyColumnReference, skipNA: Boolean = defaultCumSumSkipNA): DataFrame = cumSum(skipNA) { columns.toColumns() } public fun DataFrame.cumSum(vararg columns: KProperty<*>, skipNA: Boolean = defaultCumSumSkipNA): DataFrame = cumSum(skipNA) { columns.toColumns() } public fun DataFrame.cumSum(skipNA: Boolean = defaultCumSumSkipNA): DataFrame = cumSum(skipNA) { allDfs() } @@ -50,7 +50,7 @@ public fun DataFrame.cumSum(skipNA: Boolean = defaultCumSumSkipNA): DataF public fun GroupBy.cumSum(skipNA: Boolean = defaultCumSumSkipNA, columns: ColumnsSelector): GroupBy = updateGroups { cumSum(skipNA, columns) } public fun GroupBy.cumSum(vararg columns: String, skipNA: Boolean = defaultCumSumSkipNA): GroupBy = cumSum(skipNA) { columns.toColumns() } -public fun GroupBy.cumSum(vararg columns: Column, skipNA: Boolean = defaultCumSumSkipNA): GroupBy = cumSum(skipNA) { columns.toColumns() } +public fun GroupBy.cumSum(vararg columns: AnyColumnReference, skipNA: Boolean = defaultCumSumSkipNA): GroupBy = cumSum(skipNA) { columns.toColumns() } public fun GroupBy.cumSum(vararg columns: KProperty<*>, skipNA: Boolean = defaultCumSumSkipNA): GroupBy = cumSum(skipNA) { columns.toColumns() } public fun GroupBy.cumSum(skipNA: Boolean = defaultCumSumSkipNA): GroupBy = cumSum(skipNA) { allDfs() } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/distinct.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/distinct.kt index c0c072c33..7c72af4e6 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/distinct.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/distinct.kt @@ -1,6 +1,6 @@ package org.jetbrains.kotlinx.dataframe.api -import org.jetbrains.kotlinx.dataframe.Column +import org.jetbrains.kotlinx.dataframe.AnyColumnReference import org.jetbrains.kotlinx.dataframe.ColumnsSelector import org.jetbrains.kotlinx.dataframe.DataFrame import org.jetbrains.kotlinx.dataframe.impl.columns.toColumnSet @@ -18,19 +18,19 @@ public fun DataFrame.distinct(vararg columns: KProperty<*>): DataFrame set } public fun DataFrame.distinct(vararg columns: String): DataFrame = distinct { columns.toColumns() } -public fun DataFrame.distinct(vararg columns: Column): DataFrame = distinct { columns.toColumns() } +public fun DataFrame.distinct(vararg columns: AnyColumnReference): DataFrame = distinct { columns.toColumns() } @JvmName("distinctT") public fun DataFrame.distinct(columns: Iterable): DataFrame = distinct { columns.toColumns() } -public fun DataFrame.distinct(columns: Iterable): DataFrame = distinct { columns.toColumnSet() } +public fun DataFrame.distinct(columns: Iterable): DataFrame = distinct { columns.toColumnSet() } public fun DataFrame.distinctBy(vararg columns: KProperty<*>): DataFrame = distinctBy { columns.toColumns() } public fun DataFrame.distinctBy(vararg columns: String): DataFrame = distinctBy { columns.toColumns() } -public fun DataFrame.distinctBy(vararg columns: Column): DataFrame = distinctBy { columns.toColumns() } +public fun DataFrame.distinctBy(vararg columns: AnyColumnReference): DataFrame = distinctBy { columns.toColumns() } @JvmName("distinctByT") public fun DataFrame.distinctBy(columns: Iterable): DataFrame = distinctBy { columns.toColumns() } -public fun DataFrame.distinctBy(columns: Iterable): DataFrame = distinctBy { columns.toColumnSet() } +public fun DataFrame.distinctBy(columns: Iterable): DataFrame = distinctBy { columns.toColumnSet() } public fun DataFrame.distinctBy(columns: ColumnsSelector): DataFrame { val cols = get(columns) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/group.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/group.kt index 2c75681d3..c83f2db3f 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/group.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/group.kt @@ -1,7 +1,7 @@ package org.jetbrains.kotlinx.dataframe.api import org.jetbrains.kotlinx.dataframe.AnyColumnGroupAccessor -import org.jetbrains.kotlinx.dataframe.Column +import org.jetbrains.kotlinx.dataframe.AnyColumnReference import org.jetbrains.kotlinx.dataframe.ColumnsSelector import org.jetbrains.kotlinx.dataframe.DataFrame import org.jetbrains.kotlinx.dataframe.columns.ColumnWithPath @@ -16,7 +16,7 @@ public data class GroupClause(val df: DataFrame, val columns: ColumnsSe public fun DataFrame.group(columns: ColumnsSelector): GroupClause = GroupClause(this, columns) public fun DataFrame.group(vararg columns: String): GroupClause = group { columns.toColumns() } -public fun DataFrame.group(vararg columns: Column): GroupClause = group { columns.toColumns() } +public fun DataFrame.group(vararg columns: AnyColumnReference): GroupClause = group { columns.toColumns() } public fun DataFrame.group(vararg columns: KProperty<*>): GroupClause = group { columns.toColumns() } @JvmName("intoString") @@ -25,7 +25,7 @@ public fun DataFrame.group(vararg columns: KProperty<*>): GroupClause GroupClause.into(column: ColumnsSelectionDsl.(ColumnWithPath) -> String): DataFrame = df.move(columns).under { column(it).toColumnAccessor() } @JvmName("intoColumn") -public infix fun GroupClause.into(column: ColumnsSelectionDsl.(ColumnWithPath) -> Column): DataFrame = df.move(columns).under(column) +public infix fun GroupClause.into(column: ColumnsSelectionDsl.(ColumnWithPath) -> AnyColumnReference): DataFrame = df.move(columns).under(column) public infix fun GroupClause.into(column: String): DataFrame = into(columnGroup().named(column)) public infix fun GroupClause.into(column: AnyColumnGroupAccessor): DataFrame = df.move(columns).under(column) public infix fun GroupClause.into(column: KProperty<*>): DataFrame = into(column.columnName) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/groupBy.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/groupBy.kt index e5f8032be..8796681dd 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/groupBy.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/groupBy.kt @@ -1,7 +1,7 @@ package org.jetbrains.kotlinx.dataframe.api +import org.jetbrains.kotlinx.dataframe.AnyColumnReference import org.jetbrains.kotlinx.dataframe.AnyFrame -import org.jetbrains.kotlinx.dataframe.Column import org.jetbrains.kotlinx.dataframe.ColumnsSelector import org.jetbrains.kotlinx.dataframe.DataFrame import org.jetbrains.kotlinx.dataframe.DataRow @@ -18,18 +18,23 @@ import kotlin.reflect.KProperty // region DataFrame -public fun DataFrame.groupBy(moveToTop: Boolean = true, cols: ColumnsSelector): GroupBy = groupByImpl(moveToTop, cols) -public fun DataFrame.groupBy(cols: Iterable): GroupBy = groupBy { cols.toColumnSet() } +public fun DataFrame.groupBy(moveToTop: Boolean = true, cols: ColumnsSelector): GroupBy = + groupByImpl(moveToTop, cols) + +public fun DataFrame.groupBy(cols: Iterable): GroupBy = groupBy { cols.toColumnSet() } public fun DataFrame.groupBy(vararg cols: KProperty<*>): GroupBy = groupBy { cols.toColumns() } public fun DataFrame.groupBy(vararg cols: String): GroupBy = groupBy { cols.toColumns() } -public fun DataFrame.groupBy(vararg cols: Column, moveToTop: Boolean = true): GroupBy = groupBy(moveToTop) { cols.toColumns() } +public fun DataFrame.groupBy(vararg cols: AnyColumnReference, moveToTop: Boolean = true): GroupBy = + groupBy(moveToTop) { cols.toColumns() } // endregion // region Pivot -public fun Pivot.groupBy(moveToTop: Boolean = true, columns: ColumnsSelector): PivotGroupBy = (this as PivotImpl).toGroupedPivot(moveToTop, columns) -public fun Pivot.groupBy(vararg columns: Column): PivotGroupBy = groupBy { columns.toColumns() } +public fun Pivot.groupBy(moveToTop: Boolean = true, columns: ColumnsSelector): PivotGroupBy = + (this as PivotImpl).toGroupedPivot(moveToTop, columns) + +public fun Pivot.groupBy(vararg columns: AnyColumnReference): PivotGroupBy = groupBy { columns.toColumns() } public fun Pivot.groupBy(vararg columns: String): PivotGroupBy = groupBy { columns.toColumns() } public fun Pivot.groupBy(vararg columns: KProperty<*>): PivotGroupBy = groupBy { columns.toColumns() } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/move.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/move.kt index 5a01944c5..90ec36cf3 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/move.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/move.kt @@ -1,7 +1,7 @@ package org.jetbrains.kotlinx.dataframe.api import org.jetbrains.kotlinx.dataframe.AnyColumnGroupAccessor -import org.jetbrains.kotlinx.dataframe.Column +import org.jetbrains.kotlinx.dataframe.AnyColumnReference import org.jetbrains.kotlinx.dataframe.ColumnSelector import org.jetbrains.kotlinx.dataframe.ColumnsSelector import org.jetbrains.kotlinx.dataframe.DataFrame @@ -21,20 +21,20 @@ public fun DataFrame.move(vararg cols: KProperty): MoveClause public fun DataFrame.moveTo(newColumnIndex: Int, columns: ColumnsSelector): DataFrame = move(columns).to(newColumnIndex) public fun DataFrame.moveTo(newColumnIndex: Int, vararg columns: String): DataFrame = moveTo(newColumnIndex) { columns.toColumns() } -public fun DataFrame.moveTo(newColumnIndex: Int, vararg columns: Column): DataFrame = moveTo(newColumnIndex) { columns.toColumns() } +public fun DataFrame.moveTo(newColumnIndex: Int, vararg columns: AnyColumnReference): DataFrame = moveTo(newColumnIndex) { columns.toColumns() } public fun DataFrame.moveTo(newColumnIndex: Int, vararg columns: KProperty<*>): DataFrame = moveTo(newColumnIndex) { columns.toColumns() } public fun DataFrame.moveToLeft(columns: ColumnsSelector): DataFrame = move(columns).toLeft() public fun DataFrame.moveToLeft(vararg columns: String): DataFrame = moveToLeft { columns.toColumns() } -public fun DataFrame.moveToLeft(vararg columns: Column): DataFrame = moveToLeft { columns.toColumns() } +public fun DataFrame.moveToLeft(vararg columns: AnyColumnReference): DataFrame = moveToLeft { columns.toColumns() } public fun DataFrame.moveToLeft(vararg columns: KProperty<*>): DataFrame = moveToLeft { columns.toColumns() } public fun DataFrame.moveToRight(columns: ColumnsSelector): DataFrame = move(columns).toRight() public fun DataFrame.moveToRight(vararg columns: String): DataFrame = moveToRight { columns.toColumns() } -public fun DataFrame.moveToRight(vararg columns: Column): DataFrame = moveToRight { columns.toColumns() } +public fun DataFrame.moveToRight(vararg columns: AnyColumnReference): DataFrame = moveToRight { columns.toColumns() } public fun DataFrame.moveToRight(vararg columns: KProperty<*>): DataFrame = moveToRight { columns.toColumns() } -public fun MoveClause.into(column: ColumnsSelectionDsl.(ColumnWithPath) -> Column): DataFrame = moveImpl( +public fun MoveClause.into(column: ColumnsSelectionDsl.(ColumnWithPath) -> AnyColumnReference): DataFrame = moveImpl( under = false, column ) @@ -42,7 +42,7 @@ public fun MoveClause.into(column: ColumnsSelectionDsl.(ColumnWi public fun MoveClause.into(column: String): DataFrame = pathOf(column).let { path -> into { path } } public fun MoveClause.intoIndexed( - newPathExpression: ColumnsSelectionDsl.(ColumnWithPath, Int) -> Column + newPathExpression: ColumnsSelectionDsl.(ColumnWithPath, Int) -> AnyColumnReference ): DataFrame { var counter = 0 return into { col -> @@ -52,7 +52,7 @@ public fun MoveClause.intoIndexed( public fun MoveClause.under(column: String): DataFrame = pathOf(column).let { path -> under { path } } public fun MoveClause.under(column: AnyColumnGroupAccessor): DataFrame = column.path().let { path -> under { path } } -public fun MoveClause.under(column: ColumnsSelectionDsl.(ColumnWithPath) -> Column): DataFrame = moveImpl( +public fun MoveClause.under(column: ColumnsSelectionDsl.(ColumnWithPath) -> AnyColumnReference): DataFrame = moveImpl( under = true, column ) @@ -66,7 +66,7 @@ public fun MoveClause.toTop( public fun MoveClause.after(column: ColumnSelector): DataFrame = afterOrBefore(column, true) public fun MoveClause.after(column: String): DataFrame = after { column.toColumnAccessor() } -public fun MoveClause.after(column: Column): DataFrame = after { column } +public fun MoveClause.after(column: AnyColumnReference): DataFrame = after { column } public fun MoveClause.after(column: KProperty<*>): DataFrame = after { column.toColumnAccessor() } // TODO: implement 'before' diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/pivot.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/pivot.kt index c4023430c..ed99ff907 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/pivot.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/pivot.kt @@ -1,6 +1,6 @@ package org.jetbrains.kotlinx.dataframe.api -import org.jetbrains.kotlinx.dataframe.Column +import org.jetbrains.kotlinx.dataframe.AnyColumnReference import org.jetbrains.kotlinx.dataframe.ColumnsSelector import org.jetbrains.kotlinx.dataframe.DataFrame import org.jetbrains.kotlinx.dataframe.DataRow @@ -41,17 +41,17 @@ public interface PivotDsl : ColumnsSelectionDsl { public fun DataFrame.pivot(inward: Boolean? = null, columns: PivotColumnsSelector): Pivot = PivotImpl(this, columns, inward) public fun DataFrame.pivot(vararg columns: String, inward: Boolean? = null): Pivot = pivot(inward) { columns.toColumns() } -public fun DataFrame.pivot(vararg columns: Column, inward: Boolean? = null): Pivot = pivot(inward) { columns.toColumns() } +public fun DataFrame.pivot(vararg columns: AnyColumnReference, inward: Boolean? = null): Pivot = pivot(inward) { columns.toColumns() } public fun DataFrame.pivot(vararg columns: KProperty<*>, inward: Boolean? = null): Pivot = pivot(inward) { columns.toColumns() } public fun DataFrame.pivotMatches(inward: Boolean = true, columns: ColumnsSelector): DataFrame = pivot(inward, columns).groupByOther().matches() public fun DataFrame.pivotMatches(vararg columns: String, inward: Boolean = true): DataFrame = pivotMatches(inward) { columns.toColumns() } -public fun DataFrame.pivotMatches(vararg columns: Column, inward: Boolean = true): DataFrame = pivotMatches(inward) { columns.toColumns() } +public fun DataFrame.pivotMatches(vararg columns: AnyColumnReference, inward: Boolean = true): DataFrame = pivotMatches(inward) { columns.toColumns() } public fun DataFrame.pivotMatches(vararg columns: KProperty<*>, inward: Boolean = true): DataFrame = pivotMatches(inward) { columns.toColumns() } public fun DataFrame.pivotCounts(inward: Boolean = true, columns: ColumnsSelector): DataFrame = pivot(inward, columns).groupByOther().count() public fun DataFrame.pivotCounts(vararg columns: String, inward: Boolean = true): DataFrame = pivotCounts(inward) { columns.toColumns() } -public fun DataFrame.pivotCounts(vararg columns: Column, inward: Boolean = true): DataFrame = pivotCounts(inward) { columns.toColumns() } +public fun DataFrame.pivotCounts(vararg columns: AnyColumnReference, inward: Boolean = true): DataFrame = pivotCounts(inward) { columns.toColumns() } public fun DataFrame.pivotCounts(vararg columns: KProperty<*>, inward: Boolean = true): DataFrame = pivotCounts(inward) { columns.toColumns() } // endregion @@ -59,18 +59,18 @@ public fun DataFrame.pivotCounts(vararg columns: KProperty<*>, inward: Bo // region GroupBy public fun GroupBy<*, G>.pivot(inward: Boolean = true, columns: ColumnsSelector): PivotGroupBy = PivotGroupByImpl(this, columns, inward) -public fun GroupBy<*, G>.pivot(vararg columns: Column, inward: Boolean = true): PivotGroupBy = pivot(inward) { columns.toColumns() } +public fun GroupBy<*, G>.pivot(vararg columns: AnyColumnReference, inward: Boolean = true): PivotGroupBy = pivot(inward) { columns.toColumns() } public fun GroupBy<*, G>.pivot(vararg columns: String, inward: Boolean = true): PivotGroupBy = pivot(inward) { columns.toColumns() } public fun GroupBy<*, G>.pivot(vararg columns: KProperty<*>, inward: Boolean = true): PivotGroupBy = pivot(inward) { columns.toColumns() } public fun GroupBy<*, G>.pivotMatches(inward: Boolean = true, columns: ColumnsSelector): DataFrame = pivot(inward, columns).matches() public fun GroupBy<*, G>.pivotMatches(vararg columns: String, inward: Boolean = true): DataFrame = pivotMatches(inward) { columns.toColumns() } -public fun GroupBy<*, G>.pivotMatches(vararg columns: Column, inward: Boolean = true): DataFrame = pivotMatches(inward) { columns.toColumns() } +public fun GroupBy<*, G>.pivotMatches(vararg columns: AnyColumnReference, inward: Boolean = true): DataFrame = pivotMatches(inward) { columns.toColumns() } public fun GroupBy<*, G>.pivotMatches(vararg columns: KProperty<*>, inward: Boolean = true): DataFrame = pivotMatches(inward) { columns.toColumns() } public fun GroupBy<*, G>.pivotCounts(inward: Boolean = true, columns: ColumnsSelector): DataFrame = pivot(inward, columns).count() public fun GroupBy<*, G>.pivotCounts(vararg columns: String, inward: Boolean = true): DataFrame = pivotCounts(inward) { columns.toColumns() } -public fun GroupBy<*, G>.pivotCounts(vararg columns: Column, inward: Boolean = true): DataFrame = pivotCounts(inward) { columns.toColumns() } +public fun GroupBy<*, G>.pivotCounts(vararg columns: AnyColumnReference, inward: Boolean = true): DataFrame = pivotCounts(inward) { columns.toColumns() } public fun GroupBy<*, G>.pivotCounts(vararg columns: KProperty<*>, inward: Boolean = true): DataFrame = pivotCounts(inward) { columns.toColumns() } // endregion @@ -80,17 +80,17 @@ public fun GroupBy<*, G>.pivotCounts(vararg columns: KProperty<*>, inward: B public fun AggregateGroupedDsl.pivot(inward: Boolean = true, columns: ColumnsSelector): PivotGroupBy = PivotInAggregateImpl(this, columns, inward) public fun AggregateGroupedDsl.pivot(vararg columns: String, inward: Boolean = true): PivotGroupBy = pivot(inward) { columns.toColumns() } -public fun AggregateGroupedDsl.pivot(vararg columns: Column, inward: Boolean = true): PivotGroupBy = pivot(inward) { columns.toColumns() } +public fun AggregateGroupedDsl.pivot(vararg columns: AnyColumnReference, inward: Boolean = true): PivotGroupBy = pivot(inward) { columns.toColumns() } public fun AggregateGroupedDsl.pivot(vararg columns: KProperty<*>, inward: Boolean = true): PivotGroupBy = pivot(inward) { columns.toColumns() } public fun AggregateGroupedDsl.pivotMatches(inward: Boolean = true, columns: ColumnsSelector): DataFrame = pivot(inward, columns).matches() public fun AggregateGroupedDsl.pivotMatches(vararg columns: String, inward: Boolean = true): DataFrame = pivotMatches(inward) { columns.toColumns() } -public fun AggregateGroupedDsl.pivotMatches(vararg columns: Column, inward: Boolean = true): DataFrame = pivotMatches(inward) { columns.toColumns() } +public fun AggregateGroupedDsl.pivotMatches(vararg columns: AnyColumnReference, inward: Boolean = true): DataFrame = pivotMatches(inward) { columns.toColumns() } public fun AggregateGroupedDsl.pivotMatches(vararg columns: KProperty<*>, inward: Boolean = true): DataFrame = pivotMatches(inward) { columns.toColumns() } public fun AggregateGroupedDsl.pivotCounts(inward: Boolean = true, columns: ColumnsSelector): DataFrame = pivot(inward, columns).matches() public fun AggregateGroupedDsl.pivotCounts(vararg columns: String, inward: Boolean = true): DataFrame = pivotCounts(inward) { columns.toColumns() } -public fun AggregateGroupedDsl.pivotCounts(vararg columns: Column, inward: Boolean = true): DataFrame = pivotCounts(inward) { columns.toColumns() } +public fun AggregateGroupedDsl.pivotCounts(vararg columns: AnyColumnReference, inward: Boolean = true): DataFrame = pivotCounts(inward) { columns.toColumns() } public fun AggregateGroupedDsl.pivotCounts(vararg columns: KProperty<*>, inward: Boolean = true): DataFrame = pivotCounts(inward) { columns.toColumns() } // endregion diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/remove.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/remove.kt index b252b7ed0..85226f1c3 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/remove.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/remove.kt @@ -1,6 +1,6 @@ package org.jetbrains.kotlinx.dataframe.api -import org.jetbrains.kotlinx.dataframe.Column +import org.jetbrains.kotlinx.dataframe.AnyColumnReference import org.jetbrains.kotlinx.dataframe.ColumnsSelector import org.jetbrains.kotlinx.dataframe.DataFrame import org.jetbrains.kotlinx.dataframe.impl.api.removeImpl @@ -13,12 +13,12 @@ import kotlin.reflect.KProperty public fun DataFrame.remove(columns: ColumnsSelector): DataFrame = removeImpl(allowMissingColumns = true, columns = columns).df public fun DataFrame.remove(vararg columns: KProperty<*>): DataFrame = remove { columns.toColumns() } public fun DataFrame.remove(vararg columns: String): DataFrame = remove { columns.toColumns() } -public fun DataFrame.remove(vararg columns: Column): DataFrame = remove { columns.toColumns() } -public fun DataFrame.remove(columns: Iterable): DataFrame = remove { columns.toColumnSet() } +public fun DataFrame.remove(vararg columns: AnyColumnReference): DataFrame = remove { columns.toColumns() } +public fun DataFrame.remove(columns: Iterable): DataFrame = remove { columns.toColumnSet() } public infix operator fun DataFrame.minus(columns: ColumnsSelector): DataFrame = remove(columns) public infix operator fun DataFrame.minus(column: String): DataFrame = remove(column) -public infix operator fun DataFrame.minus(column: Column): DataFrame = remove(column) -public infix operator fun DataFrame.minus(columns: Iterable): DataFrame = remove(columns) +public infix operator fun DataFrame.minus(column: AnyColumnReference): DataFrame = remove(column) +public infix operator fun DataFrame.minus(columns: Iterable): DataFrame = remove(columns) // endregion diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/select.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/select.kt index 5f9b8a8bb..4a91e748f 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/select.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/select.kt @@ -1,6 +1,6 @@ package org.jetbrains.kotlinx.dataframe.api -import org.jetbrains.kotlinx.dataframe.Column +import org.jetbrains.kotlinx.dataframe.AnyColumnReference import org.jetbrains.kotlinx.dataframe.ColumnsSelector import org.jetbrains.kotlinx.dataframe.DataFrame import org.jetbrains.kotlinx.dataframe.impl.columnName @@ -10,12 +10,28 @@ import kotlin.reflect.KProperty // region DataFrame -public fun DataFrame.select(columns: ColumnsSelector): DataFrame = get(columns).toDataFrame().cast() -public fun DataFrame.select(vararg columns: KProperty<*>): DataFrame = select(columns.map { it.columnName }) -public fun DataFrame.select(vararg columns: String): DataFrame = select(columns.asIterable()) -public fun DataFrame.select(vararg columns: Column): DataFrame = select { columns.toColumns() } -@JvmName("selectT") -public fun DataFrame.select(columns: Iterable): DataFrame = columns.map { get(it) }.toDataFrame().cast() -public fun DataFrame.select(columns: Iterable): DataFrame = select { columns.toColumnSet() } +public fun DataFrame.select(columns: ColumnsSelector): DataFrame = + get(columns).toDataFrame().cast() + +public fun DataFrame.select(vararg columns: KProperty<*>): DataFrame = + select(columns.asIterable()) + +@JvmName("selectKPropertyIterable") +public fun DataFrame.select(columns: Iterable>): DataFrame = + select(columns.map { it.columnName }) + +public fun DataFrame.select(vararg columns: String): DataFrame = + select(columns.asIterable()) + +@JvmName("selectStringIterable") +public fun DataFrame.select(columns: Iterable): DataFrame = + columns.map { get(it) }.toDataFrame().cast() + +public fun DataFrame.select(vararg columns: AnyColumnReference): DataFrame = + select { columns.toColumns() } + +@JvmName("selectAnyColumnReferenceIterable") +public fun DataFrame.select(columns: Iterable): DataFrame = + select { columns.toColumnSet() } // endregion diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/toDataFrame.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/toDataFrame.kt index 6c79cf73a..89d7581a2 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/toDataFrame.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/toDataFrame.kt @@ -1,8 +1,8 @@ package org.jetbrains.kotlinx.dataframe.api import org.jetbrains.kotlinx.dataframe.AnyBaseCol +import org.jetbrains.kotlinx.dataframe.AnyColumnReference import org.jetbrains.kotlinx.dataframe.AnyFrame -import org.jetbrains.kotlinx.dataframe.Column import org.jetbrains.kotlinx.dataframe.ColumnsSelector import org.jetbrains.kotlinx.dataframe.DataColumn import org.jetbrains.kotlinx.dataframe.DataFrame @@ -23,7 +23,8 @@ public inline fun Iterable.toDataFrame(): DataFrame = toDataFr properties() } -public inline fun Iterable.toDataFrame(noinline body: CreateDataFrameDsl.() -> Unit): DataFrame = createDataFrameImpl(T::class, body) +public inline fun Iterable.toDataFrame(noinline body: CreateDataFrameDsl.() -> Unit): DataFrame = + createDataFrameImpl(T::class, body) public inline fun Iterable.toDataFrame(vararg props: KProperty<*>, maxDepth: Int = 0): DataFrame = toDataFrame { @@ -40,7 +41,7 @@ public fun DataFrame.read(vararg columns: String): DataFrame = unfold( public fun DataFrame.read(vararg columns: KProperty<*>): DataFrame = unfold(*columns) @Deprecated("Replaced with `unfold` operation.", ReplaceWith("this.unfold(*columns)"), DeprecationLevel.ERROR) -public fun DataFrame.read(vararg columns: Column): DataFrame = unfold(*columns) +public fun DataFrame.read(vararg columns: AnyColumnReference): DataFrame = unfold(*columns) @JvmName("toDataFrameT") public fun Iterable>.toDataFrame(): DataFrame { @@ -76,6 +77,7 @@ public fun Iterable>.toDataFrameFromPairs(): Da when (path.size) { 0 -> { } + 1 -> { val name = path[0] val uniqueName = nameGenerator.addUnique(name) @@ -85,6 +87,7 @@ public fun Iterable>.toDataFrameFromPairs(): Da columns.add(col.rename(uniqueName)) columnIndices[uniqueName] = index } + else -> { val name = path[0] val uniqueName = columnGroupName.getOrPut(name) { @@ -195,7 +198,12 @@ public fun Map>.toDataFrame(): AnyFrame { @JvmName("toDataFrameColumnPathAnyNullable") public fun Map>.toDataFrame(): AnyFrame { - return map { it.key to DataColumn.createWithTypeInference(it.key.last(), it.value.asList()) }.toDataFrameFromPairs() + return map { + it.key to DataColumn.createWithTypeInference( + it.key.last(), + it.value.asList() + ) + }.toDataFrameFromPairs() } // endregion diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/unfold.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/unfold.kt index 54f59bf95..1ad347213 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/unfold.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/unfold.kt @@ -1,7 +1,7 @@ package org.jetbrains.kotlinx.dataframe.api import org.jetbrains.kotlinx.dataframe.AnyCol -import org.jetbrains.kotlinx.dataframe.Column +import org.jetbrains.kotlinx.dataframe.AnyColumnReference import org.jetbrains.kotlinx.dataframe.ColumnsSelector import org.jetbrains.kotlinx.dataframe.DataColumn import org.jetbrains.kotlinx.dataframe.DataFrame @@ -25,4 +25,4 @@ public inline fun DataColumn.unfold(): AnyCol = public fun DataFrame.unfold(columns: ColumnsSelector): DataFrame = replace(columns).with { it.unfold() } public fun DataFrame.unfold(vararg columns: String): DataFrame = unfold { columns.toColumns() } public fun DataFrame.unfold(vararg columns: KProperty<*>): DataFrame = unfold { columns.toColumns() } -public fun DataFrame.unfold(vararg columns: Column): DataFrame = unfold { columns.toColumns() } +public fun DataFrame.unfold(vararg columns: AnyColumnReference): DataFrame = unfold { columns.toColumns() } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/ungroup.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/ungroup.kt index 65e0f820c..7633687ba 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/ungroup.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/ungroup.kt @@ -1,6 +1,6 @@ package org.jetbrains.kotlinx.dataframe.api -import org.jetbrains.kotlinx.dataframe.Column +import org.jetbrains.kotlinx.dataframe.AnyColumnReference import org.jetbrains.kotlinx.dataframe.ColumnsSelector import org.jetbrains.kotlinx.dataframe.DataFrame import org.jetbrains.kotlinx.dataframe.impl.columns.toColumns @@ -15,7 +15,7 @@ public fun DataFrame.ungroup(columns: ColumnsSelector): DataFram } public fun DataFrame.ungroup(vararg columns: String): DataFrame = ungroup { columns.toColumns() } -public fun DataFrame.ungroup(vararg columns: Column): DataFrame = ungroup { columns.toColumns() } +public fun DataFrame.ungroup(vararg columns: AnyColumnReference): DataFrame = ungroup { columns.toColumns() } public fun DataFrame.ungroup(vararg columns: KProperty<*>): DataFrame = ungroup { columns.toColumns() } // endregion diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt index 651f9a3d9..53302c2ed 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt @@ -17,6 +17,7 @@ import org.jetbrains.kotlinx.dataframe.impl.api.updateWithValuePerColumnImpl import org.jetbrains.kotlinx.dataframe.impl.columns.toColumnSet import org.jetbrains.kotlinx.dataframe.impl.columns.toColumns import org.jetbrains.kotlinx.dataframe.impl.headPlusArray +import org.jetbrains.kotlinx.dataframe.documentation.* import org.jetbrains.kotlinx.dataframe.index import kotlin.reflect.KProperty @@ -25,49 +26,57 @@ import kotlin.reflect.KProperty * * Column types can not be changed. * - * Usage: + * Update operation usage: * - * [update] { columns } + * [update] { [columns][Update.Columns] } * - * [.[where] { [rowCondition][UpdateOperation.Where.Predicate] } ] + * [.[where] { [rowCondition][Update.Where.Predicate] } ] * - * [.[at] ([rowIndices][UpdateOperation.At.RowIndices]) ] + * [.[at] ([rowIndices][Update.At.RowIndices]) ] * - * .[with][Update.with] { [rowExpression][UpdateOperation.With.Expression] } | .[notNull] { rowExpression } | .[perCol] { colExpression } | .[perRowCol] { rowColExpression } | .[withValue] (value) | .[withNull] () | .[withZero] () | .[asFrame] { frameExpression } + * .[with][Update.with] { [rowExpression][Update.With.Expression] } | .[notNull] { rowExpression } | .[perCol] { colExpression } | .[perRowCol] { rowColExpression } | .[withValue] (value) | .[withNull] () | .[withZero] () | .[asFrame] { frameExpression } * - * @comment TODO + * {@comment TODO * rowExpression: DataRow.(OldValue) -> NewValue * colExpression: DataColumn.(DataColumn) -> NewValue * rowColExpression: DataRow.(DataColumn) -> NewValue - * frameExpression: DataFrame.(DataFrame) -> DataFrame - * + * frameExpression: DataFrame.(DataFrame) -> DataFrame} */ -internal interface UpdateOperation { +public data class Update( + val df: DataFrame, + val filter: RowValueFilter?, + val columns: ColumnsSelector, +) { + public fun cast(): Update = + Update(df, filter as RowValueFilter?, columns as ColumnsSelector) + + /** Select the columns to update. See {@include [SelectingColumnsLink]}. */ + internal interface Columns /** @param columns The [ColumnsSelector] used to select the columns of this [DataFrame] to update. */ - interface ColumnsSelectorParam + internal interface ColumnsSelectorParam /** @param columns An [Iterable] of [ColumnReference]s of this [DataFrame] to update. */ - interface ColumnReferenceIterableParam + internal interface ColumnReferenceIterableParam /** @param columns The [ColumnReference]s of this [DataFrame] to update. */ - interface ColumnReferencesParam + internal interface ColumnReferencesParam /** @param columns The [KProperty] values corresponding to columns of this [DataFrame] to update. */ - interface KPropertyColumnsParam + internal interface KPropertyColumnsParam /** @param columns The column names belonging to this [DataFrame] to update. */ - interface StringColumnsParam + internal interface StringColumnsParam /** - * Only update the columns that pass a certain [predicate][UpdateOperation.Where.Predicate]. + * Only update the columns that pass a certain [predicate][Update.Where.Predicate]. * * For example: * ```kotlin * df.update { city }.where { name.firstName == "Alice" }.withValue("Paris") * ``` */ - interface Where { + internal interface Where { /** The condition for rows to be included. A filter if you will. * @@ -76,7 +85,7 @@ internal interface UpdateOperation { } /** - * Only update the columns at certain given [row indices][UpdateOperation.At.RowIndices]: + * Only update the columns at certain given [row indices][Update.At.RowIndices]: * * Either a [Collection]<[Int]>, an [IntRange], or just `vararg` indices. * @@ -85,7 +94,7 @@ internal interface UpdateOperation { * df.update { city }.at(5..10).withValue("Paris") * ``` */ - interface At { + internal interface At { /** The indices of the rows to update. */ interface RowIndices @@ -99,7 +108,7 @@ internal interface UpdateOperation { * df.update { city }.with { name.firstName + " from " + it } * ``` */ - interface With { + internal interface With { /** The expression to update the selected columns with. * @@ -110,86 +119,77 @@ internal interface UpdateOperation { } /** - * @include [UpdateOperation] - * @include [AccessApi.AnyApiLink] - * @include [UpdateOperation.ColumnsSelectorParam] + * @include [Update] + * @include [AccessApi.AnyApiLinks] + * @include [Update.ColumnsSelectorParam] */ public fun DataFrame.update(columns: ColumnsSelector): Update = Update(this, null, columns) /** - * @include [UpdateOperation] + * @include [Update] * API: * - {@include [AccessApi.StringApiLink]} * - * @include [UpdateOperation.StringColumnsParam] + * @include [Update.StringColumnsParam] */ public fun DataFrame.update(vararg columns: String): Update = update { columns.toColumns() } /** - * @include [UpdateOperation] + * @include [Update] * API: * - {@include [AccessApi.KPropertiesApiLink]} * - * @include [UpdateOperation.KPropertyColumnsParam] + * @include [Update.KPropertyColumnsParam] */ public fun DataFrame.update(vararg columns: KProperty): Update = update { columns.toColumns() } /** - * @include [UpdateOperation] + * @include [Update] * API: * - {@include [AccessApi.ColumnAccessorsApiLink]} * - * @include [UpdateOperation.ColumnReferencesParam] + * @include [Update.ColumnReferencesParam] */ public fun DataFrame.update(vararg columns: ColumnReference): Update = update { columns.toColumns() } /** - * @include [UpdateOperation] + * @include [Update] * API: * - {@include [AccessApi.ColumnAccessorsApiLink]} * - * @include [UpdateOperation.ColumnReferenceIterableParam] + * @include [Update.ColumnReferenceIterableParam] */ public fun DataFrame.update(columns: Iterable>): Update = update { columns.toColumnSet() } -public data class Update( - val df: DataFrame, - val filter: RowValueFilter?, - val columns: ColumnsSelector -) { - public fun cast(): Update = - Update(df, filter as RowValueFilter?, columns as ColumnsSelector) -} - /** - * @include [UpdateOperation.Where] + * @include [Update.Where] * - * @param predicate {@include [UpdateOperation.Where.Predicate]} + * @param predicate {@include [Update.Where.Predicate]} */ public fun Update.where(predicate: RowValueFilter): Update = copy(filter = filter and predicate) /** - * @include [UpdateOperation.At] + * @include [Update.At] * - * @param rowIndices {@include [UpdateOperation.At.RowIndices]} + * @param rowIndices {@include [Update.At.RowIndices]} */ public fun Update.at(rowIndices: Collection): Update = where { index in rowIndices } /** - * @include [UpdateOperation.At] + * @include [Update.At] * - * @param rowIndices {@include [UpdateOperation.At.RowIndices]} + * @param rowIndices {@include [Update.At.RowIndices]} */ public fun Update.at(vararg rowIndices: Int): Update = at(rowIndices.toSet()) /** - * @include [UpdateOperation.At] + * @include [Update.At] * - * @param rowRange {@include [UpdateOperation.At.RowIndices]} + * @param rowRange {@include [Update.At.RowIndices]} */ public fun Update.at(rowRange: IntRange): Update = where { index in rowRange } @@ -199,9 +199,9 @@ public infix fun Update.perRowCol(expression: RowColumnExpression = AddDataRow.(C) -> R /** - * @include [UpdateOperation.With] + * @include [Update.With] * - * @param expression {@include [UpdateOperation.With.Expression]} + * @param expression {@include [Update.With.Expression]} */ public infix fun Update.with(expression: UpdateExpression): DataFrame = updateImpl { row, _, value -> diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/valueCounts.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/valueCounts.kt index 7de0675a8..fcfef0e04 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/valueCounts.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/valueCounts.kt @@ -1,6 +1,6 @@ package org.jetbrains.kotlinx.dataframe.api -import org.jetbrains.kotlinx.dataframe.Column +import org.jetbrains.kotlinx.dataframe.AnyColumnReference import org.jetbrains.kotlinx.dataframe.ColumnsSelector import org.jetbrains.kotlinx.dataframe.DataColumn import org.jetbrains.kotlinx.dataframe.DataFrame @@ -70,7 +70,7 @@ public fun DataFrame.valueCounts( resultColumn: String = defaultCountColumnName ): DataFrame = valueCounts(sort, ascending, dropNA, resultColumn) { columns.toColumns() } public fun DataFrame.valueCounts( - vararg columns: Column, + vararg columns: AnyColumnReference, sort: Boolean = true, ascending: Boolean = false, dropNA: Boolean = true, diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/values.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/values.kt index 417487f73..a6e949a9e 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/values.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/values.kt @@ -1,6 +1,6 @@ package org.jetbrains.kotlinx.dataframe.api -import org.jetbrains.kotlinx.dataframe.Column +import org.jetbrains.kotlinx.dataframe.AnyColumnReference import org.jetbrains.kotlinx.dataframe.ColumnsSelector import org.jetbrains.kotlinx.dataframe.DataFrame import org.jetbrains.kotlinx.dataframe.DataRow @@ -24,7 +24,7 @@ public fun DataFrame.valuesNotNull(byRow: Boolean = false): Sequence // region GroupBy -public fun Grouped.values(vararg columns: Column, dropNA: Boolean = false, distinct: Boolean = false): DataFrame = values(dropNA, distinct) { columns.toColumns() } +public fun Grouped.values(vararg columns: AnyColumnReference, dropNA: Boolean = false, distinct: Boolean = false): DataFrame = values(dropNA, distinct) { columns.toColumns() } public fun Grouped.values(vararg columns: String, dropNA: Boolean = false, distinct: Boolean = false): DataFrame = values(dropNA, distinct) { columns.toColumns() } public fun Grouped.values( dropNA: Boolean = false, @@ -40,7 +40,7 @@ public fun Grouped.values(dropNA: Boolean = false, distinct: Boolean = fa public fun ReducedGroupBy.values(): DataFrame = values(groupBy.remainingColumnsSelector()) public fun ReducedGroupBy.values( - vararg columns: Column + vararg columns: AnyColumnReference ): DataFrame = values { columns.toColumns() } public fun ReducedGroupBy.values( @@ -66,7 +66,7 @@ public fun Pivot.values( columns: ColumnsForAggregateSelector ): DataRow = delegate { values(dropNA, distinct, separate, columns) } public fun Pivot.values( - vararg columns: Column, + vararg columns: AnyColumnReference, dropNA: Boolean = false, distinct: Boolean = false, separate: Boolean = false @@ -95,7 +95,7 @@ public fun ReducedPivot.values( ): DataRow = pivot.delegate { reduce(reducer).values(separate = separate) } public fun ReducedPivot.values( - vararg columns: Column, + vararg columns: AnyColumnReference, separate: Boolean = false ): DataRow = values(separate) { columns.toColumns() } @@ -121,7 +121,7 @@ public fun ReducedPivot.values( public fun PivotGroupBy.values(dropNA: Boolean = false, distinct: Boolean = false, separate: Boolean = false): DataFrame = values(dropNA, distinct, separate, remainingColumnsSelector()) public fun PivotGroupBy.values( - vararg columns: Column, + vararg columns: AnyColumnReference, dropNA: Boolean = false, distinct: Boolean = false, separate: Boolean = false @@ -155,7 +155,7 @@ public fun ReducedPivotGroupBy.values( ): DataFrame = values(separate, pivot.remainingColumnsSelector()) public fun ReducedPivotGroupBy.values( - vararg columns: Column, + vararg columns: AnyColumnReference, separate: Boolean = false ): DataFrame = values(separate) { columns.toColumns() } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/with.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/with.kt index 7b5942b83..04ae27ae3 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/with.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/with.kt @@ -1,6 +1,6 @@ package org.jetbrains.kotlinx.dataframe.api -import org.jetbrains.kotlinx.dataframe.Column +import org.jetbrains.kotlinx.dataframe.AnyColumnReference import org.jetbrains.kotlinx.dataframe.DataFrame import org.jetbrains.kotlinx.dataframe.DataRow import org.jetbrains.kotlinx.dataframe.RowExpression @@ -37,7 +37,7 @@ public inline fun ReducedPivotGroupBy.with(noinline expression return pivot.aggregate { val value = reducer(this)?.let { val value = expression(it, it) - if (value is Column) it[value] + if (value is AnyColumnReference) it[value] else value } internal().yield(emptyPath(), value, type) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/AccessApi.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/AccessApi.kt index 3217ddde7..073ea65c2 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/AccessApi.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/AccessApi.kt @@ -21,12 +21,12 @@ import org.jetbrains.kotlinx.dataframe.documentation.samples.ApiLevels as ApiLev internal interface AccessApi { /** API: - * - {@include [AccessApi.ExtensionPropertiesApiLink]} - * - {@include [AccessApi.KPropertiesApiLink]} - * - {@include [AccessApi.ColumnAccessorsApiLink]} - * - {@include [AccessApi.StringApiLink]} + * - {@include [ExtensionPropertiesApiLink]} + * - {@include [KPropertiesApiLink]} + * - {@include [ColumnAccessorsApiLink]} + * - {@include [StringApiLink]} * */ - interface AnyApiLink + interface AnyApiLinks /** * String API. @@ -40,7 +40,7 @@ internal interface AccessApi { */ interface StringApi - /** String API [AccessApi.StringApi]. */ + /** [String API][StringApi] */ interface StringApiLink /** @@ -55,7 +55,7 @@ internal interface AccessApi { */ interface ColumnAccessorsApi - /** Column Accessors API [AccessApi.ColumnAccessorsApi]. */ + /** [Column Accessors API][AccessApi.ColumnAccessorsApi] */ interface ColumnAccessorsApiLink /** @@ -72,7 +72,7 @@ internal interface AccessApi { */ interface KPropertiesApi - /** KProperties API [AccessApi.KPropertiesApi]. */ + /** [KProperties API][KPropertiesApi] */ interface KPropertiesApiLink /** @@ -87,6 +87,9 @@ internal interface AccessApi { */ interface ExtensionPropertiesApi - /** Extension Properties API [AccessApi.ExtensionPropertiesApi]. */ + /** [Extension Properties API][ExtensionPropertiesApi] */ interface ExtensionPropertiesApiLink } + +/** [Access Api][AccessApi] */ +internal interface AccessApiLink diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/HighLevelOperations.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/HighLevelOperations.kt new file mode 100644 index 000000000..dc6a989b8 --- /dev/null +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/HighLevelOperations.kt @@ -0,0 +1,70 @@ +package org.jetbrains.kotlinx.dataframe.documentation + +import org.jetbrains.kotlinx.dataframe.* +import org.jetbrains.kotlinx.dataframe.api.* + +/** [Selecting Columns][SelectingColumns] */ +internal interface SelectingColumnsLink + +/** + * Selecting columns for various operations (including but not limited to + * [DataFrame.select], [DataFrame.update], [DataFrame.gather], and [DataFrame.fillNulls]) + * can be done in the following ways: + * - {@include [Dsl]} + * - {@include [ColumnNames]} + * - {@include [ColumnAccessors]} + * - {@include [KProperties]} + */ +internal interface SelectingColumns { + + /** + * The column selector DSL (Any {@include [AccessApiLink]}). + * See {@include [ColumnSelectionDslLink]} for more details. + * + * For example: + * ```kotlin + * df.select { length and age } + * df.select { cols(1..5) } + * df.select { colsOf() } + * ``` + */ + interface Dsl + + /** + * Column names ({@include [AccessApi.StringApiLink]}). + * + * For example: + * ```kotlin + * df.select("length", "age") + * df.select(listOf("length", "age")) + * ``` + */ + interface ColumnNames + + /** + * Column accessors ({@include [AccessApi.ColumnAccessorsApiLink]}). + * + * For example: + * ```kotlin + * val length by column() + * val age by column() + * df.select(length, age) + * df.select(listOf(length, age)) + * ``` + */ + interface ColumnAccessors + + /** + * KProperties ({@include [AccessApi.KPropertiesApiLink]}). + * + * For example: + * ```kotlin + * data class Person(val length: Double, val age: Double) + * + * df.select(Person::length, Person::age) + * df.select(listOf(Person::length, Person::age)) + * ``` + */ + interface KProperties + +} diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregations.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregations.kt index 925a70fdd..f18c558ca 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregations.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregations.kt @@ -1,6 +1,6 @@ package org.jetbrains.kotlinx.dataframe.impl.aggregation -import org.jetbrains.kotlinx.dataframe.Column +import org.jetbrains.kotlinx.dataframe.AnyColumnReference import org.jetbrains.kotlinx.dataframe.DataFrame import org.jetbrains.kotlinx.dataframe.DataRow import org.jetbrains.kotlinx.dataframe.RowExpression @@ -90,7 +90,7 @@ internal fun AggregateInternalDsl.columnValues( internal fun AggregateInternalDsl.withExpr(type: KType, path: ColumnPath, expression: RowExpression) { val values = df.rows().map { val value = expression(it, it) - if (value is Column) it[value] + if (value is AnyColumnReference) it[value] else value } yieldOneOrMany(path, values, type) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/move.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/move.kt index 0e51a7362..e70c945ef 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/move.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/move.kt @@ -1,6 +1,6 @@ package org.jetbrains.kotlinx.dataframe.impl.api -import org.jetbrains.kotlinx.dataframe.Column +import org.jetbrains.kotlinx.dataframe.AnyColumnReference import org.jetbrains.kotlinx.dataframe.ColumnSelector import org.jetbrains.kotlinx.dataframe.DataColumn import org.jetbrains.kotlinx.dataframe.DataFrame @@ -43,7 +43,7 @@ internal fun MoveClause.afterOrBefore(column: ColumnSelector, internal fun MoveClause.moveImpl( under: Boolean = false, - newPathExpression: ColumnsSelectionDsl.(ColumnWithPath) -> Column + newPathExpression: ColumnsSelectionDsl.(ColumnWithPath) -> AnyColumnReference ): DataFrame { val receiver = object : DataFrameReceiver(df, UnresolvedColumnsPolicy.Fail), ColumnsSelectionDsl {} val removeResult = df.removeImpl(columns = columns) From 9ec93b43dab298a26db0d9572975b82f06b8bc31 Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Wed, 1 Feb 2023 18:31:03 +0100 Subject: [PATCH 12/50] continuing with kdocs with updated processor --- core/build.gradle.kts | 5 +- .../jetbrains/kotlinx/dataframe/api/Nulls.kt | 105 +++++++++++++----- .../jetbrains/kotlinx/dataframe/api/update.kt | 95 +++++++++------- .../documentation/DocumentationUrls.kt | 3 + .../documentation/HighLevelOperations.kt | 42 ++++--- 5 files changed, 158 insertions(+), 92 deletions(-) diff --git a/core/build.gradle.kts b/core/build.gradle.kts index f1f48fe68..3e54374f3 100644 --- a/core/build.gradle.kts +++ b/core/build.gradle.kts @@ -14,7 +14,7 @@ plugins { id("org.jetbrains.kotlinx.kover") id("org.jmailen.kotlinter") id("org.jetbrains.kotlinx.dataframe") - id("com.github.jolanrensen.docProcessorGradlePlugin") version "v0.0.7" + id("com.github.jolanrensen.docProcessorGradlePlugin") version "v0.0.10" // id("nl.jolanrensen.docProcessor") version "1.0-SNAPSHOT" } @@ -66,9 +66,10 @@ val processKdocIncludeMain by creatingProcessDocTask( ) { processors = listOf( INCLUDE_DOC_PROCESSOR, - SAMPLE_DOC_PROCESSOR, INCLUDE_FILE_DOC_PROCESSOR, + INCLUDE_ARG_DOC_PROCESSOR, COMMENT_DOC_PROCESSOR, + SAMPLE_DOC_PROCESSOR, ) debug = true } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/Nulls.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/Nulls.kt index 3b8a9c8f6..afa6c3770 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/Nulls.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/Nulls.kt @@ -7,6 +7,7 @@ import org.jetbrains.kotlinx.dataframe.AnyRow import org.jetbrains.kotlinx.dataframe.ColumnsSelector import org.jetbrains.kotlinx.dataframe.DataColumn import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.api.Update.Usage import org.jetbrains.kotlinx.dataframe.columns.ColumnKind import org.jetbrains.kotlinx.dataframe.columns.ColumnReference import org.jetbrains.kotlinx.dataframe.documentation.* @@ -21,64 +22,61 @@ import kotlin.reflect.KProperty /** * Replace `null` values with given value or expression. * Specific case of [Update]. - * TODO samples * - * @include [DocumentationUrls.Fill.FillNulls] + * Check out [how to use `fillNulls`][Usage]. + * + * For more information: {@include [DocumentationUrls.Fill.FillNulls]} */ -internal interface FillNullsOperation +internal interface FillNulls { + + /** @include [Update.Usage] {@arg [Update.UpdateOperationArg] [fillNulls]} */ + interface Usage +} + +/** {@arg [OperationArg] fillNulls} */ +internal interface SetFillNullsOperationArg /** - * @include [FillNullsOperation] + * @include [FillNulls] * - * @param columns The [ColumnsSelector] used to select columns to update. + * @include [Update.DslParam] + * @include [SetFillNullsOperationArg] */ public fun DataFrame.fillNulls(columns: ColumnsSelector): Update = update(columns).where { it == null } /** - * @include [FillNullsOperation] + * @include [FillNulls] * - * @receiver [DataFrame] - * @param T T - * @param C C - * @param columns Cols - * @return [Update] + * @include [Update.ColumnNamesParam] + * @include [SetFillNullsOperationArg] */ public fun DataFrame.fillNulls(vararg columns: String): Update = fillNulls { columns.toColumns() } /** - * @include [FillNullsOperation] + * @include [FillNulls] * - * @receiver [DataFrame] - * @param T T - * @param C C - * @param columns Cols - * @return [Update] + * @include [Update.KPropertiesParam] + * @include [SetFillNullsOperationArg] */ public fun DataFrame.fillNulls(vararg columns: KProperty): Update = fillNulls { columns.toColumns() } /** - * @include [FillNullsOperation] + * @include [FillNulls] * - * @receiver [DataFrame] - * @param T T - * @param C C - * @param columns Cols - * @return [Update] + * @include [Update.ColumnAccessorsParam] + * @include [SetFillNullsOperationArg] */ public fun DataFrame.fillNulls(vararg columns: ColumnReference): Update = fillNulls { columns.toColumns() } /** - * @include [FillNullsOperation] + * @include [FillNulls] * - * @receiver [DataFrame] - * @param T T - * @param C C - * @param columns Cols - * @return [Update] + * @include [Update.ColumnAccessorsParam] + * @include [SetFillNullsOperationArg] */ public fun DataFrame.fillNulls(columns: Iterable>): Update = fillNulls { columns.toColumnSet() } @@ -107,18 +105,67 @@ internal inline val Float?.isNA: Boolean get() = this == null || this.isNaN() // region fillNaNs + +/** + * Replace `NaN` values with given value or expression. + * Specific case of [Update]. + * + * Check out [how to use `fillNaNs`][Usage]. + * + * For more information: {@include [DocumentationUrls.Fill.FillNaNs]} + */ +internal interface FillNaNs { + + /** @include [Update.Usage] {@arg [Update.UpdateOperationArg] [fillNaNs]} */ + interface Usage +} + + +/** {@arg [OperationArg] fillNaNs} */ +internal interface SetFillNaNsOperationArg + +/** + * @include [FillNaNs] + * + * @include [Update.DslParam] + * @include [SetFillNaNsOperationArg] + */ public fun DataFrame.fillNaNs(columns: ColumnsSelector): Update = update(columns).where { it.isNaN } +/** + * @include [FillNaNs] + * + * @include [Update.ColumnNamesParam] + * @include [SetFillNaNsOperationArg] + */ public fun DataFrame.fillNaNs(vararg columns: String): Update = fillNaNs { columns.toColumns() } +/** + * @include [FillNaNs] + * + * @include [Update.KPropertiesParam] + * @include [SetFillNaNsOperationArg] + */ public fun DataFrame.fillNaNs(vararg columns: KProperty): Update = fillNaNs { columns.toColumns() } +/** + * @include [FillNaNs] + * + * @include [Update.ColumnAccessorsParam] + * @include [SetFillNaNsOperationArg] + */ public fun DataFrame.fillNaNs(vararg columns: ColumnReference): Update = fillNaNs { columns.toColumns() } +/** + * @include [FillNaNs] + * + * @include [Update.ColumnAccessorsParam] + * @include [SetFillNaNsOperationArg] + */ public fun DataFrame.fillNaNs(columns: Iterable>): Update = fillNaNs { columns.toColumnSet() } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt index 53302c2ed..7b6a0dab5 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt @@ -11,7 +11,6 @@ import org.jetbrains.kotlinx.dataframe.RowValueExpression import org.jetbrains.kotlinx.dataframe.RowValueFilter import org.jetbrains.kotlinx.dataframe.Selector import org.jetbrains.kotlinx.dataframe.columns.ColumnReference -import org.jetbrains.kotlinx.dataframe.documentation.AccessApi import org.jetbrains.kotlinx.dataframe.impl.api.updateImpl import org.jetbrains.kotlinx.dataframe.impl.api.updateWithValuePerColumnImpl import org.jetbrains.kotlinx.dataframe.impl.columns.toColumnSet @@ -22,25 +21,12 @@ import org.jetbrains.kotlinx.dataframe.index import kotlin.reflect.KProperty /** - * Returns [DataFrame] with changed values in some cells. + * Returns the [DataFrame] with changed values in some cells + * (column types can not be changed). * - * Column types can not be changed. + * Check out [how to use `update`][Usage]. * - * Update operation usage: - * - * [update] { [columns][Update.Columns] } - * - * [.[where] { [rowCondition][Update.Where.Predicate] } ] - * - * [.[at] ([rowIndices][Update.At.RowIndices]) ] - * - * .[with][Update.with] { [rowExpression][Update.With.Expression] } | .[notNull] { rowExpression } | .[perCol] { colExpression } | .[perRowCol] { rowColExpression } | .[withValue] (value) | .[withNull] () | .[withZero] () | .[asFrame] { frameExpression } - * - * {@comment TODO - * rowExpression: DataRow.(OldValue) -> NewValue - * colExpression: DataColumn.(DataColumn) -> NewValue - * rowColExpression: DataRow.(DataColumn) -> NewValue - * frameExpression: DataFrame.(DataFrame) -> DataFrame} + * For more information: {@include [DocumentationUrls.Update]} */ public data class Update( val df: DataFrame, @@ -50,23 +36,46 @@ public data class Update( public fun cast(): Update = Update(df, filter as RowValueFilter?, columns as ColumnsSelector) + internal interface UpdateOperationArg + + /** + * {@includeArg [UpdateOperationArg]} operation usage: + * + * {@includeArg [UpdateOperationArg]} { [columns][Columns] } + * + * [.[where] { [rowCondition][Where.Predicate] } ] + * + * [.[at] ([rowIndices][At.RowIndices]) ] + * + * .[with] { [rowExpression][With.Expression] } | .[notNull] { rowExpression } | .[perCol] { colExpression } | .[perRowCol] { rowColExpression } | .[withValue] (value) | .[withNull] () | .[withZero] () | .[asFrame] { frameExpression } + * + * {@comment TODO + * rowExpression: DataRow.(OldValue) -> NewValue + * colExpression: DataColumn.(DataColumn) -> NewValue + * rowColExpression: DataRow.(DataColumn) -> NewValue + * frameExpression: DataFrame.(DataFrame) -> DataFrame} + * {@arg [UpdateOperationArg] [update]} + */ + internal interface Usage + /** Select the columns to update. See {@include [SelectingColumnsLink]}. */ internal interface Columns - /** @param columns The [ColumnsSelector] used to select the columns of this [DataFrame] to update. */ - internal interface ColumnsSelectorParam - - /** @param columns An [Iterable] of [ColumnReference]s of this [DataFrame] to update. */ - internal interface ColumnReferenceIterableParam + /** {@include [SelectingColumns.Dsl]} + * @param columns The [ColumnsSelector] used to select the columns of this [DataFrame] to update. */ + internal interface DslParam - /** @param columns The [ColumnReference]s of this [DataFrame] to update. */ - internal interface ColumnReferencesParam + /** {@include [SelectingColumns.ColumnAccessors]} + * @param columns An [Iterable] of [ColumnReference]s of this [DataFrame] to update. */ + internal interface ColumnAccessorsParam - /** @param columns The [KProperty] values corresponding to columns of this [DataFrame] to update. */ - internal interface KPropertyColumnsParam + /** {@include [SelectingColumns.KProperties]} + * @param columns The [KProperty] values corresponding to columns of this [DataFrame] to update. */ + internal interface KPropertiesParam - /** @param columns The column names belonging to this [DataFrame] to update. */ - internal interface StringColumnsParam + /** {@include [SelectingColumns.ColumnNames]} + * @param columns The column names belonging to this [DataFrame] to update. */ + internal interface ColumnNamesParam /** * Only update the columns that pass a certain [predicate][Update.Where.Predicate]. @@ -118,48 +127,48 @@ public data class Update( } } +/** {@arg [OperationArg] update} */ +internal interface SetUpdateOperationArg + /** * @include [Update] - * @include [AccessApi.AnyApiLinks] - * @include [Update.ColumnsSelectorParam] + * + * @include [Update.DslParam] + * @include [SetUpdateOperationArg] */ public fun DataFrame.update(columns: ColumnsSelector): Update = Update(this, null, columns) /** * @include [Update] - * API: - * - {@include [AccessApi.StringApiLink]} * - * @include [Update.StringColumnsParam] + * @include [Update.ColumnNamesParam] + * @include [SetUpdateOperationArg] */ public fun DataFrame.update(vararg columns: String): Update = update { columns.toColumns() } /** * @include [Update] - * API: - * - {@include [AccessApi.KPropertiesApiLink]} * - * @include [Update.KPropertyColumnsParam] + * @include [Update.KPropertiesParam] + * @include [SetUpdateOperationArg] */ public fun DataFrame.update(vararg columns: KProperty): Update = update { columns.toColumns() } /** * @include [Update] - * API: - * - {@include [AccessApi.ColumnAccessorsApiLink]} * - * @include [Update.ColumnReferencesParam] + * @include [Update.ColumnAccessorsParam] + * @include [SetUpdateOperationArg] */ public fun DataFrame.update(vararg columns: ColumnReference): Update = update { columns.toColumns() } /** * @include [Update] - * API: - * - {@include [AccessApi.ColumnAccessorsApiLink]} * - * @include [Update.ColumnReferenceIterableParam] + * @include [Update.ColumnAccessorsParam] + * @include [SetUpdateOperationArg] */ public fun DataFrame.update(columns: Iterable>): Update = update { columns.toColumnSet() } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DocumentationUrls.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DocumentationUrls.kt index 011579998..7ab0acb26 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DocumentationUrls.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DocumentationUrls.kt @@ -2,6 +2,9 @@ package org.jetbrains.kotlinx.dataframe.documentation private interface DocumentationUrls { + /** [See `update` documentation.](https://kotlin.github.io/dataframe/update.html) */ + interface Update + /** [See `fill` documentation.](https://kotlin.github.io/dataframe/fill.html) */ interface Fill { diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/HighLevelOperations.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/HighLevelOperations.kt index dc6a989b8..66e1f0724 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/HighLevelOperations.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/HighLevelOperations.kt @@ -2,6 +2,10 @@ package org.jetbrains.kotlinx.dataframe.documentation import org.jetbrains.kotlinx.dataframe.* import org.jetbrains.kotlinx.dataframe.api.* +import kotlin.reflect.KProperty + +/** The key for an @arg that will define the operation name for the examples below. */ +internal interface OperationArg /** [Selecting Columns][SelectingColumns] */ internal interface SelectingColumnsLink @@ -17,53 +21,55 @@ internal interface SelectingColumnsLink */ internal interface SelectingColumns { - /** - * The column selector DSL (Any {@include [AccessApiLink]}). - * See {@include [ColumnSelectionDslLink]} for more details. + /** {@arg [OperationArg] } */ + interface SetDefaultOperationArg + + /** Select columns using the {@include [ColumnSelectionDslLink]} (Any {@include [AccessApiLink]}). * * For example: * ```kotlin - * df.select { length and age } - * df.select { cols(1..5) } - * df.select { colsOf() } + * df.{@includeArg [OperationArg]} { length and age } + * df.{@includeArg [OperationArg]} { cols(1..5) } + * df.{@includeArg [OperationArg]} { colsOf() } * ``` + * @include [SetDefaultOperationArg] */ interface Dsl - /** - * Column names ({@include [AccessApi.StringApiLink]}). + /** Select columns using their column names ({@include [AccessApi.StringApiLink]}). * * For example: * ```kotlin - * df.select("length", "age") - * df.select(listOf("length", "age")) + * df.{@includeArg [OperationArg]}("length", "age") + * df.{@includeArg [OperationArg]}(listOf("length", "age")) * ``` + * @include [SetDefaultOperationArg] */ interface ColumnNames - /** - * Column accessors ({@include [AccessApi.ColumnAccessorsApiLink]}). + /** Select columns using column accessors ({@include [AccessApi.ColumnAccessorsApiLink]}). * * For example: * ```kotlin * val length by column() * val age by column() - * df.select(length, age) - * df.select(listOf(length, age)) + * df.{@includeArg [OperationArg]}(length, age) + * df.{@includeArg [OperationArg]}(listOf(length, age)) * ``` + * @include [SetDefaultOperationArg] */ interface ColumnAccessors - /** - * KProperties ({@include [AccessApi.KPropertiesApiLink]}). + /** Select columns using [KProperty]'s ({@include [AccessApi.KPropertiesApiLink]}). * * For example: * ```kotlin * data class Person(val length: Double, val age: Double) * - * df.select(Person::length, Person::age) - * df.select(listOf(Person::length, Person::age)) + * df.{@includeArg [OperationArg]}(Person::length, Person::age) + * df.{@includeArg [OperationArg]}(listOf(Person::length, Person::age)) * ``` + * @include [SetDefaultOperationArg] */ interface KProperties From 09da9b67f8b4a0e3b9f42f2c1880694aa7d711dc Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Thu, 2 Feb 2023 15:27:54 +0100 Subject: [PATCH 13/50] continuing with kdocs --- core/build.gradle.kts | 2 +- .../dataframe/api/ColumnsSelectionDsl.kt | 8 +++- .../jetbrains/kotlinx/dataframe/api/Nulls.kt | 24 ++++++------ .../jetbrains/kotlinx/dataframe/api/update.kt | 37 ++++++++++--------- .../documentation/HighLevelOperations.kt | 32 +++++++++++----- .../kotlinx/dataframe/impl/api/convertTo.kt | 5 ++- .../dataframe/impl/columns/constructors.kt | 2 +- 7 files changed, 63 insertions(+), 47 deletions(-) diff --git a/core/build.gradle.kts b/core/build.gradle.kts index 3e54374f3..3ac6a0e85 100644 --- a/core/build.gradle.kts +++ b/core/build.gradle.kts @@ -14,7 +14,7 @@ plugins { id("org.jetbrains.kotlinx.kover") id("org.jmailen.kotlinter") id("org.jetbrains.kotlinx.dataframe") - id("com.github.jolanrensen.docProcessorGradlePlugin") version "v0.0.10" + id("com.github.jolanrensen.docProcessorGradlePlugin") version "v0.0.11" // id("nl.jolanrensen.docProcessor") version "1.0-SNAPSHOT" } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/ColumnsSelectionDsl.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/ColumnsSelectionDsl.kt index 88d62b4e3..448b7bf36 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/ColumnsSelectionDsl.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/ColumnsSelectionDsl.kt @@ -35,6 +35,10 @@ import kotlin.reflect.KProperty import kotlin.reflect.KType import kotlin.reflect.typeOf +/** [Column Selection DSL][ColumnSelectionDsl] */ +internal interface ColumnSelectionDslLink + +/** TODO: Put examples and explanations here */ public interface ColumnSelectionDsl : ColumnsContainer { public operator fun ColumnReference.invoke(): DataColumn = get(this) @@ -50,10 +54,10 @@ public interface ColumnSelectionDsl : ColumnsContainer { public operator fun String.get(column: String): ColumnPath = pathOf(this, column) } - /** [Columns Selection DSL][ColumnsSelectionDsl] */ -internal interface ColumnSelectionDslLink +internal interface ColumnsSelectionDslLink +/** TODO: Put examples and explanations here */ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColumn> { public fun ColumnSet.first(condition: ColumnFilter): SingleColumn = diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/Nulls.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/Nulls.kt index afa6c3770..0d17a890d 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/Nulls.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/Nulls.kt @@ -39,8 +39,8 @@ internal interface SetFillNullsOperationArg /** * @include [FillNulls] * + * @include [SelectingColumns.Dsl] {@include [SetFillNullsOperationArg]} * @include [Update.DslParam] - * @include [SetFillNullsOperationArg] */ public fun DataFrame.fillNulls(columns: ColumnsSelector): Update = update(columns).where { it == null } @@ -48,8 +48,8 @@ public fun DataFrame.fillNulls(columns: ColumnsSelector): Updat /** * @include [FillNulls] * + * @include [SelectingColumns.ColumnNames] {@include [SetFillNullsOperationArg]} * @include [Update.ColumnNamesParam] - * @include [SetFillNullsOperationArg] */ public fun DataFrame.fillNulls(vararg columns: String): Update = fillNulls { columns.toColumns() } @@ -57,8 +57,8 @@ public fun DataFrame.fillNulls(vararg columns: String): Update = /** * @include [FillNulls] * + * @include [SelectingColumns.KProperties] {@include [SetFillNullsOperationArg]} * @include [Update.KPropertiesParam] - * @include [SetFillNullsOperationArg] */ public fun DataFrame.fillNulls(vararg columns: KProperty): Update = fillNulls { columns.toColumns() } @@ -66,8 +66,8 @@ public fun DataFrame.fillNulls(vararg columns: KProperty): Update DataFrame.fillNulls(vararg columns: ColumnReference): Update = fillNulls { columns.toColumns() } @@ -75,8 +75,8 @@ public fun DataFrame.fillNulls(vararg columns: ColumnReference): Up /** * @include [FillNulls] * + * @include [SelectingColumns.ColumnAccessors] {@include [SetFillNullsOperationArg]} * @include [Update.ColumnAccessorsParam] - * @include [SetFillNullsOperationArg] */ public fun DataFrame.fillNulls(columns: Iterable>): Update = fillNulls { columns.toColumnSet() } @@ -105,7 +105,6 @@ internal inline val Float?.isNA: Boolean get() = this == null || this.isNaN() // region fillNaNs - /** * Replace `NaN` values with given value or expression. * Specific case of [Update]. @@ -120,15 +119,14 @@ internal interface FillNaNs { interface Usage } - /** {@arg [OperationArg] fillNaNs} */ internal interface SetFillNaNsOperationArg /** * @include [FillNaNs] - * + * @include [SelectingColumns.Dsl] {@include [SetFillNaNsOperationArg]} * @include [Update.DslParam] - * @include [SetFillNaNsOperationArg] + * */ public fun DataFrame.fillNaNs(columns: ColumnsSelector): Update = update(columns).where { it.isNaN } @@ -136,8 +134,8 @@ public fun DataFrame.fillNaNs(columns: ColumnsSelector): Update< /** * @include [FillNaNs] * + * @include [SelectingColumns.ColumnNames] {@include [SetFillNaNsOperationArg]} * @include [Update.ColumnNamesParam] - * @include [SetFillNaNsOperationArg] */ public fun DataFrame.fillNaNs(vararg columns: String): Update = fillNaNs { columns.toColumns() } @@ -145,8 +143,8 @@ public fun DataFrame.fillNaNs(vararg columns: String): Update = /** * @include [FillNaNs] * + * @include [SelectingColumns.KProperties] {@include [SetFillNaNsOperationArg]} * @include [Update.KPropertiesParam] - * @include [SetFillNaNsOperationArg] */ public fun DataFrame.fillNaNs(vararg columns: KProperty): Update = fillNaNs { columns.toColumns() } @@ -154,8 +152,8 @@ public fun DataFrame.fillNaNs(vararg columns: KProperty): Update DataFrame.fillNaNs(vararg columns: ColumnReference): Update = fillNaNs { columns.toColumns() } @@ -163,8 +161,8 @@ public fun DataFrame.fillNaNs(vararg columns: ColumnReference): Upd /** * @include [FillNaNs] * + * @include [SelectingColumns.ColumnAccessors] {@include [SetFillNaNsOperationArg]} * @include [Update.ColumnAccessorsParam] - * @include [SetFillNaNsOperationArg] */ public fun DataFrame.fillNaNs(columns: Iterable>): Update = fillNaNs { columns.toColumnSet() } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt index 7b6a0dab5..668a1d077 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt @@ -10,13 +10,14 @@ import org.jetbrains.kotlinx.dataframe.RowColumnExpression import org.jetbrains.kotlinx.dataframe.RowValueExpression import org.jetbrains.kotlinx.dataframe.RowValueFilter import org.jetbrains.kotlinx.dataframe.Selector +import org.jetbrains.kotlinx.dataframe.api.Update.Usage import org.jetbrains.kotlinx.dataframe.columns.ColumnReference +import org.jetbrains.kotlinx.dataframe.documentation.* import org.jetbrains.kotlinx.dataframe.impl.api.updateImpl import org.jetbrains.kotlinx.dataframe.impl.api.updateWithValuePerColumnImpl import org.jetbrains.kotlinx.dataframe.impl.columns.toColumnSet import org.jetbrains.kotlinx.dataframe.impl.columns.toColumns import org.jetbrains.kotlinx.dataframe.impl.headPlusArray -import org.jetbrains.kotlinx.dataframe.documentation.* import org.jetbrains.kotlinx.dataframe.index import kotlin.reflect.KProperty @@ -40,14 +41,14 @@ public data class Update( /** * {@includeArg [UpdateOperationArg]} operation usage: - * + * ___ * {@includeArg [UpdateOperationArg]} { [columns][Columns] } * - * [.[where] { [rowCondition][Where.Predicate] } ] + * - [.[where] { [rowCondition][Where.Predicate] } ] * - * [.[at] ([rowIndices][At.RowIndices]) ] + * - [.[at] ([rowIndices][At.RowIndices]) ] * - * .[with] { [rowExpression][With.Expression] } | .[notNull] { rowExpression } | .[perCol] { colExpression } | .[perRowCol] { rowColExpression } | .[withValue] (value) | .[withNull] () | .[withZero] () | .[asFrame] { frameExpression } + * - .[with] { [rowExpression][With.Expression] } | .[notNull] { rowExpression } | .[perCol] { colExpression } | .[perRowCol] { rowColExpression } | .[withValue] (value) | .[withNull] () | .[withZero] () | .[asFrame] { frameExpression } * * {@comment TODO * rowExpression: DataRow.(OldValue) -> NewValue @@ -61,20 +62,16 @@ public data class Update( /** Select the columns to update. See {@include [SelectingColumnsLink]}. */ internal interface Columns - /** {@include [SelectingColumns.Dsl]} - * @param columns The [ColumnsSelector] used to select the columns of this [DataFrame] to update. */ + /** @param columns The [ColumnsSelector] used to select the columns of this [DataFrame] to update. */ internal interface DslParam - /** {@include [SelectingColumns.ColumnAccessors]} - * @param columns An [Iterable] of [ColumnReference]s of this [DataFrame] to update. */ + /** @param columns The [ColumnReference]s of this [DataFrame] to update. */ internal interface ColumnAccessorsParam - /** {@include [SelectingColumns.KProperties]} - * @param columns The [KProperty] values corresponding to columns of this [DataFrame] to update. */ + /** @param columns The [KProperty] values corresponding to columns of this [DataFrame] to update. */ internal interface KPropertiesParam - /** {@include [SelectingColumns.ColumnNames]} - * @param columns The column names belonging to this [DataFrame] to update. */ + /** @param columns The column names belonging to this [DataFrame] to update. */ internal interface ColumnNamesParam /** @@ -127,14 +124,16 @@ public data class Update( } } +// region update + /** {@arg [OperationArg] update} */ internal interface SetUpdateOperationArg /** * @include [Update] * + * @include [SelectingColumns.Dsl] {@include [SetUpdateOperationArg]} * @include [Update.DslParam] - * @include [SetUpdateOperationArg] */ public fun DataFrame.update(columns: ColumnsSelector): Update = Update(this, null, columns) @@ -142,24 +141,24 @@ public fun DataFrame.update(columns: ColumnsSelector): Update DataFrame.update(vararg columns: String): Update = update { columns.toColumns() } /** * @include [Update] * + * @include [SelectingColumns.KProperties] {@include [SetUpdateOperationArg]} * @include [Update.KPropertiesParam] - * @include [SetUpdateOperationArg] */ public fun DataFrame.update(vararg columns: KProperty): Update = update { columns.toColumns() } /** * @include [Update] * + * @include [SelectingColumns.ColumnAccessors] {@include [SetUpdateOperationArg]} * @include [Update.ColumnAccessorsParam] - * @include [SetUpdateOperationArg] */ public fun DataFrame.update(vararg columns: ColumnReference): Update = update { columns.toColumns() } @@ -167,12 +166,14 @@ public fun DataFrame.update(vararg columns: ColumnReference): Updat /** * @include [Update] * + * @include [SelectingColumns.ColumnAccessors] {@include [SetUpdateOperationArg]} * @include [Update.ColumnAccessorsParam] - * @include [SetUpdateOperationArg] */ public fun DataFrame.update(columns: Iterable>): Update = update { columns.toColumnSet() } +// endregion + /** * @include [Update.Where] * diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/HighLevelOperations.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/HighLevelOperations.kt index 66e1f0724..00f52803f 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/HighLevelOperations.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/HighLevelOperations.kt @@ -2,16 +2,24 @@ package org.jetbrains.kotlinx.dataframe.documentation import org.jetbrains.kotlinx.dataframe.* import org.jetbrains.kotlinx.dataframe.api.* +import org.jetbrains.kotlinx.dataframe.columns.ColumnSet +import org.jetbrains.kotlinx.dataframe.columns.SingleColumn import kotlin.reflect.KProperty +/** {@comment + * In this file we provide documentation for high-level operations such as + * the operation of selecting columns. + * } + */ + + /** The key for an @arg that will define the operation name for the examples below. */ internal interface OperationArg /** [Selecting Columns][SelectingColumns] */ internal interface SelectingColumnsLink -/** - * Selecting columns for various operations (including but not limited to +/** Selecting columns for various operations (including but not limited to * [DataFrame.select], [DataFrame.update], [DataFrame.gather], and [DataFrame.fillNulls]) * can be done in the following ways: * - {@include [Dsl]} @@ -24,7 +32,12 @@ internal interface SelectingColumns { /** {@arg [OperationArg] } */ interface SetDefaultOperationArg - /** Select columns using the {@include [ColumnSelectionDslLink]} (Any {@include [AccessApiLink]}). + /** Select or express columns using the Column(s) Selection DSL. + * (Any {@include [AccessApiLink]}). + * + * The DSL comes in the form of either a [ColumnSelector]- or [ColumnsSelector] lambda, + * which operate in the {@include [ColumnSelectionDslLink]} or the {@include [ColumnsSelectionDslLink]} and + * expect you to return a [SingleColumn] or [ColumnSet], respectively. * * For example: * ```kotlin @@ -36,41 +49,40 @@ internal interface SelectingColumns { */ interface Dsl - /** Select columns using their column names ({@include [AccessApi.StringApiLink]}). + /** Select columns using their column names + * ({@include [AccessApi.StringApiLink]}). * * For example: * ```kotlin * df.{@includeArg [OperationArg]}("length", "age") - * df.{@includeArg [OperationArg]}(listOf("length", "age")) * ``` * @include [SetDefaultOperationArg] */ interface ColumnNames - /** Select columns using column accessors ({@include [AccessApi.ColumnAccessorsApiLink]}). + /** Select columns using column accessors + * ({@include [AccessApi.ColumnAccessorsApiLink]}). * * For example: * ```kotlin * val length by column() * val age by column() * df.{@includeArg [OperationArg]}(length, age) - * df.{@includeArg [OperationArg]}(listOf(length, age)) * ``` * @include [SetDefaultOperationArg] */ interface ColumnAccessors - /** Select columns using [KProperty]'s ({@include [AccessApi.KPropertiesApiLink]}). + /** Select columns using [KProperty]'s + * ({@include [AccessApi.KPropertiesApiLink]}). * * For example: * ```kotlin * data class Person(val length: Double, val age: Double) * * df.{@includeArg [OperationArg]}(Person::length, Person::age) - * df.{@includeArg [OperationArg]}(listOf(Person::length, Person::age)) * ``` * @include [SetDefaultOperationArg] */ interface KProperties - } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/convertTo.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/convertTo.kt index 405078255..e11e8bc0b 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/convertTo.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/convertTo.kt @@ -31,6 +31,7 @@ import org.jetbrains.kotlinx.dataframe.columns.ColumnPath import org.jetbrains.kotlinx.dataframe.columns.FrameColumn import org.jetbrains.kotlinx.dataframe.exceptions.ExcessiveColumnsException import org.jetbrains.kotlinx.dataframe.exceptions.TypeConversionException +import org.jetbrains.kotlinx.dataframe.impl.columns.toColumnSet import org.jetbrains.kotlinx.dataframe.impl.emptyPath import org.jetbrains.kotlinx.dataframe.impl.schema.createEmptyColumn import org.jetbrains.kotlinx.dataframe.impl.schema.createEmptyDataFrame @@ -265,8 +266,8 @@ internal fun AnyFrame.convertToImpl( dsl.fillers.forEach { filler -> val paths = result.getColumnPaths(filler.columns) - missingPaths.removeAll(paths) - result = result.update(paths).with { + missingPaths.removeAll(paths.toSet()) + result = result.update { paths.toColumnSet() }.with { filler.expr(this, this) } } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/constructors.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/constructors.kt index 5ba0ffaaf..68f50e0e4 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/constructors.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/constructors.kt @@ -189,7 +189,7 @@ internal fun Array>.toColumns(): ColumnSet = map { it.to @PublishedApi internal fun Array>.toColumns(): ColumnSet = asIterable().toColumnSet() -internal fun Iterable.toColumns() = map { it.toColumnAccessor() }.toColumnSet() +internal fun Iterable.toColumns(): ColumnSet = map { it.toColumnAccessor() }.toColumnSet() // endregion From f87819e0b7677adb16bb3a71d388fc8c4841eac3 Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Sun, 5 Feb 2023 13:26:16 +0100 Subject: [PATCH 14/50] gather.where now uses RowValueFilter instead of Predicate, similar to other where-occurrences --- .../jetbrains/kotlinx/dataframe/api/gather.kt | 21 ++++++++++++------- .../kotlinx/dataframe/impl/api/gather.kt | 8 +++---- 2 files changed, 18 insertions(+), 11 deletions(-) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/gather.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/gather.kt index a65dfcc1f..c65604ae1 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/gather.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/gather.kt @@ -2,10 +2,9 @@ package org.jetbrains.kotlinx.dataframe.api import org.jetbrains.kotlinx.dataframe.ColumnsSelector import org.jetbrains.kotlinx.dataframe.DataFrame -import org.jetbrains.kotlinx.dataframe.Predicate +import org.jetbrains.kotlinx.dataframe.RowValueFilter import org.jetbrains.kotlinx.dataframe.columns.ColumnAccessor import org.jetbrains.kotlinx.dataframe.columns.ColumnReference -import org.jetbrains.kotlinx.dataframe.impl.and import org.jetbrains.kotlinx.dataframe.impl.api.gatherImpl import org.jetbrains.kotlinx.dataframe.impl.columnName import org.jetbrains.kotlinx.dataframe.impl.columns.toColumns @@ -17,11 +16,19 @@ public fun DataFrame.gather(selector: ColumnsSelector): Gather(), { it }, null ) -public fun DataFrame.gather(vararg columns: String): Gather = gather { columns.toColumns() } -public fun DataFrame.gather(vararg columns: ColumnReference): Gather = gather { columns.toColumns() } -public fun DataFrame.gather(vararg columns: KProperty): Gather = gather { columns.toColumns() } -public fun Gather.where(filter: Predicate): Gather = copy(filter = this.filter and filter) +public fun DataFrame.gather(vararg columns: String): Gather = + gather { columns.toColumns() } + +public fun DataFrame.gather(vararg columns: ColumnReference): Gather = + gather { columns.toColumns() } + +public fun DataFrame.gather(vararg columns: KProperty): Gather = + gather { columns.toColumns() } + +public fun Gather.where(filter: RowValueFilter): Gather = + copy(filter = this.filter and filter) + public fun Gather.notNull(): Gather = where { it != null } as Gather public fun Gather.explodeLists(): Gather = copy(explode = true) @@ -35,7 +42,7 @@ public fun Gather.mapValues(transform: (C) -> R): Gathe public data class Gather( internal val df: DataFrame, internal val columns: ColumnsSelector, - internal val filter: ((C) -> Boolean)? = null, + internal val filter: RowValueFilter? = null, internal val keyType: KType? = null, internal val keyTransform: ((String) -> K), internal val valueTransform: ((C) -> R)? = null, diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/gather.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/gather.kt index af046e5ef..4844853dd 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/gather.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/gather.kt @@ -60,9 +60,9 @@ internal fun Gather.gatherImpl( } // explode keys and values - when { - keysColumn != null && valuesColumn != null -> df = df.explode(keysColumn, valuesColumn) - else -> df = df.explode(keysColumn ?: valuesColumn!!) + df = when { + keysColumn != null && valuesColumn != null -> df.explode(keysColumn, valuesColumn) + else -> df.explode(keysColumn ?: valuesColumn!!) } // explode values in lists @@ -76,7 +76,7 @@ internal fun Gather.gatherImpl( val value = col[row] when { explode && value is List<*> -> { - val filtered = (value as List).filter(filter) + val filtered = (value as List).filter { filter(it) } val transformed = valueTransform?.let { filtered.map(it) } ?: filtered keys[colIndex] to transformed } From 005e11d58d1d4330c7a10fa3a3ce1da75ba4ed0f Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Sun, 5 Feb 2023 18:28:56 +0100 Subject: [PATCH 15/50] wip update documentation --- core/build.gradle.kts | 4 +- .../jetbrains/kotlinx/dataframe/api/Nulls.kt | 4 +- .../jetbrains/kotlinx/dataframe/api/update.kt | 70 +++++++++++-------- .../documentation/DocumentationUrls.kt | 34 ++++++--- ...LevelOperations.kt => SelectingColumns.kt} | 35 +++++----- .../dataframe/documentation/SelectingRows.kt | 47 +++++++++++++ 6 files changed, 135 insertions(+), 59 deletions(-) rename core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/{HighLevelOperations.kt => SelectingColumns.kt} (78%) create mode 100644 core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/SelectingRows.kt diff --git a/core/build.gradle.kts b/core/build.gradle.kts index 3ac6a0e85..4135f0e4f 100644 --- a/core/build.gradle.kts +++ b/core/build.gradle.kts @@ -14,8 +14,8 @@ plugins { id("org.jetbrains.kotlinx.kover") id("org.jmailen.kotlinter") id("org.jetbrains.kotlinx.dataframe") - id("com.github.jolanrensen.docProcessorGradlePlugin") version "v0.0.11" -// id("nl.jolanrensen.docProcessor") version "1.0-SNAPSHOT" +// id("com.github.jolanrensen.docProcessorGradlePlugin") version "v0.0.11" + id("nl.jolanrensen.docProcessor") version "1.0-SNAPSHOT" } group = "org.jetbrains.kotlinx" diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/Nulls.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/Nulls.kt index 0d17a890d..44f1de548 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/Nulls.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/Nulls.kt @@ -33,7 +33,7 @@ internal interface FillNulls { interface Usage } -/** {@arg [OperationArg] fillNulls} */ +/** {@arg [OperationArg] [fillNulls][fillNulls]} */ internal interface SetFillNullsOperationArg /** @@ -119,7 +119,7 @@ internal interface FillNaNs { interface Usage } -/** {@arg [OperationArg] fillNaNs} */ +/** {@arg [OperationArg] [fillNaNs][fillNaNs]} */ internal interface SetFillNaNsOperationArg /** diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt index 668a1d077..0d32ef3ab 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt @@ -22,11 +22,12 @@ import org.jetbrains.kotlinx.dataframe.index import kotlin.reflect.KProperty /** + * ## The Update Operation + * * Returns the [DataFrame] with changed values in some cells * (column types can not be changed). * - * Check out [how to use `update`][Usage]. - * + * Check out [how to use the update operation fully][Usage]. * For more information: {@include [DocumentationUrls.Update]} */ public data class Update( @@ -37,29 +38,38 @@ public data class Update( public fun cast(): Update = Update(df, filter as RowValueFilter?, columns as ColumnsSelector) + /** This argument providing the (clickable) name of the update-like function. + * Note: If clickable, make sure to [alias][your type]. + */ internal interface UpdateOperationArg /** * {@includeArg [UpdateOperationArg]} operation usage: * ___ - * {@includeArg [UpdateOperationArg]} { [columns][Columns] } + * {@includeArg [UpdateOperationArg]} `{ `[columns][Columns]` }` * - * - [.[where] { [rowCondition][Where.Predicate] } ] + * - `[.`[where][where]` { `[rowCondition][SelectingRows.RowValueCondition]` } ]` * - * - [.[at] ([rowIndices][At.RowIndices]) ] + * - `[.`[at][at]` (`[rowIndices][At.RowIndices]`) ]` * - * - .[with] { [rowExpression][With.Expression] } | .[notNull] { rowExpression } | .[perCol] { colExpression } | .[perRowCol] { rowColExpression } | .[withValue] (value) | .[withNull] () | .[withZero] () | .[asFrame] { frameExpression } + * - `.`[with][with]` { `[rowExpression][With.Expression]` } + * | .`[notNull][notNull]` { rowExpression } + * | .`[perCol][perCol]` { colExpression } + * | .`[perRowCol][perRowCol]` { rowColExpression } + * | .`[withValue][withValue]`(value) | .`[withNull]`() + * | .`[withZero][withZero]`() + * | .`[asFrame][asFrame]` { frameExpression }` * * {@comment TODO * rowExpression: DataRow.(OldValue) -> NewValue * colExpression: DataColumn.(DataColumn) -> NewValue * rowColExpression: DataRow.(DataColumn) -> NewValue * frameExpression: DataFrame.(DataFrame) -> DataFrame} - * {@arg [UpdateOperationArg] [update]} + * {@arg [UpdateOperationArg] [update][update]} */ internal interface Usage - /** Select the columns to update. See {@include [SelectingColumnsLink]}. */ + /** Select the columns to update. See {@include [SelectingColumnsLink]} for all the options. */ internal interface Columns /** @param columns The [ColumnsSelector] used to select the columns of this [DataFrame] to update. */ @@ -74,22 +84,6 @@ public data class Update( /** @param columns The column names belonging to this [DataFrame] to update. */ internal interface ColumnNamesParam - /** - * Only update the columns that pass a certain [predicate][Update.Where.Predicate]. - * - * For example: - * ```kotlin - * df.update { city }.where { name.firstName == "Alice" }.withValue("Paris") - * ``` - */ - internal interface Where { - - /** The condition for rows to be included. A filter if you will. - * - * Can be seen as [DataRow].(oldValue: [C]) -> [Boolean] */ - interface Predicate - } - /** * Only update the columns at certain given [row indices][Update.At.RowIndices]: * @@ -119,19 +113,23 @@ public data class Update( /** The expression to update the selected columns with. * * Can be seen as [DataRow].(oldValue: [C]) -> newValue: [C]? - * */ + */ interface Expression } } // region update -/** {@arg [OperationArg] update} */ +/** {@arg [OperationArg] [update][update]} */ internal interface SetUpdateOperationArg /** * @include [Update] * + * @include [Update.Columns] + * + * ## This overload: + * * @include [SelectingColumns.Dsl] {@include [SetUpdateOperationArg]} * @include [Update.DslParam] */ @@ -141,6 +139,10 @@ public fun DataFrame.update(columns: ColumnsSelector): Update DataFrame.update(vararg columns: String): Update = up /** * @include [Update] * + * @include [Update.Columns] + * + * ## This overload: + * * @include [SelectingColumns.KProperties] {@include [SetUpdateOperationArg]} * @include [Update.KPropertiesParam] */ @@ -157,6 +163,10 @@ public fun DataFrame.update(vararg columns: KProperty): Update DataFrame.update(vararg columns: ColumnReference): Updat /** * @include [Update] * + * @include [Update.Columns] + * + * ## This overload: + * * @include [SelectingColumns.ColumnAccessors] {@include [SetUpdateOperationArg]} * @include [Update.ColumnAccessorsParam] */ @@ -175,9 +189,9 @@ public fun DataFrame.update(columns: Iterable>): Up // endregion /** - * @include [Update.Where] + * @include [SelectingRows.RowValueCondition] {@include [SetUpdateOperationArg]} * - * @param predicate {@include [Update.Where.Predicate]} + * @param predicate The [row value filter][RowValueFilter] to select the rows to update. */ public fun Update.where(predicate: RowValueFilter): Update = copy(filter = filter and predicate) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DocumentationUrls.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DocumentationUrls.kt index 7ab0acb26..b448ec1fe 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DocumentationUrls.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DocumentationUrls.kt @@ -2,35 +2,49 @@ package org.jetbrains.kotlinx.dataframe.documentation private interface DocumentationUrls { - /** [See `update` documentation.](https://kotlin.github.io/dataframe/update.html) */ + interface NameArg + + /** See {@includeArg [NameArg]} on the documentation website. */ + interface Text + + interface DataRow { + + /** [{@include [Text]}{@arg [NameArg] Row Expressions}](https://kotlin.github.io/dataframe/datarow.html#row-expressions) */ + interface RowExpressions + + /** [{@include [Text]}{@arg [NameArg] Row Conditions}](https://kotlin.github.io/dataframe/datarow.html#row-conditions) */ + interface RowConditions + } + + /** [{@include [Text]}{@arg [NameArg] `update`}](https://kotlin.github.io/dataframe/update.html) */ interface Update - /** [See `fill` documentation.](https://kotlin.github.io/dataframe/fill.html) */ + /** [{@include [Text]}{@arg [NameArg] `fill`}](https://kotlin.github.io/dataframe/fill.html) */ interface Fill { - /** [See `fillNulls` documentation.](https://kotlin.github.io/dataframe/fill.html#fillnulls) */ + /** [{@include [Text]}{@arg [NameArg] `fillNulls`}](https://kotlin.github.io/dataframe/fill.html#fillnulls) */ interface FillNulls - /** [See `fillNaNs` documentation.](https://kotlin.github.io/dataframe/fill.html#fillnans) */ + /** [{@include [Text]}{@arg [NameArg] `fillNaNs`}](https://kotlin.github.io/dataframe/fill.html#fillnans) */ interface FillNaNs - /** [See `fillNA` documentation.](https://kotlin.github.io/dataframe/fill.html#fillna) */ + /** [{@include [Text]}{@arg [NameArg] `fillNA`}](https://kotlin.github.io/dataframe/fill.html#fillna) */ interface FillNA } - /** [See Access APIs documentation.](https://kotlin.github.io/dataframe/apilevels.html) */ + /** [{@include [Text]}{@arg [NameArg] Access APIs}](https://kotlin.github.io/dataframe/apilevels.html) */ interface AccessApis { - /** [See String API documentation.](https://kotlin.github.io/dataframe/stringapi.html) */ + /** [{@include [Text]}{@arg [NameArg] String API}](https://kotlin.github.io/dataframe/stringapi.html) */ interface StringApi - /** [See Column Accessors API documentation.](https://kotlin.github.io/dataframe/columnaccessorsapi.html) */ + /** [{@include [Text]}{@arg [NameArg] Column Accessors API}](https://kotlin.github.io/dataframe/columnaccessorsapi.html) */ interface ColumnAccessorsApi - /** [See KProperties API documentation.](https://kotlin.github.io/dataframe/kpropertiesapi.html) */ + /** [{@include [Text]}{@arg [NameArg] KProperties API}](https://kotlin.github.io/dataframe/kpropertiesapi.html) */ interface KPropertiesApi - /** [See Extension Properties API documentation.](https://kotlin.github.io/dataframe/extensionpropertiesapi.html) */ + /** [{@include [Text]}{@arg [NameArg] Extension Properties API}](https://kotlin.github.io/dataframe/extensionpropertiesapi.html) */ interface ExtensionPropertiesApi } } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/HighLevelOperations.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/SelectingColumns.kt similarity index 78% rename from core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/HighLevelOperations.kt rename to core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/SelectingColumns.kt index 00f52803f..2dcc75786 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/HighLevelOperations.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/SelectingColumns.kt @@ -12,7 +12,6 @@ import kotlin.reflect.KProperty * } */ - /** The key for an @arg that will define the operation name for the examples below. */ internal interface OperationArg @@ -29,7 +28,7 @@ internal interface SelectingColumnsLink */ internal interface SelectingColumns { - /** {@arg [OperationArg] } */ + /** {@arg [OperationArg] operation} */ interface SetDefaultOperationArg /** Select or express columns using the Column(s) Selection DSL. @@ -40,11 +39,13 @@ internal interface SelectingColumns { * expect you to return a [SingleColumn] or [ColumnSet], respectively. * * For example: - * ```kotlin - * df.{@includeArg [OperationArg]} { length and age } - * df.{@includeArg [OperationArg]} { cols(1..5) } - * df.{@includeArg [OperationArg]} { colsOf() } - * ``` + * + * `df.`{@includeArg [OperationArg]}` { length `[and][ColumnsSelectionDsl.and]` age }` + * + * `df.`{@includeArg [OperationArg]}` { `[cols][ColumnsSelectionDsl.cols]`(1..5) }` + * + * `df.`{@includeArg [OperationArg]}` { `[colsOf][colsOf]`() }` + * * @include [SetDefaultOperationArg] */ interface Dsl @@ -53,9 +54,8 @@ internal interface SelectingColumns { * ({@include [AccessApi.StringApiLink]}). * * For example: - * ```kotlin - * df.{@includeArg [OperationArg]}("length", "age") - * ``` + * + * `df.`{@includeArg [OperationArg]}`("length", "age")` * @include [SetDefaultOperationArg] */ interface ColumnNames @@ -64,11 +64,13 @@ internal interface SelectingColumns { * ({@include [AccessApi.ColumnAccessorsApiLink]}). * * For example: - * ```kotlin - * val length by column() - * val age by column() - * df.{@includeArg [OperationArg]}(length, age) - * ``` + * + * `val length by `[column]`()` + * + * `val age by `[column]`()` + * + * `df.`{@includeArg [OperationArg]}`(length, age)` + * * @include [SetDefaultOperationArg] */ interface ColumnAccessors @@ -79,9 +81,8 @@ internal interface SelectingColumns { * For example: * ```kotlin * data class Person(val length: Double, val age: Double) - * - * df.{@includeArg [OperationArg]}(Person::length, Person::age) * ``` + * `df.`{@includeArg [OperationArg]}`(Person::length, Person::age)` * @include [SetDefaultOperationArg] */ interface KProperties diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/SelectingRows.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/SelectingRows.kt new file mode 100644 index 000000000..fbb66ffd2 --- /dev/null +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/SelectingRows.kt @@ -0,0 +1,47 @@ +package org.jetbrains.kotlinx.dataframe.documentation + +import org.jetbrains.kotlinx.dataframe.* +import org.jetbrains.kotlinx.dataframe.api.* + +/** + * Selecting rows that satisfy a "Row Condition" ({@include [DocumentationUrls.DataRow.RowConditions]}) + * can occur in the following two types of operations: + * - Selecting entire rows ({@include [RowConditionLink]}), for instance in [filter], [drop], [first], and [count]. + * - Selecting parts of rows using a `where` operation after selecting columns ({@include [RowValueConditionLink]}), + * such as with [update], [gather], and [format] ([RowValueFilter]). + * {@comment TODO Is where present everywhere it should be?} + */ +internal interface SelectingRows { + + /** {@arg [OperationArg] } */ + interface SetDefaultOperationArg + + /** [Entire-Row Condition][EntireRowCondition] */ + interface RowConditionLink + + /** Filter or find rows to operate on using a [row filter][RowFilter]. + * + * For example: + * + * `df.`{@includeArg [OperationArg]}` { `[index][index]`() % 2 == 0 }` + * + * `df.`{@includeArg [OperationArg]}` { `[diff][diff]` { age } == 0 }` + */ + interface EntireRowCondition + + /** [Row-Value Condition][RowValueCondition] */ + interface RowValueConditionLink + + /** Filter or find rows to operate on after [selecting columns][SelectingColumns] using a + * [row value filter][RowValueFilter]. + * + * For example: + * + * `df.`{@includeArg [OperationArg]}` { length }.`[where][where]` { it > 10.0 }` + * + * `df.`{@includeArg [OperationArg]}` { `[cols][ColumnsSelectionDsl.cols]`(1..5) }.`[where][where]` { `[index][index]`() > 4 && city != "Paris" }` + * + * @include [SetDefaultOperationArg] + */ + interface RowValueCondition +} From 1db9d70052c09aa83ed808e1c5961e051679722b Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Tue, 7 Feb 2023 14:46:56 +0100 Subject: [PATCH 16/50] working on docs, found bug in kdoc highlighting --- .../jetbrains/kotlinx/dataframe/api/Nulls.kt | 2 +- .../jetbrains/kotlinx/dataframe/api/update.kt | 66 +++++++------------ .../documentation/SelectingColumns.kt | 2 +- .../dataframe/documentation/SelectingRows.kt | 2 +- 4 files changed, 28 insertions(+), 44 deletions(-) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/Nulls.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/Nulls.kt index 44f1de548..3821af2d0 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/Nulls.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/Nulls.kt @@ -29,7 +29,7 @@ import kotlin.reflect.KProperty */ internal interface FillNulls { - /** @include [Update.Usage] {@arg [Update.UpdateOperationArg] [fillNulls]} */ + /** @include [Update.Usage] {@arg [Update.UpdateOperationArg] [fillNulls][fillNulls]} */ interface Usage } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt index 0d32ef3ab..acf6232d4 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt @@ -27,7 +27,7 @@ import kotlin.reflect.KProperty * Returns the [DataFrame] with changed values in some cells * (column types can not be changed). * - * Check out [how to use the update operation fully][Usage]. + * Check out the [Update Operation Usage][Usage]. * For more information: {@include [DocumentationUrls.Update]} */ public data class Update( @@ -44,21 +44,22 @@ public data class Update( internal interface UpdateOperationArg /** - * {@includeArg [UpdateOperationArg]} operation usage: - * ___ + * ## {@includeArg [UpdateOperationArg]} Operation Usage + * * {@includeArg [UpdateOperationArg]} `{ `[columns][Columns]` }` * - * - `[.`[where][where]` { `[rowCondition][SelectingRows.RowValueCondition]` } ]` + * - `[.`[where][org.jetbrains.kotlinx.dataframe.api.Update.where]` { `[rowCondition][SelectingRows.RowValueCondition]` } ]` * - * - `[.`[at][at]` (`[rowIndices][At.RowIndices]`) ]` + * - `[.`[at][org.jetbrains.kotlinx.dataframe.api.Update.at]` (`[rowIndices][At.RowIndices]`) ]` * - * - `.`[with][with]` { `[rowExpression][With.Expression]` } - * | .`[notNull][notNull]` { rowExpression } - * | .`[perCol][perCol]` { colExpression } - * | .`[perRowCol][perRowCol]` { rowColExpression } - * | .`[withValue][withValue]`(value) | .`[withNull]`() - * | .`[withZero][withZero]`() - * | .`[asFrame][asFrame]` { frameExpression }` + * - `.`[with][org.jetbrains.kotlinx.dataframe.api.Update.with]` { `[rowExpression][With.Expression]` } + * | .`[notNull][org.jetbrains.kotlinx.dataframe.api.Update.notNull]` { rowExpression } + * | .`[perCol][org.jetbrains.kotlinx.dataframe.api.Update.perCol]` { colExpression } + * | .`[perRowCol][org.jetbrains.kotlinx.dataframe.api.Update.perRowCol]` { rowColExpression } + * | .`[withValue][org.jetbrains.kotlinx.dataframe.api.Update.withValue]`(value) + * | .`[withNull][org.jetbrains.kotlinx.dataframe.api.Update.withNull]`() + * | .`[withZero][org.jetbrains.kotlinx.dataframe.api.Update.withZero]`() + * | .`[asFrame][org.jetbrains.kotlinx.dataframe.api.Update.asFrame]` { frameExpression }` * * {@comment TODO * rowExpression: DataRow.(OldValue) -> NewValue @@ -124,12 +125,15 @@ public data class Update( internal interface SetUpdateOperationArg /** - * @include [Update] - * - * @include [Update.Columns] - * - * ## This overload: + * @include [Update] {@comment Description of the update operation.} * + * @include [Update.Columns] {@comment Description of what this function expects the user to do.} + * ## This Update Overload: + */ +internal interface CommonUpdateFunctionDoc + +/** + * @include [CommonUpdateFunctionDoc] * @include [SelectingColumns.Dsl] {@include [SetUpdateOperationArg]} * @include [Update.DslParam] */ @@ -137,36 +141,21 @@ public fun DataFrame.update(columns: ColumnsSelector): Update DataFrame.update(vararg columns: String): Update = update { columns.toColumns() } /** - * @include [Update] - * - * @include [Update.Columns] - * - * ## This overload: - * + * @include [CommonUpdateFunctionDoc] * @include [SelectingColumns.KProperties] {@include [SetUpdateOperationArg]} * @include [Update.KPropertiesParam] */ public fun DataFrame.update(vararg columns: KProperty): Update = update { columns.toColumns() } /** - * @include [Update] - * - * @include [Update.Columns] - * - * ## This overload: - * + * @include [CommonUpdateFunctionDoc] * @include [SelectingColumns.ColumnAccessors] {@include [SetUpdateOperationArg]} * @include [Update.ColumnAccessorsParam] */ @@ -174,12 +163,7 @@ public fun DataFrame.update(vararg columns: ColumnReference): Updat update { columns.toColumns() } /** - * @include [Update] - * - * @include [Update.Columns] - * - * ## This overload: - * + * @include [CommonUpdateFunctionDoc] * @include [SelectingColumns.ColumnAccessors] {@include [SetUpdateOperationArg]} * @include [Update.ColumnAccessorsParam] */ diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/SelectingColumns.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/SelectingColumns.kt index 2dcc75786..78462658a 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/SelectingColumns.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/SelectingColumns.kt @@ -34,7 +34,7 @@ internal interface SelectingColumns { /** Select or express columns using the Column(s) Selection DSL. * (Any {@include [AccessApiLink]}). * - * The DSL comes in the form of either a [ColumnSelector]- or [ColumnsSelector] lambda, + * This DSL comes in the form of either a [Column Selector][ColumnSelector]- or [Columns Selector][ColumnsSelector] lambda, * which operate in the {@include [ColumnSelectionDslLink]} or the {@include [ColumnsSelectionDslLink]} and * expect you to return a [SingleColumn] or [ColumnSet], respectively. * diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/SelectingRows.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/SelectingRows.kt index fbb66ffd2..29c62b755 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/SelectingRows.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/SelectingRows.kt @@ -9,7 +9,7 @@ import org.jetbrains.kotlinx.dataframe.api.* * - Selecting entire rows ({@include [RowConditionLink]}), for instance in [filter], [drop], [first], and [count]. * - Selecting parts of rows using a `where` operation after selecting columns ({@include [RowValueConditionLink]}), * such as with [update], [gather], and [format] ([RowValueFilter]). - * {@comment TODO Is where present everywhere it should be?} + * {@comment TODO Is `where` present everywhere it should be?} */ internal interface SelectingRows { From a7a4b1b00e8279f58107f0f935b9f75e8915c0b1 Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Wed, 8 Feb 2023 16:09:54 +0100 Subject: [PATCH 17/50] row expressions and updating docs --- core/build.gradle.kts | 4 +- .../jetbrains/kotlinx/dataframe/api/Nulls.kt | 9 +- .../jetbrains/kotlinx/dataframe/api/add.kt | 11 +- .../jetbrains/kotlinx/dataframe/api/update.kt | 163 +++++++++++------- .../dataframe/documentation/RowCondition.kt | 69 ++++++++ .../dataframe/documentation/RowExpressions.kt | 82 +++++++++ .../documentation/SelectingColumns.kt | 9 +- .../dataframe/documentation/SelectingRows.kt | 47 ----- 8 files changed, 280 insertions(+), 114 deletions(-) create mode 100644 core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/RowCondition.kt create mode 100644 core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/RowExpressions.kt delete mode 100644 core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/SelectingRows.kt diff --git a/core/build.gradle.kts b/core/build.gradle.kts index 4135f0e4f..a0372c9f8 100644 --- a/core/build.gradle.kts +++ b/core/build.gradle.kts @@ -14,8 +14,8 @@ plugins { id("org.jetbrains.kotlinx.kover") id("org.jmailen.kotlinter") id("org.jetbrains.kotlinx.dataframe") -// id("com.github.jolanrensen.docProcessorGradlePlugin") version "v0.0.11" - id("nl.jolanrensen.docProcessor") version "1.0-SNAPSHOT" + id("com.github.jolanrensen.docProcessorGradlePlugin") version "v0.0.13" +// id("nl.jolanrensen.docProcessor") version "1.0-SNAPSHOT" } group = "org.jetbrains.kotlinx" diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/Nulls.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/Nulls.kt index 3821af2d0..5bd6c86e1 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/Nulls.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/Nulls.kt @@ -7,7 +7,7 @@ import org.jetbrains.kotlinx.dataframe.AnyRow import org.jetbrains.kotlinx.dataframe.ColumnsSelector import org.jetbrains.kotlinx.dataframe.DataColumn import org.jetbrains.kotlinx.dataframe.DataFrame -import org.jetbrains.kotlinx.dataframe.api.Update.Usage +import org.jetbrains.kotlinx.dataframe.api.Update.UpdateOperationArg import org.jetbrains.kotlinx.dataframe.columns.ColumnKind import org.jetbrains.kotlinx.dataframe.columns.ColumnReference import org.jetbrains.kotlinx.dataframe.documentation.* @@ -29,11 +29,14 @@ import kotlin.reflect.KProperty */ internal interface FillNulls { - /** @include [Update.Usage] {@arg [Update.UpdateOperationArg] [fillNulls][fillNulls]} */ + /** @include [Update.Usage] + * {@arg [UpdateOperationArg] [fillNulls][fillNulls]} + * {@arg [RowCondition.FirstOperationArg] [fillNulls][fillNulls]} + * {@arg [RowExpressions.OperationArg] [fillNulls][fillNulls]} */ interface Usage } -/** {@arg [OperationArg] [fillNulls][fillNulls]} */ +/** {@arg [SelectingColumns.OperationArg] [fillNulls][fillNulls]} */ internal interface SetFillNullsOperationArg /** diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/add.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/add.kt index 4b045a455..2a9375869 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/add.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/add.kt @@ -98,7 +98,16 @@ public interface AddDataRow : DataRow { public fun AnyRow.newValue(): C } -public typealias AddExpression = Selector, C> +/** + * [AddExpression] is used to express or select any instance of `R` using the given instance of [AddDataRow]`` as + * `this` and `it`. + * + * Shorthand for: + * ```kotlin + * AddDataRow.(it: AddDataRow) -> R + * ``` + */ +public typealias AddExpression = Selector, R> /** * Creates new column using row [expression] and adds it to the end of [DataFrame] diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt index acf6232d4..fb5d9c34d 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt @@ -48,12 +48,12 @@ public data class Update( * * {@includeArg [UpdateOperationArg]} `{ `[columns][Columns]` }` * - * - `[.`[where][org.jetbrains.kotlinx.dataframe.api.Update.where]` { `[rowCondition][SelectingRows.RowValueCondition]` } ]` + * - `[.`[where][org.jetbrains.kotlinx.dataframe.api.Update.where]` { `[rowValueCondition][RowCondition.RowValueCondition]` } ]` * * - `[.`[at][org.jetbrains.kotlinx.dataframe.api.Update.at]` (`[rowIndices][At.RowIndices]`) ]` * - * - `.`[with][org.jetbrains.kotlinx.dataframe.api.Update.with]` { `[rowExpression][With.Expression]` } - * | .`[notNull][org.jetbrains.kotlinx.dataframe.api.Update.notNull]` { rowExpression } + * - `.`[with][org.jetbrains.kotlinx.dataframe.api.Update.with]` { `[rowExpression][RowExpressions.RowValueExpression]` } + * | .`[notNull][org.jetbrains.kotlinx.dataframe.api.Update.notNull]` { `[rowExpression][RowExpressions.RowValueExpression]` } * | .`[perCol][org.jetbrains.kotlinx.dataframe.api.Update.perCol]` { colExpression } * | .`[perRowCol][org.jetbrains.kotlinx.dataframe.api.Update.perRowCol]` { rowColExpression } * | .`[withValue][org.jetbrains.kotlinx.dataframe.api.Update.withValue]`(value) @@ -62,7 +62,6 @@ public data class Update( * | .`[asFrame][org.jetbrains.kotlinx.dataframe.api.Update.asFrame]` { frameExpression }` * * {@comment TODO - * rowExpression: DataRow.(OldValue) -> NewValue * colExpression: DataColumn.(DataColumn) -> NewValue * rowColExpression: DataRow.(DataColumn) -> NewValue * frameExpression: DataFrame.(DataFrame) -> DataFrame} @@ -84,57 +83,24 @@ public data class Update( /** @param columns The column names belonging to this [DataFrame] to update. */ internal interface ColumnNamesParam - - /** - * Only update the columns at certain given [row indices][Update.At.RowIndices]: - * - * Either a [Collection]<[Int]>, an [IntRange], or just `vararg` indices. - * - * For example: - * ```kotlin - * df.update { city }.at(5..10).withValue("Paris") - * ``` - */ - internal interface At { - - /** The indices of the rows to update. */ - interface RowIndices - } - - /** - * Update the selected columns using the given [expression][With.Expression]. - * - * For example: - * ```kotlin - * df.update { city }.with { name.firstName + " from " + it } - * ``` - */ - internal interface With { - - /** The expression to update the selected columns with. - * - * Can be seen as [DataRow].(oldValue: [C]) -> newValue: [C]? - */ - interface Expression - } } // region update -/** {@arg [OperationArg] [update][update]} */ -internal interface SetUpdateOperationArg +/** {@arg [SelectingColumns.OperationArg] [update][update]} */ +private interface SetSelectingColumnsOperationArg /** * @include [Update] {@comment Description of the update operation.} * * @include [Update.Columns] {@comment Description of what this function expects the user to do.} - * ## This Update Overload: + * ## This Update Overload */ internal interface CommonUpdateFunctionDoc /** * @include [CommonUpdateFunctionDoc] - * @include [SelectingColumns.Dsl] {@include [SetUpdateOperationArg]} + * @include [SelectingColumns.Dsl] {@include [SetSelectingColumnsOperationArg]} * @include [Update.DslParam] */ public fun DataFrame.update(columns: ColumnsSelector): Update = @@ -142,21 +108,21 @@ public fun DataFrame.update(columns: ColumnsSelector): Update DataFrame.update(vararg columns: String): Update = update { columns.toColumns() } /** * @include [CommonUpdateFunctionDoc] - * @include [SelectingColumns.KProperties] {@include [SetUpdateOperationArg]} + * @include [SelectingColumns.KProperties] {@include [SetSelectingColumnsOperationArg]} * @include [Update.KPropertiesParam] */ public fun DataFrame.update(vararg columns: KProperty): Update = update { columns.toColumns() } /** * @include [CommonUpdateFunctionDoc] - * @include [SelectingColumns.ColumnAccessors] {@include [SetUpdateOperationArg]} + * @include [SelectingColumns.ColumnAccessors] {@include [SetSelectingColumnsOperationArg]} * @include [Update.ColumnAccessorsParam] */ public fun DataFrame.update(vararg columns: ColumnReference): Update = @@ -164,7 +130,7 @@ public fun DataFrame.update(vararg columns: ColumnReference): Updat /** * @include [CommonUpdateFunctionDoc] - * @include [SelectingColumns.ColumnAccessors] {@include [SetUpdateOperationArg]} + * @include [SelectingColumns.ColumnAccessors] {@include [SetSelectingColumnsOperationArg]} * @include [Update.ColumnAccessorsParam] */ public fun DataFrame.update(columns: Iterable>): Update = @@ -172,78 +138,121 @@ public fun DataFrame.update(columns: Iterable>): Up // endregion -/** - * @include [SelectingRows.RowValueCondition] {@include [SetUpdateOperationArg]} +/** ## Where + * @include [RowCondition.RowValueCondition] + * {@arg [RowCondition.FirstOperationArg] [update][update]} + * {@arg [RowCondition.SecondOperationArg] [where][where]} * * @param predicate The [row value filter][RowValueFilter] to select the rows to update. */ public fun Update.where(predicate: RowValueFilter): Update = copy(filter = filter and predicate) +/** ## At + * Only update the columns at certain given [row indices][CommonUpdateAtFunctionDoc.RowIndicesParam]: + * + * Either a [Collection]<[Int]>, an [IntRange], or just `vararg` indices. + * + * For example: + * + * `df.`[update][update]` { city }.`[at][at]`(5..10).`[with][with]` { "Paris" }` + * + * `df.`[update][update]` { name }.`[at][at]`(1, 2, 3, 4).`[with][with]` { "Empty" }` + * + * ## This At Overload + */ +private interface CommonUpdateAtFunctionDoc { + + /** The indices of the rows to update. */ + interface RowIndicesParam +} + /** - * @include [Update.At] + * @include [CommonUpdateAtFunctionDoc] + * + * Provide a [Collection]<[Int]> of row indices to update. * - * @param rowIndices {@include [Update.At.RowIndices]} + * @param rowIndices {@include [CommonUpdateAtFunctionDoc.RowIndicesParam]} */ public fun Update.at(rowIndices: Collection): Update = where { index in rowIndices } /** - * @include [Update.At] + * @include [CommonUpdateAtFunctionDoc] * - * @param rowIndices {@include [Update.At.RowIndices]} + * Provide a `vararg` of [Ints][Int] of row indices to update. + * + * @param rowIndices {@include [CommonUpdateAtFunctionDoc.RowIndicesParam]} */ public fun Update.at(vararg rowIndices: Int): Update = at(rowIndices.toSet()) /** - * @include [Update.At] + * @include [CommonUpdateAtFunctionDoc] + * + * Provide an [IntRange] of row indices to update. * - * @param rowRange {@include [Update.At.RowIndices]} + * @param rowRange {@include [CommonUpdateAtFunctionDoc.RowIndicesParam]} */ public fun Update.at(rowRange: IntRange): Update = where { index in rowRange } +/** TODO */ public infix fun Update.perRowCol(expression: RowColumnExpression): DataFrame = updateImpl { row, column, _ -> expression(row, column) } public typealias UpdateExpression = AddDataRow.(C) -> R -/** - * @include [Update.With] +/** ## With + * {@include [RowExpressions.RowValueExpression]} + * {@arg [RowExpressions.OperationArg] [update][update]` { city \}.`[with][with]} * - * @param expression {@include [Update.With.Expression]} + * ## Note + * @include [RowExpressions.AddDataRowNote] + * @param expression The {@include [RowExpressions.RowValueExpressionLink]} to update the rows with. */ public infix fun Update.with(expression: UpdateExpression): DataFrame = updateImpl { row, _, value -> expression(row, value) } +/** TODO */ public infix fun Update>.asFrame(expression: DataFrameExpression>): DataFrame = df.replace(columns).with { it.asColumnGroup().let { expression(it, it) }.asColumnGroup(it.name()) } +@Deprecated( + "Useless unless in combination with `withValue(null)`, but then users can just use `with { null }`...", + ReplaceWith("this as Update") +) public fun Update.asNullable(): Update = this as Update +/** TODO */ public fun Update.perCol(values: Map): DataFrame = updateWithValuePerColumnImpl { values[it.name()] ?: throw IllegalArgumentException("Update value for column ${it.name()} is not defined") } +/** TODO */ public fun Update.perCol(values: AnyRow): DataFrame = perCol(values.toMap() as Map) +/** TODO */ public fun Update.perCol(valueSelector: Selector, C>): DataFrame = updateWithValuePerColumnImpl(valueSelector) +/** TODO */ internal infix fun RowValueFilter?.and(other: RowValueFilter): RowValueFilter { if (this == null) return other val thisExp = this return { thisExp(this, it) && other(this, it) } } +/** TODO */ public fun Update.notNull(): Update = copy(filter = filter and { it != null }) as Update +/** TODO */ public fun Update.notNull(expression: RowValueExpression): DataFrame = notNull().updateImpl { row, column, value -> expression(row, value) } +/** TODO */ public fun DataFrame.update( firstCol: ColumnReference, vararg cols: ColumnReference, @@ -251,6 +260,7 @@ public fun DataFrame.update( ): DataFrame = update(*headPlusArray(firstCol, cols)).with(expression) +/** TODO */ public fun DataFrame.update( firstCol: KProperty, vararg cols: KProperty, @@ -258,6 +268,7 @@ public fun DataFrame.update( ): DataFrame = update(*headPlusArray(firstCol, cols)).with(expression) +/** TODO */ public fun DataFrame.update( firstCol: String, vararg cols: String, @@ -265,8 +276,44 @@ public fun DataFrame.update( ): DataFrame = update(*headPlusArray(firstCol, cols)).with(expression) -public fun Update.withNull(): DataFrame = asNullable().withValue(null) -public fun Update.withZero(): DataFrame = updateWithValuePerColumnImpl { 0 as C } +/** + * Specific version of [with] that simply sets the value of each selected row to {@includeArg [CommonSpecificWithDocFirstArg]}. + * + * For example: + * + * `df.`[update][update]` { id }.`[where][Update.where]` { it < 0 }.`{@includeArg [CommonSpecificWithDocSecondArg]}` + */ +private interface CommonSpecificWithDoc + +/** Arg for the resulting value */ +private interface CommonSpecificWithDocFirstArg +/** Arg for the function call */ +private interface CommonSpecificWithDocSecondArg + +/** + * ## With Null + * @include [CommonSpecificWithDoc] + * {@arg [CommonSpecificWithDocFirstArg] `null`} + * {@arg [CommonSpecificWithDocSecondArg] [withNull][withNull]`()} + */ +public fun Update.withNull(): DataFrame = with { null } + +/** + * ## With Zero + * @include [CommonSpecificWithDoc] + * {@arg [CommonSpecificWithDocFirstArg] `0`} + * {@arg [CommonSpecificWithDocSecondArg] [withZero][withZero]`()} + */ +public fun Update.withZero(): DataFrame = updateWithValuePerColumnImpl { 0 as C } + +/** + * ## With Value + * @include [CommonSpecificWithDoc] + * {@arg [CommonSpecificWithDocFirstArg] [value]} + * {@arg [CommonSpecificWithDocSecondArg] [withValue][withValue]`(-1)} + * + * @param value The value to set the selected rows to. In contrast to [with][Update.with], this must be the same exact type. + */ public infix fun Update.withValue(value: C): DataFrame = with { value } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/RowCondition.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/RowCondition.kt new file mode 100644 index 000000000..7eeabe4b9 --- /dev/null +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/RowCondition.kt @@ -0,0 +1,69 @@ +package org.jetbrains.kotlinx.dataframe.documentation + +import org.jetbrains.kotlinx.dataframe.* +import org.jetbrains.kotlinx.dataframe.api.* + +/** + * ## Row Condition + * Selecting rows that satisfy a "Row Condition" ({@include [DocumentationUrls.DataRow.RowConditions]}) + * can occur in the following two types of operations: + * - Selecting entire rows ({@include [RowConditionLink]}), for instance in [filter], [drop], [first], and [count] + * (using [RowFilter]). + * - Selecting parts of rows using a `where` operation after selecting columns ({@include [RowValueConditionLink]}), + * such as with [update], [gather], and [format] + * (using [RowValueFilter]). + * + * A Row Condition is similar to a {@include [RowExpressionsLink]} but expects a [Boolean] as result. + * {@comment TODO Is `where` present everywhere it should be?} + */ +internal interface RowCondition { + + /** + * The key for an @arg that will define the operation name for the examples below. + * Make sure to [alias][your examples]. + */ + interface FirstOperationArg + + /** + * The key for an @arg that will define the operation name for the examples below. + * Make sure to [alias][your examples]. + */ + interface SecondOperationArg + + /** {@arg [FirstOperationArg] operation}{@arg [SecondOperationArg] where} */ + interface SetDefaultOperationArg + + /** [Entire-Row Condition][EntireRowCondition] */ + interface RowConditionLink + + /** Filter or find rows to operate on using a [row filter][RowFilter]. + * + * For example: + * + * `df.`{@includeArg [FirstOperationArg]}` { `[index][index]`() % 2 == 0 }` + * + * `df.`{@includeArg [FirstOperationArg]}` { `[diff][diff]` { age } == 0 }` + * + * @include [SetDefaultOperationArg] + */ + interface EntireRowCondition + + /** [Row-Value Condition][RowValueCondition] */ + interface RowValueConditionLink + + /** Filter or find rows to operate on after [selecting columns][SelectingColumns] using a + * [row value filter][RowValueFilter]. + * + * For example: + * + * `df.`{@includeArg [FirstOperationArg]}` { length }.`{@includeArg [SecondOperationArg]}` { it > 10.0 }` + * + * `df.`{@includeArg [FirstOperationArg]}` { `[cols][ColumnsSelectionDsl.cols]`(1..5) }.`{@includeArg [SecondOperationArg]}` { `[index][index]`() > 4 && city != "Paris" }` + * + * @include [SetDefaultOperationArg] + */ + interface RowValueCondition +} + +/** [Row Condition][RowCondition] */ +internal interface RowConditionLink diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/RowExpressions.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/RowExpressions.kt new file mode 100644 index 000000000..d8c6cfedd --- /dev/null +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/RowExpressions.kt @@ -0,0 +1,82 @@ +package org.jetbrains.kotlinx.dataframe.documentation + +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.DataRow +import org.jetbrains.kotlinx.dataframe.api.* +import org.jetbrains.kotlinx.dataframe.documentation.RowExpressions.AddDataRowNote +import org.jetbrains.kotlinx.dataframe.documentation.RowExpressions.RowExpressionLink +import org.jetbrains.kotlinx.dataframe.documentation.RowExpressions.RowValueExpressionLink +import org.jetbrains.kotlinx.dataframe.RowExpression as DfRowExpression +import org.jetbrains.kotlinx.dataframe.RowValueExpression as DfRowValueExpression + +/** + * ## Row expressions + * Expressing values using a "Row Expression" ({@include [DocumentationUrls.DataRow.RowExpressions]}) + * can occur in the following two types of operations: + * + * - Providing a new value for every selected row given the row ({@include [RowExpressionLink]}), + * for instance in [map][DataFrame.map], [add][DataFrame.add], and [insert][DataFrame.insert] + * (using [RowExpression][DfRowExpression]). + * + * - Providing a new value for every selected row given the row and the previous value ({@include [RowValueExpressionLink]}), + * for instance in [update.with][Update.with], and [convert.notNull][Convert.notNull] + * (using [RowValueExpression][DfRowValueExpression]). + * + * Note: + * @include [AddDataRowNote] + * + * A Row Expression is similar to a {@include [RowConditionLink]} but that expects a [Boolean] as result. + */ +internal interface RowExpressions { + + /** + * The key for an @arg that will define the operation name for the examples below. + * Make sure to [alias][your examples]. + */ + interface OperationArg + + /** {@arg [OperationArg] operation} */ + interface SetDefaultOperationArg + + /** + * [with][org.jetbrains.kotlinx.dataframe.api.Update.with] and [add][org.jetbrains.kotlinx.dataframe.api.add] use [AddDataRow] instead of [DataRow] as the DSL's receiver type. + * This is an extension to [RowValueExpression][DfRowValueExpression] and + * [RowExpression][DfRowExpression] that provides access to + * the modified/generated value of the preceding row ([AddDataRow.newValue]). + */ + interface AddDataRowNote + + /** Provide a new value for every selected row given the row using a + * [row expression][DfRowExpression]. + * + * For example: + * + * `df.`{@includeArg [OperationArg]}` { name.firstName + " " + name.lastName }` + * + * `df.`{@includeArg [OperationArg]}` { 2021 - age }` + * + * @include [SetDefaultOperationArg] + */ + interface RowExpression + + /** [Row Expression][RowExpression] */ + interface RowExpressionLink + + /** Provide a new value for every selected row given the row and the previous value using a + * [row value expression][DfRowValueExpression]. + * + * For example: + * + * `df.`{@includeArg [OperationArg]}` { name.firstName + " from " + it }` + * + * `df.`{@includeArg [OperationArg]}` { it.uppercase() }` + * {@include [SetDefaultOperationArg]} + */ + interface RowValueExpression + + /** [Row Value Expression][RowValueExpression] */ + interface RowValueExpressionLink +} + +/** [Row Expressions][RowExpressions] */ +internal interface RowExpressionsLink diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/SelectingColumns.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/SelectingColumns.kt index 78462658a..6c1c77c54 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/SelectingColumns.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/SelectingColumns.kt @@ -12,9 +12,6 @@ import kotlin.reflect.KProperty * } */ -/** The key for an @arg that will define the operation name for the examples below. */ -internal interface OperationArg - /** [Selecting Columns][SelectingColumns] */ internal interface SelectingColumnsLink @@ -28,6 +25,12 @@ internal interface SelectingColumnsLink */ internal interface SelectingColumns { + /** + * The key for an @arg that will define the operation name for the examples below. + * Make sure to [alias][your examples]. + */ + interface OperationArg + /** {@arg [OperationArg] operation} */ interface SetDefaultOperationArg diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/SelectingRows.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/SelectingRows.kt deleted file mode 100644 index 29c62b755..000000000 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/SelectingRows.kt +++ /dev/null @@ -1,47 +0,0 @@ -package org.jetbrains.kotlinx.dataframe.documentation - -import org.jetbrains.kotlinx.dataframe.* -import org.jetbrains.kotlinx.dataframe.api.* - -/** - * Selecting rows that satisfy a "Row Condition" ({@include [DocumentationUrls.DataRow.RowConditions]}) - * can occur in the following two types of operations: - * - Selecting entire rows ({@include [RowConditionLink]}), for instance in [filter], [drop], [first], and [count]. - * - Selecting parts of rows using a `where` operation after selecting columns ({@include [RowValueConditionLink]}), - * such as with [update], [gather], and [format] ([RowValueFilter]). - * {@comment TODO Is `where` present everywhere it should be?} - */ -internal interface SelectingRows { - - /** {@arg [OperationArg] } */ - interface SetDefaultOperationArg - - /** [Entire-Row Condition][EntireRowCondition] */ - interface RowConditionLink - - /** Filter or find rows to operate on using a [row filter][RowFilter]. - * - * For example: - * - * `df.`{@includeArg [OperationArg]}` { `[index][index]`() % 2 == 0 }` - * - * `df.`{@includeArg [OperationArg]}` { `[diff][diff]` { age } == 0 }` - */ - interface EntireRowCondition - - /** [Row-Value Condition][RowValueCondition] */ - interface RowValueConditionLink - - /** Filter or find rows to operate on after [selecting columns][SelectingColumns] using a - * [row value filter][RowValueFilter]. - * - * For example: - * - * `df.`{@includeArg [OperationArg]}` { length }.`[where][where]` { it > 10.0 }` - * - * `df.`{@includeArg [OperationArg]}` { `[cols][ColumnsSelectionDsl.cols]`(1..5) }.`[where][where]` { `[index][index]`() > 4 && city != "Paris" }` - * - * @include [SetDefaultOperationArg] - */ - interface RowValueCondition -} From 394a795efe618f95b0426af54e18e6cd33189659 Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Wed, 8 Feb 2023 19:58:32 +0100 Subject: [PATCH 18/50] refactored examples separate from docs, working on update overloads docs --- core/build.gradle.kts | 2 +- .../jetbrains/kotlinx/dataframe/api/Nulls.kt | 20 ++-- .../jetbrains/kotlinx/dataframe/api/update.kt | 51 ++++++--- .../dataframe/documentation/AccessApi.kt | 8 +- .../dataframe/documentation/RowCondition.kt | 53 +++++---- .../dataframe/documentation/RowExpressions.kt | 53 +++++---- .../documentation/SelectingColumns.kt | 106 +++++++++++------- 7 files changed, 177 insertions(+), 116 deletions(-) diff --git a/core/build.gradle.kts b/core/build.gradle.kts index a0372c9f8..b9f725f90 100644 --- a/core/build.gradle.kts +++ b/core/build.gradle.kts @@ -14,7 +14,7 @@ plugins { id("org.jetbrains.kotlinx.kover") id("org.jmailen.kotlinter") id("org.jetbrains.kotlinx.dataframe") - id("com.github.jolanrensen.docProcessorGradlePlugin") version "v0.0.13" + id("com.github.jolanrensen.docProcessorGradlePlugin") version "v0.0.14" // id("nl.jolanrensen.docProcessor") version "1.0-SNAPSHOT" } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/Nulls.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/Nulls.kt index 5bd6c86e1..b5d6670ba 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/Nulls.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/Nulls.kt @@ -42,7 +42,7 @@ internal interface SetFillNullsOperationArg /** * @include [FillNulls] * - * @include [SelectingColumns.Dsl] {@include [SetFillNullsOperationArg]} + * @include [SelectingColumns.Dsl.WithExample] {@include [SetFillNullsOperationArg]} * @include [Update.DslParam] */ public fun DataFrame.fillNulls(columns: ColumnsSelector): Update = @@ -51,7 +51,7 @@ public fun DataFrame.fillNulls(columns: ColumnsSelector): Updat /** * @include [FillNulls] * - * @include [SelectingColumns.ColumnNames] {@include [SetFillNullsOperationArg]} + * @include [SelectingColumns.ColumnNames.WithExample] {@include [SetFillNullsOperationArg]} * @include [Update.ColumnNamesParam] */ public fun DataFrame.fillNulls(vararg columns: String): Update = @@ -60,7 +60,7 @@ public fun DataFrame.fillNulls(vararg columns: String): Update = /** * @include [FillNulls] * - * @include [SelectingColumns.KProperties] {@include [SetFillNullsOperationArg]} + * @include [SelectingColumns.KProperties.WithExample] {@include [SetFillNullsOperationArg]} * @include [Update.KPropertiesParam] */ public fun DataFrame.fillNulls(vararg columns: KProperty): Update = @@ -69,7 +69,7 @@ public fun DataFrame.fillNulls(vararg columns: KProperty): Update DataFrame.fillNulls(vararg columns: ColumnReference): Update = @@ -78,7 +78,7 @@ public fun DataFrame.fillNulls(vararg columns: ColumnReference): Up /** * @include [FillNulls] * - * @include [SelectingColumns.ColumnAccessors] {@include [SetFillNullsOperationArg]} + * @include [SelectingColumns.ColumnAccessors.WithExample] {@include [SetFillNullsOperationArg]} * @include [Update.ColumnAccessorsParam] */ public fun DataFrame.fillNulls(columns: Iterable>): Update = @@ -127,7 +127,7 @@ internal interface SetFillNaNsOperationArg /** * @include [FillNaNs] - * @include [SelectingColumns.Dsl] {@include [SetFillNaNsOperationArg]} + * @include [SelectingColumns.Dsl.WithExample] {@include [SetFillNaNsOperationArg]} * @include [Update.DslParam] * */ @@ -137,7 +137,7 @@ public fun DataFrame.fillNaNs(columns: ColumnsSelector): Update< /** * @include [FillNaNs] * - * @include [SelectingColumns.ColumnNames] {@include [SetFillNaNsOperationArg]} + * @include [SelectingColumns.ColumnNames.WithExample] {@include [SetFillNaNsOperationArg]} * @include [Update.ColumnNamesParam] */ public fun DataFrame.fillNaNs(vararg columns: String): Update = @@ -146,7 +146,7 @@ public fun DataFrame.fillNaNs(vararg columns: String): Update = /** * @include [FillNaNs] * - * @include [SelectingColumns.KProperties] {@include [SetFillNaNsOperationArg]} + * @include [SelectingColumns.KProperties.WithExample] {@include [SetFillNaNsOperationArg]} * @include [Update.KPropertiesParam] */ public fun DataFrame.fillNaNs(vararg columns: KProperty): Update = @@ -155,7 +155,7 @@ public fun DataFrame.fillNaNs(vararg columns: KProperty): Update DataFrame.fillNaNs(vararg columns: ColumnReference): Update = @@ -164,7 +164,7 @@ public fun DataFrame.fillNaNs(vararg columns: ColumnReference): Upd /** * @include [FillNaNs] * - * @include [SelectingColumns.ColumnAccessors] {@include [SetFillNaNsOperationArg]} + * @include [SelectingColumns.ColumnAccessors.WithExample] {@include [SetFillNaNsOperationArg]} * @include [Update.ColumnAccessorsParam] */ public fun DataFrame.fillNaNs(columns: Iterable>): Update = diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt index fb5d9c34d..9e2161893 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt @@ -28,6 +28,7 @@ import kotlin.reflect.KProperty * (column types can not be changed). * * Check out the [Update Operation Usage][Usage]. + * * For more information: {@include [DocumentationUrls.Update]} */ public data class Update( @@ -48,12 +49,12 @@ public data class Update( * * {@includeArg [UpdateOperationArg]} `{ `[columns][Columns]` }` * - * - `[.`[where][org.jetbrains.kotlinx.dataframe.api.Update.where]` { `[rowValueCondition][RowCondition.RowValueCondition]` } ]` + * - `[.`[where][org.jetbrains.kotlinx.dataframe.api.Update.where]` { `[rowValueCondition][RowCondition.RowValueCondition.WithExample]` } ]` * - * - `[.`[at][org.jetbrains.kotlinx.dataframe.api.Update.at]` (`[rowIndices][At.RowIndices]`) ]` + * - `[.`[at][org.jetbrains.kotlinx.dataframe.api.Update.at]` (`[rowIndices][CommonUpdateAtFunctionDoc]`) ]` * - * - `.`[with][org.jetbrains.kotlinx.dataframe.api.Update.with]` { `[rowExpression][RowExpressions.RowValueExpression]` } - * | .`[notNull][org.jetbrains.kotlinx.dataframe.api.Update.notNull]` { `[rowExpression][RowExpressions.RowValueExpression]` } + * - `.`[with][org.jetbrains.kotlinx.dataframe.api.Update.with]` { `[rowExpression][RowExpressions.RowValueExpression.WithExample]` } + * | .`[notNull][org.jetbrains.kotlinx.dataframe.api.Update.notNull]` { `[rowExpression][RowExpressions.RowValueExpression.WithExample]` } * | .`[perCol][org.jetbrains.kotlinx.dataframe.api.Update.perCol]` { colExpression } * | .`[perRowCol][org.jetbrains.kotlinx.dataframe.api.Update.perRowCol]` { rowColExpression } * | .`[withValue][org.jetbrains.kotlinx.dataframe.api.Update.withValue]`(value) @@ -69,7 +70,7 @@ public data class Update( */ internal interface Usage - /** Select the columns to update. See {@include [SelectingColumnsLink]} for all the options. */ + /** The columns to update need to be selected. See {@include [SelectingColumnsLink]} for all the selecting options. */ internal interface Columns /** @param columns The [ColumnsSelector] used to select the columns of this [DataFrame] to update. */ @@ -92,15 +93,22 @@ private interface SetSelectingColumnsOperationArg /** * @include [Update] {@comment Description of the update operation.} - * + * ## {@comment Line break} * @include [Update.Columns] {@comment Description of what this function expects the user to do.} * ## This Update Overload */ internal interface CommonUpdateFunctionDoc +/** + * ## Optional + * Combine `df.`[update][update]`(...).`[with][Update.with]` { ... }` + * into `df.`[update][update]`(...) { ... }` + */ +private interface UpdatePlusWithNote + /** * @include [CommonUpdateFunctionDoc] - * @include [SelectingColumns.Dsl] {@include [SetSelectingColumnsOperationArg]} + * @include [SelectingColumns.Dsl.WithExample] {@include [SetSelectingColumnsOperationArg]} * @include [Update.DslParam] */ public fun DataFrame.update(columns: ColumnsSelector): Update = @@ -108,21 +116,24 @@ public fun DataFrame.update(columns: ColumnsSelector): Update DataFrame.update(vararg columns: String): Update = update { columns.toColumns() } /** * @include [CommonUpdateFunctionDoc] - * @include [SelectingColumns.KProperties] {@include [SetSelectingColumnsOperationArg]} + * @include [SelectingColumns.KProperties.WithExample] {@include [SetSelectingColumnsOperationArg]} + * @include [UpdatePlusWithNote] * @include [Update.KPropertiesParam] */ public fun DataFrame.update(vararg columns: KProperty): Update = update { columns.toColumns() } /** * @include [CommonUpdateFunctionDoc] - * @include [SelectingColumns.ColumnAccessors] {@include [SetSelectingColumnsOperationArg]} + * @include [SelectingColumns.ColumnAccessors.WithExample] {@include [SetSelectingColumnsOperationArg]} + * @include [UpdatePlusWithNote] * @include [Update.ColumnAccessorsParam] */ public fun DataFrame.update(vararg columns: ColumnReference): Update = @@ -130,8 +141,9 @@ public fun DataFrame.update(vararg columns: ColumnReference): Updat /** * @include [CommonUpdateFunctionDoc] - * @include [SelectingColumns.ColumnAccessors] {@include [SetSelectingColumnsOperationArg]} + * @include [SelectingColumns.ColumnAccessors.WithExample] {@include [SetSelectingColumnsOperationArg]} * @include [Update.ColumnAccessorsParam] + * TODO this will be deprecated */ public fun DataFrame.update(columns: Iterable>): Update = update { columns.toColumnSet() } @@ -139,7 +151,7 @@ public fun DataFrame.update(columns: Iterable>): Up // endregion /** ## Where - * @include [RowCondition.RowValueCondition] + * @include [RowCondition.RowValueCondition.WithExample] * {@arg [RowCondition.FirstOperationArg] [update][update]} * {@arg [RowCondition.SecondOperationArg] [where][where]} * @@ -201,7 +213,7 @@ public infix fun Update.perRowCol(expression: RowColumnExpression = AddDataRow.(C) -> R /** ## With - * {@include [RowExpressions.RowValueExpression]} + * {@include [RowExpressions.RowValueExpression.WithExample]} * {@arg [RowExpressions.OperationArg] [update][update]` { city \}.`[with][with]} * * ## Note @@ -252,7 +264,18 @@ public fun Update.notNull(expression: RowValueExpression) expression(row, value) } -/** TODO */ +/** + * @include [CommonUpdateFunctionDoc] + * ### A combination of [update] and [with][Update.with]. + * + * @include [SelectingColumns.ColumnAccessors] + * + * {@include [RowExpressions.RowValueExpression.WithExample]} + * {@arg [RowExpressions.OperationArg] [update][update]`("city")` } + * + * @include [Update.ColumnAccessorsParam] + * @param expression The {@include [RowExpressions.RowValueExpressionLink]} to update the rows with. + */ public fun DataFrame.update( firstCol: ColumnReference, vararg cols: ColumnReference, diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/AccessApi.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/AccessApi.kt index 073ea65c2..02f5ae125 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/AccessApi.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/AccessApi.kt @@ -9,10 +9,10 @@ import org.jetbrains.kotlinx.dataframe.documentation.samples.ApiLevels as ApiLev * * In `Kotlin DataFrame` we provide four different ways to access columns, and, while they are essentially different, they * look pretty similar in the data wrangling DSL. These include: - * - [StringApi] - * - [ColumnAccessorsApi] - * - [KPropertiesApi] - * - [ExtensionPropertiesApi] + * - {@include [ExtensionPropertiesApiLink]} + * - {@include [KPropertiesApiLink]} + * - {@include [ColumnAccessorsApiLink]} + * - {@include [StringApiLink]} * * For more information: {@include [DocumentationUrls.AccessApis]} * diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/RowCondition.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/RowCondition.kt index 7eeabe4b9..3158a0232 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/RowCondition.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/RowCondition.kt @@ -33,36 +33,45 @@ internal interface RowCondition { /** {@arg [FirstOperationArg] operation}{@arg [SecondOperationArg] where} */ interface SetDefaultOperationArg - /** [Entire-Row Condition][EntireRowCondition] */ + /** [Entire-Row Condition][EntireRowCondition.WithExample] */ interface RowConditionLink - /** Filter or find rows to operate on using a [row filter][RowFilter]. - * - * For example: - * - * `df.`{@includeArg [FirstOperationArg]}` { `[index][index]`() % 2 == 0 }` - * - * `df.`{@includeArg [FirstOperationArg]}` { `[diff][diff]` { age } == 0 }` - * - * @include [SetDefaultOperationArg] - */ - interface EntireRowCondition + /** Filter or find rows to operate on using a [row filter][RowFilter]. */ + interface EntireRowCondition { + + /** + * {@include [EntireRowCondition]} + * + * For example: + * + * `df.`{@includeArg [FirstOperationArg]}` { `[index][index]`() % 2 == 0 }` + * + * `df.`{@includeArg [FirstOperationArg]}` { `[diff][diff]` { age } == 0 }` + * @include [SetDefaultOperationArg] + */ + interface WithExample + } - /** [Row-Value Condition][RowValueCondition] */ + /** [Row-Value Condition][RowValueCondition.WithExample] */ interface RowValueConditionLink /** Filter or find rows to operate on after [selecting columns][SelectingColumns] using a * [row value filter][RowValueFilter]. - * - * For example: - * - * `df.`{@includeArg [FirstOperationArg]}` { length }.`{@includeArg [SecondOperationArg]}` { it > 10.0 }` - * - * `df.`{@includeArg [FirstOperationArg]}` { `[cols][ColumnsSelectionDsl.cols]`(1..5) }.`{@includeArg [SecondOperationArg]}` { `[index][index]`() > 4 && city != "Paris" }` - * - * @include [SetDefaultOperationArg] */ - interface RowValueCondition + interface RowValueCondition { + + /** + * {@include [RowValueCondition]} + * + * For example: + * + * `df.`{@includeArg [FirstOperationArg]}` { length }.`{@includeArg [SecondOperationArg]}` { it > 10.0 }` + * + * `df.`{@includeArg [FirstOperationArg]}` { `[cols][ColumnsSelectionDsl.cols]`(1..5) }.`{@includeArg [SecondOperationArg]}` { `[index][index]`() > 4 && city != "Paris" }` + * @include [SetDefaultOperationArg] + */ + interface WithExample + } } /** [Row Condition][RowCondition] */ diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/RowExpressions.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/RowExpressions.kt index d8c6cfedd..e2b60f624 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/RowExpressions.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/RowExpressions.kt @@ -46,35 +46,44 @@ internal interface RowExpressions { */ interface AddDataRowNote - /** Provide a new value for every selected row given the row using a - * [row expression][DfRowExpression]. - * - * For example: - * - * `df.`{@includeArg [OperationArg]}` { name.firstName + " " + name.lastName }` - * - * `df.`{@includeArg [OperationArg]}` { 2021 - age }` - * - * @include [SetDefaultOperationArg] - */ - interface RowExpression + /** Provide a new value for every selected row given the row using a [row expression][DfRowExpression]. */ + interface RowExpression { + + /** + * {@include [RowExpression]} + * + * For example: + * + * `df.`{@includeArg [OperationArg]}` { name.firstName + " " + name.lastName }` + * + * `df.`{@includeArg [OperationArg]}` { 2021 - age }` + * @include [SetDefaultOperationArg] + */ + interface WithExample + } - /** [Row Expression][RowExpression] */ + /** [Row Expression][RowExpression.WithExample] */ interface RowExpressionLink /** Provide a new value for every selected row given the row and the previous value using a * [row value expression][DfRowValueExpression]. - * - * For example: - * - * `df.`{@includeArg [OperationArg]}` { name.firstName + " from " + it }` - * - * `df.`{@includeArg [OperationArg]}` { it.uppercase() }` - * {@include [SetDefaultOperationArg]} */ - interface RowValueExpression + interface RowValueExpression { + + /** + * {@include [RowValueExpression]} + * + * For example: + * + * `df.`{@includeArg [OperationArg]}` { name.firstName + " from " + it }` + * + * `df.`{@includeArg [OperationArg]}` { it.uppercase() }` + * {@include [SetDefaultOperationArg]} + */ + interface WithExample + } - /** [Row Value Expression][RowValueExpression] */ + /** [Row Value Expression][RowValueExpression.WithExample] */ interface RowValueExpressionLink } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/SelectingColumns.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/SelectingColumns.kt index 6c1c77c54..d857e7966 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/SelectingColumns.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/SelectingColumns.kt @@ -18,10 +18,10 @@ internal interface SelectingColumnsLink /** Selecting columns for various operations (including but not limited to * [DataFrame.select], [DataFrame.update], [DataFrame.gather], and [DataFrame.fillNulls]) * can be done in the following ways: - * - {@include [Dsl]} - * - {@include [ColumnNames]} - * - {@include [ColumnAccessors]} - * - {@include [KProperties]} + * - {@include [Dsl.WithExample]} + * - {@include [ColumnNames.WithExample]} + * - {@include [ColumnAccessors.WithExample]} + * - {@include [KProperties.WithExample]} */ internal interface SelectingColumns { @@ -40,53 +40,73 @@ internal interface SelectingColumns { * This DSL comes in the form of either a [Column Selector][ColumnSelector]- or [Columns Selector][ColumnsSelector] lambda, * which operate in the {@include [ColumnSelectionDslLink]} or the {@include [ColumnsSelectionDslLink]} and * expect you to return a [SingleColumn] or [ColumnSet], respectively. - * - * For example: - * - * `df.`{@includeArg [OperationArg]}` { length `[and][ColumnsSelectionDsl.and]` age }` - * - * `df.`{@includeArg [OperationArg]}` { `[cols][ColumnsSelectionDsl.cols]`(1..5) }` - * - * `df.`{@includeArg [OperationArg]}` { `[colsOf][colsOf]`() }` - * - * @include [SetDefaultOperationArg] */ - interface Dsl + interface Dsl { + + /** + * {@include [Dsl]} + * + * For example: + * `df.`{@includeArg [OperationArg]}` { length `[and][ColumnsSelectionDsl.and]` age }` + * + * `df.`{@includeArg [OperationArg]}` { `[cols][ColumnsSelectionDsl.cols]`(1..5) }` + * + * `df.`{@includeArg [OperationArg]}` { `[colsOf][colsOf]`() }` + * @include [SetDefaultOperationArg] + */ + interface WithExample + } /** Select columns using their column names * ({@include [AccessApi.StringApiLink]}). - * - * For example: - * - * `df.`{@includeArg [OperationArg]}`("length", "age")` - * @include [SetDefaultOperationArg] */ - interface ColumnNames + interface ColumnNames { + + /** + * {@include [ColumnNames]} + * + * For example: + * + * `df.`{@includeArg [OperationArg]}`("length", "age")` + * @include [SetDefaultOperationArg] + */ + interface WithExample + } /** Select columns using column accessors * ({@include [AccessApi.ColumnAccessorsApiLink]}). - * - * For example: - * - * `val length by `[column]`()` - * - * `val age by `[column]`()` - * - * `df.`{@includeArg [OperationArg]}`(length, age)` - * - * @include [SetDefaultOperationArg] */ - interface ColumnAccessors + interface ColumnAccessors { - /** Select columns using [KProperty]'s - * ({@include [AccessApi.KPropertiesApiLink]}). - * - * For example: - * ```kotlin - * data class Person(val length: Double, val age: Double) - * ``` - * `df.`{@includeArg [OperationArg]}`(Person::length, Person::age)` - * @include [SetDefaultOperationArg] - */ - interface KProperties + /** + * {@include [ColumnAccessors]} + * + * For example: + * + * `val length by `[column]`()` + * + * `val age by `[column]`()` + * + * `df.`{@includeArg [OperationArg]}`(length, age)` + * @include [SetDefaultOperationArg] + */ + interface WithExample + } + + /** Select columns using [KProperties][KProperty] ({@include [AccessApi.KPropertiesApiLink]}). */ + interface KProperties { + + /** + * {@include [KProperties]} + * + * For example: + * ```kotlin + * data class Person(val length: Double, val age: Double) + * ``` + * + * `df.`{@includeArg [OperationArg]}`(Person::length, Person::age)` + * @include [SetDefaultOperationArg] + */ + interface WithExample + } } From 83cfa31a0669fc90596ca2e62e044b52a0b6e819 Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Thu, 9 Feb 2023 14:21:34 +0100 Subject: [PATCH 19/50] final update overloads, fillNA and NA/NaN info --- .../jetbrains/kotlinx/dataframe/api/Nulls.kt | 152 ++++++++++++++---- .../jetbrains/kotlinx/dataframe/api/update.kt | 39 +++-- 2 files changed, 149 insertions(+), 42 deletions(-) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/Nulls.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/Nulls.kt index b5d6670ba..f090b0659 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/Nulls.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/Nulls.kt @@ -20,28 +20,34 @@ import kotlin.reflect.KProperty // region fillNulls /** - * Replace `null` values with given value or expression. - * Specific case of [Update]. + * ## The Fill Nulls Operation * - * Check out [how to use `fillNulls`][Usage]. + * Replaces `null` values with given value or expression. + * Specific case of [update]. + * + * Check out the [`fillNulls` Operation Usage][Usage]. * * For more information: {@include [DocumentationUrls.Fill.FillNulls]} */ internal interface FillNulls { - /** @include [Update.Usage] - * {@arg [UpdateOperationArg] [fillNulls][fillNulls]} - * {@arg [RowCondition.FirstOperationArg] [fillNulls][fillNulls]} - * {@arg [RowExpressions.OperationArg] [fillNulls][fillNulls]} */ + /** @include [Update.Usage] {@arg [UpdateOperationArg] [fillNulls][fillNulls]} */ interface Usage } /** {@arg [SelectingColumns.OperationArg] [fillNulls][fillNulls]} */ -internal interface SetFillNullsOperationArg +private interface SetFillNullsOperationArg /** - * @include [FillNulls] - * + * @include [FillNulls] {@comment Description of the fillNulls operation.} + * ## {@comment Line break} + * @include [Update.Columns] {@comment Description of what this function expects the user to do: select columns} + * ## This Fill Nulls Overload + */ +private interface CommonFillNullsFunctionDoc + +/** + * @include [CommonFillNullsFunctionDoc] * @include [SelectingColumns.Dsl.WithExample] {@include [SetFillNullsOperationArg]} * @include [Update.DslParam] */ @@ -49,8 +55,7 @@ public fun DataFrame.fillNulls(columns: ColumnsSelector): Updat update(columns).where { it == null } /** - * @include [FillNulls] - * + * @include [CommonFillNullsFunctionDoc] * @include [SelectingColumns.ColumnNames.WithExample] {@include [SetFillNullsOperationArg]} * @include [Update.ColumnNamesParam] */ @@ -58,8 +63,7 @@ public fun DataFrame.fillNulls(vararg columns: String): Update = fillNulls { columns.toColumns() } /** - * @include [FillNulls] - * + * @include [CommonFillNullsFunctionDoc] * @include [SelectingColumns.KProperties.WithExample] {@include [SetFillNullsOperationArg]} * @include [Update.KPropertiesParam] */ @@ -67,8 +71,7 @@ public fun DataFrame.fillNulls(vararg columns: KProperty): Update DataFrame.fillNulls(vararg columns: ColumnReference): Up fillNulls { columns.toColumns() } /** - * @include [FillNulls] - * + * @include [CommonFillNullsFunctionDoc] * @include [SelectingColumns.ColumnAccessors.WithExample] {@include [SetFillNullsOperationArg]} * @include [Update.ColumnAccessorsParam] + * TODO this will be deprecated */ public fun DataFrame.fillNulls(columns: Iterable>): Update = fillNulls { columns.toColumnSet() } // endregion +/** + * [Floats][Float] or [Doubles][Double] can be represented as [Float.NaN] or [Double.NaN], respectively, + * in cases where a mathematical operation is undefined, such as dividing by zero. + * In Dataframe we have helper functions to check for `NaNs`, such as [Any?.isNaN][Any.isNaN] and + * [column.canHaveNaN][DataColumn.canHaveNaN]. + * You can also use [fillNaNs][fillNaNs] to replace `NaNs` in certain columns with a given value or expression. + * + * @see NA + */ +internal interface NaN + +/** + * `NA` in Dataframe can be seen as "[NaN] or `null`". + * + * [Floats][Float] or [Doubles][Double] can be represented as [Float.NaN] or [Double.NaN], respectively, + * in cases where a mathematical operation is undefined, such as dividing by zero. + * + * In Dataframe we have helper functions to check for `NAs`, such as [Any?.isNA][Any.isNA] and + * [column.canHaveNA][DataColumn.canHaveNA]. + * You can also use [fillNA][fillNA] to replace `NAs` in certain columns with a given value or expression. + * @see NaN + */ +internal interface NA + internal inline val Any?.isNaN: Boolean get() = (this is Double && isNaN()) || (this is Float && isNaN()) internal inline val Any?.isNA: Boolean @@ -109,34 +136,42 @@ internal inline val Float?.isNA: Boolean get() = this == null || this.isNaN() // region fillNaNs /** - * Replace `NaN` values with given value or expression. - * Specific case of [Update]. + * ## The Fill NaNs Operation * - * Check out [how to use `fillNaNs`][Usage]. + * Replaces [`NaN`][NaN] values with given value or expression. + * Specific case of [update]. + * + * Check out the [`fillNaNs` Operation Usage][Usage]. * * For more information: {@include [DocumentationUrls.Fill.FillNaNs]} */ internal interface FillNaNs { - /** @include [Update.Usage] {@arg [Update.UpdateOperationArg] [fillNaNs]} */ + /** @include [Update.Usage] {@arg [Update.UpdateOperationArg] [fillNaNs][fillNaNs]} */ interface Usage } -/** {@arg [OperationArg] [fillNaNs][fillNaNs]} */ +/** {@arg [SelectingColumns.OperationArg] [fillNaNs][fillNaNs]} */ internal interface SetFillNaNsOperationArg /** - * @include [FillNaNs] + * @include [FillNaNs] {@comment Description of the fillNaNs operation.} + * ## {@comment Line break} + * @include [Update.Columns] {@comment Description of what this function expects the user to do: select columns} + * ## This Fill NaNs Overload + */ +private interface CommonFillNaNsFunctionDoc + +/** + * @include [CommonFillNaNsFunctionDoc] * @include [SelectingColumns.Dsl.WithExample] {@include [SetFillNaNsOperationArg]} * @include [Update.DslParam] - * */ public fun DataFrame.fillNaNs(columns: ColumnsSelector): Update = update(columns).where { it.isNaN } /** - * @include [FillNaNs] - * + * @include [CommonFillNaNsFunctionDoc] * @include [SelectingColumns.ColumnNames.WithExample] {@include [SetFillNaNsOperationArg]} * @include [Update.ColumnNamesParam] */ @@ -144,8 +179,7 @@ public fun DataFrame.fillNaNs(vararg columns: String): Update = fillNaNs { columns.toColumns() } /** - * @include [FillNaNs] - * + * @include [CommonFillNaNsFunctionDoc] * @include [SelectingColumns.KProperties.WithExample] {@include [SetFillNaNsOperationArg]} * @include [Update.KPropertiesParam] */ @@ -153,8 +187,7 @@ public fun DataFrame.fillNaNs(vararg columns: KProperty): Update DataFrame.fillNaNs(vararg columns: ColumnReference): Upd fillNaNs { columns.toColumns() } /** - * @include [FillNaNs] - * + * @include [CommonFillNaNsFunctionDoc] * @include [SelectingColumns.ColumnAccessors.WithExample] {@include [SetFillNaNsOperationArg]} * @include [Update.ColumnAccessorsParam] + * TODO this will be deprecated */ public fun DataFrame.fillNaNs(columns: Iterable>): Update = fillNaNs { columns.toColumnSet() } @@ -174,18 +207,71 @@ public fun DataFrame.fillNaNs(columns: Iterable>): // region fillNA +/** + * ## The Fill NA Operation + * + * Replaces [`NA`][NA] values with given value or expression. + * Specific case of [update]. + * + * Check out the [`fillNA` Operation Usage][Usage]. + * + * For more information: {@include [DocumentationUrls.Fill.FillNA]} + */ +internal interface FillNA { + + /** @include [Update.Usage] {@arg [Update.UpdateOperationArg] [fillNA][fillNA]} */ + interface Usage +} + +/** {@arg [SelectingColumns.OperationArg] [fillNA][fillNA]} */ +internal interface SetFillNAOperationArg + +/** + * @include [FillNA] {@comment Description of the fillNA operation.} + * ## {@comment Line break} + * @include [Update.Columns] {@comment Description of what this function expects the user to do: select columns} + * ## This Fill NA Overload + */ +private interface CommonFillNAFunctionDoc + +/** + * @include [CommonFillNAFunctionDoc] + * @include [SelectingColumns.Dsl.WithExample] {@include [SetFillNAOperationArg]} + * @include [Update.DslParam] + */ public fun DataFrame.fillNA(columns: ColumnsSelector): Update = update(columns).where { it.isNA } +/** + * @include [CommonFillNAFunctionDoc] + * @include [SelectingColumns.ColumnNames.WithExample] {@include [SetFillNAOperationArg]} + * @include [Update.ColumnNamesParam] + */ public fun DataFrame.fillNA(vararg columns: String): Update = fillNA { columns.toColumns() } +/** + * @include [CommonFillNAFunctionDoc] + * @include [SelectingColumns.KProperties.WithExample] {@include [SetFillNAOperationArg]} + * @include [Update.KPropertiesParam] + */ public fun DataFrame.fillNA(vararg columns: KProperty): Update = fillNA { columns.toColumns() } +/** + * @include [CommonFillNAFunctionDoc] + * @include [SelectingColumns.ColumnAccessors.WithExample] {@include [SetFillNAOperationArg]} + * @include [Update.ColumnAccessorsParam] + */ public fun DataFrame.fillNA(vararg columns: ColumnReference): Update = fillNA { columns.toColumns() } +/** + * @include [CommonFillNAFunctionDoc] + * @include [SelectingColumns.ColumnAccessors.WithExample] {@include [SetFillNAOperationArg]} + * @include [Update.ColumnAccessorsParam] + * TODO this will be deprecated + */ public fun DataFrame.fillNA(columns: Iterable>): Update = fillNA { columns.toColumnSet() } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt index 9e2161893..34481d31f 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt @@ -27,7 +27,7 @@ import kotlin.reflect.KProperty * Returns the [DataFrame] with changed values in some cells * (column types can not be changed). * - * Check out the [Update Operation Usage][Usage]. + * Check out the [`update` Operation Usage][Usage]. * * For more information: {@include [DocumentationUrls.Update]} */ @@ -51,7 +51,7 @@ public data class Update( * * - `[.`[where][org.jetbrains.kotlinx.dataframe.api.Update.where]` { `[rowValueCondition][RowCondition.RowValueCondition.WithExample]` } ]` * - * - `[.`[at][org.jetbrains.kotlinx.dataframe.api.Update.at]` (`[rowIndices][CommonUpdateAtFunctionDoc]`) ]` + * - `[.`[at][org.jetbrains.kotlinx.dataframe.api.Update.at]` (`[rowIndices][CommonUpdateAtFunctionDoc.RowIndicesParam]`) ]` * * - `.`[with][org.jetbrains.kotlinx.dataframe.api.Update.with]` { `[rowExpression][RowExpressions.RowValueExpression.WithExample]` } * | .`[notNull][org.jetbrains.kotlinx.dataframe.api.Update.notNull]` { `[rowExpression][RowExpressions.RowValueExpression.WithExample]` } @@ -94,10 +94,10 @@ private interface SetSelectingColumnsOperationArg /** * @include [Update] {@comment Description of the update operation.} * ## {@comment Line break} - * @include [Update.Columns] {@comment Description of what this function expects the user to do.} + * @include [Update.Columns] {@comment Description of what this function expects the user to do: select columns} * ## This Update Overload */ -internal interface CommonUpdateFunctionDoc +private interface CommonUpdateFunctionDoc /** * ## Optional @@ -175,7 +175,7 @@ public fun Update.where(predicate: RowValueFilter): Update, an [IntRange], or just `vararg` indices. */ interface RowIndicesParam } @@ -266,7 +266,7 @@ public fun Update.notNull(expression: RowValueExpression) /** * @include [CommonUpdateFunctionDoc] - * ### A combination of [update] and [with][Update.with]. + * ### This overload is a combination of [update] and [with][Update.with]. * * @include [SelectingColumns.ColumnAccessors] * @@ -283,7 +283,18 @@ public fun DataFrame.update( ): DataFrame = update(*headPlusArray(firstCol, cols)).with(expression) -/** TODO */ +/** + * @include [CommonUpdateFunctionDoc] + * ### This overload is a combination of [update] and [with][Update.with]. + * + * @include [SelectingColumns.KProperties] + * + * {@include [RowExpressions.RowValueExpression.WithExample]} + * {@arg [RowExpressions.OperationArg] [update][update]`("city")` } + * + * @include [Update.KPropertiesParam] + * @param expression The {@include [RowExpressions.RowValueExpressionLink]} to update the rows with. + */ public fun DataFrame.update( firstCol: KProperty, vararg cols: KProperty, @@ -291,7 +302,18 @@ public fun DataFrame.update( ): DataFrame = update(*headPlusArray(firstCol, cols)).with(expression) -/** TODO */ +/** + * @include [CommonUpdateFunctionDoc] + * ### This overload is a combination of [update] and [with][Update.with]. + * + * @include [SelectingColumns.ColumnNames] + * + * {@include [RowExpressions.RowValueExpression.WithExample]} + * {@arg [RowExpressions.OperationArg] [update][update]`("city")` } + * + * @include [Update.ColumnNamesParam] + * @param expression The {@include [RowExpressions.RowValueExpressionLink]} to update the rows with. + */ public fun DataFrame.update( firstCol: String, vararg cols: String, @@ -299,7 +321,6 @@ public fun DataFrame.update( ): DataFrame = update(*headPlusArray(firstCol, cols)).with(expression) - /** * Specific version of [with] that simply sets the value of each selected row to {@includeArg [CommonSpecificWithDocFirstArg]}. * From 28b1df9f992313c7c006df9beb65b39a78b12896 Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Fri, 10 Feb 2023 18:01:53 +0100 Subject: [PATCH 20/50] slight refactoring and updated gradle plugin --- core/build.gradle.kts | 14 +++++++++- .../jetbrains/kotlinx/dataframe/api/Nulls.kt | 6 ++-- .../jetbrains/kotlinx/dataframe/api/update.kt | 28 +++++++++---------- .../kotlinx/dataframe/documentation/utils.kt | 4 +++ 4 files changed, 34 insertions(+), 18 deletions(-) create mode 100644 core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/utils.kt diff --git a/core/build.gradle.kts b/core/build.gradle.kts index b9f725f90..41c8854d3 100644 --- a/core/build.gradle.kts +++ b/core/build.gradle.kts @@ -14,7 +14,7 @@ plugins { id("org.jetbrains.kotlinx.kover") id("org.jmailen.kotlinter") id("org.jetbrains.kotlinx.dataframe") - id("com.github.jolanrensen.docProcessorGradlePlugin") version "v0.0.14" + id("com.github.jolanrensen.docProcessorGradlePlugin") version "v0.0.15" // id("nl.jolanrensen.docProcessor") version "1.0-SNAPSHOT" } @@ -74,6 +74,18 @@ val processKdocIncludeMain by creatingProcessDocTask( debug = true } +// As a bonus, this will update dokka if you use that +tasks.withType { + dependsOn(processKdocIncludeMain) + dokkaSourceSets { + all { + sourceRoot(processKdocIncludeMain.target.get()) +// for (root in processKdocIncludeMain.targets) +// sourceRoot(root) + } + } +} + // Modify all Jar tasks such that before running the Kotlin sources are set to // the target of processKdocIncludeMain and they are returned back to normal afterwards. tasks.withType { diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/Nulls.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/Nulls.kt index f090b0659..49a99d8b5 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/Nulls.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/Nulls.kt @@ -40,7 +40,7 @@ private interface SetFillNullsOperationArg /** * @include [FillNulls] {@comment Description of the fillNulls operation.} - * ## {@comment Line break} + * @include [LineBreak] * @include [Update.Columns] {@comment Description of what this function expects the user to do: select columns} * ## This Fill Nulls Overload */ @@ -156,7 +156,7 @@ internal interface SetFillNaNsOperationArg /** * @include [FillNaNs] {@comment Description of the fillNaNs operation.} - * ## {@comment Line break} + * @include [LineBreak] * @include [Update.Columns] {@comment Description of what this function expects the user to do: select columns} * ## This Fill NaNs Overload */ @@ -228,7 +228,7 @@ internal interface SetFillNAOperationArg /** * @include [FillNA] {@comment Description of the fillNA operation.} - * ## {@comment Line break} + * @include [LineBreak] * @include [Update.Columns] {@comment Description of what this function expects the user to do: select columns} * ## This Fill NA Overload */ diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt index 34481d31f..3630201f5 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt @@ -49,18 +49,18 @@ public data class Update( * * {@includeArg [UpdateOperationArg]} `{ `[columns][Columns]` }` * - * - `[.`[where][org.jetbrains.kotlinx.dataframe.api.Update.where]` { `[rowValueCondition][RowCondition.RowValueCondition.WithExample]` } ]` + * - `[.`[where][Update.where]` { `[rowValueCondition][RowCondition.RowValueCondition.WithExample]` } ]` * - * - `[.`[at][org.jetbrains.kotlinx.dataframe.api.Update.at]` (`[rowIndices][CommonUpdateAtFunctionDoc.RowIndicesParam]`) ]` + * - `[.`[at][Update.at]` (`[rowIndices][CommonUpdateAtFunctionDoc.RowIndicesParam]`) ]` * - * - `.`[with][org.jetbrains.kotlinx.dataframe.api.Update.with]` { `[rowExpression][RowExpressions.RowValueExpression.WithExample]` } - * | .`[notNull][org.jetbrains.kotlinx.dataframe.api.Update.notNull]` { `[rowExpression][RowExpressions.RowValueExpression.WithExample]` } - * | .`[perCol][org.jetbrains.kotlinx.dataframe.api.Update.perCol]` { colExpression } - * | .`[perRowCol][org.jetbrains.kotlinx.dataframe.api.Update.perRowCol]` { rowColExpression } - * | .`[withValue][org.jetbrains.kotlinx.dataframe.api.Update.withValue]`(value) - * | .`[withNull][org.jetbrains.kotlinx.dataframe.api.Update.withNull]`() - * | .`[withZero][org.jetbrains.kotlinx.dataframe.api.Update.withZero]`() - * | .`[asFrame][org.jetbrains.kotlinx.dataframe.api.Update.asFrame]` { frameExpression }` + * - `.`[with][Update.with]` { `[rowExpression][RowExpressions.RowValueExpression.WithExample]` } + * | .`[notNull][Update.notNull]` { `[rowExpression][RowExpressions.RowValueExpression.WithExample]` } + * | .`[perCol][Update.perCol]` { colExpression } + * | .`[perRowCol][Update.perRowCol]` { rowColExpression } + * | .`[withValue][Update.withValue]`(value) + * | .`[withNull][Update.withNull]`() + * | .`[withZero][Update.withZero]`() + * | .`[asFrame][Update.asFrame]` { frameExpression }` * * {@comment TODO * colExpression: DataColumn.(DataColumn) -> NewValue @@ -68,10 +68,10 @@ public data class Update( * frameExpression: DataFrame.(DataFrame) -> DataFrame} * {@arg [UpdateOperationArg] [update][update]} */ - internal interface Usage + public interface Usage /** The columns to update need to be selected. See {@include [SelectingColumnsLink]} for all the selecting options. */ - internal interface Columns + public interface Columns /** @param columns The [ColumnsSelector] used to select the columns of this [DataFrame] to update. */ internal interface DslParam @@ -93,7 +93,7 @@ private interface SetSelectingColumnsOperationArg /** * @include [Update] {@comment Description of the update operation.} - * ## {@comment Line break} + * @include [LineBreak] * @include [Update.Columns] {@comment Description of what this function expects the user to do: select columns} * ## This Update Overload */ @@ -350,7 +350,7 @@ public fun Update.withNull(): DataFrame = with { null } * {@arg [CommonSpecificWithDocFirstArg] `0`} * {@arg [CommonSpecificWithDocSecondArg] [withZero][withZero]`()} */ -public fun Update.withZero(): DataFrame = updateWithValuePerColumnImpl { 0 as C } +public fun Update.withZero(): DataFrame = updateWithValuePerColumnImpl { 0 as C } /** * ## With Value diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/utils.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/utils.kt new file mode 100644 index 000000000..3660dd539 --- /dev/null +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/utils.kt @@ -0,0 +1,4 @@ +package org.jetbrains.kotlinx.dataframe.documentation + +/** ## ‎ */ +internal interface LineBreak From f70f0bc128226dd77f2cdb306b54bfaec89df02f Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Tue, 14 Feb 2023 17:43:13 +0100 Subject: [PATCH 21/50] added ColumnExpression typealias and uses across the api. --- .../kotlin/org/jetbrains/kotlinx/dataframe/aliases.kt | 11 +++++++++++ .../org/jetbrains/kotlinx/dataframe/api/reorder.kt | 5 +++-- .../org/jetbrains/kotlinx/dataframe/api/update.kt | 5 ++--- .../jetbrains/kotlinx/dataframe/impl/api/reorder.kt | 4 ++-- .../jetbrains/kotlinx/dataframe/impl/api/update.kt | 4 ++-- 5 files changed, 20 insertions(+), 9 deletions(-) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/aliases.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/aliases.kt index 74552bae8..064bbeb02 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/aliases.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/aliases.kt @@ -74,6 +74,17 @@ public typealias RowValueExpression = DataRow.(it: C) -> R */ public typealias RowColumnExpression = (row: DataRow, col: DataColumn) -> R +/** + * [ColumnExpression] is used to express or select any instance of `R` using the given instance of [DataColumn]`` as + * `this` and `it`. + * + * Shorthand for: + * ```kotlin + * DataColumn.(it: DataColumn) -> R + * ``` + */ +public typealias ColumnExpression = Selector, R> + /** * [ColumnSelector] is used to express or select a single column, represented by [SingleColumn]``, using the * context of [ColumnsSelectionDsl]`` as `this` and `it`. diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/reorder.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/reorder.kt index 31e8ee869..64c2e5b63 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/reorder.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/reorder.kt @@ -1,6 +1,7 @@ package org.jetbrains.kotlinx.dataframe.api import org.jetbrains.kotlinx.dataframe.AnyCol +import org.jetbrains.kotlinx.dataframe.ColumnExpression import org.jetbrains.kotlinx.dataframe.ColumnsSelector import org.jetbrains.kotlinx.dataframe.DataColumn import org.jetbrains.kotlinx.dataframe.DataFrame @@ -25,11 +26,11 @@ public fun DataFrame.reorder(vararg columns: ColumnReference): Reor public fun DataFrame.reorder(vararg columns: KProperty): Reorder = reorder { columns.toColumns() } public fun DataFrame.reorder(vararg columns: String): Reorder = reorder { columns.toColumns() } -public fun > Reorder.by(expression: Selector, V>): DataFrame = reorderImpl(false, expression) +public fun > Reorder.by(expression: ColumnExpression): DataFrame = reorderImpl(false, expression) public fun Reorder.byName(desc: Boolean = false): DataFrame = if (desc) byDesc { it.name } else by { it.name } -public fun > Reorder.byDesc(expression: Selector, V>): DataFrame = reorderImpl(true, expression) +public fun > Reorder.byDesc(expression: ColumnExpression): DataFrame = reorderImpl(true, expression) public fun > DataFrame.reorderColumnsBy( dfs: Boolean = true, diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt index 3630201f5..9a71b2125 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt @@ -1,15 +1,14 @@ package org.jetbrains.kotlinx.dataframe.api import org.jetbrains.kotlinx.dataframe.AnyRow +import org.jetbrains.kotlinx.dataframe.ColumnExpression import org.jetbrains.kotlinx.dataframe.ColumnsSelector -import org.jetbrains.kotlinx.dataframe.DataColumn import org.jetbrains.kotlinx.dataframe.DataFrame import org.jetbrains.kotlinx.dataframe.DataFrameExpression import org.jetbrains.kotlinx.dataframe.DataRow import org.jetbrains.kotlinx.dataframe.RowColumnExpression import org.jetbrains.kotlinx.dataframe.RowValueExpression import org.jetbrains.kotlinx.dataframe.RowValueFilter -import org.jetbrains.kotlinx.dataframe.Selector import org.jetbrains.kotlinx.dataframe.api.Update.Usage import org.jetbrains.kotlinx.dataframe.columns.ColumnReference import org.jetbrains.kotlinx.dataframe.documentation.* @@ -244,7 +243,7 @@ public fun Update.perCol(values: Map): DataFrame = up public fun Update.perCol(values: AnyRow): DataFrame = perCol(values.toMap() as Map) /** TODO */ -public fun Update.perCol(valueSelector: Selector, C>): DataFrame = +public fun Update.perCol(valueSelector: ColumnExpression): DataFrame = updateWithValuePerColumnImpl(valueSelector) /** TODO */ diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/reorder.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/reorder.kt index db04a39c1..2ce9f4db8 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/reorder.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/reorder.kt @@ -1,9 +1,9 @@ package org.jetbrains.kotlinx.dataframe.impl.api import org.jetbrains.kotlinx.dataframe.AnyFrame +import org.jetbrains.kotlinx.dataframe.ColumnExpression import org.jetbrains.kotlinx.dataframe.DataColumn import org.jetbrains.kotlinx.dataframe.DataFrame -import org.jetbrains.kotlinx.dataframe.Selector import org.jetbrains.kotlinx.dataframe.api.Reorder import org.jetbrains.kotlinx.dataframe.api.asColumnGroup import org.jetbrains.kotlinx.dataframe.api.cast @@ -23,7 +23,7 @@ import kotlin.reflect.typeOf internal fun > Reorder.reorderImpl( desc: Boolean, - expression: Selector, V> + expression: ColumnExpression ): DataFrame { data class ColumnInfo( val treeNode: TreeNode, diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/update.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/update.kt index 082ccc11a..b3fca01b3 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/update.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/update.kt @@ -2,10 +2,10 @@ package org.jetbrains.kotlinx.dataframe.impl.api import org.jetbrains.kotlinx.dataframe.AnyFrame import org.jetbrains.kotlinx.dataframe.AnyRow +import org.jetbrains.kotlinx.dataframe.ColumnExpression import org.jetbrains.kotlinx.dataframe.DataColumn import org.jetbrains.kotlinx.dataframe.DataFrame import org.jetbrains.kotlinx.dataframe.RowValueFilter -import org.jetbrains.kotlinx.dataframe.Selector import org.jetbrains.kotlinx.dataframe.api.AddDataRow import org.jetbrains.kotlinx.dataframe.api.Update import org.jetbrains.kotlinx.dataframe.api.cast @@ -30,7 +30,7 @@ internal fun Update.updateImpl(expression: (AddDataRow, DataColu if (df.isEmpty()) df else df.replace(columns).with { it.updateImpl(df, filter, expression) } -internal fun Update.updateWithValuePerColumnImpl(selector: Selector, C>) = +internal fun Update.updateWithValuePerColumnImpl(selector: ColumnExpression) = if (df.isEmpty()) df else { df.replace(columns).with { From 428cff3b0159f01f5a48de44b4052e51a4d39020 Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Tue, 14 Feb 2023 18:16:10 +0100 Subject: [PATCH 22/50] created stubs for columnExpression and dataframeexpression --- .../jetbrains/kotlinx/dataframe/api/update.kt | 15 +++---- .../documentation/ColumnExpressions.kt | 39 +++++++++++++++++++ .../documentation/DataFrameExpressions.kt | 24 ++++++++++++ .../dataframe/documentation/RowExpressions.kt | 3 ++ .../documentation/SelectingColumns.kt | 1 + 5 files changed, 72 insertions(+), 10 deletions(-) create mode 100644 core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/ColumnExpressions.kt create mode 100644 core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DataFrameExpressions.kt diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt index 9a71b2125..6f1a2987d 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt @@ -46,7 +46,7 @@ public data class Update( /** * ## {@includeArg [UpdateOperationArg]} Operation Usage * - * {@includeArg [UpdateOperationArg]} `{ `[columns][Columns]` }` + * {@includeArg [UpdateOperationArg]} `{ `[columns][SelectingColumns]` }` * * - `[.`[where][Update.where]` { `[rowValueCondition][RowCondition.RowValueCondition.WithExample]` } ]` * @@ -54,18 +54,13 @@ public data class Update( * * - `.`[with][Update.with]` { `[rowExpression][RowExpressions.RowValueExpression.WithExample]` } * | .`[notNull][Update.notNull]` { `[rowExpression][RowExpressions.RowValueExpression.WithExample]` } - * | .`[perCol][Update.perCol]` { colExpression } - * | .`[perRowCol][Update.perRowCol]` { rowColExpression } + * | .`[perCol][Update.perCol]` { [colExpression][ColumnExpressions.ColumnExpression.WithExample] } + * | .`[perRowCol][Update.perRowCol]` { [rowColExpression][ColumnExpressions.RowColumnExpression.WithExample] } * | .`[withValue][Update.withValue]`(value) * | .`[withNull][Update.withNull]`() * | .`[withZero][Update.withZero]`() - * | .`[asFrame][Update.asFrame]` { frameExpression }` - * - * {@comment TODO - * colExpression: DataColumn.(DataColumn) -> NewValue - * rowColExpression: DataRow.(DataColumn) -> NewValue - * frameExpression: DataFrame.(DataFrame) -> DataFrame} - * {@arg [UpdateOperationArg] [update][update]} + * | .`[asFrame][Update.asFrame]` { [dataFrameExpression][DataFrameExpressions.DataFrameExpression.WithExample] }` + * {@arg [UpdateOperationArg] [update][update]}{@comment The default name of the `update` operation function name.} */ public interface Usage diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/ColumnExpressions.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/ColumnExpressions.kt new file mode 100644 index 000000000..7226e8e16 --- /dev/null +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/ColumnExpressions.kt @@ -0,0 +1,39 @@ +package org.jetbrains.kotlinx.dataframe.documentation + +/** + * Column expressions. + */ +internal interface ColumnExpressions { + + + /** + * TODO + */ + interface ColumnExpression { + + /** + * TODO + * @include [ColumnExpression] + */ + interface WithExample + } + + /** [Column Expression][ColumnExpression] */ + interface ColumnExpressionLink + + /** + * TODO + */ + interface RowColumnExpression { + + /** + * TODO + * @include [RowColumnExpression] + */ + interface WithExample + } + + /** [Row Column Expression][RowColumnExpression] */ + interface RowColumnExpressionLink + +} diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DataFrameExpressions.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DataFrameExpressions.kt new file mode 100644 index 000000000..be4f81151 --- /dev/null +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DataFrameExpressions.kt @@ -0,0 +1,24 @@ +package org.jetbrains.kotlinx.dataframe.documentation + +/** + * TODO + */ +internal interface DataFrameExpressions { + + /** + * TODO + */ + interface DataFrameExpression { + + /** + * TODO + * @include [DataFrameExpression] + */ + interface WithExample + } + + /** [Data Frame Expression][DataFrameExpression] */ + interface DataFrameExpressionLink + + +} diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/RowExpressions.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/RowExpressions.kt index e2b60f624..62bef20f8 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/RowExpressions.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/RowExpressions.kt @@ -85,6 +85,9 @@ internal interface RowExpressions { /** [Row Value Expression][RowValueExpression.WithExample] */ interface RowValueExpressionLink + + /** @include [ColumnExpressions.RowColumnExpression] */ + interface RowColumnExpression } /** [Row Expressions][RowExpressions] */ diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/SelectingColumns.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/SelectingColumns.kt index d857e7966..92ce06e7e 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/SelectingColumns.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/SelectingColumns.kt @@ -47,6 +47,7 @@ internal interface SelectingColumns { * {@include [Dsl]} * * For example: + * * `df.`{@includeArg [OperationArg]}` { length `[and][ColumnsSelectionDsl.and]` age }` * * `df.`{@includeArg [OperationArg]}` { `[cols][ColumnsSelectionDsl.cols]`(1..5) }` From 6ff4176b4e4d37fd8bcfa19f6f2bd776df5bdc3f Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Fri, 17 Feb 2023 15:19:25 +0100 Subject: [PATCH 23/50] small refactorings and renaming --- .../kotlinx/dataframe/api/reorder.kt | 17 +++++---- .../jetbrains/kotlinx/dataframe/api/update.kt | 36 +++++++++---------- .../documentation/DocumentationUrls.kt | 27 +++++++------- ...umnExpressions.kt => ExpressingColumns.kt} | 4 +-- ...Expressions.kt => ExpressingDataFrames.kt} | 4 +-- .../{RowExpressions.kt => ExpressingRows.kt} | 14 ++++---- .../documentation/SelectingColumns.kt | 4 ++- 7 files changed, 56 insertions(+), 50 deletions(-) rename core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/{ColumnExpressions.kt => ExpressingColumns.kt} (93%) rename core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/{DataFrameExpressions.kt => ExpressingDataFrames.kt} (88%) rename core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/{RowExpressions.kt => ExpressingRows.kt} (88%) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/reorder.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/reorder.kt index 64c2e5b63..f6eb6cdfd 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/reorder.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/reorder.kt @@ -3,7 +3,6 @@ package org.jetbrains.kotlinx.dataframe.api import org.jetbrains.kotlinx.dataframe.AnyCol import org.jetbrains.kotlinx.dataframe.ColumnExpression import org.jetbrains.kotlinx.dataframe.ColumnsSelector -import org.jetbrains.kotlinx.dataframe.DataColumn import org.jetbrains.kotlinx.dataframe.DataFrame import org.jetbrains.kotlinx.dataframe.Selector import org.jetbrains.kotlinx.dataframe.columns.ColumnReference @@ -22,15 +21,20 @@ public data class Reorder( } public fun DataFrame.reorder(selector: ColumnsSelector): Reorder = Reorder(this, selector, false) -public fun DataFrame.reorder(vararg columns: ColumnReference): Reorder = reorder { columns.toColumns() } +public fun DataFrame.reorder(vararg columns: ColumnReference): Reorder = + reorder { columns.toColumns() } + public fun DataFrame.reorder(vararg columns: KProperty): Reorder = reorder { columns.toColumns() } public fun DataFrame.reorder(vararg columns: String): Reorder = reorder { columns.toColumns() } -public fun > Reorder.by(expression: ColumnExpression): DataFrame = reorderImpl(false, expression) +public fun > Reorder.by(expression: ColumnExpression): DataFrame = + reorderImpl(false, expression) -public fun Reorder.byName(desc: Boolean = false): DataFrame = if (desc) byDesc { it.name } else by { it.name } +public fun Reorder.byName(desc: Boolean = false): DataFrame = + if (desc) byDesc { it.name } else by { it.name } -public fun > Reorder.byDesc(expression: ColumnExpression): DataFrame = reorderImpl(true, expression) +public fun > Reorder.byDesc(expression: ColumnExpression): DataFrame = + reorderImpl(true, expression) public fun > DataFrame.reorderColumnsBy( dfs: Boolean = true, @@ -38,6 +42,7 @@ public fun > DataFrame.reorderColumnsBy( expression: Selector ): DataFrame = Reorder(this, { if (dfs) allDfs(true) else all() }, dfs).reorderImpl(desc, expression) -public fun DataFrame.reorderColumnsByName(dfs: Boolean = true, desc: Boolean = false): DataFrame = reorderColumnsBy(dfs, desc) { name() } +public fun DataFrame.reorderColumnsByName(dfs: Boolean = true, desc: Boolean = false): DataFrame = + reorderColumnsBy(dfs, desc) { name() } // endregion diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt index 6f1a2987d..b0ed4e3b2 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt @@ -52,14 +52,14 @@ public data class Update( * * - `[.`[at][Update.at]` (`[rowIndices][CommonUpdateAtFunctionDoc.RowIndicesParam]`) ]` * - * - `.`[with][Update.with]` { `[rowExpression][RowExpressions.RowValueExpression.WithExample]` } - * | .`[notNull][Update.notNull]` { `[rowExpression][RowExpressions.RowValueExpression.WithExample]` } - * | .`[perCol][Update.perCol]` { [colExpression][ColumnExpressions.ColumnExpression.WithExample] } - * | .`[perRowCol][Update.perRowCol]` { [rowColExpression][ColumnExpressions.RowColumnExpression.WithExample] } + * - `.`[with][Update.with]` { `[rowExpression][ExpressingRows.RowValueExpression.WithExample]` } + * | .`[notNull][Update.notNull]` { `[rowExpression][ExpressingRows.RowValueExpression.WithExample]` } + * | .`[perCol][Update.perCol]` { [colExpression][ExpressingColumns.ColumnExpression.WithExample] } + * | .`[perRowCol][Update.perRowCol]` { [rowColExpression][ExpressingColumns.RowColumnExpression.WithExample] } * | .`[withValue][Update.withValue]`(value) * | .`[withNull][Update.withNull]`() * | .`[withZero][Update.withZero]`() - * | .`[asFrame][Update.asFrame]` { [dataFrameExpression][DataFrameExpressions.DataFrameExpression.WithExample] }` + * | .`[asFrame][Update.asFrame]` { [dataFrameExpression][ExpressingDataFrames.DataFrameExpression.WithExample] }` * {@arg [UpdateOperationArg] [update][update]}{@comment The default name of the `update` operation function name.} */ public interface Usage @@ -207,12 +207,12 @@ public infix fun Update.perRowCol(expression: RowColumnExpression = AddDataRow.(C) -> R /** ## With - * {@include [RowExpressions.RowValueExpression.WithExample]} - * {@arg [RowExpressions.OperationArg] [update][update]` { city \}.`[with][with]} + * {@include [ExpressingRows.RowValueExpression.WithExample]} + * {@arg [ExpressingRows.OperationArg] [update][update]` { city \}.`[with][with]} * * ## Note - * @include [RowExpressions.AddDataRowNote] - * @param expression The {@include [RowExpressions.RowValueExpressionLink]} to update the rows with. + * @include [ExpressingRows.AddDataRowNote] + * @param expression The {@include [ExpressingRows.RowValueExpressionLink]} to update the rows with. */ public infix fun Update.with(expression: UpdateExpression): DataFrame = updateImpl { row, _, value -> @@ -264,11 +264,11 @@ public fun Update.notNull(expression: RowValueExpression) * * @include [SelectingColumns.ColumnAccessors] * - * {@include [RowExpressions.RowValueExpression.WithExample]} - * {@arg [RowExpressions.OperationArg] [update][update]`("city")` } + * {@include [ExpressingRows.RowValueExpression.WithExample]} + * {@arg [ExpressingRows.OperationArg] [update][update]`("city")` } * * @include [Update.ColumnAccessorsParam] - * @param expression The {@include [RowExpressions.RowValueExpressionLink]} to update the rows with. + * @param expression The {@include [ExpressingRows.RowValueExpressionLink]} to update the rows with. */ public fun DataFrame.update( firstCol: ColumnReference, @@ -283,11 +283,11 @@ public fun DataFrame.update( * * @include [SelectingColumns.KProperties] * - * {@include [RowExpressions.RowValueExpression.WithExample]} - * {@arg [RowExpressions.OperationArg] [update][update]`("city")` } + * {@include [ExpressingRows.RowValueExpression.WithExample]} + * {@arg [ExpressingRows.OperationArg] [update][update]`("city")` } * * @include [Update.KPropertiesParam] - * @param expression The {@include [RowExpressions.RowValueExpressionLink]} to update the rows with. + * @param expression The {@include [ExpressingRows.RowValueExpressionLink]} to update the rows with. */ public fun DataFrame.update( firstCol: KProperty, @@ -302,11 +302,11 @@ public fun DataFrame.update( * * @include [SelectingColumns.ColumnNames] * - * {@include [RowExpressions.RowValueExpression.WithExample]} - * {@arg [RowExpressions.OperationArg] [update][update]`("city")` } + * {@include [ExpressingRows.RowValueExpression.WithExample]} + * {@arg [ExpressingRows.OperationArg] [update][update]`("city")` } * * @include [Update.ColumnNamesParam] - * @param expression The {@include [RowExpressions.RowValueExpressionLink]} to update the rows with. + * @param expression The {@include [ExpressingRows.RowValueExpressionLink]} to update the rows with. */ public fun DataFrame.update( firstCol: String, diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DocumentationUrls.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DocumentationUrls.kt index b448ec1fe..01d072f10 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DocumentationUrls.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DocumentationUrls.kt @@ -7,44 +7,47 @@ private interface DocumentationUrls { /** See {@includeArg [NameArg]} on the documentation website. */ interface Text + /** https://kotlin.github.io/dataframe */ + interface Url + interface DataRow { - /** [{@include [Text]}{@arg [NameArg] Row Expressions}](https://kotlin.github.io/dataframe/datarow.html#row-expressions) */ + /** [{@include [Text]}{@arg [NameArg] Row Expressions}]({@include [Url]}/datarow.html#row-expressions) */ interface RowExpressions - /** [{@include [Text]}{@arg [NameArg] Row Conditions}](https://kotlin.github.io/dataframe/datarow.html#row-conditions) */ + /** [{@include [Text]}{@arg [NameArg] Row Conditions}]({@include [Url]}/datarow.html#row-conditions) */ interface RowConditions } - /** [{@include [Text]}{@arg [NameArg] `update`}](https://kotlin.github.io/dataframe/update.html) */ + /** [{@include [Text]}{@arg [NameArg] `update`}]({@include [Url]}/update.html) */ interface Update - /** [{@include [Text]}{@arg [NameArg] `fill`}](https://kotlin.github.io/dataframe/fill.html) */ + /** [{@include [Text]}{@arg [NameArg] `fill`}]({@include [Url]}/fill.html) */ interface Fill { - /** [{@include [Text]}{@arg [NameArg] `fillNulls`}](https://kotlin.github.io/dataframe/fill.html#fillnulls) */ + /** [{@include [Text]}{@arg [NameArg] `fillNulls`}]({@include [Url]}/fill.html#fillnulls) */ interface FillNulls - /** [{@include [Text]}{@arg [NameArg] `fillNaNs`}](https://kotlin.github.io/dataframe/fill.html#fillnans) */ + /** [{@include [Text]}{@arg [NameArg] `fillNaNs`}]({@include [Url]}/fill.html#fillnans) */ interface FillNaNs - /** [{@include [Text]}{@arg [NameArg] `fillNA`}](https://kotlin.github.io/dataframe/fill.html#fillna) */ + /** [{@include [Text]}{@arg [NameArg] `fillNA`}]({@include [Url]}/fill.html#fillna) */ interface FillNA } - /** [{@include [Text]}{@arg [NameArg] Access APIs}](https://kotlin.github.io/dataframe/apilevels.html) */ + /** [{@include [Text]}{@arg [NameArg] Access APIs}]({@include [Url]}/apilevels.html) */ interface AccessApis { - /** [{@include [Text]}{@arg [NameArg] String API}](https://kotlin.github.io/dataframe/stringapi.html) */ + /** [{@include [Text]}{@arg [NameArg] String API}]({@include [Url]}/stringapi.html) */ interface StringApi - /** [{@include [Text]}{@arg [NameArg] Column Accessors API}](https://kotlin.github.io/dataframe/columnaccessorsapi.html) */ + /** [{@include [Text]}{@arg [NameArg] Column Accessors API}]({@include [Url]}/columnaccessorsapi.html) */ interface ColumnAccessorsApi - /** [{@include [Text]}{@arg [NameArg] KProperties API}](https://kotlin.github.io/dataframe/kpropertiesapi.html) */ + /** [{@include [Text]}{@arg [NameArg] KProperties API}]({@include [Url]}/kpropertiesapi.html) */ interface KPropertiesApi - /** [{@include [Text]}{@arg [NameArg] Extension Properties API}](https://kotlin.github.io/dataframe/extensionpropertiesapi.html) */ + /** [{@include [Text]}{@arg [NameArg] Extension Properties API}]({@include [Url]}/extensionpropertiesapi.html) */ interface ExtensionPropertiesApi } } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/ColumnExpressions.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/ExpressingColumns.kt similarity index 93% rename from core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/ColumnExpressions.kt rename to core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/ExpressingColumns.kt index 7226e8e16..c30536a42 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/ColumnExpressions.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/ExpressingColumns.kt @@ -3,8 +3,7 @@ package org.jetbrains.kotlinx.dataframe.documentation /** * Column expressions. */ -internal interface ColumnExpressions { - +internal interface ExpressingColumns { /** * TODO @@ -35,5 +34,4 @@ internal interface ColumnExpressions { /** [Row Column Expression][RowColumnExpression] */ interface RowColumnExpressionLink - } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DataFrameExpressions.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/ExpressingDataFrames.kt similarity index 88% rename from core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DataFrameExpressions.kt rename to core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/ExpressingDataFrames.kt index be4f81151..7dea513de 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DataFrameExpressions.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/ExpressingDataFrames.kt @@ -3,7 +3,7 @@ package org.jetbrains.kotlinx.dataframe.documentation /** * TODO */ -internal interface DataFrameExpressions { +internal interface ExpressingDataFrames { /** * TODO @@ -19,6 +19,4 @@ internal interface DataFrameExpressions { /** [Data Frame Expression][DataFrameExpression] */ interface DataFrameExpressionLink - - } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/RowExpressions.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/ExpressingRows.kt similarity index 88% rename from core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/RowExpressions.kt rename to core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/ExpressingRows.kt index 62bef20f8..4425b821e 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/RowExpressions.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/ExpressingRows.kt @@ -3,14 +3,14 @@ package org.jetbrains.kotlinx.dataframe.documentation import org.jetbrains.kotlinx.dataframe.DataFrame import org.jetbrains.kotlinx.dataframe.DataRow import org.jetbrains.kotlinx.dataframe.api.* -import org.jetbrains.kotlinx.dataframe.documentation.RowExpressions.AddDataRowNote -import org.jetbrains.kotlinx.dataframe.documentation.RowExpressions.RowExpressionLink -import org.jetbrains.kotlinx.dataframe.documentation.RowExpressions.RowValueExpressionLink +import org.jetbrains.kotlinx.dataframe.documentation.ExpressingRows.AddDataRowNote +import org.jetbrains.kotlinx.dataframe.documentation.ExpressingRows.RowExpressionLink +import org.jetbrains.kotlinx.dataframe.documentation.ExpressingRows.RowValueExpressionLink import org.jetbrains.kotlinx.dataframe.RowExpression as DfRowExpression import org.jetbrains.kotlinx.dataframe.RowValueExpression as DfRowValueExpression /** - * ## Row expressions + * ## Expressing Rows * Expressing values using a "Row Expression" ({@include [DocumentationUrls.DataRow.RowExpressions]}) * can occur in the following two types of operations: * @@ -27,7 +27,7 @@ import org.jetbrains.kotlinx.dataframe.RowValueExpression as DfRowValueExpressio * * A Row Expression is similar to a {@include [RowConditionLink]} but that expects a [Boolean] as result. */ -internal interface RowExpressions { +internal interface ExpressingRows { /** * The key for an @arg that will define the operation name for the examples below. @@ -86,9 +86,9 @@ internal interface RowExpressions { /** [Row Value Expression][RowValueExpression.WithExample] */ interface RowValueExpressionLink - /** @include [ColumnExpressions.RowColumnExpression] */ + /** @include [ExpressingColumns.RowColumnExpression] */ interface RowColumnExpression } -/** [Row Expressions][RowExpressions] */ +/** [Row Expressions][ExpressingRows] */ internal interface RowExpressionsLink diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/SelectingColumns.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/SelectingColumns.kt index 92ce06e7e..d493f94bc 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/SelectingColumns.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/SelectingColumns.kt @@ -15,7 +15,9 @@ import kotlin.reflect.KProperty /** [Selecting Columns][SelectingColumns] */ internal interface SelectingColumnsLink -/** Selecting columns for various operations (including but not limited to +/** + * ## Selecting Columns + * Selecting columns for various operations (including but not limited to * [DataFrame.select], [DataFrame.update], [DataFrame.gather], and [DataFrame.fillNulls]) * can be done in the following ways: * - {@include [Dsl.WithExample]} From fd1e545cde97e6cc13fea5348ea9ae8c601d3550 Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Mon, 20 Feb 2023 17:38:56 +0100 Subject: [PATCH 24/50] notNull, dataframe expression, refactoring --- core/build.gradle.kts | 10 +-- .../jetbrains/kotlinx/dataframe/api/update.kt | 63 ++++++++++++------- .../documentation/ExpressingDataFrames.kt | 21 +++++-- .../dataframe/documentation/ExpressingRows.kt | 2 +- 4 files changed, 64 insertions(+), 32 deletions(-) diff --git a/core/build.gradle.kts b/core/build.gradle.kts index 41c8854d3..bf73141b9 100644 --- a/core/build.gradle.kts +++ b/core/build.gradle.kts @@ -61,7 +61,7 @@ kotlin.sourceSets { // Backup the kotlin source files location val kotlinMainSources = kotlin.sourceSets.main.get().kotlin.sourceDirectories -val processKdocIncludeMain by creatingProcessDocTask( +val processKDocsMain by creatingProcessDocTask( sources = kotlinMainSources.filterNot { "build/generated" in it.path } // Exclude generated sources ) { processors = listOf( @@ -76,10 +76,10 @@ val processKdocIncludeMain by creatingProcessDocTask( // As a bonus, this will update dokka if you use that tasks.withType { - dependsOn(processKdocIncludeMain) + dependsOn(processKDocsMain) dokkaSourceSets { all { - sourceRoot(processKdocIncludeMain.target.get()) + sourceRoot(processKDocsMain.target.get()) // for (root in processKdocIncludeMain.targets) // sourceRoot(root) } @@ -89,7 +89,7 @@ tasks.withType { // Modify all Jar tasks such that before running the Kotlin sources are set to // the target of processKdocIncludeMain and they are returned back to normal afterwards. tasks.withType { - dependsOn(processKdocIncludeMain) + dependsOn(processKDocsMain) outputs.upToDateWhen { false } doFirst { @@ -97,7 +97,7 @@ tasks.withType { sourceSets { main { kotlin.setSrcDirs( - processKdocIncludeMain.targets + + processKDocsMain.targets + kotlinMainSources.filter { "build/generated" in it.path } // Include generated sources (which were excluded above) ) } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt index b0ed4e3b2..6b88e5d6f 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt @@ -1,15 +1,8 @@ package org.jetbrains.kotlinx.dataframe.api -import org.jetbrains.kotlinx.dataframe.AnyRow -import org.jetbrains.kotlinx.dataframe.ColumnExpression -import org.jetbrains.kotlinx.dataframe.ColumnsSelector -import org.jetbrains.kotlinx.dataframe.DataFrame -import org.jetbrains.kotlinx.dataframe.DataFrameExpression -import org.jetbrains.kotlinx.dataframe.DataRow -import org.jetbrains.kotlinx.dataframe.RowColumnExpression -import org.jetbrains.kotlinx.dataframe.RowValueExpression -import org.jetbrains.kotlinx.dataframe.RowValueFilter +import org.jetbrains.kotlinx.dataframe.* import org.jetbrains.kotlinx.dataframe.api.Update.Usage +import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup import org.jetbrains.kotlinx.dataframe.columns.ColumnReference import org.jetbrains.kotlinx.dataframe.documentation.* import org.jetbrains.kotlinx.dataframe.impl.api.updateImpl @@ -17,7 +10,6 @@ import org.jetbrains.kotlinx.dataframe.impl.api.updateWithValuePerColumnImpl import org.jetbrains.kotlinx.dataframe.impl.columns.toColumnSet import org.jetbrains.kotlinx.dataframe.impl.columns.toColumns import org.jetbrains.kotlinx.dataframe.impl.headPlusArray -import org.jetbrains.kotlinx.dataframe.index import kotlin.reflect.KProperty /** @@ -204,6 +196,10 @@ public fun Update.at(rowRange: IntRange): Update = where { in public infix fun Update.perRowCol(expression: RowColumnExpression): DataFrame = updateImpl { row, column, _ -> expression(row, column) } +/** ## Update Expression + * @see ExpressingRows.RowValueExpression.WithExample + * @see ExpressingRows.AddDataRowNote + */ // doc processor plugin does not work with type aliases yet public typealias UpdateExpression = AddDataRow.(C) -> R /** ## With @@ -219,7 +215,14 @@ public infix fun Update.with(expression: UpdateExpression expression(row, value) } -/** TODO */ +/** ## As Frame + * + * Updates selected [column group][ColumnGroup] as a [DataFrame] with the given [expression]. + * + * {@include [ExpressingDataFrames.DataFrameExpression.WithExample]} + * {@arg [ExpressingDataFrames.OperationArg] `df.`[update][update]` { name \}.`[asFrame][asFrame]} + * @param expression The [DataFrameExpression] to replace the selected column group with. + */ public infix fun Update>.asFrame(expression: DataFrameExpression>): DataFrame = df.replace(columns).with { it.asColumnGroup().let { expression(it, it) }.asColumnGroup(it.name()) } @@ -248,15 +251,33 @@ internal infix fun RowValueFilter?.and(other: RowValueFilter) return { thisExp(this, it) && other(this, it) } } -/** TODO */ +/** @include [Update.notNull] */ public fun Update.notNull(): Update = - copy(filter = filter and { it != null }) as Update + where { it != null } as Update -/** TODO */ -public fun Update.notNull(expression: RowValueExpression): DataFrame = - notNull().updateImpl { row, column, value -> - expression(row, value) - } +/** + * ## Not Null + * + * Selects only the rows where the values in the selected columns are not null. + * + * Shorthand for: [update][update]` { ... }.`[where][Update.where]` { it != null }` + * + * For example: + * + * `df.`[update][update]` { `[colsOf][colsOf]`<`[Number][Number]`?>() }.`[notNull][notNull]`()`.[perCol][Update.perCol] `{ `[mean][mean]`() }` + * + * ### Optional + * Provide an [expression] to update the rows with. + * This combines [with][Update.with] with [notNull]. + * + * For example: + * + * `df.`[notNull][Update.notNull]` { city }.{ it.`[toUpperCase][String.toUpperCase]`() }` + * + * @param expression Optional {@include [ExpressingRows.RowExpressionLink]} to update the rows with. + */ +public fun Update.notNull(expression: UpdateExpression): DataFrame = + notNull().with(expression) /** * @include [CommonUpdateFunctionDoc] @@ -273,7 +294,7 @@ public fun Update.notNull(expression: RowValueExpression) public fun DataFrame.update( firstCol: ColumnReference, vararg cols: ColumnReference, - expression: RowValueExpression + expression: UpdateExpression ): DataFrame = update(*headPlusArray(firstCol, cols)).with(expression) @@ -292,7 +313,7 @@ public fun DataFrame.update( public fun DataFrame.update( firstCol: KProperty, vararg cols: KProperty, - expression: RowValueExpression + expression: UpdateExpression ): DataFrame = update(*headPlusArray(firstCol, cols)).with(expression) @@ -311,7 +332,7 @@ public fun DataFrame.update( public fun DataFrame.update( firstCol: String, vararg cols: String, - expression: RowValueExpression + expression: UpdateExpression ): DataFrame = update(*headPlusArray(firstCol, cols)).with(expression) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/ExpressingDataFrames.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/ExpressingDataFrames.kt index 7dea513de..f4df459eb 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/ExpressingDataFrames.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/ExpressingDataFrames.kt @@ -1,18 +1,29 @@ package org.jetbrains.kotlinx.dataframe.documentation +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.api.* +import org.jetbrains.kotlinx.dataframe.documentation.ExpressingDataFrames.DataFrameExpressionLink +import org.jetbrains.kotlinx.dataframe.DataFrameExpression as DfDataFrameExpression + /** - * TODO + * ## Expressing Data Frames + * Expressing values using a "Data Frame Expression" can occur exclusively in a + * {@include [DataFrameExpressionLink]} + * */ internal interface ExpressingDataFrames { - /** - * TODO - */ + interface OperationArg + + /** Provide a new value for every selected data frame using a [dataframe expression][DfDataFrameExpression]. */ interface DataFrameExpression { /** - * TODO * @include [DataFrameExpression] + * + * For example: + * + * {@includeArg [OperationArg]}` { `[select][DataFrame.select]` { lastName } }` */ interface WithExample } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/ExpressingRows.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/ExpressingRows.kt index 4425b821e..da6a3f707 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/ExpressingRows.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/ExpressingRows.kt @@ -39,7 +39,7 @@ internal interface ExpressingRows { interface SetDefaultOperationArg /** - * [with][org.jetbrains.kotlinx.dataframe.api.Update.with] and [add][org.jetbrains.kotlinx.dataframe.api.add] use [AddDataRow] instead of [DataRow] as the DSL's receiver type. + * [update with][org.jetbrains.kotlinx.dataframe.api.Update.with]- and [add][org.jetbrains.kotlinx.dataframe.api.add]-like expressions use [AddDataRow] instead of [DataRow] as the DSL's receiver type. * This is an extension to [RowValueExpression][DfRowValueExpression] and * [RowExpression][DfRowExpression] that provides access to * the modified/generated value of the preceding row ([AddDataRow.newValue]). From a795a9d663454052d4892c02d3544c36564428e4 Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Mon, 20 Feb 2023 21:50:42 +0100 Subject: [PATCH 25/50] refactoring --- core/build.gradle.kts | 4 +--- .../org/jetbrains/kotlinx/dataframe/api/Nulls.kt | 9 --------- .../org/jetbrains/kotlinx/dataframe/api/update.kt | 3 --- .../dataframe/documentation/SelectingColumns.kt | 12 ++++-------- 4 files changed, 5 insertions(+), 23 deletions(-) diff --git a/core/build.gradle.kts b/core/build.gradle.kts index bf73141b9..7c57203f5 100644 --- a/core/build.gradle.kts +++ b/core/build.gradle.kts @@ -14,7 +14,7 @@ plugins { id("org.jetbrains.kotlinx.kover") id("org.jmailen.kotlinter") id("org.jetbrains.kotlinx.dataframe") - id("com.github.jolanrensen.docProcessorGradlePlugin") version "v0.0.15" + id("com.github.jolanrensen.docProcessorGradlePlugin") version "v0.0.16" // id("nl.jolanrensen.docProcessor") version "1.0-SNAPSHOT" } @@ -80,8 +80,6 @@ tasks.withType { dokkaSourceSets { all { sourceRoot(processKDocsMain.target.get()) -// for (root in processKdocIncludeMain.targets) -// sourceRoot(root) } } } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/Nulls.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/Nulls.kt index 49a99d8b5..1e20fd6ab 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/Nulls.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/Nulls.kt @@ -79,9 +79,6 @@ public fun DataFrame.fillNulls(vararg columns: ColumnReference): Up fillNulls { columns.toColumns() } /** - * @include [CommonFillNullsFunctionDoc] - * @include [SelectingColumns.ColumnAccessors.WithExample] {@include [SetFillNullsOperationArg]} - * @include [Update.ColumnAccessorsParam] * TODO this will be deprecated */ public fun DataFrame.fillNulls(columns: Iterable>): Update = @@ -195,9 +192,6 @@ public fun DataFrame.fillNaNs(vararg columns: ColumnReference): Upd fillNaNs { columns.toColumns() } /** - * @include [CommonFillNaNsFunctionDoc] - * @include [SelectingColumns.ColumnAccessors.WithExample] {@include [SetFillNaNsOperationArg]} - * @include [Update.ColumnAccessorsParam] * TODO this will be deprecated */ public fun DataFrame.fillNaNs(columns: Iterable>): Update = @@ -267,9 +261,6 @@ public fun DataFrame.fillNA(vararg columns: ColumnReference): Updat fillNA { columns.toColumns() } /** - * @include [CommonFillNAFunctionDoc] - * @include [SelectingColumns.ColumnAccessors.WithExample] {@include [SetFillNAOperationArg]} - * @include [Update.ColumnAccessorsParam] * TODO this will be deprecated */ public fun DataFrame.fillNA(columns: Iterable>): Update = diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt index 6b88e5d6f..1974d8a9e 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt @@ -126,9 +126,6 @@ public fun DataFrame.update(vararg columns: ColumnReference): Updat update { columns.toColumns() } /** - * @include [CommonUpdateFunctionDoc] - * @include [SelectingColumns.ColumnAccessors.WithExample] {@include [SetSelectingColumnsOperationArg]} - * @include [Update.ColumnAccessorsParam] * TODO this will be deprecated */ public fun DataFrame.update(columns: Iterable>): Update = diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/SelectingColumns.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/SelectingColumns.kt index d493f94bc..aea0fa8e5 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/SelectingColumns.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/SelectingColumns.kt @@ -45,8 +45,7 @@ internal interface SelectingColumns { */ interface Dsl { - /** - * {@include [Dsl]} + /** {@include [Dsl]} * * For example: * @@ -65,8 +64,7 @@ internal interface SelectingColumns { */ interface ColumnNames { - /** - * {@include [ColumnNames]} + /** {@include [ColumnNames]} * * For example: * @@ -81,8 +79,7 @@ internal interface SelectingColumns { */ interface ColumnAccessors { - /** - * {@include [ColumnAccessors]} + /** {@include [ColumnAccessors]} * * For example: * @@ -99,8 +96,7 @@ internal interface SelectingColumns { /** Select columns using [KProperties][KProperty] ({@include [AccessApi.KPropertiesApiLink]}). */ interface KProperties { - /** - * {@include [KProperties]} + /** {@include [KProperties]} * * For example: * ```kotlin From e9f7fd0d5959d0268899353eba4a99fd70c8e64e Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Tue, 21 Feb 2023 14:06:42 +0100 Subject: [PATCH 26/50] fixed some rendering issues --- .../kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt index 1974d8a9e..ece8835a4 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt @@ -46,12 +46,12 @@ public data class Update( * * - `.`[with][Update.with]` { `[rowExpression][ExpressingRows.RowValueExpression.WithExample]` } * | .`[notNull][Update.notNull]` { `[rowExpression][ExpressingRows.RowValueExpression.WithExample]` } - * | .`[perCol][Update.perCol]` { [colExpression][ExpressingColumns.ColumnExpression.WithExample] } - * | .`[perRowCol][Update.perRowCol]` { [rowColExpression][ExpressingColumns.RowColumnExpression.WithExample] } + * | .`[perCol][Update.perCol]` { `[colExpression][ExpressingColumns.ColumnExpression.WithExample]` } + * | .`[perRowCol][Update.perRowCol]` { `[rowColExpression][ExpressingColumns.RowColumnExpression.WithExample]` } * | .`[withValue][Update.withValue]`(value) * | .`[withNull][Update.withNull]`() * | .`[withZero][Update.withZero]`() - * | .`[asFrame][Update.asFrame]` { [dataFrameExpression][ExpressingDataFrames.DataFrameExpression.WithExample] }` + * | .`[asFrame][Update.asFrame]` { `[dataFrameExpression][ExpressingDataFrames.DataFrameExpression.WithExample]` }` * {@arg [UpdateOperationArg] [update][update]}{@comment The default name of the `update` operation function name.} */ public interface Usage From d6c5f1bbc0318716b9552c2364ad18030be3b3da Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Tue, 21 Feb 2023 16:29:25 +0100 Subject: [PATCH 27/50] refactoring --- .../org/jetbrains/kotlinx/dataframe/api/update.kt | 12 ++++++------ .../dataframe/documentation/SelectingColumns.kt | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt index ece8835a4..88d0679ed 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt @@ -90,7 +90,7 @@ private interface CommonUpdateFunctionDoc * Combine `df.`[update][update]`(...).`[with][Update.with]` { ... }` * into `df.`[update][update]`(...) { ... }` */ -private interface UpdatePlusWithNote +private interface UpdateWithNote /** * @include [CommonUpdateFunctionDoc] @@ -103,7 +103,7 @@ public fun DataFrame.update(columns: ColumnsSelector): Update DataFrame.update(vararg columns: String): Update = update { columns.toColumns() } @@ -111,7 +111,7 @@ public fun DataFrame.update(vararg columns: String): Update = up /** * @include [CommonUpdateFunctionDoc] * @include [SelectingColumns.KProperties.WithExample] {@include [SetSelectingColumnsOperationArg]} - * @include [UpdatePlusWithNote] + * @include [UpdateWithNote] * @include [Update.KPropertiesParam] */ public fun DataFrame.update(vararg columns: KProperty): Update = update { columns.toColumns() } @@ -119,7 +119,7 @@ public fun DataFrame.update(vararg columns: KProperty): Update DataFrame.update(vararg columns: ColumnReference): Update = @@ -241,7 +241,7 @@ public fun Update.perCol(values: AnyRow): DataFrame = perCol(val public fun Update.perCol(valueSelector: ColumnExpression): DataFrame = updateWithValuePerColumnImpl(valueSelector) -/** TODO */ +/** Chains up two row value filters together. */ internal infix fun RowValueFilter?.and(other: RowValueFilter): RowValueFilter { if (this == null) return other val thisExp = this @@ -269,7 +269,7 @@ public fun Update.notNull(): Update = * * For example: * - * `df.`[notNull][Update.notNull]` { city }.{ it.`[toUpperCase][String.toUpperCase]`() }` + * `df.`[update][update]` { city }.`[notNull][Update.notNull]` { it.`[toUpperCase][String.toUpperCase]`() }` * * @param expression Optional {@include [ExpressingRows.RowExpressionLink]} to update the rows with. */ diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/SelectingColumns.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/SelectingColumns.kt index aea0fa8e5..36108f6d5 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/SelectingColumns.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/SelectingColumns.kt @@ -49,7 +49,7 @@ internal interface SelectingColumns { * * For example: * - * `df.`{@includeArg [OperationArg]}` { length `[and][ColumnsSelectionDsl.and]` age }` + * `df.`{@includeArg [OperationArg]}` { length `[and][ColumnsSelectionDsl.and]` age }`{@comment TODO this links up like "kotlin.String.and"} * * `df.`{@includeArg [OperationArg]}` { `[cols][ColumnsSelectionDsl.cols]`(1..5) }` * From 2c2a6109f2b7d9f0712cd0388acf068bfe78c8b3 Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Tue, 21 Feb 2023 17:51:30 +0100 Subject: [PATCH 28/50] updated doc processor plugin --- core/build.gradle.kts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/build.gradle.kts b/core/build.gradle.kts index 7c57203f5..089fd3803 100644 --- a/core/build.gradle.kts +++ b/core/build.gradle.kts @@ -14,7 +14,7 @@ plugins { id("org.jetbrains.kotlinx.kover") id("org.jmailen.kotlinter") id("org.jetbrains.kotlinx.dataframe") - id("com.github.jolanrensen.docProcessorGradlePlugin") version "v0.0.16" + id("com.github.jolanrensen.docProcessorGradlePlugin") version "v0.0.17" // id("nl.jolanrensen.docProcessor") version "1.0-SNAPSHOT" } From eeb36c0f81988ce31200a61346cb5cc3071b7dfa Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Wed, 22 Feb 2023 14:02:52 +0100 Subject: [PATCH 29/50] updating doc processor plugin --- core/build.gradle.kts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/build.gradle.kts b/core/build.gradle.kts index 089fd3803..2a4c7ff51 100644 --- a/core/build.gradle.kts +++ b/core/build.gradle.kts @@ -14,7 +14,7 @@ plugins { id("org.jetbrains.kotlinx.kover") id("org.jmailen.kotlinter") id("org.jetbrains.kotlinx.dataframe") - id("com.github.jolanrensen.docProcessorGradlePlugin") version "v0.0.17" + id("com.github.jolanrensen.docProcessorGradlePlugin") version "v0.0.18" // id("nl.jolanrensen.docProcessor") version "1.0-SNAPSHOT" } From aa311a4d10a1a7f0eeaa707a1b9d5440aab8a409 Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Wed, 22 Feb 2023 14:10:16 +0100 Subject: [PATCH 30/50] reverted Column -> AnyColumnReference to not clog up the PR --- .../jetbrains/kotlinx/dataframe/DataFrame.kt | 2 +- .../jetbrains/kotlinx/dataframe/DataRow.kt | 2 +- .../jetbrains/kotlinx/dataframe/aliases.kt | 2 +- .../dataframe/api/ColumnsSelectionDsl.kt | 10 +++++----- .../kotlinx/dataframe/api/DataFrameGet.kt | 4 ++-- .../kotlinx/dataframe/api/DataRowApi.kt | 6 +++--- .../jetbrains/kotlinx/dataframe/api/Nulls.kt | 10 +++++----- .../jetbrains/kotlinx/dataframe/api/add.kt | 10 +++++----- .../kotlinx/dataframe/api/countDistinct.kt | 4 ++-- .../jetbrains/kotlinx/dataframe/api/cumSum.kt | 6 +++--- .../kotlinx/dataframe/api/distinct.kt | 10 +++++----- .../jetbrains/kotlinx/dataframe/api/group.kt | 6 +++--- .../kotlinx/dataframe/api/groupBy.kt | 8 ++++---- .../jetbrains/kotlinx/dataframe/api/move.kt | 16 +++++++-------- .../jetbrains/kotlinx/dataframe/api/pivot.kt | 20 +++++++++---------- .../jetbrains/kotlinx/dataframe/api/remove.kt | 10 +++++----- .../jetbrains/kotlinx/dataframe/api/select.kt | 6 +++--- .../kotlinx/dataframe/api/toDataFrame.kt | 4 ++-- .../jetbrains/kotlinx/dataframe/api/unfold.kt | 4 ++-- .../kotlinx/dataframe/api/ungroup.kt | 4 ++-- .../kotlinx/dataframe/api/valueCounts.kt | 4 ++-- .../jetbrains/kotlinx/dataframe/api/values.kt | 14 ++++++------- .../jetbrains/kotlinx/dataframe/api/with.kt | 4 ++-- .../impl/aggregation/aggregations.kt | 4 ++-- .../kotlinx/dataframe/impl/api/move.kt | 4 ++-- 25 files changed, 87 insertions(+), 87 deletions(-) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataFrame.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataFrame.kt index 9698f9630..dd1c55636 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataFrame.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataFrame.kt @@ -57,7 +57,7 @@ public interface DataFrame : Aggregatable, ColumnsContainer { override operator fun get(columns: ColumnsSelector): List> = getColumnsImpl(UnresolvedColumnsPolicy.Fail, columns) - public operator fun get(first: AnyColumnReference, vararg other: AnyColumnReference): DataFrame = select(listOf(first) + other) + public operator fun get(first: Column, vararg other: Column): DataFrame = select(listOf(first) + other) public operator fun get(first: String, vararg other: String): DataFrame = select(listOf(first) + other) public operator fun get(columnRange: ClosedRange): DataFrame = select { columnRange.start..columnRange.endInclusive } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataRow.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataRow.kt index 5a1375436..989fb3df2 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataRow.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataRow.kt @@ -28,7 +28,7 @@ public interface DataRow { public operator fun get(column: ColumnReference): R public operator fun get(columns: List>): List = columns.map { get(it) } public operator fun get(property: KProperty): R = get(property.columnName) as R - public operator fun get(first: AnyColumnReference, vararg other: AnyColumnReference): DataRow = owner.get(first, *other)[index] + public operator fun get(first: Column, vararg other: Column): DataRow = owner.get(first, *other)[index] public operator fun get(first: String, vararg other: String): DataRow = owner.get(first, *other)[index] public operator fun get(path: ColumnPath): Any? = owner.get(path)[index] public operator fun get(name: String): Any? diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/aliases.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/aliases.kt index 064bbeb02..de7a58df5 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/aliases.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/aliases.kt @@ -149,7 +149,7 @@ public typealias RowValueFilter = RowValueExpression // region columns -public typealias AnyColumnReference = ColumnReference<*> +public typealias Column = ColumnReference<*> public typealias ColumnGroupReference = ColumnReference public typealias ColumnGroupAccessor = ColumnAccessor> diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/ColumnsSelectionDsl.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/ColumnsSelectionDsl.kt index 448b7bf36..43ffc0908 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/ColumnsSelectionDsl.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/ColumnsSelectionDsl.kt @@ -74,7 +74,7 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum public operator fun String.rangeTo(endInclusive: String): ColumnSet<*> = toColumnAccessor().rangeTo(endInclusive.toColumnAccessor()) - public operator fun AnyColumnReference.rangeTo(endInclusive: AnyColumnReference): ColumnSet<*> = object : ColumnSet { + public operator fun Column.rangeTo(endInclusive: Column): ColumnSet<*> = object : ColumnSet { override fun resolve(context: ColumnResolutionContext): List> { val startPath = this@rangeTo.resolveSingle(context)!!.path val endPath = endInclusive.resolveSingle(context)!!.path @@ -168,7 +168,7 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum } public fun SingleColumn<*>.allAfter(colName: String): ColumnSet = allAfter(pathOf(colName)) - public fun SingleColumn<*>.allAfter(column: AnyColumnReference): ColumnSet = allAfter(column.path()) + public fun SingleColumn<*>.allAfter(column: Column): ColumnSet = allAfter(column.path()) // endregion @@ -187,7 +187,7 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum } public fun SingleColumn<*>.allSince(colName: String): ColumnSet = allSince(pathOf(colName)) - public fun SingleColumn<*>.allSince(column: AnyColumnReference): ColumnSet = allSince(column.path()) + public fun SingleColumn<*>.allSince(column: Column): ColumnSet = allSince(column.path()) // endregion @@ -206,7 +206,7 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum } public fun SingleColumn<*>.allBefore(colName: String): ColumnSet = allBefore(pathOf(colName)) - public fun SingleColumn<*>.allBefore(column: AnyColumnReference): ColumnSet = allBefore(column.path()) + public fun SingleColumn<*>.allBefore(column: Column): ColumnSet = allBefore(column.path()) // endregion @@ -225,7 +225,7 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum } public fun SingleColumn<*>.allUntil(colName: String): ColumnSet = allUntil(pathOf(colName)) - public fun SingleColumn<*>.allUntil(column: AnyColumnReference): ColumnSet = allUntil(column.path()) + public fun SingleColumn<*>.allUntil(column: Column): ColumnSet = allUntil(column.path()) // endregion diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/DataFrameGet.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/DataFrameGet.kt index 354d5990d..63588c4e9 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/DataFrameGet.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/DataFrameGet.kt @@ -1,7 +1,7 @@ package org.jetbrains.kotlinx.dataframe.api import org.jetbrains.kotlinx.dataframe.AnyCol -import org.jetbrains.kotlinx.dataframe.AnyColumnReference +import org.jetbrains.kotlinx.dataframe.Column import org.jetbrains.kotlinx.dataframe.ColumnSelector import org.jetbrains.kotlinx.dataframe.ColumnsContainer import org.jetbrains.kotlinx.dataframe.ColumnsSelector @@ -88,7 +88,7 @@ public fun ColumnsContainer.getColumnGroupOrNull(column: KProperty<*>): C public fun ColumnsContainer<*>.containsColumn(column: ColumnReference): Boolean = getColumnOrNull(column) != null public fun ColumnsContainer<*>.containsColumn(column: KProperty<*>): Boolean = containsColumn(column.columnName) -public operator fun ColumnsContainer<*>.contains(column: AnyColumnReference): Boolean = containsColumn(column) +public operator fun ColumnsContainer<*>.contains(column: Column): Boolean = containsColumn(column) public operator fun ColumnsContainer<*>.contains(column: KProperty<*>): Boolean = containsColumn(column) // region rows diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/DataRowApi.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/DataRowApi.kt index d7987f5c4..2860c274e 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/DataRowApi.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/DataRowApi.kt @@ -1,6 +1,6 @@ package org.jetbrains.kotlinx.dataframe.api -import org.jetbrains.kotlinx.dataframe.AnyColumnReference +import org.jetbrains.kotlinx.dataframe.Column import org.jetbrains.kotlinx.dataframe.AnyRow import org.jetbrains.kotlinx.dataframe.ColumnsContainer import org.jetbrains.kotlinx.dataframe.DataColumn @@ -57,10 +57,10 @@ public fun AnyRow.getValueOrNull(column: KProperty): T? = getValueOrNull< // region contains public fun AnyRow.containsKey(columnName: String): Boolean = owner.containsColumn(columnName) -public fun AnyRow.containsKey(column: AnyColumnReference): Boolean = owner.containsColumn(column) +public fun AnyRow.containsKey(column: Column): Boolean = owner.containsColumn(column) public fun AnyRow.containsKey(column: KProperty<*>): Boolean = owner.containsColumn(column) -public operator fun AnyRow.contains(column: AnyColumnReference): Boolean = containsKey(column) +public operator fun AnyRow.contains(column: Column): Boolean = containsKey(column) public operator fun AnyRow.contains(column: KProperty<*>): Boolean = containsKey(column) // endregion diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/Nulls.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/Nulls.kt index 1e20fd6ab..bc6477885 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/Nulls.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/Nulls.kt @@ -1,7 +1,7 @@ package org.jetbrains.kotlinx.dataframe.api import org.jetbrains.kotlinx.dataframe.AnyCol -import org.jetbrains.kotlinx.dataframe.AnyColumnReference +import org.jetbrains.kotlinx.dataframe.Column import org.jetbrains.kotlinx.dataframe.AnyFrame import org.jetbrains.kotlinx.dataframe.AnyRow import org.jetbrains.kotlinx.dataframe.ColumnsSelector @@ -285,11 +285,11 @@ public fun DataFrame.dropNulls(vararg columns: KProperty<*>, whereAllNull public fun DataFrame.dropNulls(vararg columns: String, whereAllNull: Boolean = false): DataFrame = dropNulls(whereAllNull) { columns.toColumns() } -public fun DataFrame.dropNulls(vararg columns: AnyColumnReference, whereAllNull: Boolean = false): DataFrame = +public fun DataFrame.dropNulls(vararg columns: Column, whereAllNull: Boolean = false): DataFrame = dropNulls(whereAllNull) { columns.toColumns() } public fun DataFrame.dropNulls( - columns: Iterable, + columns: Iterable, whereAllNull: Boolean = false ): DataFrame = dropNulls(whereAllNull) { columns.toColumnSet() } @@ -314,10 +314,10 @@ public fun DataFrame.dropNA(vararg columns: KProperty<*>, whereAllNA: Boo public fun DataFrame.dropNA(vararg columns: String, whereAllNA: Boolean = false): DataFrame = dropNA(whereAllNA) { columns.toColumns() } -public fun DataFrame.dropNA(vararg columns: AnyColumnReference, whereAllNA: Boolean = false): DataFrame = +public fun DataFrame.dropNA(vararg columns: Column, whereAllNA: Boolean = false): DataFrame = dropNA(whereAllNA) { columns.toColumns() } -public fun DataFrame.dropNA(columns: Iterable, whereAllNA: Boolean = false): DataFrame = +public fun DataFrame.dropNA(columns: Iterable, whereAllNA: Boolean = false): DataFrame = dropNA(whereAllNA) { columns.toColumnSet() } public fun DataFrame.dropNA(whereAllNA: Boolean = false): DataFrame = diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/add.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/add.kt index 2a9375869..c33d5b947 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/add.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/add.kt @@ -3,7 +3,7 @@ package org.jetbrains.kotlinx.dataframe.api import org.jetbrains.kotlinx.dataframe.AnyBaseCol import org.jetbrains.kotlinx.dataframe.AnyCol import org.jetbrains.kotlinx.dataframe.AnyColumnGroupAccessor -import org.jetbrains.kotlinx.dataframe.AnyColumnReference +import org.jetbrains.kotlinx.dataframe.Column import org.jetbrains.kotlinx.dataframe.AnyFrame import org.jetbrains.kotlinx.dataframe.AnyRow import org.jetbrains.kotlinx.dataframe.ColumnsContainer @@ -160,9 +160,9 @@ public class AddDsl(@PublishedApi internal val df: DataFrame) : ColumnsCon // TODO: support adding column into path internal val columns = mutableListOf() - public fun add(column: AnyColumnReference): Boolean = columns.add(column.resolveSingle(df)!!.data) + public fun add(column: Column): Boolean = columns.add(column.resolveSingle(df)!!.data) - public operator fun AnyColumnReference.unaryPlus(): Boolean = add(this) + public operator fun Column.unaryPlus(): Boolean = add(this) public operator fun String.unaryPlus(): Boolean = add(df[this]) @@ -187,11 +187,11 @@ public class AddDsl(@PublishedApi internal val df: DataFrame) : ColumnsCon public inline infix fun KProperty.from(noinline expression: RowExpression): Boolean = add(name, Infer.Nulls, expression) - public infix fun String.from(column: AnyColumnReference): Boolean = add(column.rename(this)) + public infix fun String.from(column: Column): Boolean = add(column.rename(this)) public inline infix fun ColumnAccessor.from(column: ColumnReference): Boolean = name() from column public inline infix fun KProperty.from(column: ColumnReference): Boolean = name from column - public infix fun AnyColumnReference.into(name: String): Boolean = add(rename(name)) + public infix fun Column.into(name: String): Boolean = add(rename(name)) public infix fun ColumnReference.into(column: ColumnAccessor): Boolean = into(column.name()) public infix fun ColumnReference.into(column: KProperty): Boolean = into(column.name) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/countDistinct.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/countDistinct.kt index cc4b04301..931983e76 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/countDistinct.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/countDistinct.kt @@ -1,6 +1,6 @@ package org.jetbrains.kotlinx.dataframe.api -import org.jetbrains.kotlinx.dataframe.AnyColumnReference +import org.jetbrains.kotlinx.dataframe.Column import org.jetbrains.kotlinx.dataframe.AnyFrame import org.jetbrains.kotlinx.dataframe.ColumnsSelector import org.jetbrains.kotlinx.dataframe.DataFrame @@ -19,7 +19,7 @@ public fun DataFrame.countDistinct(columns: ColumnsSelector): In public fun DataFrame.countDistinct(vararg columns: String): Int = countDistinct { columns.toColumns() } public fun DataFrame.countDistinct(vararg columns: KProperty): Int = countDistinct { columns.toColumns() } -public fun DataFrame.countDistinct(vararg columns: AnyColumnReference): Int = +public fun DataFrame.countDistinct(vararg columns: Column): Int = countDistinct { columns.toColumns() } // endregion diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/cumSum.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/cumSum.kt index f67760cf3..be60508cd 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/cumSum.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/cumSum.kt @@ -1,6 +1,6 @@ package org.jetbrains.kotlinx.dataframe.api -import org.jetbrains.kotlinx.dataframe.AnyColumnReference +import org.jetbrains.kotlinx.dataframe.Column import org.jetbrains.kotlinx.dataframe.ColumnsSelector import org.jetbrains.kotlinx.dataframe.DataColumn import org.jetbrains.kotlinx.dataframe.DataFrame @@ -38,7 +38,7 @@ private val supportedClasses = setOf(Double::class, Float::class, Int::class, Lo public fun DataFrame.cumSum(skipNA: Boolean = defaultCumSumSkipNA, columns: ColumnsSelector): DataFrame = convert(columns).to { if (it.typeClass in supportedClasses) it.cast().cumSum(skipNA) else it } public fun DataFrame.cumSum(vararg columns: String, skipNA: Boolean = defaultCumSumSkipNA): DataFrame = cumSum(skipNA) { columns.toColumns() } -public fun DataFrame.cumSum(vararg columns: AnyColumnReference, skipNA: Boolean = defaultCumSumSkipNA): DataFrame = cumSum(skipNA) { columns.toColumns() } +public fun DataFrame.cumSum(vararg columns: Column, skipNA: Boolean = defaultCumSumSkipNA): DataFrame = cumSum(skipNA) { columns.toColumns() } public fun DataFrame.cumSum(vararg columns: KProperty<*>, skipNA: Boolean = defaultCumSumSkipNA): DataFrame = cumSum(skipNA) { columns.toColumns() } public fun DataFrame.cumSum(skipNA: Boolean = defaultCumSumSkipNA): DataFrame = cumSum(skipNA) { allDfs() } @@ -50,7 +50,7 @@ public fun DataFrame.cumSum(skipNA: Boolean = defaultCumSumSkipNA): DataF public fun GroupBy.cumSum(skipNA: Boolean = defaultCumSumSkipNA, columns: ColumnsSelector): GroupBy = updateGroups { cumSum(skipNA, columns) } public fun GroupBy.cumSum(vararg columns: String, skipNA: Boolean = defaultCumSumSkipNA): GroupBy = cumSum(skipNA) { columns.toColumns() } -public fun GroupBy.cumSum(vararg columns: AnyColumnReference, skipNA: Boolean = defaultCumSumSkipNA): GroupBy = cumSum(skipNA) { columns.toColumns() } +public fun GroupBy.cumSum(vararg columns: Column, skipNA: Boolean = defaultCumSumSkipNA): GroupBy = cumSum(skipNA) { columns.toColumns() } public fun GroupBy.cumSum(vararg columns: KProperty<*>, skipNA: Boolean = defaultCumSumSkipNA): GroupBy = cumSum(skipNA) { columns.toColumns() } public fun GroupBy.cumSum(skipNA: Boolean = defaultCumSumSkipNA): GroupBy = cumSum(skipNA) { allDfs() } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/distinct.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/distinct.kt index 7c72af4e6..c0c072c33 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/distinct.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/distinct.kt @@ -1,6 +1,6 @@ package org.jetbrains.kotlinx.dataframe.api -import org.jetbrains.kotlinx.dataframe.AnyColumnReference +import org.jetbrains.kotlinx.dataframe.Column import org.jetbrains.kotlinx.dataframe.ColumnsSelector import org.jetbrains.kotlinx.dataframe.DataFrame import org.jetbrains.kotlinx.dataframe.impl.columns.toColumnSet @@ -18,19 +18,19 @@ public fun DataFrame.distinct(vararg columns: KProperty<*>): DataFrame set } public fun DataFrame.distinct(vararg columns: String): DataFrame = distinct { columns.toColumns() } -public fun DataFrame.distinct(vararg columns: AnyColumnReference): DataFrame = distinct { columns.toColumns() } +public fun DataFrame.distinct(vararg columns: Column): DataFrame = distinct { columns.toColumns() } @JvmName("distinctT") public fun DataFrame.distinct(columns: Iterable): DataFrame = distinct { columns.toColumns() } -public fun DataFrame.distinct(columns: Iterable): DataFrame = distinct { columns.toColumnSet() } +public fun DataFrame.distinct(columns: Iterable): DataFrame = distinct { columns.toColumnSet() } public fun DataFrame.distinctBy(vararg columns: KProperty<*>): DataFrame = distinctBy { columns.toColumns() } public fun DataFrame.distinctBy(vararg columns: String): DataFrame = distinctBy { columns.toColumns() } -public fun DataFrame.distinctBy(vararg columns: AnyColumnReference): DataFrame = distinctBy { columns.toColumns() } +public fun DataFrame.distinctBy(vararg columns: Column): DataFrame = distinctBy { columns.toColumns() } @JvmName("distinctByT") public fun DataFrame.distinctBy(columns: Iterable): DataFrame = distinctBy { columns.toColumns() } -public fun DataFrame.distinctBy(columns: Iterable): DataFrame = distinctBy { columns.toColumnSet() } +public fun DataFrame.distinctBy(columns: Iterable): DataFrame = distinctBy { columns.toColumnSet() } public fun DataFrame.distinctBy(columns: ColumnsSelector): DataFrame { val cols = get(columns) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/group.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/group.kt index c83f2db3f..2c75681d3 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/group.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/group.kt @@ -1,7 +1,7 @@ package org.jetbrains.kotlinx.dataframe.api import org.jetbrains.kotlinx.dataframe.AnyColumnGroupAccessor -import org.jetbrains.kotlinx.dataframe.AnyColumnReference +import org.jetbrains.kotlinx.dataframe.Column import org.jetbrains.kotlinx.dataframe.ColumnsSelector import org.jetbrains.kotlinx.dataframe.DataFrame import org.jetbrains.kotlinx.dataframe.columns.ColumnWithPath @@ -16,7 +16,7 @@ public data class GroupClause(val df: DataFrame, val columns: ColumnsSe public fun DataFrame.group(columns: ColumnsSelector): GroupClause = GroupClause(this, columns) public fun DataFrame.group(vararg columns: String): GroupClause = group { columns.toColumns() } -public fun DataFrame.group(vararg columns: AnyColumnReference): GroupClause = group { columns.toColumns() } +public fun DataFrame.group(vararg columns: Column): GroupClause = group { columns.toColumns() } public fun DataFrame.group(vararg columns: KProperty<*>): GroupClause = group { columns.toColumns() } @JvmName("intoString") @@ -25,7 +25,7 @@ public fun DataFrame.group(vararg columns: KProperty<*>): GroupClause GroupClause.into(column: ColumnsSelectionDsl.(ColumnWithPath) -> String): DataFrame = df.move(columns).under { column(it).toColumnAccessor() } @JvmName("intoColumn") -public infix fun GroupClause.into(column: ColumnsSelectionDsl.(ColumnWithPath) -> AnyColumnReference): DataFrame = df.move(columns).under(column) +public infix fun GroupClause.into(column: ColumnsSelectionDsl.(ColumnWithPath) -> Column): DataFrame = df.move(columns).under(column) public infix fun GroupClause.into(column: String): DataFrame = into(columnGroup().named(column)) public infix fun GroupClause.into(column: AnyColumnGroupAccessor): DataFrame = df.move(columns).under(column) public infix fun GroupClause.into(column: KProperty<*>): DataFrame = into(column.columnName) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/groupBy.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/groupBy.kt index 8796681dd..8bd1d05f5 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/groupBy.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/groupBy.kt @@ -1,6 +1,6 @@ package org.jetbrains.kotlinx.dataframe.api -import org.jetbrains.kotlinx.dataframe.AnyColumnReference +import org.jetbrains.kotlinx.dataframe.Column import org.jetbrains.kotlinx.dataframe.AnyFrame import org.jetbrains.kotlinx.dataframe.ColumnsSelector import org.jetbrains.kotlinx.dataframe.DataFrame @@ -21,10 +21,10 @@ import kotlin.reflect.KProperty public fun DataFrame.groupBy(moveToTop: Boolean = true, cols: ColumnsSelector): GroupBy = groupByImpl(moveToTop, cols) -public fun DataFrame.groupBy(cols: Iterable): GroupBy = groupBy { cols.toColumnSet() } +public fun DataFrame.groupBy(cols: Iterable): GroupBy = groupBy { cols.toColumnSet() } public fun DataFrame.groupBy(vararg cols: KProperty<*>): GroupBy = groupBy { cols.toColumns() } public fun DataFrame.groupBy(vararg cols: String): GroupBy = groupBy { cols.toColumns() } -public fun DataFrame.groupBy(vararg cols: AnyColumnReference, moveToTop: Boolean = true): GroupBy = +public fun DataFrame.groupBy(vararg cols: Column, moveToTop: Boolean = true): GroupBy = groupBy(moveToTop) { cols.toColumns() } // endregion @@ -34,7 +34,7 @@ public fun DataFrame.groupBy(vararg cols: AnyColumnReference, moveToTop: public fun Pivot.groupBy(moveToTop: Boolean = true, columns: ColumnsSelector): PivotGroupBy = (this as PivotImpl).toGroupedPivot(moveToTop, columns) -public fun Pivot.groupBy(vararg columns: AnyColumnReference): PivotGroupBy = groupBy { columns.toColumns() } +public fun Pivot.groupBy(vararg columns: Column): PivotGroupBy = groupBy { columns.toColumns() } public fun Pivot.groupBy(vararg columns: String): PivotGroupBy = groupBy { columns.toColumns() } public fun Pivot.groupBy(vararg columns: KProperty<*>): PivotGroupBy = groupBy { columns.toColumns() } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/move.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/move.kt index 90ec36cf3..5a01944c5 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/move.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/move.kt @@ -1,7 +1,7 @@ package org.jetbrains.kotlinx.dataframe.api import org.jetbrains.kotlinx.dataframe.AnyColumnGroupAccessor -import org.jetbrains.kotlinx.dataframe.AnyColumnReference +import org.jetbrains.kotlinx.dataframe.Column import org.jetbrains.kotlinx.dataframe.ColumnSelector import org.jetbrains.kotlinx.dataframe.ColumnsSelector import org.jetbrains.kotlinx.dataframe.DataFrame @@ -21,20 +21,20 @@ public fun DataFrame.move(vararg cols: KProperty): MoveClause public fun DataFrame.moveTo(newColumnIndex: Int, columns: ColumnsSelector): DataFrame = move(columns).to(newColumnIndex) public fun DataFrame.moveTo(newColumnIndex: Int, vararg columns: String): DataFrame = moveTo(newColumnIndex) { columns.toColumns() } -public fun DataFrame.moveTo(newColumnIndex: Int, vararg columns: AnyColumnReference): DataFrame = moveTo(newColumnIndex) { columns.toColumns() } +public fun DataFrame.moveTo(newColumnIndex: Int, vararg columns: Column): DataFrame = moveTo(newColumnIndex) { columns.toColumns() } public fun DataFrame.moveTo(newColumnIndex: Int, vararg columns: KProperty<*>): DataFrame = moveTo(newColumnIndex) { columns.toColumns() } public fun DataFrame.moveToLeft(columns: ColumnsSelector): DataFrame = move(columns).toLeft() public fun DataFrame.moveToLeft(vararg columns: String): DataFrame = moveToLeft { columns.toColumns() } -public fun DataFrame.moveToLeft(vararg columns: AnyColumnReference): DataFrame = moveToLeft { columns.toColumns() } +public fun DataFrame.moveToLeft(vararg columns: Column): DataFrame = moveToLeft { columns.toColumns() } public fun DataFrame.moveToLeft(vararg columns: KProperty<*>): DataFrame = moveToLeft { columns.toColumns() } public fun DataFrame.moveToRight(columns: ColumnsSelector): DataFrame = move(columns).toRight() public fun DataFrame.moveToRight(vararg columns: String): DataFrame = moveToRight { columns.toColumns() } -public fun DataFrame.moveToRight(vararg columns: AnyColumnReference): DataFrame = moveToRight { columns.toColumns() } +public fun DataFrame.moveToRight(vararg columns: Column): DataFrame = moveToRight { columns.toColumns() } public fun DataFrame.moveToRight(vararg columns: KProperty<*>): DataFrame = moveToRight { columns.toColumns() } -public fun MoveClause.into(column: ColumnsSelectionDsl.(ColumnWithPath) -> AnyColumnReference): DataFrame = moveImpl( +public fun MoveClause.into(column: ColumnsSelectionDsl.(ColumnWithPath) -> Column): DataFrame = moveImpl( under = false, column ) @@ -42,7 +42,7 @@ public fun MoveClause.into(column: ColumnsSelectionDsl.(ColumnWi public fun MoveClause.into(column: String): DataFrame = pathOf(column).let { path -> into { path } } public fun MoveClause.intoIndexed( - newPathExpression: ColumnsSelectionDsl.(ColumnWithPath, Int) -> AnyColumnReference + newPathExpression: ColumnsSelectionDsl.(ColumnWithPath, Int) -> Column ): DataFrame { var counter = 0 return into { col -> @@ -52,7 +52,7 @@ public fun MoveClause.intoIndexed( public fun MoveClause.under(column: String): DataFrame = pathOf(column).let { path -> under { path } } public fun MoveClause.under(column: AnyColumnGroupAccessor): DataFrame = column.path().let { path -> under { path } } -public fun MoveClause.under(column: ColumnsSelectionDsl.(ColumnWithPath) -> AnyColumnReference): DataFrame = moveImpl( +public fun MoveClause.under(column: ColumnsSelectionDsl.(ColumnWithPath) -> Column): DataFrame = moveImpl( under = true, column ) @@ -66,7 +66,7 @@ public fun MoveClause.toTop( public fun MoveClause.after(column: ColumnSelector): DataFrame = afterOrBefore(column, true) public fun MoveClause.after(column: String): DataFrame = after { column.toColumnAccessor() } -public fun MoveClause.after(column: AnyColumnReference): DataFrame = after { column } +public fun MoveClause.after(column: Column): DataFrame = after { column } public fun MoveClause.after(column: KProperty<*>): DataFrame = after { column.toColumnAccessor() } // TODO: implement 'before' diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/pivot.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/pivot.kt index ed99ff907..c4023430c 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/pivot.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/pivot.kt @@ -1,6 +1,6 @@ package org.jetbrains.kotlinx.dataframe.api -import org.jetbrains.kotlinx.dataframe.AnyColumnReference +import org.jetbrains.kotlinx.dataframe.Column import org.jetbrains.kotlinx.dataframe.ColumnsSelector import org.jetbrains.kotlinx.dataframe.DataFrame import org.jetbrains.kotlinx.dataframe.DataRow @@ -41,17 +41,17 @@ public interface PivotDsl : ColumnsSelectionDsl { public fun DataFrame.pivot(inward: Boolean? = null, columns: PivotColumnsSelector): Pivot = PivotImpl(this, columns, inward) public fun DataFrame.pivot(vararg columns: String, inward: Boolean? = null): Pivot = pivot(inward) { columns.toColumns() } -public fun DataFrame.pivot(vararg columns: AnyColumnReference, inward: Boolean? = null): Pivot = pivot(inward) { columns.toColumns() } +public fun DataFrame.pivot(vararg columns: Column, inward: Boolean? = null): Pivot = pivot(inward) { columns.toColumns() } public fun DataFrame.pivot(vararg columns: KProperty<*>, inward: Boolean? = null): Pivot = pivot(inward) { columns.toColumns() } public fun DataFrame.pivotMatches(inward: Boolean = true, columns: ColumnsSelector): DataFrame = pivot(inward, columns).groupByOther().matches() public fun DataFrame.pivotMatches(vararg columns: String, inward: Boolean = true): DataFrame = pivotMatches(inward) { columns.toColumns() } -public fun DataFrame.pivotMatches(vararg columns: AnyColumnReference, inward: Boolean = true): DataFrame = pivotMatches(inward) { columns.toColumns() } +public fun DataFrame.pivotMatches(vararg columns: Column, inward: Boolean = true): DataFrame = pivotMatches(inward) { columns.toColumns() } public fun DataFrame.pivotMatches(vararg columns: KProperty<*>, inward: Boolean = true): DataFrame = pivotMatches(inward) { columns.toColumns() } public fun DataFrame.pivotCounts(inward: Boolean = true, columns: ColumnsSelector): DataFrame = pivot(inward, columns).groupByOther().count() public fun DataFrame.pivotCounts(vararg columns: String, inward: Boolean = true): DataFrame = pivotCounts(inward) { columns.toColumns() } -public fun DataFrame.pivotCounts(vararg columns: AnyColumnReference, inward: Boolean = true): DataFrame = pivotCounts(inward) { columns.toColumns() } +public fun DataFrame.pivotCounts(vararg columns: Column, inward: Boolean = true): DataFrame = pivotCounts(inward) { columns.toColumns() } public fun DataFrame.pivotCounts(vararg columns: KProperty<*>, inward: Boolean = true): DataFrame = pivotCounts(inward) { columns.toColumns() } // endregion @@ -59,18 +59,18 @@ public fun DataFrame.pivotCounts(vararg columns: KProperty<*>, inward: Bo // region GroupBy public fun GroupBy<*, G>.pivot(inward: Boolean = true, columns: ColumnsSelector): PivotGroupBy = PivotGroupByImpl(this, columns, inward) -public fun GroupBy<*, G>.pivot(vararg columns: AnyColumnReference, inward: Boolean = true): PivotGroupBy = pivot(inward) { columns.toColumns() } +public fun GroupBy<*, G>.pivot(vararg columns: Column, inward: Boolean = true): PivotGroupBy = pivot(inward) { columns.toColumns() } public fun GroupBy<*, G>.pivot(vararg columns: String, inward: Boolean = true): PivotGroupBy = pivot(inward) { columns.toColumns() } public fun GroupBy<*, G>.pivot(vararg columns: KProperty<*>, inward: Boolean = true): PivotGroupBy = pivot(inward) { columns.toColumns() } public fun GroupBy<*, G>.pivotMatches(inward: Boolean = true, columns: ColumnsSelector): DataFrame = pivot(inward, columns).matches() public fun GroupBy<*, G>.pivotMatches(vararg columns: String, inward: Boolean = true): DataFrame = pivotMatches(inward) { columns.toColumns() } -public fun GroupBy<*, G>.pivotMatches(vararg columns: AnyColumnReference, inward: Boolean = true): DataFrame = pivotMatches(inward) { columns.toColumns() } +public fun GroupBy<*, G>.pivotMatches(vararg columns: Column, inward: Boolean = true): DataFrame = pivotMatches(inward) { columns.toColumns() } public fun GroupBy<*, G>.pivotMatches(vararg columns: KProperty<*>, inward: Boolean = true): DataFrame = pivotMatches(inward) { columns.toColumns() } public fun GroupBy<*, G>.pivotCounts(inward: Boolean = true, columns: ColumnsSelector): DataFrame = pivot(inward, columns).count() public fun GroupBy<*, G>.pivotCounts(vararg columns: String, inward: Boolean = true): DataFrame = pivotCounts(inward) { columns.toColumns() } -public fun GroupBy<*, G>.pivotCounts(vararg columns: AnyColumnReference, inward: Boolean = true): DataFrame = pivotCounts(inward) { columns.toColumns() } +public fun GroupBy<*, G>.pivotCounts(vararg columns: Column, inward: Boolean = true): DataFrame = pivotCounts(inward) { columns.toColumns() } public fun GroupBy<*, G>.pivotCounts(vararg columns: KProperty<*>, inward: Boolean = true): DataFrame = pivotCounts(inward) { columns.toColumns() } // endregion @@ -80,17 +80,17 @@ public fun GroupBy<*, G>.pivotCounts(vararg columns: KProperty<*>, inward: B public fun AggregateGroupedDsl.pivot(inward: Boolean = true, columns: ColumnsSelector): PivotGroupBy = PivotInAggregateImpl(this, columns, inward) public fun AggregateGroupedDsl.pivot(vararg columns: String, inward: Boolean = true): PivotGroupBy = pivot(inward) { columns.toColumns() } -public fun AggregateGroupedDsl.pivot(vararg columns: AnyColumnReference, inward: Boolean = true): PivotGroupBy = pivot(inward) { columns.toColumns() } +public fun AggregateGroupedDsl.pivot(vararg columns: Column, inward: Boolean = true): PivotGroupBy = pivot(inward) { columns.toColumns() } public fun AggregateGroupedDsl.pivot(vararg columns: KProperty<*>, inward: Boolean = true): PivotGroupBy = pivot(inward) { columns.toColumns() } public fun AggregateGroupedDsl.pivotMatches(inward: Boolean = true, columns: ColumnsSelector): DataFrame = pivot(inward, columns).matches() public fun AggregateGroupedDsl.pivotMatches(vararg columns: String, inward: Boolean = true): DataFrame = pivotMatches(inward) { columns.toColumns() } -public fun AggregateGroupedDsl.pivotMatches(vararg columns: AnyColumnReference, inward: Boolean = true): DataFrame = pivotMatches(inward) { columns.toColumns() } +public fun AggregateGroupedDsl.pivotMatches(vararg columns: Column, inward: Boolean = true): DataFrame = pivotMatches(inward) { columns.toColumns() } public fun AggregateGroupedDsl.pivotMatches(vararg columns: KProperty<*>, inward: Boolean = true): DataFrame = pivotMatches(inward) { columns.toColumns() } public fun AggregateGroupedDsl.pivotCounts(inward: Boolean = true, columns: ColumnsSelector): DataFrame = pivot(inward, columns).matches() public fun AggregateGroupedDsl.pivotCounts(vararg columns: String, inward: Boolean = true): DataFrame = pivotCounts(inward) { columns.toColumns() } -public fun AggregateGroupedDsl.pivotCounts(vararg columns: AnyColumnReference, inward: Boolean = true): DataFrame = pivotCounts(inward) { columns.toColumns() } +public fun AggregateGroupedDsl.pivotCounts(vararg columns: Column, inward: Boolean = true): DataFrame = pivotCounts(inward) { columns.toColumns() } public fun AggregateGroupedDsl.pivotCounts(vararg columns: KProperty<*>, inward: Boolean = true): DataFrame = pivotCounts(inward) { columns.toColumns() } // endregion diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/remove.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/remove.kt index 85226f1c3..b252b7ed0 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/remove.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/remove.kt @@ -1,6 +1,6 @@ package org.jetbrains.kotlinx.dataframe.api -import org.jetbrains.kotlinx.dataframe.AnyColumnReference +import org.jetbrains.kotlinx.dataframe.Column import org.jetbrains.kotlinx.dataframe.ColumnsSelector import org.jetbrains.kotlinx.dataframe.DataFrame import org.jetbrains.kotlinx.dataframe.impl.api.removeImpl @@ -13,12 +13,12 @@ import kotlin.reflect.KProperty public fun DataFrame.remove(columns: ColumnsSelector): DataFrame = removeImpl(allowMissingColumns = true, columns = columns).df public fun DataFrame.remove(vararg columns: KProperty<*>): DataFrame = remove { columns.toColumns() } public fun DataFrame.remove(vararg columns: String): DataFrame = remove { columns.toColumns() } -public fun DataFrame.remove(vararg columns: AnyColumnReference): DataFrame = remove { columns.toColumns() } -public fun DataFrame.remove(columns: Iterable): DataFrame = remove { columns.toColumnSet() } +public fun DataFrame.remove(vararg columns: Column): DataFrame = remove { columns.toColumns() } +public fun DataFrame.remove(columns: Iterable): DataFrame = remove { columns.toColumnSet() } public infix operator fun DataFrame.minus(columns: ColumnsSelector): DataFrame = remove(columns) public infix operator fun DataFrame.minus(column: String): DataFrame = remove(column) -public infix operator fun DataFrame.minus(column: AnyColumnReference): DataFrame = remove(column) -public infix operator fun DataFrame.minus(columns: Iterable): DataFrame = remove(columns) +public infix operator fun DataFrame.minus(column: Column): DataFrame = remove(column) +public infix operator fun DataFrame.minus(columns: Iterable): DataFrame = remove(columns) // endregion diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/select.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/select.kt index 4a91e748f..3277ec595 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/select.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/select.kt @@ -1,6 +1,6 @@ package org.jetbrains.kotlinx.dataframe.api -import org.jetbrains.kotlinx.dataframe.AnyColumnReference +import org.jetbrains.kotlinx.dataframe.Column import org.jetbrains.kotlinx.dataframe.ColumnsSelector import org.jetbrains.kotlinx.dataframe.DataFrame import org.jetbrains.kotlinx.dataframe.impl.columnName @@ -27,11 +27,11 @@ public fun DataFrame.select(vararg columns: String): DataFrame = public fun DataFrame.select(columns: Iterable): DataFrame = columns.map { get(it) }.toDataFrame().cast() -public fun DataFrame.select(vararg columns: AnyColumnReference): DataFrame = +public fun DataFrame.select(vararg columns: Column): DataFrame = select { columns.toColumns() } @JvmName("selectAnyColumnReferenceIterable") -public fun DataFrame.select(columns: Iterable): DataFrame = +public fun DataFrame.select(columns: Iterable): DataFrame = select { columns.toColumnSet() } // endregion diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/toDataFrame.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/toDataFrame.kt index 89d7581a2..7d06eb151 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/toDataFrame.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/toDataFrame.kt @@ -1,7 +1,7 @@ package org.jetbrains.kotlinx.dataframe.api import org.jetbrains.kotlinx.dataframe.AnyBaseCol -import org.jetbrains.kotlinx.dataframe.AnyColumnReference +import org.jetbrains.kotlinx.dataframe.Column import org.jetbrains.kotlinx.dataframe.AnyFrame import org.jetbrains.kotlinx.dataframe.ColumnsSelector import org.jetbrains.kotlinx.dataframe.DataColumn @@ -41,7 +41,7 @@ public fun DataFrame.read(vararg columns: String): DataFrame = unfold( public fun DataFrame.read(vararg columns: KProperty<*>): DataFrame = unfold(*columns) @Deprecated("Replaced with `unfold` operation.", ReplaceWith("this.unfold(*columns)"), DeprecationLevel.ERROR) -public fun DataFrame.read(vararg columns: AnyColumnReference): DataFrame = unfold(*columns) +public fun DataFrame.read(vararg columns: Column): DataFrame = unfold(*columns) @JvmName("toDataFrameT") public fun Iterable>.toDataFrame(): DataFrame { diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/unfold.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/unfold.kt index 1ad347213..54f59bf95 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/unfold.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/unfold.kt @@ -1,7 +1,7 @@ package org.jetbrains.kotlinx.dataframe.api import org.jetbrains.kotlinx.dataframe.AnyCol -import org.jetbrains.kotlinx.dataframe.AnyColumnReference +import org.jetbrains.kotlinx.dataframe.Column import org.jetbrains.kotlinx.dataframe.ColumnsSelector import org.jetbrains.kotlinx.dataframe.DataColumn import org.jetbrains.kotlinx.dataframe.DataFrame @@ -25,4 +25,4 @@ public inline fun DataColumn.unfold(): AnyCol = public fun DataFrame.unfold(columns: ColumnsSelector): DataFrame = replace(columns).with { it.unfold() } public fun DataFrame.unfold(vararg columns: String): DataFrame = unfold { columns.toColumns() } public fun DataFrame.unfold(vararg columns: KProperty<*>): DataFrame = unfold { columns.toColumns() } -public fun DataFrame.unfold(vararg columns: AnyColumnReference): DataFrame = unfold { columns.toColumns() } +public fun DataFrame.unfold(vararg columns: Column): DataFrame = unfold { columns.toColumns() } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/ungroup.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/ungroup.kt index 7633687ba..65e0f820c 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/ungroup.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/ungroup.kt @@ -1,6 +1,6 @@ package org.jetbrains.kotlinx.dataframe.api -import org.jetbrains.kotlinx.dataframe.AnyColumnReference +import org.jetbrains.kotlinx.dataframe.Column import org.jetbrains.kotlinx.dataframe.ColumnsSelector import org.jetbrains.kotlinx.dataframe.DataFrame import org.jetbrains.kotlinx.dataframe.impl.columns.toColumns @@ -15,7 +15,7 @@ public fun DataFrame.ungroup(columns: ColumnsSelector): DataFram } public fun DataFrame.ungroup(vararg columns: String): DataFrame = ungroup { columns.toColumns() } -public fun DataFrame.ungroup(vararg columns: AnyColumnReference): DataFrame = ungroup { columns.toColumns() } +public fun DataFrame.ungroup(vararg columns: Column): DataFrame = ungroup { columns.toColumns() } public fun DataFrame.ungroup(vararg columns: KProperty<*>): DataFrame = ungroup { columns.toColumns() } // endregion diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/valueCounts.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/valueCounts.kt index fcfef0e04..7de0675a8 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/valueCounts.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/valueCounts.kt @@ -1,6 +1,6 @@ package org.jetbrains.kotlinx.dataframe.api -import org.jetbrains.kotlinx.dataframe.AnyColumnReference +import org.jetbrains.kotlinx.dataframe.Column import org.jetbrains.kotlinx.dataframe.ColumnsSelector import org.jetbrains.kotlinx.dataframe.DataColumn import org.jetbrains.kotlinx.dataframe.DataFrame @@ -70,7 +70,7 @@ public fun DataFrame.valueCounts( resultColumn: String = defaultCountColumnName ): DataFrame = valueCounts(sort, ascending, dropNA, resultColumn) { columns.toColumns() } public fun DataFrame.valueCounts( - vararg columns: AnyColumnReference, + vararg columns: Column, sort: Boolean = true, ascending: Boolean = false, dropNA: Boolean = true, diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/values.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/values.kt index a6e949a9e..417487f73 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/values.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/values.kt @@ -1,6 +1,6 @@ package org.jetbrains.kotlinx.dataframe.api -import org.jetbrains.kotlinx.dataframe.AnyColumnReference +import org.jetbrains.kotlinx.dataframe.Column import org.jetbrains.kotlinx.dataframe.ColumnsSelector import org.jetbrains.kotlinx.dataframe.DataFrame import org.jetbrains.kotlinx.dataframe.DataRow @@ -24,7 +24,7 @@ public fun DataFrame.valuesNotNull(byRow: Boolean = false): Sequence // region GroupBy -public fun Grouped.values(vararg columns: AnyColumnReference, dropNA: Boolean = false, distinct: Boolean = false): DataFrame = values(dropNA, distinct) { columns.toColumns() } +public fun Grouped.values(vararg columns: Column, dropNA: Boolean = false, distinct: Boolean = false): DataFrame = values(dropNA, distinct) { columns.toColumns() } public fun Grouped.values(vararg columns: String, dropNA: Boolean = false, distinct: Boolean = false): DataFrame = values(dropNA, distinct) { columns.toColumns() } public fun Grouped.values( dropNA: Boolean = false, @@ -40,7 +40,7 @@ public fun Grouped.values(dropNA: Boolean = false, distinct: Boolean = fa public fun ReducedGroupBy.values(): DataFrame = values(groupBy.remainingColumnsSelector()) public fun ReducedGroupBy.values( - vararg columns: AnyColumnReference + vararg columns: Column ): DataFrame = values { columns.toColumns() } public fun ReducedGroupBy.values( @@ -66,7 +66,7 @@ public fun Pivot.values( columns: ColumnsForAggregateSelector ): DataRow = delegate { values(dropNA, distinct, separate, columns) } public fun Pivot.values( - vararg columns: AnyColumnReference, + vararg columns: Column, dropNA: Boolean = false, distinct: Boolean = false, separate: Boolean = false @@ -95,7 +95,7 @@ public fun ReducedPivot.values( ): DataRow = pivot.delegate { reduce(reducer).values(separate = separate) } public fun ReducedPivot.values( - vararg columns: AnyColumnReference, + vararg columns: Column, separate: Boolean = false ): DataRow = values(separate) { columns.toColumns() } @@ -121,7 +121,7 @@ public fun ReducedPivot.values( public fun PivotGroupBy.values(dropNA: Boolean = false, distinct: Boolean = false, separate: Boolean = false): DataFrame = values(dropNA, distinct, separate, remainingColumnsSelector()) public fun PivotGroupBy.values( - vararg columns: AnyColumnReference, + vararg columns: Column, dropNA: Boolean = false, distinct: Boolean = false, separate: Boolean = false @@ -155,7 +155,7 @@ public fun ReducedPivotGroupBy.values( ): DataFrame = values(separate, pivot.remainingColumnsSelector()) public fun ReducedPivotGroupBy.values( - vararg columns: AnyColumnReference, + vararg columns: Column, separate: Boolean = false ): DataFrame = values(separate) { columns.toColumns() } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/with.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/with.kt index 04ae27ae3..7b5942b83 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/with.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/with.kt @@ -1,6 +1,6 @@ package org.jetbrains.kotlinx.dataframe.api -import org.jetbrains.kotlinx.dataframe.AnyColumnReference +import org.jetbrains.kotlinx.dataframe.Column import org.jetbrains.kotlinx.dataframe.DataFrame import org.jetbrains.kotlinx.dataframe.DataRow import org.jetbrains.kotlinx.dataframe.RowExpression @@ -37,7 +37,7 @@ public inline fun ReducedPivotGroupBy.with(noinline expression return pivot.aggregate { val value = reducer(this)?.let { val value = expression(it, it) - if (value is AnyColumnReference) it[value] + if (value is Column) it[value] else value } internal().yield(emptyPath(), value, type) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregations.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregations.kt index f18c558ca..925a70fdd 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregations.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregations.kt @@ -1,6 +1,6 @@ package org.jetbrains.kotlinx.dataframe.impl.aggregation -import org.jetbrains.kotlinx.dataframe.AnyColumnReference +import org.jetbrains.kotlinx.dataframe.Column import org.jetbrains.kotlinx.dataframe.DataFrame import org.jetbrains.kotlinx.dataframe.DataRow import org.jetbrains.kotlinx.dataframe.RowExpression @@ -90,7 +90,7 @@ internal fun AggregateInternalDsl.columnValues( internal fun AggregateInternalDsl.withExpr(type: KType, path: ColumnPath, expression: RowExpression) { val values = df.rows().map { val value = expression(it, it) - if (value is AnyColumnReference) it[value] + if (value is Column) it[value] else value } yieldOneOrMany(path, values, type) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/move.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/move.kt index e70c945ef..0e51a7362 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/move.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/move.kt @@ -1,6 +1,6 @@ package org.jetbrains.kotlinx.dataframe.impl.api -import org.jetbrains.kotlinx.dataframe.AnyColumnReference +import org.jetbrains.kotlinx.dataframe.Column import org.jetbrains.kotlinx.dataframe.ColumnSelector import org.jetbrains.kotlinx.dataframe.DataColumn import org.jetbrains.kotlinx.dataframe.DataFrame @@ -43,7 +43,7 @@ internal fun MoveClause.afterOrBefore(column: ColumnSelector, internal fun MoveClause.moveImpl( under: Boolean = false, - newPathExpression: ColumnsSelectionDsl.(ColumnWithPath) -> AnyColumnReference + newPathExpression: ColumnsSelectionDsl.(ColumnWithPath) -> Column ): DataFrame { val receiver = object : DataFrameReceiver(df, UnresolvedColumnsPolicy.Fail), ColumnsSelectionDsl {} val removeResult = df.removeImpl(columns = columns) From 341da804e48bb260a91c5ea2a36e17d09f801271 Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Wed, 22 Feb 2023 17:11:04 +0100 Subject: [PATCH 31/50] refactored and renamed some of the general docs --- .../kotlinx/dataframe/api/DataRowApi.kt | 2 +- .../jetbrains/kotlinx/dataframe/api/Nulls.kt | 2 +- .../jetbrains/kotlinx/dataframe/api/add.kt | 2 +- .../kotlinx/dataframe/api/countDistinct.kt | 2 +- .../kotlinx/dataframe/api/groupBy.kt | 2 +- .../kotlinx/dataframe/api/toDataFrame.kt | 2 +- .../jetbrains/kotlinx/dataframe/api/update.kt | 57 ++++++++++--------- ...ngColumns.kt => ExpressionsGivenColumn.kt} | 6 +- ...Frames.kt => ExpressionsGivenDataFrame.kt} | 9 ++- ...pressingRows.kt => ExpressionsGivenRow.kt} | 22 +++---- .../{RowCondition.kt => SelectingRows.kt} | 6 +- 11 files changed, 58 insertions(+), 54 deletions(-) rename core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/{ExpressingColumns.kt => ExpressionsGivenColumn.kt} (87%) rename core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/{ExpressingDataFrames.kt => ExpressionsGivenDataFrame.kt} (78%) rename core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/{ExpressingRows.kt => ExpressionsGivenRow.kt} (75%) rename core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/{RowCondition.kt => SelectingRows.kt} (96%) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/DataRowApi.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/DataRowApi.kt index 2860c274e..9cdc67e72 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/DataRowApi.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/DataRowApi.kt @@ -1,7 +1,7 @@ package org.jetbrains.kotlinx.dataframe.api -import org.jetbrains.kotlinx.dataframe.Column import org.jetbrains.kotlinx.dataframe.AnyRow +import org.jetbrains.kotlinx.dataframe.Column import org.jetbrains.kotlinx.dataframe.ColumnsContainer import org.jetbrains.kotlinx.dataframe.DataColumn import org.jetbrains.kotlinx.dataframe.DataFrame diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/Nulls.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/Nulls.kt index bc6477885..d187efef9 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/Nulls.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/Nulls.kt @@ -1,9 +1,9 @@ package org.jetbrains.kotlinx.dataframe.api import org.jetbrains.kotlinx.dataframe.AnyCol -import org.jetbrains.kotlinx.dataframe.Column import org.jetbrains.kotlinx.dataframe.AnyFrame import org.jetbrains.kotlinx.dataframe.AnyRow +import org.jetbrains.kotlinx.dataframe.Column import org.jetbrains.kotlinx.dataframe.ColumnsSelector import org.jetbrains.kotlinx.dataframe.DataColumn import org.jetbrains.kotlinx.dataframe.DataFrame diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/add.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/add.kt index c33d5b947..a81da1c9a 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/add.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/add.kt @@ -3,9 +3,9 @@ package org.jetbrains.kotlinx.dataframe.api import org.jetbrains.kotlinx.dataframe.AnyBaseCol import org.jetbrains.kotlinx.dataframe.AnyCol import org.jetbrains.kotlinx.dataframe.AnyColumnGroupAccessor -import org.jetbrains.kotlinx.dataframe.Column import org.jetbrains.kotlinx.dataframe.AnyFrame import org.jetbrains.kotlinx.dataframe.AnyRow +import org.jetbrains.kotlinx.dataframe.Column import org.jetbrains.kotlinx.dataframe.ColumnsContainer import org.jetbrains.kotlinx.dataframe.DataColumn import org.jetbrains.kotlinx.dataframe.DataFrame diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/countDistinct.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/countDistinct.kt index 931983e76..4239b8da3 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/countDistinct.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/countDistinct.kt @@ -1,7 +1,7 @@ package org.jetbrains.kotlinx.dataframe.api -import org.jetbrains.kotlinx.dataframe.Column import org.jetbrains.kotlinx.dataframe.AnyFrame +import org.jetbrains.kotlinx.dataframe.Column import org.jetbrains.kotlinx.dataframe.ColumnsSelector import org.jetbrains.kotlinx.dataframe.DataFrame import org.jetbrains.kotlinx.dataframe.impl.columns.toColumns diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/groupBy.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/groupBy.kt index 8bd1d05f5..b02c37e7b 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/groupBy.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/groupBy.kt @@ -1,7 +1,7 @@ package org.jetbrains.kotlinx.dataframe.api -import org.jetbrains.kotlinx.dataframe.Column import org.jetbrains.kotlinx.dataframe.AnyFrame +import org.jetbrains.kotlinx.dataframe.Column import org.jetbrains.kotlinx.dataframe.ColumnsSelector import org.jetbrains.kotlinx.dataframe.DataFrame import org.jetbrains.kotlinx.dataframe.DataRow diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/toDataFrame.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/toDataFrame.kt index 7d06eb151..3bf5ec6d9 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/toDataFrame.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/toDataFrame.kt @@ -1,8 +1,8 @@ package org.jetbrains.kotlinx.dataframe.api import org.jetbrains.kotlinx.dataframe.AnyBaseCol -import org.jetbrains.kotlinx.dataframe.Column import org.jetbrains.kotlinx.dataframe.AnyFrame +import org.jetbrains.kotlinx.dataframe.Column import org.jetbrains.kotlinx.dataframe.ColumnsSelector import org.jetbrains.kotlinx.dataframe.DataColumn import org.jetbrains.kotlinx.dataframe.DataFrame diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt index 88d0679ed..13ce718b4 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt @@ -40,18 +40,18 @@ public data class Update( * * {@includeArg [UpdateOperationArg]} `{ `[columns][SelectingColumns]` }` * - * - `[.`[where][Update.where]` { `[rowValueCondition][RowCondition.RowValueCondition.WithExample]` } ]` + * - `[.`[where][Update.where]` { `[rowValueCondition][SelectingRows.RowValueCondition.WithExample]` } ]` * * - `[.`[at][Update.at]` (`[rowIndices][CommonUpdateAtFunctionDoc.RowIndicesParam]`) ]` * - * - `.`[with][Update.with]` { `[rowExpression][ExpressingRows.RowValueExpression.WithExample]` } - * | .`[notNull][Update.notNull]` { `[rowExpression][ExpressingRows.RowValueExpression.WithExample]` } - * | .`[perCol][Update.perCol]` { `[colExpression][ExpressingColumns.ColumnExpression.WithExample]` } - * | .`[perRowCol][Update.perRowCol]` { `[rowColExpression][ExpressingColumns.RowColumnExpression.WithExample]` } + * - `.`[with][Update.with]` { `[rowExpression][ExpressionsGivenRow.RowValueExpression.WithExample]` } + * | .`[notNull][Update.notNull]` { `[rowExpression][ExpressionsGivenRow.RowValueExpression.WithExample]` } + * | .`[perCol][Update.perCol]` { `[colExpression][ExpressionsGivenColumn.ColumnExpression.WithExample]` } + * | .`[perRowCol][Update.perRowCol]` { `[rowColExpression][ExpressionsGivenColumn.RowColumnExpression.WithExample]` } * | .`[withValue][Update.withValue]`(value) * | .`[withNull][Update.withNull]`() * | .`[withZero][Update.withZero]`() - * | .`[asFrame][Update.asFrame]` { `[dataFrameExpression][ExpressingDataFrames.DataFrameExpression.WithExample]` }` + * | .`[asFrame][Update.asFrame]` { `[dataFrameExpression][ExpressionsGivenDataFrame.DataFrameExpression.WithExample]` }` * {@arg [UpdateOperationArg] [update][update]}{@comment The default name of the `update` operation function name.} */ public interface Usage @@ -134,9 +134,9 @@ public fun DataFrame.update(columns: Iterable>): Up // endregion /** ## Where - * @include [RowCondition.RowValueCondition.WithExample] - * {@arg [RowCondition.FirstOperationArg] [update][update]} - * {@arg [RowCondition.SecondOperationArg] [where][where]} + * @include [SelectingRows.RowValueCondition.WithExample] + * {@arg [SelectingRows.FirstOperationArg] [update][update]} + * {@arg [SelectingRows.SecondOperationArg] [where][where]} * * @param predicate The [row value filter][RowValueFilter] to select the rows to update. */ @@ -194,18 +194,21 @@ public infix fun Update.perRowCol(expression: RowColumnExpression expression(row, column) } /** ## Update Expression - * @see ExpressingRows.RowValueExpression.WithExample - * @see ExpressingRows.AddDataRowNote + * @see ExpressionsGivenRow.RowValueExpression.WithExample + * @see ExpressionsGivenRow.AddDataRowNote */ // doc processor plugin does not work with type aliases yet public typealias UpdateExpression = AddDataRow.(C) -> R /** ## With - * {@include [ExpressingRows.RowValueExpression.WithExample]} - * {@arg [ExpressingRows.OperationArg] [update][update]` { city \}.`[with][with]} + * {@include [ExpressionsGivenRow.RowValueExpression.WithExample]} + * {@arg [ExpressionsGivenRow.OperationArg] [update][update]` { city \}.`[with][with]} * * ## Note - * @include [ExpressingRows.AddDataRowNote] - * @param expression The {@include [ExpressingRows.RowValueExpressionLink]} to update the rows with. + * @include [ExpressionsGivenRow.AddDataRowNote] + * ## See Also + * - [Update per col][Update.perCol] to provide a new value for every selected row giving the column. + * - [Update per row col][Update.perRowCol] to provide a new value for every selected row giving the row and the column. + * @param expression The {@include [ExpressionsGivenRow.RowValueExpressionLink]} to update the rows with. */ public infix fun Update.with(expression: UpdateExpression): DataFrame = updateImpl { row, _, value -> @@ -216,8 +219,8 @@ public infix fun Update.with(expression: UpdateExpression * * Updates selected [column group][ColumnGroup] as a [DataFrame] with the given [expression]. * - * {@include [ExpressingDataFrames.DataFrameExpression.WithExample]} - * {@arg [ExpressingDataFrames.OperationArg] `df.`[update][update]` { name \}.`[asFrame][asFrame]} + * {@include [ExpressionsGivenDataFrame.DataFrameExpression.WithExample]} + * {@arg [ExpressionsGivenDataFrame.OperationArg] `df.`[update][update]` { name \}.`[asFrame][asFrame]} * @param expression The [DataFrameExpression] to replace the selected column group with. */ public infix fun Update>.asFrame(expression: DataFrameExpression>): DataFrame = @@ -271,7 +274,7 @@ public fun Update.notNull(): Update = * * `df.`[update][update]` { city }.`[notNull][Update.notNull]` { it.`[toUpperCase][String.toUpperCase]`() }` * - * @param expression Optional {@include [ExpressingRows.RowExpressionLink]} to update the rows with. + * @param expression Optional {@include [ExpressionsGivenRow.RowExpressionLink]} to update the rows with. */ public fun Update.notNull(expression: UpdateExpression): DataFrame = notNull().with(expression) @@ -282,11 +285,11 @@ public fun Update.notNull(expression: UpdateExpression): * * @include [SelectingColumns.ColumnAccessors] * - * {@include [ExpressingRows.RowValueExpression.WithExample]} - * {@arg [ExpressingRows.OperationArg] [update][update]`("city")` } + * {@include [ExpressionsGivenRow.RowValueExpression.WithExample]} + * {@arg [ExpressionsGivenRow.OperationArg] [update][update]`("city")` } * * @include [Update.ColumnAccessorsParam] - * @param expression The {@include [ExpressingRows.RowValueExpressionLink]} to update the rows with. + * @param expression The {@include [ExpressionsGivenRow.RowValueExpressionLink]} to update the rows with. */ public fun DataFrame.update( firstCol: ColumnReference, @@ -301,11 +304,11 @@ public fun DataFrame.update( * * @include [SelectingColumns.KProperties] * - * {@include [ExpressingRows.RowValueExpression.WithExample]} - * {@arg [ExpressingRows.OperationArg] [update][update]`("city")` } + * {@include [ExpressionsGivenRow.RowValueExpression.WithExample]} + * {@arg [ExpressionsGivenRow.OperationArg] [update][update]`("city")` } * * @include [Update.KPropertiesParam] - * @param expression The {@include [ExpressingRows.RowValueExpressionLink]} to update the rows with. + * @param expression The {@include [ExpressionsGivenRow.RowValueExpressionLink]} to update the rows with. */ public fun DataFrame.update( firstCol: KProperty, @@ -320,11 +323,11 @@ public fun DataFrame.update( * * @include [SelectingColumns.ColumnNames] * - * {@include [ExpressingRows.RowValueExpression.WithExample]} - * {@arg [ExpressingRows.OperationArg] [update][update]`("city")` } + * {@include [ExpressionsGivenRow.RowValueExpression.WithExample]} + * {@arg [ExpressionsGivenRow.OperationArg] [update][update]`("city")` } * * @include [Update.ColumnNamesParam] - * @param expression The {@include [ExpressingRows.RowValueExpressionLink]} to update the rows with. + * @param expression The {@include [ExpressionsGivenRow.RowValueExpressionLink]} to update the rows with. */ public fun DataFrame.update( firstCol: String, diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/ExpressingColumns.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/ExpressionsGivenColumn.kt similarity index 87% rename from core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/ExpressingColumns.kt rename to core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/ExpressionsGivenColumn.kt index c30536a42..0bc4c4285 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/ExpressingColumns.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/ExpressionsGivenColumn.kt @@ -1,9 +1,11 @@ package org.jetbrains.kotlinx.dataframe.documentation /** - * Column expressions. + * ## Expressions Given Column + * + * TODO */ -internal interface ExpressingColumns { +internal interface ExpressionsGivenColumn { /** * TODO diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/ExpressingDataFrames.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/ExpressionsGivenDataFrame.kt similarity index 78% rename from core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/ExpressingDataFrames.kt rename to core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/ExpressionsGivenDataFrame.kt index f4df459eb..eba50e165 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/ExpressingDataFrames.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/ExpressionsGivenDataFrame.kt @@ -2,16 +2,15 @@ package org.jetbrains.kotlinx.dataframe.documentation import org.jetbrains.kotlinx.dataframe.DataFrame import org.jetbrains.kotlinx.dataframe.api.* -import org.jetbrains.kotlinx.dataframe.documentation.ExpressingDataFrames.DataFrameExpressionLink +import org.jetbrains.kotlinx.dataframe.documentation.ExpressionsGivenDataFrame.DataFrameExpressionLink import org.jetbrains.kotlinx.dataframe.DataFrameExpression as DfDataFrameExpression /** - * ## Expressing Data Frames + * ## Expressions Given DataFrame * Expressing values using a "Data Frame Expression" can occur exclusively in a - * {@include [DataFrameExpressionLink]} - * + * {@include [DataFrameExpressionLink]}. */ -internal interface ExpressingDataFrames { +internal interface ExpressionsGivenDataFrame { interface OperationArg diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/ExpressingRows.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/ExpressionsGivenRow.kt similarity index 75% rename from core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/ExpressingRows.kt rename to core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/ExpressionsGivenRow.kt index da6a3f707..078697015 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/ExpressingRows.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/ExpressionsGivenRow.kt @@ -3,22 +3,22 @@ package org.jetbrains.kotlinx.dataframe.documentation import org.jetbrains.kotlinx.dataframe.DataFrame import org.jetbrains.kotlinx.dataframe.DataRow import org.jetbrains.kotlinx.dataframe.api.* -import org.jetbrains.kotlinx.dataframe.documentation.ExpressingRows.AddDataRowNote -import org.jetbrains.kotlinx.dataframe.documentation.ExpressingRows.RowExpressionLink -import org.jetbrains.kotlinx.dataframe.documentation.ExpressingRows.RowValueExpressionLink +import org.jetbrains.kotlinx.dataframe.documentation.ExpressionsGivenRow.AddDataRowNote +import org.jetbrains.kotlinx.dataframe.documentation.ExpressionsGivenRow.RowExpressionLink +import org.jetbrains.kotlinx.dataframe.documentation.ExpressionsGivenRow.RowValueExpressionLink import org.jetbrains.kotlinx.dataframe.RowExpression as DfRowExpression import org.jetbrains.kotlinx.dataframe.RowValueExpression as DfRowValueExpression /** - * ## Expressing Rows + * ## Expressions Given Row * Expressing values using a "Row Expression" ({@include [DocumentationUrls.DataRow.RowExpressions]}) * can occur in the following two types of operations: * - * - Providing a new value for every selected row given the row ({@include [RowExpressionLink]}), + * - Providing a new value for every selected cell given the row of that cell ({@include [RowExpressionLink]}), * for instance in [map][DataFrame.map], [add][DataFrame.add], and [insert][DataFrame.insert] * (using [RowExpression][DfRowExpression]). * - * - Providing a new value for every selected row given the row and the previous value ({@include [RowValueExpressionLink]}), + * - Providing a new value for every selected cell given the row of that cell and its previous value ({@include [RowValueExpressionLink]}), * for instance in [update.with][Update.with], and [convert.notNull][Convert.notNull] * (using [RowValueExpression][DfRowValueExpression]). * @@ -27,7 +27,7 @@ import org.jetbrains.kotlinx.dataframe.RowValueExpression as DfRowValueExpressio * * A Row Expression is similar to a {@include [RowConditionLink]} but that expects a [Boolean] as result. */ -internal interface ExpressingRows { +internal interface ExpressionsGivenRow { /** * The key for an @arg that will define the operation name for the examples below. @@ -46,7 +46,7 @@ internal interface ExpressingRows { */ interface AddDataRowNote - /** Provide a new value for every selected row given the row using a [row expression][DfRowExpression]. */ + /** Provide a new value for every selected cell given its row using a [row expression][DfRowExpression]. */ interface RowExpression { /** @@ -65,7 +65,7 @@ internal interface ExpressingRows { /** [Row Expression][RowExpression.WithExample] */ interface RowExpressionLink - /** Provide a new value for every selected row given the row and the previous value using a + /** Provide a new value for every selected cell given its row and its previous value using a * [row value expression][DfRowValueExpression]. */ interface RowValueExpression { @@ -86,9 +86,9 @@ internal interface ExpressingRows { /** [Row Value Expression][RowValueExpression.WithExample] */ interface RowValueExpressionLink - /** @include [ExpressingColumns.RowColumnExpression] */ + /** @include [ExpressionsGivenColumn.RowColumnExpression] */ interface RowColumnExpression } -/** [Row Expressions][ExpressingRows] */ +/** [Row Expression][ExpressionsGivenRow] */ internal interface RowExpressionsLink diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/RowCondition.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/SelectingRows.kt similarity index 96% rename from core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/RowCondition.kt rename to core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/SelectingRows.kt index 3158a0232..6438b05aa 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/RowCondition.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/SelectingRows.kt @@ -4,7 +4,7 @@ import org.jetbrains.kotlinx.dataframe.* import org.jetbrains.kotlinx.dataframe.api.* /** - * ## Row Condition + * ## Selecting Rows * Selecting rows that satisfy a "Row Condition" ({@include [DocumentationUrls.DataRow.RowConditions]}) * can occur in the following two types of operations: * - Selecting entire rows ({@include [RowConditionLink]}), for instance in [filter], [drop], [first], and [count] @@ -16,7 +16,7 @@ import org.jetbrains.kotlinx.dataframe.api.* * A Row Condition is similar to a {@include [RowExpressionsLink]} but expects a [Boolean] as result. * {@comment TODO Is `where` present everywhere it should be?} */ -internal interface RowCondition { +internal interface SelectingRows { /** * The key for an @arg that will define the operation name for the examples below. @@ -74,5 +74,5 @@ internal interface RowCondition { } } -/** [Row Condition][RowCondition] */ +/** [Row Condition][SelectingRows] */ internal interface RowConditionLink From 87710c16c6edf1d436931341da63e4ee65bdc38f Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Thu, 23 Feb 2023 17:46:40 +0100 Subject: [PATCH 32/50] docs and examples for ColumnExpression, links for SelectingColumns, docs for perCol with examples, updated gradle plugin --- core/build.gradle.kts | 2 +- .../jetbrains/kotlinx/dataframe/api/update.kt | 83 ++++++++++++++++--- .../documentation/ExpressionsGivenColumn.kt | 23 ++++- .../documentation/SelectingColumns.kt | 19 ++++- 4 files changed, 109 insertions(+), 18 deletions(-) diff --git a/core/build.gradle.kts b/core/build.gradle.kts index 2a4c7ff51..0b001588d 100644 --- a/core/build.gradle.kts +++ b/core/build.gradle.kts @@ -14,7 +14,7 @@ plugins { id("org.jetbrains.kotlinx.kover") id("org.jmailen.kotlinter") id("org.jetbrains.kotlinx.dataframe") - id("com.github.jolanrensen.docProcessorGradlePlugin") version "v0.0.18" + id("com.github.jolanrensen.docProcessorGradlePlugin") version "v0.0.19" // id("nl.jolanrensen.docProcessor") version "1.0-SNAPSHOT" } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt index 13ce718b4..80d0f70c4 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt @@ -59,16 +59,16 @@ public data class Update( /** The columns to update need to be selected. See {@include [SelectingColumnsLink]} for all the selecting options. */ public interface Columns - /** @param columns The [ColumnsSelector] used to select the columns of this [DataFrame] to update. */ + /** @param columns The {@include [SelectingColumns.DslLink]} used to select the columns of this [DataFrame] to update. */ internal interface DslParam - /** @param columns The [ColumnReference]s of this [DataFrame] to update. */ + /** @param columns The {@include [SelectingColumns.ColumnAccessorsLink]} of this [DataFrame] to update. */ internal interface ColumnAccessorsParam - /** @param columns The [KProperty] values corresponding to columns of this [DataFrame] to update. */ + /** @param columns The {@include [SelectingColumns.KPropertiesLink]} corresponding to columns of this [DataFrame] to update. */ internal interface KPropertiesParam - /** @param columns The column names belonging to this [DataFrame] to update. */ + /** @param columns The {@include [SelectingColumns.ColumnNamesLink]} belonging to this [DataFrame] to update. */ internal interface ColumnNamesParam } @@ -193,6 +193,9 @@ public fun Update.at(rowRange: IntRange): Update = where { in public infix fun Update.perRowCol(expression: RowColumnExpression): DataFrame = updateImpl { row, column, _ -> expression(row, column) } +/** [Update per row col][Update.perRowCol] to provide a new value for every selected cell giving its row and column. */ +private interface SeeAlsoPerRowCol + /** ## Update Expression * @see ExpressionsGivenRow.RowValueExpression.WithExample * @see ExpressionsGivenRow.AddDataRowNote @@ -206,8 +209,8 @@ public typealias UpdateExpression = AddDataRow.(C) -> R * ## Note * @include [ExpressionsGivenRow.AddDataRowNote] * ## See Also - * - [Update per col][Update.perCol] to provide a new value for every selected row giving the column. - * - [Update per row col][Update.perRowCol] to provide a new value for every selected row giving the row and the column. + * - {@include [SeeAlsoPerCol]} + * - {@include [SeeAlsoPerRowCol]} * @param expression The {@include [ExpressionsGivenRow.RowValueExpressionLink]} to update the rows with. */ public infix fun Update.with(expression: UpdateExpression): DataFrame = @@ -215,13 +218,16 @@ public infix fun Update.with(expression: UpdateExpression expression(row, value) } +/** [Update with][Update.with] to provide a new value for every selected cell giving its row. */ +private interface SeeAlsoWith + /** ## As Frame * * Updates selected [column group][ColumnGroup] as a [DataFrame] with the given [expression]. * * {@include [ExpressionsGivenDataFrame.DataFrameExpression.WithExample]} * {@arg [ExpressionsGivenDataFrame.OperationArg] `df.`[update][update]` { name \}.`[asFrame][asFrame]} - * @param expression The [DataFrameExpression] to replace the selected column group with. + * @param expression The {@include [ExpressionsGivenDataFrame.DataFrameExpressionLink]} to replace the selected column group with. */ public infix fun Update>.asFrame(expression: DataFrameExpression>): DataFrame = df.replace(columns).with { it.asColumnGroup().let { expression(it, it) }.asColumnGroup(it.name()) } @@ -232,18 +238,75 @@ public infix fun Update>.asFrame(expression: DataFrameEx ) public fun Update.asNullable(): Update = this as Update -/** TODO */ +/** ## Per Col + * + * Per Col can be used for two different types of operations: + * - {@include [ExpressionsGivenColumn.ColumnExpression]} + * - {@include [UpdatePerColMap]} + * + * ## See Also + * - {@include [SeeAlsoWith]} + * - {@include [SeeAlsoPerRowCol]} + * ## This Per Col Overload + */ +private interface CommonUpdatePerColDoc + +/** Provide a new value for every selected cell per column using a [Map][Map]`<`[colName: String][String]`, value: C>` + * or [DataRow][DataRow] as Map. */ +private interface UpdatePerColMap + +/** + * @include [CommonUpdatePerColDoc] + * @include [UpdatePerColMap] + * + * For example: + * + * `val defaults = {@includeArg [CommonUpdatePerColMapDoc]}` + * + * `df.`[update][update]` { name and age }.`[where][Update.where]` { ... }.`[perCol][perCol]`(defaults)` + * + * @throws IllegalArgumentException if a value for a selected cell's column is not defined in [values\]. + */ +private interface CommonUpdatePerColMapDoc + +/** + * @include [CommonUpdatePerColMapDoc] + * {@arg [CommonUpdatePerColMapDoc] `[mapOf][mapOf]`("name" to "Empty", "age" to 0)} + * + * @param values The [Map]<[String], Value> to provide a new value for every selected cell. + * For each selected column, there must be a value in the map with the same name. + */ public fun Update.perCol(values: Map): DataFrame = updateWithValuePerColumnImpl { values[it.name()] ?: throw IllegalArgumentException("Update value for column ${it.name()} is not defined") } -/** TODO */ +/** + * @include [CommonUpdatePerColMapDoc] + * {@arg [CommonUpdatePerColMapDoc] df.`[getRows][DataFrame.getRows]`(`[listOf][listOf]`(0))` + * + * `.`[update][update]` { name \}.`[with][Update.with]` { "Empty" \}` + * + * `.`[update][update]` { age \}.`[with][Update.with]` { 0 \}` + * + * `.first()} + * + * @param values The [DataRow] to provide a new value for every selected cell. + */ public fun Update.perCol(values: AnyRow): DataFrame = perCol(values.toMap() as Map) -/** TODO */ +/** + * @include [CommonUpdatePerColDoc] + * @include [ExpressionsGivenColumn.ColumnExpression.WithExample] + * {@arg [ExpressionsGivenColumn.OperationArg] [update][update]` { age \}.`[perCol][perCol]} + * + * @param valueSelector The {@include [ExpressionsGivenColumn.ColumnExpressionLink]} to provide a new value for every selected cell giving its column. + */ public fun Update.perCol(valueSelector: ColumnExpression): DataFrame = updateWithValuePerColumnImpl(valueSelector) +/** [Update per col][Update.perCol] to provide a new value for every selected cell giving its column. */ +private interface SeeAlsoPerCol + /** Chains up two row value filters together. */ internal infix fun RowValueFilter?.and(other: RowValueFilter): RowValueFilter { if (this == null) return other diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/ExpressionsGivenColumn.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/ExpressionsGivenColumn.kt index 0bc4c4285..b02417b83 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/ExpressionsGivenColumn.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/ExpressionsGivenColumn.kt @@ -1,5 +1,9 @@ package org.jetbrains.kotlinx.dataframe.documentation +import org.jetbrains.kotlinx.dataframe.* +import org.jetbrains.kotlinx.dataframe.api.* +import org.jetbrains.kotlinx.dataframe.ColumnExpression as DfColumnExpression + /** * ## Expressions Given Column * @@ -8,13 +12,26 @@ package org.jetbrains.kotlinx.dataframe.documentation internal interface ExpressionsGivenColumn { /** - * TODO + * The key for an @arg that will define the operation name for the examples below. + * Make sure to [alias][your examples]. */ + interface OperationArg + + /** {@arg [OperationArg] operation} */ + interface SetDefaultOperationArg + + /** Provide a new value for every selected cell given its column using a [column expression][DfColumnExpression]. */ interface ColumnExpression { /** - * TODO - * @include [ColumnExpression] + * {@include [ColumnExpression]} + * + * For example: + * + * `df.`{@includeArg [OperationArg]}` { it / `[mean][DataColumn.mean]`(skipNA = true) }` + * + * `df.`{@includeArg [OperationArg]}` { `[count][DataColumn.count]` { it > 10 } }` + * @include [SetDefaultOperationArg] */ interface WithExample } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/SelectingColumns.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/SelectingColumns.kt index 36108f6d5..c87c94827 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/SelectingColumns.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/SelectingColumns.kt @@ -2,8 +2,7 @@ package org.jetbrains.kotlinx.dataframe.documentation import org.jetbrains.kotlinx.dataframe.* import org.jetbrains.kotlinx.dataframe.api.* -import org.jetbrains.kotlinx.dataframe.columns.ColumnSet -import org.jetbrains.kotlinx.dataframe.columns.SingleColumn +import org.jetbrains.kotlinx.dataframe.columns.* import kotlin.reflect.KProperty /** {@comment @@ -59,7 +58,10 @@ internal interface SelectingColumns { interface WithExample } - /** Select columns using their column names + /** [Columns selector DSL][Dsl.WithExample] */ + interface DslLink + + /** Select columns using their [column names][String] * ({@include [AccessApi.StringApiLink]}). */ interface ColumnNames { @@ -74,7 +76,10 @@ internal interface SelectingColumns { interface WithExample } - /** Select columns using column accessors + /** [Column names][ColumnNames.WithExample] */ + interface ColumnNamesLink + + /** Select columns using [column accessors][ColumnReference] * ({@include [AccessApi.ColumnAccessorsApiLink]}). */ interface ColumnAccessors { @@ -93,6 +98,9 @@ internal interface SelectingColumns { interface WithExample } + /** [Column references][ColumnAccessors.WithExample] */ + interface ColumnAccessorsLink + /** Select columns using [KProperties][KProperty] ({@include [AccessApi.KPropertiesApiLink]}). */ interface KProperties { @@ -108,4 +116,7 @@ internal interface SelectingColumns { */ interface WithExample } + + /** [KProperties][KProperties.WithExample] */ + interface KPropertiesLink } From 03c398ffa694de133b73959c225870ae5f47f717 Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Thu, 23 Feb 2023 18:15:52 +0100 Subject: [PATCH 33/50] per row col. update.kt finished! --- .../jetbrains/kotlinx/dataframe/api/update.kt | 10 +++++++++- .../documentation/ExpressionsGivenColumn.kt | 18 +++++++++++++----- .../documentation/ExpressionsGivenRow.kt | 16 ++++++++++++++-- 3 files changed, 36 insertions(+), 8 deletions(-) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt index 80d0f70c4..58f81fd31 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt @@ -189,7 +189,15 @@ public fun Update.at(vararg rowIndices: Int): Update = at(row */ public fun Update.at(rowRange: IntRange): Update = where { index in rowRange } -/** TODO */ +/** ## Per Row Col + * @include [ExpressionsGivenColumn.RowColumnExpression.WithExample] + * {@arg [ExpressionsGivenColumn.OperationArg] [update][update]` { age \}.`[perRowCol][perRowCol]} + * + * ## See Also + * - {@include [SeeAlsoWith]} + * - {@include [SeeAlsoPerCol]} + * @param expression The {@include [ExpressionsGivenColumn.RowColumnExpressionLink]} to provide a new value for every selected cell giving its row and column. + */ public infix fun Update.perRowCol(expression: RowColumnExpression): DataFrame = updateImpl { row, column, _ -> expression(row, column) } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/ExpressionsGivenColumn.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/ExpressionsGivenColumn.kt index b02417b83..e2fa5f4d8 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/ExpressionsGivenColumn.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/ExpressionsGivenColumn.kt @@ -3,6 +3,7 @@ package org.jetbrains.kotlinx.dataframe.documentation import org.jetbrains.kotlinx.dataframe.* import org.jetbrains.kotlinx.dataframe.api.* import org.jetbrains.kotlinx.dataframe.ColumnExpression as DfColumnExpression +import org.jetbrains.kotlinx.dataframe.RowColumnExpression as DfRowColumnExpression /** * ## Expressions Given Column @@ -28,7 +29,7 @@ internal interface ExpressionsGivenColumn { * * For example: * - * `df.`{@includeArg [OperationArg]}` { it / `[mean][DataColumn.mean]`(skipNA = true) }` + * `df.`{@includeArg [OperationArg]}` { `[mean][DataColumn.mean]`(skipNA = true) }` * * `df.`{@includeArg [OperationArg]}` { `[count][DataColumn.count]` { it > 10 } }` * @include [SetDefaultOperationArg] @@ -39,14 +40,21 @@ internal interface ExpressionsGivenColumn { /** [Column Expression][ColumnExpression] */ interface ColumnExpressionLink - /** - * TODO - */ + /** Provide a new value for every selected cell given both its row and column using a [row-column expression][DfRowColumnExpression]. */ interface RowColumnExpression { /** - * TODO * @include [RowColumnExpression] + * + * For example: + * + * `df.`{@includeArg [OperationArg]}` { row, col ->` + * + * `row.age / col.`[mean][DataColumn.mean]`(skipNA = true)` + * + * `}` + * + * @include [SetDefaultOperationArg] */ interface WithExample } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/ExpressionsGivenRow.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/ExpressionsGivenRow.kt index 078697015..74db05625 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/ExpressionsGivenRow.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/ExpressionsGivenRow.kt @@ -86,8 +86,20 @@ internal interface ExpressionsGivenRow { /** [Row Value Expression][RowValueExpression.WithExample] */ interface RowValueExpressionLink - /** @include [ExpressionsGivenColumn.RowColumnExpression] */ - interface RowColumnExpression +// @Deprecated( +// "This is located in another file", +// ReplaceWith("ExpressionsGivenColumn.RowColumnExpression"), +// DeprecationLevel.ERROR, +// ) +// interface RowColumnExpression { +// +// @Deprecated( +// "This is located in another file", +// ReplaceWith("ExpressionsGivenColumn.RowColumnExpression.WithExample"), +// DeprecationLevel.ERROR, +// ) +// interface WithExample +// } } /** [Row Expression][ExpressionsGivenRow] */ From 15026b7e0264e276f4a1f72c28220d7503ef5692 Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Fri, 24 Feb 2023 12:48:30 +0100 Subject: [PATCH 34/50] updated doc processor gradle plugin --- core/build.gradle.kts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/build.gradle.kts b/core/build.gradle.kts index 0b001588d..23e4accd9 100644 --- a/core/build.gradle.kts +++ b/core/build.gradle.kts @@ -14,7 +14,7 @@ plugins { id("org.jetbrains.kotlinx.kover") id("org.jmailen.kotlinter") id("org.jetbrains.kotlinx.dataframe") - id("com.github.jolanrensen.docProcessorGradlePlugin") version "v0.0.19" + id("com.github.jolanrensen.docProcessorGradlePlugin") version "v0.0.20" // id("nl.jolanrensen.docProcessor") version "1.0-SNAPSHOT" } From 91e97548c70b489990d7e9f28f8f7625b29cb70a Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Mon, 27 Feb 2023 14:36:16 +0100 Subject: [PATCH 35/50] cleaning PR a bit and added new target folder with generated sources --- core/build.gradle.kts | 1 + .../kotlinx/dataframe/api/countDistinct.kt | 3 +-- .../jetbrains/kotlinx/dataframe/api/gather.kt | 19 +++++++++++++------ 3 files changed, 15 insertions(+), 8 deletions(-) diff --git a/core/build.gradle.kts b/core/build.gradle.kts index 23e4accd9..c37074534 100644 --- a/core/build.gradle.kts +++ b/core/build.gradle.kts @@ -64,6 +64,7 @@ val kotlinMainSources = kotlin.sourceSets.main.get().kotlin.sourceDirectories val processKDocsMain by creatingProcessDocTask( sources = kotlinMainSources.filterNot { "build/generated" in it.path } // Exclude generated sources ) { + target = file("generated-sources") processors = listOf( INCLUDE_DOC_PROCESSOR, INCLUDE_FILE_DOC_PROCESSOR, diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/countDistinct.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/countDistinct.kt index 4239b8da3..37cfdb6c9 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/countDistinct.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/countDistinct.kt @@ -19,7 +19,6 @@ public fun DataFrame.countDistinct(columns: ColumnsSelector): In public fun DataFrame.countDistinct(vararg columns: String): Int = countDistinct { columns.toColumns() } public fun DataFrame.countDistinct(vararg columns: KProperty): Int = countDistinct { columns.toColumns() } -public fun DataFrame.countDistinct(vararg columns: Column): Int = - countDistinct { columns.toColumns() } +public fun DataFrame.countDistinct(vararg columns: Column): Int = countDistinct { columns.toColumns() } // endregion diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/gather.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/gather.kt index c65604ae1..119ab658a 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/gather.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/gather.kt @@ -12,10 +12,15 @@ import kotlin.reflect.KProperty import kotlin.reflect.KType import kotlin.reflect.typeOf -public fun DataFrame.gather(selector: ColumnsSelector): Gather = Gather( - this, selector, null, typeOf(), - { it }, null -) +public fun DataFrame.gather(selector: ColumnsSelector): Gather = + Gather( + df = this, + columns = selector, + filter = null, + keyType = typeOf(), + keyTransform = { it }, + valueTransform = null, + ) public fun DataFrame.gather(vararg columns: String): Gather = gather { columns.toColumns() } @@ -29,9 +34,11 @@ public fun DataFrame.gather(vararg columns: KProperty): Gather Gather.where(filter: RowValueFilter): Gather = copy(filter = this.filter and filter) -public fun Gather.notNull(): Gather = where { it != null } as Gather +public fun Gather.notNull(): Gather = + where { it != null } as Gather -public fun Gather.explodeLists(): Gather = copy(explode = true) +public fun Gather.explodeLists(): Gather = + copy(explode = true) public inline fun Gather.mapKeys(noinline transform: (String) -> K): Gather = copy(keyTransform = transform as ((String) -> Nothing), keyType = typeOf()) as Gather From 91c9e8e917e9c8e6832a1b95615025c59e80993a Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Mon, 27 Feb 2023 14:40:29 +0100 Subject: [PATCH 36/50] added generated sources with docs. Do we want to do it like this? --- .../kotlinx/dataframe/ColumnsContainer.kt | 66 ++ .../jetbrains/kotlinx/dataframe/DataColumn.kt | 138 +++ .../jetbrains/kotlinx/dataframe/DataFrame.kt | 93 ++ .../jetbrains/kotlinx/dataframe/DataRow.kt | 100 ++ .../dataframe/aggregation/Aggregatable.kt | 3 + .../dataframe/aggregation/AggregateDsl.kt | 25 + .../aggregation/AggregateGroupedDsl.kt | 3 + .../ColumnsForAggregateSelectionDsl.kt | 19 + .../dataframe/aggregation/NamedValue.kt | 26 + .../kotlinx/dataframe/aggregation/aliases.kt | 10 + .../jetbrains/kotlinx/dataframe/aliases.kt | 175 ++++ .../dataframe/annotations/ColumnName.kt | 4 + .../dataframe/annotations/DataSchema.kt | 4 + .../dataframe/annotations/ImportDataSchema.kt | 61 ++ .../dataframe/api/ColumnAccessorApi.kt | 5 + .../dataframe/api/ColumnReferenceApi.kt | 21 + .../dataframe/api/ColumnsSelectionDsl.kt | 376 +++++++ .../kotlinx/dataframe/api/Cumulative.kt | 1 + .../dataframe/api/DataColumnArithmetics.kt | 111 ++ .../kotlinx/dataframe/api/DataColumnType.kt | 27 + .../kotlinx/dataframe/api/DataFrameGet.kt | 126 +++ .../kotlinx/dataframe/api/DataRowApi.kt | 119 +++ .../kotlinx/dataframe/api/Defaults.kt | 7 + .../kotlinx/dataframe/api/JsonPath.kt | 69 ++ .../kotlinx/dataframe/api/KeyValueProperty.kt | 15 + .../jetbrains/kotlinx/dataframe/api/Misc.kt | 1 + .../jetbrains/kotlinx/dataframe/api/Nulls.kt | 638 ++++++++++++ .../kotlinx/dataframe/api/TypeConversions.kt | 312 ++++++ .../jetbrains/kotlinx/dataframe/api/add.kt | 236 +++++ .../jetbrains/kotlinx/dataframe/api/addId.kt | 22 + .../kotlinx/dataframe/api/aggregate.kt | 11 + .../jetbrains/kotlinx/dataframe/api/all.kt | 34 + .../jetbrains/kotlinx/dataframe/api/any.kt | 19 + .../jetbrains/kotlinx/dataframe/api/append.kt | 30 + .../kotlinx/dataframe/api/asIterable.kt | 9 + .../kotlinx/dataframe/api/asSequence.kt | 17 + .../kotlinx/dataframe/api/associate.kt | 15 + .../kotlinx/dataframe/api/between.kt | 11 + .../jetbrains/kotlinx/dataframe/api/cast.kt | 49 + .../kotlinx/dataframe/api/chunked.kt | 25 + .../jetbrains/kotlinx/dataframe/api/concat.kt | 58 ++ .../kotlinx/dataframe/api/constructors.kt | 332 ++++++ .../kotlinx/dataframe/api/convert.kt | 387 +++++++ .../kotlinx/dataframe/api/convertTo.kt | 200 ++++ .../jetbrains/kotlinx/dataframe/api/copy.kt | 9 + .../jetbrains/kotlinx/dataframe/api/corr.kt | 35 + .../jetbrains/kotlinx/dataframe/api/count.kt | 54 + .../kotlinx/dataframe/api/countDistinct.kt | 24 + .../jetbrains/kotlinx/dataframe/api/cumSum.kt | 57 + .../kotlinx/dataframe/api/describe.kt | 48 + .../kotlinx/dataframe/api/digitize.kt | 29 + .../kotlinx/dataframe/api/distinct.kt | 41 + .../jetbrains/kotlinx/dataframe/api/drop.kt | 57 + .../kotlinx/dataframe/api/duplicate.kt | 18 + .../jetbrains/kotlinx/dataframe/api/enum.kt | 11 + .../kotlinx/dataframe/api/explode.kt | 59 ++ .../jetbrains/kotlinx/dataframe/api/filter.kt | 34 + .../jetbrains/kotlinx/dataframe/api/first.kt | 57 + .../kotlinx/dataframe/api/flatten.kt | 15 + .../kotlinx/dataframe/api/forEach.kt | 29 + .../jetbrains/kotlinx/dataframe/api/format.kt | 118 +++ .../jetbrains/kotlinx/dataframe/api/frames.kt | 16 + .../jetbrains/kotlinx/dataframe/api/gather.kt | 102 ++ .../jetbrains/kotlinx/dataframe/api/group.kt | 33 + .../kotlinx/dataframe/api/groupBy.kt | 91 ++ .../jetbrains/kotlinx/dataframe/api/head.kt | 9 + .../kotlinx/dataframe/api/implode.kt | 24 + .../kotlinx/dataframe/api/indices.kt | 17 + .../kotlinx/dataframe/api/inferType.kt | 19 + .../jetbrains/kotlinx/dataframe/api/insert.kt | 51 + .../jetbrains/kotlinx/dataframe/api/into.kt | 68 ++ .../kotlinx/dataframe/api/isEmpty.kt | 12 + .../jetbrains/kotlinx/dataframe/api/join.kt | 128 +++ .../jetbrains/kotlinx/dataframe/api/last.kt | 58 ++ .../jetbrains/kotlinx/dataframe/api/length.kt | 10 + .../kotlinx/dataframe/api/lowercase.kt | 9 + .../jetbrains/kotlinx/dataframe/api/map.kt | 130 +++ .../kotlinx/dataframe/api/matches.kt | 10 + .../jetbrains/kotlinx/dataframe/api/max.kt | 170 +++ .../jetbrains/kotlinx/dataframe/api/mean.kt | 183 ++++ .../jetbrains/kotlinx/dataframe/api/median.kt | 150 +++ .../jetbrains/kotlinx/dataframe/api/merge.kt | 104 ++ .../jetbrains/kotlinx/dataframe/api/min.kt | 170 +++ .../jetbrains/kotlinx/dataframe/api/move.kt | 84 ++ .../jetbrains/kotlinx/dataframe/api/parse.kt | 65 ++ .../jetbrains/kotlinx/dataframe/api/pivot.kt | 125 +++ .../jetbrains/kotlinx/dataframe/api/print.kt | 44 + .../jetbrains/kotlinx/dataframe/api/remove.kt | 24 + .../jetbrains/kotlinx/dataframe/api/rename.kt | 77 ++ .../kotlinx/dataframe/api/reorder.kt | 48 + .../kotlinx/dataframe/api/replace.kt | 50 + .../kotlinx/dataframe/api/reverse.kt | 18 + .../jetbrains/kotlinx/dataframe/api/schema.kt | 25 + .../jetbrains/kotlinx/dataframe/api/select.kt | 37 + .../kotlinx/dataframe/api/shuffle.kt | 17 + .../jetbrains/kotlinx/dataframe/api/single.kt | 31 + .../jetbrains/kotlinx/dataframe/api/sort.kt | 141 +++ .../jetbrains/kotlinx/dataframe/api/split.kt | 300 ++++++ .../jetbrains/kotlinx/dataframe/api/std.kt | 252 +++++ .../jetbrains/kotlinx/dataframe/api/sum.kt | 143 +++ .../jetbrains/kotlinx/dataframe/api/tail.kt | 9 + .../jetbrains/kotlinx/dataframe/api/take.kt | 49 + .../kotlinx/dataframe/api/toDataFrame.kt | 209 ++++ .../jetbrains/kotlinx/dataframe/api/toList.kt | 14 + .../kotlinx/dataframe/api/transpose.kt | 29 + .../jetbrains/kotlinx/dataframe/api/unfold.kt | 28 + .../kotlinx/dataframe/api/ungroup.kt | 21 + .../jetbrains/kotlinx/dataframe/api/update.kt | 778 ++++++++++++++ .../kotlinx/dataframe/api/uppercase.kt | 9 + .../kotlinx/dataframe/api/valueCounts.kt | 87 ++ .../jetbrains/kotlinx/dataframe/api/values.kt | 177 ++++ .../jetbrains/kotlinx/dataframe/api/with.kt | 47 + .../org/jetbrains/kotlinx/dataframe/api/xs.kt | 21 + .../dataframe/codeGen/CodeGenerator.kt | 73 ++ .../dataframe/codeGen/CodeWithConverter.kt | 29 + .../dataframe/codeGen/DefaultReadDfMethods.kt | 103 ++ .../codeGen/ExtensionsCodeGenerator.kt | 12 + .../dataframe/codeGen/GeneratedField.kt | 145 +++ .../kotlinx/dataframe/codeGen/Marker.kt | 119 +++ .../dataframe/codeGen/MarkersExtractor.kt | 91 ++ .../dataframe/codeGen/NameNormalizer.kt | 5 + .../dataframe/codeGen/ReplCodeGenerator.kt | 30 + .../dataframe/codeGen/SchemaProcessor.kt | 27 + .../kotlinx/dataframe/codeGen/generateCode.kt | 43 + .../kotlinx/dataframe/columns/BaseColumn.kt | 72 ++ .../dataframe/columns/ColumnAccessor.kt | 27 + .../kotlinx/dataframe/columns/ColumnGroup.kt | 42 + .../kotlinx/dataframe/columns/ColumnKind.kt | 13 + .../kotlinx/dataframe/columns/ColumnPath.kt | 57 + .../dataframe/columns/ColumnReference.kt | 44 + .../kotlinx/dataframe/columns/ColumnSet.kt | 26 + .../dataframe/columns/ColumnWithPath.kt | 27 + .../kotlinx/dataframe/columns/FrameColumn.kt | 23 + .../kotlinx/dataframe/columns/SingleColumn.kt | 15 + .../kotlinx/dataframe/columns/ValueColumn.kt | 24 + .../kotlinx/dataframe/dataTypes/IFRAME.kt | 16 + .../kotlinx/dataframe/dataTypes/IMG.kt | 22 + .../dataframe/documentation/AccessApi.kt | 135 +++ .../documentation/DocumentationUrls.kt | 53 + .../documentation/ExpressionsGivenColumn.kt | 61 ++ .../ExpressionsGivenDataFrame.kt | 32 + .../documentation/ExpressionsGivenRow.kt | 109 ++ .../documentation/SelectingColumns.kt | 169 +++ .../dataframe/documentation/SelectingRows.kt | 76 ++ .../documentation/samples/ApiLevels.kt | 146 +++ .../kotlinx/dataframe/documentation/utils.kt | 4 + .../exceptions/CellConversionException.kt | 16 + .../exceptions/ColumnNotFoundException.kt | 3 + .../DuplicateColumnNamesException.kt | 9 + .../exceptions/ExcessiveColumnsException.kt | 7 + .../exceptions/TypeConversionException.kt | 16 + .../TypeConverterNotFoundException.kt | 15 + .../exceptions/UnequalColumnSizesException.kt | 10 + .../jetbrains/kotlinx/dataframe/impl/Cache.kt | 8 + .../dataframe/impl/ColumnAccessTracker.kt | 33 + .../dataframe/impl/ColumnDataCollector.kt | 75 ++ .../dataframe/impl/ColumnNameGenerator.kt | 37 + .../kotlinx/dataframe/impl/DataFrameImpl.kt | 149 +++ .../dataframe/impl/DataFrameReceiver.kt | 73 ++ .../kotlinx/dataframe/impl/DataFrameSize.kt | 5 + .../kotlinx/dataframe/impl/DataRowImpl.kt | 62 ++ .../kotlinx/dataframe/impl/ExceptionUtils.kt | 6 + .../kotlinx/dataframe/impl/GroupByImpl.kt | 102 ++ .../kotlinx/dataframe/impl/MathUtils.kt | 28 + .../kotlinx/dataframe/impl/Rendering.kt | 90 ++ .../kotlinx/dataframe/impl/TypeUtils.kt | 430 ++++++++ .../jetbrains/kotlinx/dataframe/impl/Utils.kt | 288 ++++++ .../impl/aggregation/AggregatableInternal.kt | 21 + .../aggregation/AggregateColumnDescriptor.kt | 32 + .../aggregation/ConfiguredAggregateColumn.kt | 45 + .../impl/aggregation/GroupByReceiverImpl.kt | 88 ++ .../impl/aggregation/PivotGroupByImpl.kt | 32 + .../dataframe/impl/aggregation/PivotImpl.kt | 21 + .../impl/aggregation/PivotInAggregateImpl.kt | 30 + .../impl/aggregation/ValueWithDefault.kt | 3 + .../impl/aggregation/aggregations.kt | 97 ++ .../aggregation/aggregators/Aggregator.kt | 24 + .../aggregation/aggregators/AggregatorBase.kt | 18 + .../aggregators/AggregatorOptionSwitch.kt | 30 + .../aggregators/AggregatorProvider.kt | 10 + .../aggregation/aggregators/Aggregators.kt | 41 + .../aggregators/MergedValuesAggregator.kt | 41 + .../aggregators/NumbersAggregator.kt | 28 + .../aggregators/TwoStepAggregator.kt | 27 + .../dataframe/impl/aggregation/getColumns.kt | 25 + .../impl/aggregation/modes/aggregateBy.kt | 23 + .../impl/aggregation/modes/forEveryColumn.kt | 51 + .../impl/aggregation/modes/noAggregation.kt | 17 + .../impl/aggregation/modes/ofRowExpression.kt | 89 ++ .../aggregation/modes/withinAllColumns.kt | 60 ++ .../receivers/AggregateInternalDsl.kt | 36 + .../receivers/AggregatePivotDslImpl.kt | 24 + .../kotlinx/dataframe/impl/api/concat.kt | 91 ++ .../kotlinx/dataframe/impl/api/convert.kt | 446 ++++++++ .../kotlinx/dataframe/impl/api/convertTo.kt | 280 +++++ .../kotlinx/dataframe/impl/api/corr.kt | 75 ++ .../kotlinx/dataframe/impl/api/describe.kt | 76 ++ .../kotlinx/dataframe/impl/api/duplicate.kt | 73 ++ .../kotlinx/dataframe/impl/api/explode.kt | 110 ++ .../kotlinx/dataframe/impl/api/flatten.kt | 36 + .../kotlinx/dataframe/impl/api/format.kt | 63 ++ .../kotlinx/dataframe/impl/api/gather.kt | 118 +++ .../kotlinx/dataframe/impl/api/groupBy.kt | 61 ++ .../kotlinx/dataframe/impl/api/implode.kt | 36 + .../kotlinx/dataframe/impl/api/insert.kt | 135 +++ .../kotlinx/dataframe/impl/api/join.kt | 229 +++++ .../kotlinx/dataframe/impl/api/map.kt | 18 + .../kotlinx/dataframe/impl/api/move.kt | 65 ++ .../kotlinx/dataframe/impl/api/parse.kt | 383 +++++++ .../kotlinx/dataframe/impl/api/pivot.kt | 123 +++ .../kotlinx/dataframe/impl/api/remove.kt | 65 ++ .../kotlinx/dataframe/impl/api/reorder.kt | 73 ++ .../kotlinx/dataframe/impl/api/sort.kt | 123 +++ .../kotlinx/dataframe/impl/api/split.kt | 67 ++ .../kotlinx/dataframe/impl/api/toDataFrame.kt | 229 +++++ .../kotlinx/dataframe/impl/api/toList.kt | 75 ++ .../kotlinx/dataframe/impl/api/update.kt | 104 ++ .../kotlinx/dataframe/impl/api/xs.kt | 42 + .../impl/codeGen/CodeGeneratorImpl.kt | 518 ++++++++++ .../impl/codeGen/NameNormalizerImpl.kt | 23 + .../impl/codeGen/ReplCodeGeneratorImpl.kt | 144 +++ .../impl/codeGen/SchemaProcessorImpl.kt | 159 +++ .../dataframe/impl/codeGen/SchemaReader.kt | 94 ++ .../impl/columns/ColumnAccessorImpl.kt | 42 + .../dataframe/impl/columns/ColumnGroupImpl.kt | 105 ++ .../impl/columns/ColumnGroupWithParent.kt | 60 ++ .../impl/columns/ColumnGroupWithPathImpl.kt | 26 + .../impl/columns/ColumnWithParent.kt | 34 + .../dataframe/impl/columns/ColumnsList.kt | 10 + .../impl/columns/ComputedColumnReference.kt | 33 + .../dataframe/impl/columns/DataColumnGroup.kt | 21 + .../dataframe/impl/columns/DataColumnImpl.kt | 43 + .../impl/columns/DataColumnInternal.kt | 20 + .../impl/columns/DistinctColumnSet.kt | 10 + .../impl/columns/ForceResolvedColumn.kt | 7 + .../dataframe/impl/columns/FrameColumnImpl.kt | 72 ++ .../impl/columns/FrameColumnWithParent.kt | 32 + .../impl/columns/FrameColumnWithPathImpl.kt | 19 + .../impl/columns/RenamedColumnReference.kt | 21 + .../kotlinx/dataframe/impl/columns/Utils.kt | 191 ++++ .../dataframe/impl/columns/ValueColumnImpl.kt | 71 ++ .../impl/columns/ValueColumnWithParent.kt | 27 + .../impl/columns/ValueColumnWithPathImpl.kt | 18 + .../dataframe/impl/columns/constructors.kt | 290 ++++++ .../columns/missing/MissingColumnGroup.kt | 69 ++ .../impl/columns/missing/MissingDataColumn.kt | 36 + .../impl/columns/tree/ColumnPosition.kt | 9 + .../impl/columns/tree/ReadonlyTreeNode.kt | 14 + .../impl/columns/tree/ReferenceData.kt | 6 + .../dataframe/impl/columns/tree/TreeNode.kt | 66 ++ .../dataframe/impl/columns/tree/Utils.kt | 132 +++ .../impl/schema/DataFrameSchemaImpl.kt | 81 ++ .../kotlinx/dataframe/impl/schema/Utils.kt | 145 +++ .../jetbrains/kotlinx/dataframe/io/common.kt | 60 ++ .../org/jetbrains/kotlinx/dataframe/io/csv.kt | 355 +++++++ .../jetbrains/kotlinx/dataframe/io/guess.kt | 297 ++++++ .../jetbrains/kotlinx/dataframe/io/html.kt | 461 +++++++++ .../jetbrains/kotlinx/dataframe/io/json.kt | 971 ++++++++++++++++++ .../jetbrains/kotlinx/dataframe/io/string.kt | 187 ++++ .../org/jetbrains/kotlinx/dataframe/io/tsv.kt | 108 ++ .../kotlinx/dataframe/jupyter/CellRenderer.kt | 90 ++ .../kotlinx/dataframe/jupyter/Integration.kt | 267 +++++ .../dataframe/jupyter/JupyterConfiguration.kt | 10 + .../dataframe/jupyter/JupyterHtmlRenderer.kt | 34 + .../dataframe/jupyter/importDataSchema.kt | 46 + .../dataframe/jupyter/kernelUpdateMessages.kt | 26 + .../kotlinx/dataframe/math/cumsum.kt | 139 +++ .../jetbrains/kotlinx/dataframe/math/mean.kt | 111 ++ .../kotlinx/dataframe/math/median.kt | 68 ++ .../kotlinx/dataframe/math/minmax.kt | 23 + .../jetbrains/kotlinx/dataframe/math/std.kt | 42 + .../kotlinx/dataframe/math/stdMean.kt | 110 ++ .../jetbrains/kotlinx/dataframe/math/sum.kt | 58 ++ .../kotlinx/dataframe/schema/ColumnSchema.kt | 95 ++ .../kotlinx/dataframe/schema/CompareResult.kt | 28 + .../dataframe/schema/DataFrameSchema.kt | 8 + .../dataframe/test/sampleExtensions.kt | 17 + .../jetbrains/kotlinx/dataframe/test/usage.kt | 17 + 278 files changed, 22945 insertions(+) create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/ColumnsContainer.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataColumn.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataFrame.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataRow.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/aggregation/Aggregatable.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/aggregation/AggregateDsl.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/aggregation/AggregateGroupedDsl.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/aggregation/ColumnsForAggregateSelectionDsl.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/aggregation/NamedValue.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/aggregation/aliases.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/aliases.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/annotations/ColumnName.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/annotations/DataSchema.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/annotations/ImportDataSchema.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/ColumnAccessorApi.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/ColumnReferenceApi.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/ColumnsSelectionDsl.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/Cumulative.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/DataColumnArithmetics.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/DataColumnType.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/DataFrameGet.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/DataRowApi.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/Defaults.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/JsonPath.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/KeyValueProperty.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/Misc.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/Nulls.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/TypeConversions.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/add.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/addId.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/aggregate.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/all.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/any.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/append.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/asIterable.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/asSequence.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/associate.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/between.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/cast.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/chunked.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/concat.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/constructors.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/convert.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/convertTo.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/copy.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/corr.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/count.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/countDistinct.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/cumSum.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/describe.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/digitize.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/distinct.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/drop.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/duplicate.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/enum.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/explode.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/filter.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/first.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/flatten.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/forEach.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/format.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/frames.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/gather.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/group.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/groupBy.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/head.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/implode.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/indices.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/inferType.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/insert.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/into.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/isEmpty.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/join.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/last.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/length.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/lowercase.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/map.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/matches.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/max.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/mean.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/median.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/merge.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/min.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/move.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/parse.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/pivot.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/print.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/remove.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/rename.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/reorder.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/replace.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/reverse.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/schema.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/select.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/shuffle.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/single.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/sort.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/split.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/std.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/sum.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/tail.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/take.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/toDataFrame.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/toList.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/transpose.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/unfold.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/ungroup.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/uppercase.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/valueCounts.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/values.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/with.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/xs.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/codeGen/CodeGenerator.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/codeGen/CodeWithConverter.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/codeGen/DefaultReadDfMethods.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/codeGen/ExtensionsCodeGenerator.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/codeGen/GeneratedField.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/codeGen/Marker.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/codeGen/MarkersExtractor.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/codeGen/NameNormalizer.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/codeGen/ReplCodeGenerator.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/codeGen/SchemaProcessor.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/codeGen/generateCode.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/BaseColumn.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/ColumnAccessor.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/ColumnGroup.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/ColumnKind.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/ColumnPath.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/ColumnReference.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/ColumnSet.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/ColumnWithPath.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/FrameColumn.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/SingleColumn.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/ValueColumn.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/dataTypes/IFRAME.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/dataTypes/IMG.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/AccessApi.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DocumentationUrls.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/ExpressionsGivenColumn.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/ExpressionsGivenDataFrame.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/ExpressionsGivenRow.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/SelectingColumns.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/SelectingRows.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/samples/ApiLevels.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/utils.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/exceptions/CellConversionException.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/exceptions/ColumnNotFoundException.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/exceptions/DuplicateColumnNamesException.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/exceptions/ExcessiveColumnsException.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/exceptions/TypeConversionException.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/exceptions/TypeConverterNotFoundException.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/exceptions/UnequalColumnSizesException.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/Cache.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/ColumnAccessTracker.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/ColumnDataCollector.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/ColumnNameGenerator.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/DataFrameImpl.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/DataFrameReceiver.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/DataFrameSize.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/DataRowImpl.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/ExceptionUtils.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/GroupByImpl.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/MathUtils.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/Rendering.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/TypeUtils.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/Utils.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/AggregatableInternal.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/AggregateColumnDescriptor.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/ConfiguredAggregateColumn.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/GroupByReceiverImpl.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/PivotGroupByImpl.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/PivotImpl.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/PivotInAggregateImpl.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/ValueWithDefault.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregations.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregator.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/AggregatorBase.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/AggregatorOptionSwitch.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/AggregatorProvider.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregators.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/MergedValuesAggregator.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/NumbersAggregator.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/TwoStepAggregator.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/getColumns.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/modes/aggregateBy.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/modes/forEveryColumn.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/modes/noAggregation.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/modes/ofRowExpression.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/modes/withinAllColumns.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/receivers/AggregateInternalDsl.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/receivers/AggregatePivotDslImpl.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/concat.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/convert.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/convertTo.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/corr.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/describe.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/duplicate.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/explode.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/flatten.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/format.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/gather.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/groupBy.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/implode.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/insert.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/join.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/map.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/move.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/parse.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/pivot.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/remove.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/reorder.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/sort.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/split.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/toDataFrame.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/toList.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/update.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/xs.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/codeGen/CodeGeneratorImpl.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/codeGen/NameNormalizerImpl.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/codeGen/ReplCodeGeneratorImpl.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/codeGen/SchemaProcessorImpl.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/codeGen/SchemaReader.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/ColumnAccessorImpl.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/ColumnGroupImpl.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/ColumnGroupWithParent.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/ColumnGroupWithPathImpl.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/ColumnWithParent.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/ColumnsList.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/ComputedColumnReference.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/DataColumnGroup.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/DataColumnImpl.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/DataColumnInternal.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/DistinctColumnSet.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/ForceResolvedColumn.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/FrameColumnImpl.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/FrameColumnWithParent.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/FrameColumnWithPathImpl.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/RenamedColumnReference.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/Utils.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/ValueColumnImpl.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/ValueColumnWithParent.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/ValueColumnWithPathImpl.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/constructors.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/missing/MissingColumnGroup.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/missing/MissingDataColumn.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/tree/ColumnPosition.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/tree/ReadonlyTreeNode.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/tree/ReferenceData.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/tree/TreeNode.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/tree/Utils.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/schema/DataFrameSchemaImpl.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/schema/Utils.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/common.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/csv.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/guess.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/html.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/string.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/tsv.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/CellRenderer.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/Integration.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/JupyterConfiguration.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/JupyterHtmlRenderer.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/importDataSchema.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/kernelUpdateMessages.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/cumsum.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/mean.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/median.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/minmax.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/std.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/stdMean.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/sum.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/schema/ColumnSchema.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/schema/CompareResult.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/schema/DataFrameSchema.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/test/sampleExtensions.kt create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/test/usage.kt diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/ColumnsContainer.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/ColumnsContainer.kt new file mode 100644 index 000000000..397ce223a --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/ColumnsContainer.kt @@ -0,0 +1,66 @@ +package org.jetbrains.kotlinx.dataframe + +import org.jetbrains.kotlinx.dataframe.api.ColumnSelectionDsl +import org.jetbrains.kotlinx.dataframe.api.asColumnGroup +import org.jetbrains.kotlinx.dataframe.api.cast +import org.jetbrains.kotlinx.dataframe.api.castFrameColumn +import org.jetbrains.kotlinx.dataframe.api.getColumn +import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup +import org.jetbrains.kotlinx.dataframe.columns.ColumnPath +import org.jetbrains.kotlinx.dataframe.columns.ColumnReference +import org.jetbrains.kotlinx.dataframe.columns.FrameColumn +import org.jetbrains.kotlinx.dataframe.impl.columnName +import org.jetbrains.kotlinx.dataframe.impl.columns.asAnyFrameColumn +import kotlin.reflect.KProperty + +/** + * Provides access to [columns][DataColumn]. + * + * Base interface for [DataFrame] and [ColumnSelectionDsl] + * + * @param T Schema marker. Used to generate extension properties for typed column access. + */ +public interface ColumnsContainer { + + // region columns + + public fun columns(): List + public fun columnsCount(): Int + public fun containsColumn(name: String): Boolean + public fun containsColumn(path: ColumnPath): Boolean + public fun getColumnIndex(name: String): Int + + // endregion + + // region getColumnOrNull + + public fun getColumnOrNull(name: String): AnyCol? + public fun getColumnOrNull(index: Int): AnyCol? + public fun getColumnOrNull(column: ColumnReference): DataColumn? + public fun getColumnOrNull(path: ColumnPath): AnyCol? + public fun getColumnOrNull(column: ColumnSelector): DataColumn? + + // endregion + + // region get + + public operator fun get(columnName: String): AnyCol = getColumn(columnName) + public operator fun get(columnPath: ColumnPath): AnyCol = getColumn(columnPath) + + public operator fun get(column: DataColumn): DataColumn = getColumn(column.name()).cast() + public operator fun get(column: DataColumn>): ColumnGroup = getColumn(column) + public operator fun get(column: DataColumn>): FrameColumn = getColumn(column) + + public operator fun get(column: ColumnReference): DataColumn = getColumn(column) + public operator fun get(column: ColumnReference>): ColumnGroup = getColumn(column) + public operator fun get(column: ColumnReference>): FrameColumn = getColumn(column) + + public operator fun get(column: KProperty): DataColumn = get(column.columnName).cast() + public operator fun get(column: KProperty>): ColumnGroup = get(column.columnName).asColumnGroup().cast() + public operator fun get(column: KProperty>): FrameColumn = get(column.columnName).asAnyFrameColumn().castFrameColumn() + + public operator fun get(columns: ColumnsSelector): List> + public operator fun get(column: ColumnSelector): DataColumn = get(column as ColumnsSelector).single() + + // endregion +} diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataColumn.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataColumn.kt new file mode 100644 index 000000000..37d387567 --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataColumn.kt @@ -0,0 +1,138 @@ +package org.jetbrains.kotlinx.dataframe + +import org.jetbrains.kotlinx.dataframe.api.Infer +import org.jetbrains.kotlinx.dataframe.api.asDataColumn +import org.jetbrains.kotlinx.dataframe.api.cast +import org.jetbrains.kotlinx.dataframe.api.concat +import org.jetbrains.kotlinx.dataframe.api.filter +import org.jetbrains.kotlinx.dataframe.api.schema +import org.jetbrains.kotlinx.dataframe.api.take +import org.jetbrains.kotlinx.dataframe.columns.BaseColumn +import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup +import org.jetbrains.kotlinx.dataframe.columns.ColumnKind +import org.jetbrains.kotlinx.dataframe.columns.ColumnPath +import org.jetbrains.kotlinx.dataframe.columns.ColumnResolutionContext +import org.jetbrains.kotlinx.dataframe.columns.ColumnWithPath +import org.jetbrains.kotlinx.dataframe.columns.FrameColumn +import org.jetbrains.kotlinx.dataframe.columns.ValueColumn +import org.jetbrains.kotlinx.dataframe.impl.columns.ColumnGroupImpl +import org.jetbrains.kotlinx.dataframe.impl.columns.FrameColumnImpl +import org.jetbrains.kotlinx.dataframe.impl.columns.ValueColumnImpl +import org.jetbrains.kotlinx.dataframe.impl.columns.addPath +import org.jetbrains.kotlinx.dataframe.impl.columns.guessColumnType +import org.jetbrains.kotlinx.dataframe.impl.columns.toColumnKind +import org.jetbrains.kotlinx.dataframe.impl.getValuesType +import org.jetbrains.kotlinx.dataframe.impl.splitByIndices +import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema +import kotlin.reflect.KClass +import kotlin.reflect.KProperty +import kotlin.reflect.KType +import kotlin.reflect.typeOf + +/** + * Column with [name] and [values] of specific [type]. + * + * Base interface for [ValueColumn] and [FrameColumn], but not for [ColumnGroup]. However, implementations for all three [column kinds][ColumnKind] derive from DataColumn and can cast to it safely. + * Column operations that have signature clash with [DataFrame] API ([filter], [take], [map] etc.) are defined for [DataColumn] and not for [BaseColumn]. + * + * @param T type of values in the column. + */ +public interface DataColumn : BaseColumn { + + public companion object { + + /** + * Creates [ValueColumn] using given [name], [values] and [type]. + * + * @param name name of the column + * @param values list of column values + * @param type type of the column + * @param infer column type inference mode + */ + public fun createValueColumn( + name: String, + values: List, + type: KType, + infer: Infer = Infer.None, + defaultValue: T? = null + ): ValueColumn = ValueColumnImpl(values, name, getValuesType(values, type, infer), defaultValue) + + /** + * Creates [ValueColumn] using given [name], [values] and reified column [type]. + * + * Note, that column [type] will be defined at compile-time using [T] argument + * + * @param T type of the column + * @param name name of the column + * @param values list of column values + * @param infer column type inference mode + */ + public inline fun createValueColumn(name: String, values: List, infer: Infer = Infer.None): ValueColumn = createValueColumn( + name, values, + getValuesType( + values, + typeOf(), + infer + ) + ) + + public fun createColumnGroup(name: String, df: DataFrame): ColumnGroup = ColumnGroupImpl(name, df) + + public fun createFrameColumn( + name: String, + df: DataFrame, + startIndices: Iterable + ): FrameColumn = + FrameColumnImpl(name, df.splitByIndices(startIndices.asSequence()).toList(), lazy { df.schema() }) + + public fun createFrameColumn( + name: String, + groups: List>, + schema: Lazy? = null + ): FrameColumn = FrameColumnImpl(name, groups, schema) + + public fun createWithTypeInference(name: String, values: List, nullable: Boolean? = null): DataColumn = guessColumnType(name, values, nullable = nullable) + + public fun create(name: String, values: List, type: KType, infer: Infer = Infer.None): DataColumn { + return when (type.toColumnKind()) { + ColumnKind.Value -> createValueColumn(name, values, type, infer) + ColumnKind.Group -> createColumnGroup(name, (values as List).concat()).asDataColumn().cast() + ColumnKind.Frame -> createFrameColumn(name, values as List).asDataColumn().cast() + } + } + + public inline fun create(name: String, values: List, infer: Infer = Infer.None): DataColumn = create(name, values, typeOf(), infer) + + public fun empty(name: String = ""): AnyCol = createValueColumn(name, emptyList(), typeOf()) + } + + public fun hasNulls(): Boolean = type().isMarkedNullable + + override fun distinct(): DataColumn + + override fun get(indices: Iterable): DataColumn + + override fun rename(newName: String): DataColumn + + override fun resolveSingle(context: ColumnResolutionContext): ColumnWithPath? = this.addPath() + + override operator fun getValue(thisRef: Any?, property: KProperty<*>): DataColumn = super.getValue(thisRef, property) as DataColumn + + public operator fun iterator(): Iterator = values().iterator() + + public override operator fun get(range: IntRange): DataColumn +} + +public val AnyCol.name: String get() = name() +public val AnyCol.path: ColumnPath get() = path() + +public val DataColumn.values: Iterable get() = values() +public val AnyCol.hasNulls: Boolean get() = hasNulls() +public val AnyCol.size: Int get() = size() +public val AnyCol.indices: IntRange get() = indices() + +public val AnyCol.type: KType get() = type() +public val AnyCol.kind: ColumnKind get() = kind() +public val AnyCol.typeClass: KClass<*> get() = type.classifier as? KClass<*> ?: error("Cannot cast ${type.classifier?.javaClass} to a ${KClass::class}. Column $name: $type") + +public fun AnyBaseCol.indices(): IntRange = 0 until size() diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataFrame.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataFrame.kt new file mode 100644 index 000000000..dd1c55636 --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataFrame.kt @@ -0,0 +1,93 @@ +package org.jetbrains.kotlinx.dataframe + +import org.jetbrains.kotlinx.dataframe.aggregation.Aggregatable +import org.jetbrains.kotlinx.dataframe.aggregation.AggregateGroupedBody +import org.jetbrains.kotlinx.dataframe.api.add +import org.jetbrains.kotlinx.dataframe.api.cast +import org.jetbrains.kotlinx.dataframe.api.getRows +import org.jetbrains.kotlinx.dataframe.api.indices +import org.jetbrains.kotlinx.dataframe.api.rows +import org.jetbrains.kotlinx.dataframe.api.select +import org.jetbrains.kotlinx.dataframe.api.toDataFrame +import org.jetbrains.kotlinx.dataframe.columns.UnresolvedColumnsPolicy +import org.jetbrains.kotlinx.dataframe.impl.DataFrameImpl +import org.jetbrains.kotlinx.dataframe.impl.DataFrameSize +import org.jetbrains.kotlinx.dataframe.impl.getColumnsImpl +import org.jetbrains.kotlinx.dataframe.impl.headPlusArray +import org.jetbrains.kotlinx.dataframe.impl.headPlusIterable +import org.jetbrains.kotlinx.dataframe.impl.schema.createEmptyDataFrameOf +import kotlin.reflect.KType + +/** + * Readonly interface for an ordered list of [columns][DataColumn]. + * + * Columns in `DataFrame` have distinct non-empty [names][DataColumn.name] and equal [sizes][DataColumn.size]. + * + * @param T Schema marker. It identifies column schema and is used to generate schema-specific extension properties for typed data access. It is covariant, so `DataFrame` is assignable to variable of type `DataFrame` if `A` is a subtype of `B`. + */ +public interface DataFrame : Aggregatable, ColumnsContainer { + + public companion object { + public val Empty: AnyFrame = DataFrameImpl(emptyList(), 0) + public fun empty(nrow: Int = 0): AnyFrame = if (nrow == 0) Empty else DataFrameImpl(emptyList(), nrow) + + public inline fun emptyOf(): DataFrame = createEmptyDataFrameOf(T::class).cast() + } + + // region columns + + public fun columnNames(): List + + public fun columnTypes(): List + + // endregion + + // region rows + + public fun rowsCount(): Int + + public operator fun iterator(): Iterator> = rows().iterator() + + // endregion + + public fun aggregate(body: AggregateGroupedBody): DataRow + + // region get columns + + override operator fun get(columns: ColumnsSelector): List> = + getColumnsImpl(UnresolvedColumnsPolicy.Fail, columns) + + public operator fun get(first: Column, vararg other: Column): DataFrame = select(listOf(first) + other) + public operator fun get(first: String, vararg other: String): DataFrame = select(listOf(first) + other) + public operator fun get(columnRange: ClosedRange): DataFrame = + select { columnRange.start..columnRange.endInclusive } + + // endregion + + // region get rows + + public operator fun get(index: Int): DataRow + public operator fun get(indices: Iterable): DataFrame = getRows(indices) + public operator fun get(range: IntRange): DataFrame = getRows(range) + public operator fun get(first: IntRange, vararg ranges: IntRange): DataFrame = + getRows(headPlusArray(first, ranges).asSequence().flatMap { it.asSequence() }.asIterable()) + + public operator fun get(firstIndex: Int, vararg otherIndices: Int): DataFrame = + get(headPlusIterable(firstIndex, otherIndices.asIterable())) + + // endregion + + // region plus columns + + public operator fun plus(col: AnyBaseCol): DataFrame = add(col) + public operator fun plus(cols: Iterable): DataFrame = (columns() + cols).toDataFrame().cast() + + // endregion +} + +internal val ColumnsContainer<*>.ncol get() = columnsCount() +internal val AnyFrame.nrow get() = rowsCount() +internal val AnyFrame.indices get() = indices() +internal val AnyFrame.size: DataFrameSize get() = size() + +public fun AnyFrame.size(): DataFrameSize = DataFrameSize(ncol, nrow) diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataRow.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataRow.kt new file mode 100644 index 000000000..989fb3df2 --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataRow.kt @@ -0,0 +1,100 @@ +package org.jetbrains.kotlinx.dataframe + +import org.jetbrains.kotlinx.dataframe.api.next +import org.jetbrains.kotlinx.dataframe.api.prev +import org.jetbrains.kotlinx.dataframe.columns.ColumnKind +import org.jetbrains.kotlinx.dataframe.columns.ColumnPath +import org.jetbrains.kotlinx.dataframe.columns.ColumnReference +import org.jetbrains.kotlinx.dataframe.impl.columnName +import org.jetbrains.kotlinx.dataframe.impl.owner +import org.jetbrains.kotlinx.dataframe.impl.toIterable +import kotlin.reflect.KProperty + +/** + * Single row of a [DataFrame]. + * + * @param T Schema marker. See [DataFrame] for details + */ +public interface DataRow { + + public fun index(): Int + + public fun df(): DataFrame + + // region get cell value + + public operator fun get(columnIndex: Int): Any? + public operator fun get(expression: RowExpression): R = expression(this, this) + public operator fun get(column: ColumnReference): R + public operator fun get(columns: List>): List = columns.map { get(it) } + public operator fun get(property: KProperty): R = get(property.columnName) as R + public operator fun get(first: Column, vararg other: Column): DataRow = owner.get(first, *other)[index] + public operator fun get(first: String, vararg other: String): DataRow = owner.get(first, *other)[index] + public operator fun get(path: ColumnPath): Any? = owner.get(path)[index] + public operator fun get(name: String): Any? + public fun getColumnGroup(columnName: String): AnyRow { + val value = get(columnName) + if (value == null) { + val kind = df()[columnName].kind() + if (kind != ColumnKind.Group) { + error("Cannot cast null value of a $kind to a ${DataRow::class}") + } + } + return value as AnyRow + } + + public fun getOrNull(name: String): Any? + public fun getValueOrNull(column: ColumnReference): R? + + // endregion + + public fun values(): List + + public operator fun String.get(vararg path: String): ColumnPath = ColumnPath(listOf(this) + path) + + public operator fun ColumnReference.invoke(): R = get(this) + public operator fun String.invoke(): R = this@DataRow[this@invoke] as R + public operator fun ColumnPath.invoke(): R = this@DataRow.get(this) as R + + public fun forwardIterable(): Iterable> = this.toIterable { it.next } + public fun backwardIterable(): Iterable> = this.toIterable { it.prev } + + public operator fun > ColumnReference.compareTo(other: R): Int = get(this).compareTo(other) + public operator fun ColumnReference.plus(a: Int): Int = get(this) + a + public operator fun ColumnReference.plus(a: Long): Long = get(this) + a + public operator fun ColumnReference.plus(a: Double): Double = get(this) + a + public operator fun ColumnReference.plus(a: String): String = get(this) + a + public operator fun Int.plus(col: ColumnReference): Int = this + get(col) + public operator fun Long.plus(col: ColumnReference): Long = this + get(col) + public operator fun Double.plus(col: ColumnReference): Double = this + get(col) + + public operator fun ColumnReference.minus(a: Int): Int = get(this) - a + public operator fun ColumnReference.minus(a: Long): Long = get(this) - a + public operator fun ColumnReference.minus(a: Double): Double = get(this) - a + public operator fun Int.minus(col: ColumnReference): Int = this - get(col) + public operator fun Long.minus(col: ColumnReference): Long = this - get(col) + public operator fun Double.minus(col: ColumnReference): Double = this - get(col) + + public operator fun ColumnReference.times(a: Int): Int = get(this) * a + public operator fun ColumnReference.times(a: Long): Long = get(this) * a + public operator fun ColumnReference.times(a: Double): Double = get(this) * a + public operator fun ColumnReference.times(a: Int): Double = get(this) * a + public operator fun ColumnReference.times(a: Int): Long = get(this) * a + public operator fun ColumnReference.times(a: Long): Double = get(this) * a + + public operator fun ColumnReference.div(a: Int): Int = get(this) / a + public operator fun ColumnReference.div(a: Long): Long = get(this) / a + public operator fun ColumnReference.div(a: Double): Double = get(this) / a + public operator fun ColumnReference.div(a: Int): Double = get(this) / a + public operator fun ColumnReference.div(a: Int): Long = get(this) / a + public operator fun ColumnReference.div(a: Long): Double = get(this) / a + + public companion object { + public val empty: AnyRow = DataFrame.empty(1)[0] + } +} + +internal val AnyRow.values: List get() = values() +internal val AnyRow.index: Int get() = index() +internal val DataRow.prev: DataRow? get() = this.prev() +internal val DataRow.next: DataRow? get() = this.next() diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/aggregation/Aggregatable.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/aggregation/Aggregatable.kt new file mode 100644 index 000000000..7cb74c05f --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/aggregation/Aggregatable.kt @@ -0,0 +1,3 @@ +package org.jetbrains.kotlinx.dataframe.aggregation + +public interface Aggregatable diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/aggregation/AggregateDsl.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/aggregation/AggregateDsl.kt new file mode 100644 index 000000000..ae8388e3d --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/aggregation/AggregateDsl.kt @@ -0,0 +1,25 @@ +package org.jetbrains.kotlinx.dataframe.aggregation + +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.api.ColumnSelectionDsl +import org.jetbrains.kotlinx.dataframe.api.pathOf +import org.jetbrains.kotlinx.dataframe.columns.ColumnAccessor +import org.jetbrains.kotlinx.dataframe.impl.aggregation.ValueWithDefault +import org.jetbrains.kotlinx.dataframe.impl.aggregation.receivers.internal +import org.jetbrains.kotlinx.dataframe.impl.columnName +import kotlin.reflect.KProperty +import kotlin.reflect.typeOf + +public abstract class AggregateDsl : DataFrame, ColumnSelectionDsl { + + public inline infix fun R.into(name: String): NamedValue = internal().yield(pathOf(name), this, typeOf()) + + public inline infix fun R.into(column: ColumnAccessor): NamedValue = internal().yield(pathOf(column.name()), this, typeOf()) + + public inline infix fun R.into(column: KProperty): NamedValue = internal().yield(pathOf(column.columnName), this, typeOf()) + + public infix fun R.default(defaultValue: R): Any = when (this) { + is NamedValue -> this.also { it.default = defaultValue } + else -> ValueWithDefault(this, defaultValue) + } +} diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/aggregation/AggregateGroupedDsl.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/aggregation/AggregateGroupedDsl.kt new file mode 100644 index 000000000..43b529522 --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/aggregation/AggregateGroupedDsl.kt @@ -0,0 +1,3 @@ +package org.jetbrains.kotlinx.dataframe.aggregation + +public abstract class AggregateGroupedDsl : AggregateDsl() diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/aggregation/ColumnsForAggregateSelectionDsl.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/aggregation/ColumnsForAggregateSelectionDsl.kt new file mode 100644 index 000000000..776e6166d --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/aggregation/ColumnsForAggregateSelectionDsl.kt @@ -0,0 +1,19 @@ +package org.jetbrains.kotlinx.dataframe.aggregation + +import org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl +import org.jetbrains.kotlinx.dataframe.api.pathOf +import org.jetbrains.kotlinx.dataframe.columns.ColumnPath +import org.jetbrains.kotlinx.dataframe.columns.ColumnSet +import org.jetbrains.kotlinx.dataframe.impl.aggregation.ConfiguredAggregateColumn + +public interface ColumnsForAggregateSelectionDsl : ColumnsSelectionDsl { + + public infix fun ColumnSet.default(defaultValue: C): ColumnSet = + ConfiguredAggregateColumn.withDefault(this, defaultValue) + + public fun path(vararg names: String): ColumnPath = ColumnPath(names.asList()) + + public infix fun ColumnSet.into(name: String): ColumnSet = ConfiguredAggregateColumn.withPath(this, pathOf(name)) + + public infix fun ColumnSet.into(path: ColumnPath): ColumnSet = ConfiguredAggregateColumn.withPath(this, path) +} diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/aggregation/NamedValue.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/aggregation/NamedValue.kt new file mode 100644 index 000000000..b1d15f734 --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/aggregation/NamedValue.kt @@ -0,0 +1,26 @@ +package org.jetbrains.kotlinx.dataframe.aggregation + +import org.jetbrains.kotlinx.dataframe.columns.ColumnPath +import org.jetbrains.kotlinx.dataframe.impl.aggregation.ValueWithDefault +import org.jetbrains.kotlinx.dataframe.impl.emptyPath +import kotlin.reflect.KType + +@Suppress("DataClassPrivateConstructor") +public data class NamedValue private constructor( + val path: ColumnPath, + val value: Any?, + val type: KType?, + var default: Any?, + val guessType: Boolean = false +) { + public companion object { + internal fun create(path: ColumnPath, value: Any?, type: KType?, defaultValue: Any?, guessType: Boolean = false): NamedValue = when (value) { + is ValueWithDefault<*> -> create(path, value.value, type, value.default, guessType) + else -> NamedValue(path, value, type, defaultValue, guessType) + } + internal fun aggregator(builder: AggregateGroupedDsl<*>): NamedValue = + NamedValue(emptyPath(), builder, null, null, false) + } + + val name: String get() = path.last() +} diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/aggregation/aliases.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/aggregation/aliases.kt new file mode 100644 index 000000000..07c0d7708 --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/aggregation/aliases.kt @@ -0,0 +1,10 @@ +package org.jetbrains.kotlinx.dataframe.aggregation + +import org.jetbrains.kotlinx.dataframe.Selector +import org.jetbrains.kotlinx.dataframe.columns.ColumnSet + +public typealias AggregateBody = Selector, R> + +public typealias AggregateGroupedBody = Selector, R> + +public typealias ColumnsForAggregateSelector = Selector, ColumnSet> diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/aliases.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/aliases.kt new file mode 100644 index 000000000..de7a58df5 --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/aliases.kt @@ -0,0 +1,175 @@ +package org.jetbrains.kotlinx.dataframe + +import org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl +import org.jetbrains.kotlinx.dataframe.columns.BaseColumn +import org.jetbrains.kotlinx.dataframe.columns.ColumnAccessor +import org.jetbrains.kotlinx.dataframe.columns.ColumnReference +import org.jetbrains.kotlinx.dataframe.columns.ColumnSet +import org.jetbrains.kotlinx.dataframe.columns.ColumnWithPath +import org.jetbrains.kotlinx.dataframe.columns.SingleColumn + +/** + * [Predicate] is used to reach a [Boolean] result using the given instance of `T` as `it`. + * + * Shorthand for: + * ```kotlin + * (it: T) -> Boolean + * ``` + */ +public typealias Predicate = (it: T) -> Boolean + +/** + * [Selector] is used to express or select any instance of `R` using the given instance of `T` as `this` and `it`. + * + * Shorthand for: + * ```kotlin + * T.(it: T) -> R + * ``` + */ +public typealias Selector = T.(it: T) -> R + +// region selectors + +/** + * [DataFrameExpression] is used to express or select any instance of `R` using the given instance of [DataFrame]`` + * as `this` and `it`. + * + * Shorthand for: + * ```kotlin + * DataFrame.(it: DataFrame) -> R + * ``` + */ +public typealias DataFrameExpression = Selector, R> + +/** + * [RowExpression] is used to express or select any instance of `R` using the given instance of [DataRow]`` as + * `this` and `it`. + * + * Shorthand for: + * ```kotlin + * DataRow.(it: DataRow) -> R + * ``` + */ +public typealias RowExpression = Selector, R> + +/** + * [RowValueExpression] is used to express or select any instance of `R` using the given value `it: C` and the given + * instance of [DataRow]`` as `this`. + * + * Shorthand for: + * ```kotlin + * DataRow.(it: C) -> R + * ``` + */ +public typealias RowValueExpression = DataRow.(it: C) -> R + +/** + * [RowColumnExpression] is used to express or select any instance of `R` using the given instances of + * [DataRow]`` as `row` and [DataColumn]`` as `col`. + * + * Shorthand for: + * ```kotlin + * (row: DataRow, col: DataColumn) -> R + * ``` + */ +public typealias RowColumnExpression = (row: DataRow, col: DataColumn) -> R + +/** + * [ColumnExpression] is used to express or select any instance of `R` using the given instance of [DataColumn]`` as + * `this` and `it`. + * + * Shorthand for: + * ```kotlin + * DataColumn.(it: DataColumn) -> R + * ``` + */ +public typealias ColumnExpression = Selector, R> + +/** + * [ColumnSelector] is used to express or select a single column, represented by [SingleColumn]``, using the + * context of [ColumnsSelectionDsl]`` as `this` and `it`. + * + * Shorthand for: + * ```kotlin + * ColumnsSelectionDsl.(it: ColumnsSelectionDsl) -> SingleColumn + * ``` + */ +public typealias ColumnSelector = Selector, SingleColumn> + +/** + * [ColumnsSelector] is used to express or select multiple columns, represented by [ColumnSet]``, using the + * context of [ColumnsSelectionDsl]`` as `this` and `it`. + * + * Shorthand for: + * ```kotlin + * ColumnsSelectionDsl.(it: ColumnsSelectionDsl) -> ColumnSet + * ``` + */ +public typealias ColumnsSelector = Selector, ColumnSet> + +// endregion + +// region filters + +/** + * [RowFilter] is used to filter or find rows using the given instance of [DataRow]`` as `this` and `it`. + * Return `true` if the row should be included in the result. + * + * Shorthand for: + * ```kotlin + * DataRow.(it: DataRow) -> Boolean + * ``` + */ +public typealias RowFilter = RowExpression + +/** + * [ColumnFilter] is used to filter or find columns using the given instance of [ColumnWithPath]`` as `it`. + * Return `true` if the column should be included in the result. + * + * Shorthand for: + * ```kotlin + * (it: ColumnWithPath) -> Boolean + * ``` + */ +public typealias ColumnFilter = Predicate> + +/** + * [RowValueFilter] is used to filter or find rows using the given value of `it: C` and the given instance of + * [DataRow]`` as `this`. + * Return `true` if the row should be included in the result. + * + * Shorthand for: + * ```kotlin + * DataRow.(it: C) -> Boolean + * ``` + */ +public typealias RowValueFilter = RowValueExpression + +// endregion + +// region columns + +public typealias Column = ColumnReference<*> + +public typealias ColumnGroupReference = ColumnReference +public typealias ColumnGroupAccessor = ColumnAccessor> +public typealias AnyColumnGroupAccessor = ColumnGroupAccessor<*> + +public typealias DoubleCol = DataColumn +public typealias BooleanCol = DataColumn +public typealias IntCol = DataColumn +public typealias NumberCol = DataColumn +public typealias StringCol = DataColumn +public typealias AnyCol = DataColumn<*> + +// endregion + +// region Any* + +public typealias AnyFrame = DataFrame<*> + +public typealias AnyRow = DataRow<*> + +public typealias AnyBaseCol = BaseColumn<*> + +// endregion diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/annotations/ColumnName.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/annotations/ColumnName.kt new file mode 100644 index 000000000..11ea05df3 --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/annotations/ColumnName.kt @@ -0,0 +1,4 @@ +package org.jetbrains.kotlinx.dataframe.annotations + +@Target(AnnotationTarget.PROPERTY) +public annotation class ColumnName(val name: String) diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/annotations/DataSchema.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/annotations/DataSchema.kt new file mode 100644 index 000000000..2422c1d33 --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/annotations/DataSchema.kt @@ -0,0 +1,4 @@ +package org.jetbrains.kotlinx.dataframe.annotations + +@Target(AnnotationTarget.CLASS) +public annotation class DataSchema(val isOpen: Boolean = true) diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/annotations/ImportDataSchema.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/annotations/ImportDataSchema.kt new file mode 100644 index 000000000..462fd7b73 --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/annotations/ImportDataSchema.kt @@ -0,0 +1,61 @@ +package org.jetbrains.kotlinx.dataframe.annotations + +import org.jetbrains.kotlinx.dataframe.api.JsonPath +import org.jetbrains.kotlinx.dataframe.api.KeyValueProperty +import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup +import org.jetbrains.kotlinx.dataframe.columns.FrameColumn +import org.jetbrains.kotlinx.dataframe.io.JSON + +/** + * Annotation preprocessing will generate a DataSchema interface from the data at `path`. + * Data must be of supported format: CSV, JSON, Apache Arrow, Excel, OpenAPI (Swagger) in YAML/JSON. + * Generated data schema has properties inferred from data and a companion object with `read method`. + * `read method` is either `readCSV` or `readJson` that returns `DataFrame` + * + * @param name name of the generated interface + * @param path URL or relative path to data. + * if path starts with protocol (http, https, ftp), it's considered a URL. Otherwise, it's treated as relative path. + * By default, it will be resolved relatively to project dir, i.e. File(projectDir, path) + * You can configure it by passing `dataframe.resolutionDir` option to preprocessor, see https://kotlinlang.org/docs/ksp-quickstart.html#pass-options-to-processors + * @param visibility visibility of the generated interface. + * @param normalizationDelimiters if not empty, split property names by delimiters, + * lowercase parts and join to camel case. Set empty list to disable normalization + * @param withDefaultPath if `true`, generate `defaultPath` property to the data schema's companion object and make it default argument for a `read method` + * @param csvOptions options to parse CSV data. Not used when data is not Csv + * @param jsonOptions options to parse JSON data. Not used when data is not Json + */ +@Retention(AnnotationRetention.SOURCE) +@Target(AnnotationTarget.FILE) +@Repeatable +public annotation class ImportDataSchema( + val name: String, + val path: String, + val visibility: DataSchemaVisibility = DataSchemaVisibility.IMPLICIT_PUBLIC, + val normalizationDelimiters: CharArray = ['\t', ' ', '_'], + val withDefaultPath: Boolean = true, + val csvOptions: CsvOptions = CsvOptions(','), + val jsonOptions: JsonOptions = JsonOptions(), +) + +public enum class DataSchemaVisibility { + INTERNAL, IMPLICIT_PUBLIC, EXPLICIT_PUBLIC +} + +public annotation class CsvOptions( + public val delimiter: Char, +) + +public annotation class JsonOptions( + + /** Allows the choice of how to handle type clashes when reading a JSON file. */ + public val typeClashTactic: JSON.TypeClashTactic = JSON.TypeClashTactic.ARRAY_AND_VALUE_COLUMNS, + + /** + * List of [JsonPath]s where instead of a [ColumnGroup], a [FrameColumn]<[KeyValueProperty]> + * will be created. + * + * Example: + * `["""$["store"]["book"][*]["author"]"""]` + */ + public val keyValuePaths: Array = [], +) diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/ColumnAccessorApi.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/ColumnAccessorApi.kt new file mode 100644 index 000000000..834fbdd5f --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/ColumnAccessorApi.kt @@ -0,0 +1,5 @@ +package org.jetbrains.kotlinx.dataframe.api + +import org.jetbrains.kotlinx.dataframe.columns.ColumnAccessor + +public inline fun ColumnAccessor.nullable(): ColumnAccessor = cast() diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/ColumnReferenceApi.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/ColumnReferenceApi.kt new file mode 100644 index 000000000..7f8b85125 --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/ColumnReferenceApi.kt @@ -0,0 +1,21 @@ +package org.jetbrains.kotlinx.dataframe.api + +import org.jetbrains.kotlinx.dataframe.DataColumn +import org.jetbrains.kotlinx.dataframe.columns.ColumnReference +import org.jetbrains.kotlinx.dataframe.columns.ValueColumn +import org.jetbrains.kotlinx.dataframe.impl.asList +import kotlin.reflect.typeOf + +internal val ColumnReference<*>.name: String get() = name() +public inline fun ColumnReference.withValues(vararg values: T): ValueColumn = withValues(values.asIterable()) +public inline fun ColumnReference.withValues(values: Iterable): ValueColumn = + DataColumn.createValueColumn(name(), values.asList(), typeOf()) + +public infix fun > ColumnReference.gt(value: C): ColumnReference = map { it > value } +public infix fun > ColumnReference.lt(value: C): ColumnReference = map { it < value } +public infix fun ColumnReference.eq(value: C): ColumnReference = map { it == value } +public infix fun ColumnReference.neq(value: C): ColumnReference = map { it != value } + +public fun ColumnReference.length(): ColumnReference = map { it?.length ?: 0 } +public fun ColumnReference.lowercase(): ColumnReference = map { it?.lowercase() } +public fun ColumnReference.uppercase(): ColumnReference = map { it?.uppercase() } diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/ColumnsSelectionDsl.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/ColumnsSelectionDsl.kt new file mode 100644 index 000000000..43ffc0908 --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/ColumnsSelectionDsl.kt @@ -0,0 +1,376 @@ +package org.jetbrains.kotlinx.dataframe.api + +import org.jetbrains.kotlinx.dataframe.* +import org.jetbrains.kotlinx.dataframe.ColumnFilter +import org.jetbrains.kotlinx.dataframe.ColumnsContainer +import org.jetbrains.kotlinx.dataframe.ColumnsSelector +import org.jetbrains.kotlinx.dataframe.DataColumn +import org.jetbrains.kotlinx.dataframe.Predicate +import org.jetbrains.kotlinx.dataframe.columns.ColumnAccessor +import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup +import org.jetbrains.kotlinx.dataframe.columns.ColumnPath +import org.jetbrains.kotlinx.dataframe.columns.ColumnReference +import org.jetbrains.kotlinx.dataframe.columns.ColumnResolutionContext +import org.jetbrains.kotlinx.dataframe.columns.ColumnSet +import org.jetbrains.kotlinx.dataframe.columns.ColumnWithPath +import org.jetbrains.kotlinx.dataframe.columns.FrameColumn +import org.jetbrains.kotlinx.dataframe.columns.SingleColumn +import org.jetbrains.kotlinx.dataframe.columns.renamedReference +import org.jetbrains.kotlinx.dataframe.impl.columnName +import org.jetbrains.kotlinx.dataframe.impl.columns.ColumnsList +import org.jetbrains.kotlinx.dataframe.impl.columns.DistinctColumnSet +import org.jetbrains.kotlinx.dataframe.impl.columns.addPath +import org.jetbrains.kotlinx.dataframe.impl.columns.allColumnsExcept +import org.jetbrains.kotlinx.dataframe.impl.columns.changePath +import org.jetbrains.kotlinx.dataframe.impl.columns.createColumnSet +import org.jetbrains.kotlinx.dataframe.impl.columns.getAt +import org.jetbrains.kotlinx.dataframe.impl.columns.getChildrenAt +import org.jetbrains.kotlinx.dataframe.impl.columns.single +import org.jetbrains.kotlinx.dataframe.impl.columns.toColumns +import org.jetbrains.kotlinx.dataframe.impl.columns.top +import org.jetbrains.kotlinx.dataframe.impl.columns.transform +import org.jetbrains.kotlinx.dataframe.impl.columns.transformSingle +import org.jetbrains.kotlinx.dataframe.impl.columns.tree.dfs +import kotlin.reflect.KProperty +import kotlin.reflect.KType +import kotlin.reflect.typeOf + +/** [Column Selection DSL][ColumnSelectionDsl] */ +internal interface ColumnSelectionDslLink + +/** TODO: Put examples and explanations here */ +public interface ColumnSelectionDsl : ColumnsContainer { + + public operator fun ColumnReference.invoke(): DataColumn = get(this) + + public operator fun ColumnReference>.invoke(): ColumnGroup = get(this) + + public operator fun ColumnReference>.invoke(): FrameColumn = get(this) + + public operator fun ColumnPath.invoke(): DataColumn = getColumn(this).cast() + + public operator fun String.invoke(): DataColumn = getColumn(this).cast() + + public operator fun String.get(column: String): ColumnPath = pathOf(this, column) +} + +/** [Columns Selection DSL][ColumnsSelectionDsl] */ +internal interface ColumnsSelectionDslLink + +/** TODO: Put examples and explanations here */ +public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColumn> { + + public fun ColumnSet.first(condition: ColumnFilter): SingleColumn = + transform { listOf(it.first(condition)) }.single() + + public fun ColumnSet.single(condition: ColumnFilter): SingleColumn = + transform { listOf(it.single(condition)) }.single() + + public fun SingleColumn.col(index: Int): SingleColumn = getChildrenAt(index).single() + + public operator fun ColumnSet.get(index: Int): SingleColumn = getAt(index) + + public fun ColumnsContainer<*>.group(name: String): ColumnGroupReference = name.toColumnOf() + + public operator fun String.rangeTo(endInclusive: String): ColumnSet<*> = toColumnAccessor().rangeTo(endInclusive.toColumnAccessor()) + + public operator fun Column.rangeTo(endInclusive: Column): ColumnSet<*> = object : ColumnSet { + override fun resolve(context: ColumnResolutionContext): List> { + val startPath = this@rangeTo.resolveSingle(context)!!.path + val endPath = endInclusive.resolveSingle(context)!!.path + val parentPath = startPath.parent()!! + require(parentPath == endPath.parent()) { "Start and end columns have different parent column paths" } + val parentCol = context.df.getColumnGroup(parentPath) + val startIndex = parentCol.getColumnIndex(startPath.name) + val endIndex = parentCol.getColumnIndex(endPath.name) + return (startIndex..endIndex).map { + parentCol.getColumn(it).let { + it.addPath(parentPath + it.name) + } + } + } + } + + public fun none(): ColumnSet<*> = ColumnsList(emptyList()) + + // region cols + + public fun ColumnSet<*>.cols(predicate: (AnyCol) -> Boolean = { true }): ColumnSet = colsInternal(predicate) + + public fun ColumnSet<*>.cols(firstCol: ColumnReference, vararg otherCols: ColumnReference): ColumnSet = + (listOf(firstCol) + otherCols).let { refs -> + transform { it.flatMap { col -> refs.mapNotNull { col.getChild(it) } } } + } + + public fun ColumnSet<*>.cols(firstCol: String, vararg otherCols: String): ColumnSet = + (listOf(firstCol) + otherCols).let { names -> + transform { it.flatMap { col -> names.mapNotNull { col.getChild(it) } } } + } + + public fun ColumnSet<*>.cols(vararg indices: Int): ColumnSet = + transform { it.flatMap { it.children().let { children -> indices.map { children[it] } } } } + + public fun ColumnSet<*>.cols(range: IntRange): ColumnSet = + transform { it.flatMap { it.children().subList(range.start, range.endInclusive + 1) } } + + // region select + + public fun ColumnSet>.select(vararg columns: String): ColumnSet<*> = select { columns.toColumns() } + + public fun ColumnSet>.select(vararg columns: KProperty): ColumnSet = select { columns.toColumns() } + + public fun ColumnSet>.select(selector: ColumnsSelector): ColumnSet = createColumnSet { + this@select.resolve(it).flatMap { group -> + group.asColumnGroup().getColumnsWithPaths(selector).map { + it.changePath(group.path + it.path) + } + } + } + + // endregion + + // endregion + + // region dfs + + public fun ColumnSet.dfs(predicate: (ColumnWithPath<*>) -> Boolean): ColumnSet = dfsInternal(predicate) + + public fun String.dfs(predicate: (ColumnWithPath<*>) -> Boolean): ColumnSet<*> = toColumnAccessor().dfs(predicate) + + // endregion + + // region all + + public fun SingleColumn<*>.all(): ColumnSet<*> = transformSingle { it.children() } + + public fun String.all(): ColumnSet<*> = toColumnAccessor().transformSingle { it.children() } + + // region allDfs + + public fun ColumnSet<*>.allDfs(includeGroups: Boolean = false): ColumnSet = if (includeGroups) dfs { true } else dfs { !it.isColumnGroup() } + + public fun String.allDfs(includeGroups: Boolean = false): ColumnSet = toColumnAccessor().allDfs(includeGroups) + + // endregion + + // region allAfter + + // excluding current + public fun SingleColumn<*>.allAfter(colPath: ColumnPath): ColumnSet { + var take = false + return children { + if (take) true + else { + take = colPath == it.path + false + } + } + } + + public fun SingleColumn<*>.allAfter(colName: String): ColumnSet = allAfter(pathOf(colName)) + public fun SingleColumn<*>.allAfter(column: Column): ColumnSet = allAfter(column.path()) + + // endregion + + // region allSince + + // including current + public fun SingleColumn<*>.allSince(colPath: ColumnPath): ColumnSet { + var take = false + return children { + if (take) true + else { + take = colPath == it.path + take + } + } + } + + public fun SingleColumn<*>.allSince(colName: String): ColumnSet = allSince(pathOf(colName)) + public fun SingleColumn<*>.allSince(column: Column): ColumnSet = allSince(column.path()) + + // endregion + + // region allBefore + + // excluding current + public fun SingleColumn<*>.allBefore(colPath: ColumnPath): ColumnSet { + var take = true + return children { + if (!take) false + else { + take = colPath != it.path + take + } + } + } + + public fun SingleColumn<*>.allBefore(colName: String): ColumnSet = allBefore(pathOf(colName)) + public fun SingleColumn<*>.allBefore(column: Column): ColumnSet = allBefore(column.path()) + + // endregion + + // region allUntil + + // including current + public fun SingleColumn<*>.allUntil(colPath: ColumnPath): ColumnSet { + var take = true + return children { + if (!take) false + else { + take = colPath != it.path + true + } + } + } + + public fun SingleColumn<*>.allUntil(colName: String): ColumnSet = allUntil(pathOf(colName)) + public fun SingleColumn<*>.allUntil(column: Column): ColumnSet = allUntil(column.path()) + + // endregion + + // endregion + + public fun SingleColumn<*>.groups(filter: (ColumnGroup<*>) -> Boolean = { true }): ColumnSet = + children { it.isColumnGroup() && filter(it.asColumnGroup()) } as ColumnSet + + public fun ColumnSet.children(predicate: (ColumnWithPath) -> Boolean = { true }): ColumnSet = + transform { it.flatMap { it.children().filter { predicate(it) } } } + + public fun ColumnGroupReference.children(): ColumnSet = transformSingle { it.children() } + + public operator fun List>.get(range: IntRange): ColumnSet = + ColumnsList(subList(range.first, range.last + 1)) + + public fun col(property: KProperty): ColumnAccessor = property.toColumnAccessor() + + public operator fun ColumnSet<*>.get(colName: String): ColumnSet = transform { it.mapNotNull { it.getChild(colName) } } + public operator fun ColumnSet<*>.get(column: ColumnReference): ColumnSet = cols(column) + + public fun SingleColumn.take(n: Int): ColumnSet<*> = transformSingle { it.children().take(n) } + public fun SingleColumn.takeLast(n: Int): ColumnSet<*> = transformSingle { it.children().takeLast(n) } + public fun SingleColumn.drop(n: Int): ColumnSet<*> = transformSingle { it.children().drop(n) } + public fun SingleColumn.dropLast(n: Int = 1): ColumnSet<*> = transformSingle { it.children().dropLast(n) } + + public fun ColumnSet.drop(n: Int): ColumnSet = transform { it.drop(n) } + public fun ColumnSet.take(n: Int): ColumnSet = transform { it.take(n) } + public fun ColumnSet.dropLast(n: Int = 1): ColumnSet = transform { it.dropLast(n) } + public fun ColumnSet.takeLast(n: Int): ColumnSet = transform { it.takeLast(n) } + public fun ColumnSet.top(): ColumnSet = transform { it.top() } + public fun ColumnSet.takeWhile(predicate: Predicate>): ColumnSet = + transform { it.takeWhile(predicate) } + + public fun ColumnSet.takeLastWhile(predicate: Predicate>): ColumnSet = + transform { it.takeLastWhile(predicate) } + + public fun ColumnSet.filter(predicate: Predicate>): ColumnSet = + transform { it.filter(predicate) } + + public fun ColumnSet<*>.nameContains(text: CharSequence): ColumnSet = cols { it.name.contains(text) } + public fun ColumnSet<*>.nameContains(regex: Regex): ColumnSet = cols { it.name.contains(regex) } + public fun ColumnSet<*>.startsWith(prefix: CharSequence): ColumnSet = cols { it.name.startsWith(prefix) } + public fun ColumnSet<*>.endsWith(suffix: CharSequence): ColumnSet = cols { it.name.endsWith(suffix) } + + public fun ColumnSet.except(vararg other: ColumnSet<*>): ColumnSet<*> = except(other.toColumns()) + public fun ColumnSet.except(vararg other: String): ColumnSet<*> = except(other.toColumns()) + + public fun ColumnSet.withoutNulls(): ColumnSet = transform { it.filter { !it.hasNulls } } as ColumnSet + + public infix fun ColumnSet.except(other: ColumnSet<*>): ColumnSet<*> = + createColumnSet { resolve(it).allColumnsExcept(other.resolve(it)) } + + public infix fun ColumnSet.except(selector: ColumnsSelector): ColumnSet = + except(selector.toColumns()) as ColumnSet + + public operator fun ColumnsSelector.invoke(): ColumnSet = + this(this@ColumnsSelectionDsl, this@ColumnsSelectionDsl) + + public infix fun ColumnReference.into(newName: String): ColumnReference = named(newName) + public infix fun ColumnReference.into(column: ColumnAccessor<*>): ColumnReference = into(column.name()) + public infix fun ColumnReference.into(column: KProperty<*>): ColumnReference = named(column.columnName) + + public infix fun String.into(newName: String): ColumnReference = toColumnAccessor().into(newName) + public infix fun String.into(column: ColumnAccessor<*>): ColumnReference = toColumnAccessor().into(column.name()) + public infix fun String.into(column: KProperty<*>): ColumnReference = toColumnAccessor().into(column.columnName) + + public infix fun ColumnReference.named(newName: String): ColumnReference = renamedReference(newName) + public infix fun ColumnReference.named(name: KProperty<*>): ColumnReference = named(name.columnName) + + public infix fun String.named(newName: String): ColumnReference = toColumnAccessor().named(newName) + + // region and + + // region String + public infix fun String.and(other: String): ColumnSet = toColumnAccessor() and other.toColumnAccessor() + public infix fun String.and(other: ColumnSet): ColumnSet = toColumnAccessor() and other + public infix fun String.and(other: KProperty): ColumnSet = toColumnAccessor() and other + public infix fun String.and(other: ColumnsSelector): ColumnSet = toColumnAccessor() and other() + + // endregion + + // region KProperty + public infix fun KProperty.and(other: ColumnSet): ColumnSet = toColumnAccessor() and other + public infix fun KProperty.and(other: String): ColumnSet = toColumnAccessor() and other + public infix fun KProperty.and(other: KProperty): ColumnSet = + toColumnAccessor() and other.toColumnAccessor() + public infix fun KProperty.and(other: ColumnsSelector): ColumnSet = toColumnAccessor() and other() + + // endregion + + // region ColumnSet + + public infix fun ColumnSet.and(other: KProperty): ColumnSet = this and other.toColumnAccessor() + public infix fun ColumnSet.and(other: String): ColumnSet = this and other.toColumnAccessor() + public infix fun ColumnSet.and(other: ColumnSet): ColumnSet = ColumnsList(this, other) + public infix fun ColumnSet.and(other: ColumnsSelector): ColumnSet = this and other() + + // endregion + + // region ColumnsSelector + + public infix fun ColumnsSelector.and(other: KProperty): ColumnSet = this() and other + public infix fun ColumnsSelector.and(other: String): ColumnSet = this() and other + public infix fun ColumnsSelector.and(other: ColumnSet): ColumnSet = this() and other + public infix fun ColumnsSelector.and(other: ColumnsSelector): ColumnSet = this() and other + + // endregion + + public fun ColumnSet.distinct(): ColumnSet = DistinctColumnSet(this) +} + +public inline fun ColumnsSelectionDsl.expr( + name: String = "", + infer: Infer = Infer.Nulls, + noinline expression: AddExpression +): DataColumn = mapToColumn(name, infer, expression) + +internal fun ColumnsSelector.filter(predicate: (ColumnWithPath) -> Boolean): ColumnsSelector = + { this@filter(it, it).filter(predicate) } +// internal fun Columns<*>.filter(predicate: (AnyCol) -> Boolean) = transform { it.filter { predicate(it.data) } } + +internal fun ColumnSet<*>.colsInternal(predicate: (AnyCol) -> Boolean) = + transform { it.flatMap { it.children().filter { predicate(it.data) } } } + +internal fun ColumnSet<*>.dfsInternal(predicate: (ColumnWithPath<*>) -> Boolean) = + transform { it.filter { it.isColumnGroup() }.flatMap { it.children().dfs().filter(predicate) } } + +public fun ColumnSet<*>.dfsOf(type: KType, predicate: (ColumnWithPath) -> Boolean = { true }): ColumnSet<*> = + dfsInternal { it.isSubtypeOf(type) && predicate(it.cast()) } + +public inline fun ColumnSet<*>.dfsOf(noinline filter: (ColumnWithPath) -> Boolean = { true }): ColumnSet = + dfsOf( + typeOf(), + filter + ) as ColumnSet + +public fun ColumnSet<*>.colsOf(type: KType): ColumnSet = colsOf(type) { true } + +public inline fun ColumnSet<*>.colsOf(): ColumnSet = colsOf(typeOf()) as ColumnSet + +public fun ColumnSet<*>.colsOf(type: KType, filter: (DataColumn) -> Boolean): ColumnSet = + colsInternal { it.isSubtypeOf(type) && filter(it.cast()) } as ColumnSet + +public inline fun ColumnSet<*>.colsOf(noinline filter: (DataColumn) -> Boolean = { true }): ColumnSet = + colsOf( + typeOf(), filter + ) diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/Cumulative.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/Cumulative.kt new file mode 100644 index 000000000..5da628238 --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/Cumulative.kt @@ -0,0 +1 @@ +package org.jetbrains.kotlinx.dataframe.api diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/DataColumnArithmetics.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/DataColumnArithmetics.kt new file mode 100644 index 000000000..7d3531fc3 --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/DataColumnArithmetics.kt @@ -0,0 +1,111 @@ +package org.jetbrains.kotlinx.dataframe.api + +import org.jetbrains.kotlinx.dataframe.AnyCol +import org.jetbrains.kotlinx.dataframe.DataColumn +import org.jetbrains.kotlinx.dataframe.Predicate +import org.jetbrains.kotlinx.dataframe.columns.ColumnReference +import java.math.BigDecimal + +public operator fun DataColumn.plus(value: Int): DataColumn = map { it + value } +public operator fun DataColumn.minus(value: Int): DataColumn = map { it - value } +public operator fun Int.plus(column: DataColumn): DataColumn = column.map { this + it } +public operator fun Int.minus(column: DataColumn): DataColumn = column.map { this - it } +public operator fun DataColumn.unaryMinus(): DataColumn = map { -it } +public operator fun DataColumn.times(value: Int): DataColumn = map { it * value } +public operator fun DataColumn.div(value: Int): DataColumn = map { it / value } +public operator fun Int.div(column: DataColumn): DataColumn = column.map { this / it } +public operator fun AnyCol.plus(str: String): DataColumn = map { it.toString() + str } + +public operator fun ColumnReference.plus(value: Int): ColumnReference = map { it + value } +public operator fun ColumnReference.minus(value: Int): ColumnReference = map { it - value } +public operator fun Int.plus(column: ColumnReference): ColumnReference = column.map { this + it } +public operator fun Int.minus(column: ColumnReference): ColumnReference = column.map { this - it } +public operator fun ColumnReference.unaryMinus(): ColumnReference = map { -it } +public operator fun ColumnReference.times(value: Int): ColumnReference = map { it * value } +public operator fun ColumnReference.div(value: Int): ColumnReference = map { it / value } +public operator fun Int.div(column: ColumnReference): ColumnReference = column.map { this / it } +public operator fun ColumnReference.plus(str: String): ColumnReference = map { it.toString() + str } + +@JvmName("plusIntNullable") +public operator fun DataColumn.plus(value: Int): DataColumn = map { it?.plus(value) } +@JvmName("minusIntNullable") +public operator fun DataColumn.minus(value: Int): DataColumn = map { it?.minus(value) } +@JvmName("plusNullable") +public operator fun Int.plus(column: DataColumn): DataColumn = column.map { it?.plus(this) } +@JvmName("minusNullable") +public operator fun Int.minus(column: DataColumn): DataColumn = column.map { it?.let { this - it } } +@JvmName("unaryMinusIntNullable") +public operator fun DataColumn.unaryMinus(): DataColumn = map { it?.unaryMinus() } +@JvmName("timesIntNullable") +public operator fun DataColumn.times(value: Int): DataColumn = map { it?.times(value) } +@JvmName("divIntNullable") +public operator fun DataColumn.div(value: Int): DataColumn = map { it?.div(value) } +@JvmName("divNullable") +public operator fun Int.div(column: DataColumn): DataColumn = column.map { it?.let { this / it } } + +@JvmName("plusInt") +public operator fun DataColumn.plus(value: Double): DataColumn = map { it + value } +@JvmName("minusInt") +public operator fun DataColumn.minus(value: Double): DataColumn = map { it - value } +@JvmName("doublePlus") +public operator fun Double.plus(column: DataColumn): DataColumn = column.map { this + it } +@JvmName("doubleMinus") +public operator fun Double.minus(column: DataColumn): DataColumn = column.map { this - it } +@JvmName("timesInt") +public operator fun DataColumn.times(value: Double): DataColumn = map { it * value } +@JvmName("divInt") +public operator fun DataColumn.div(value: Double): DataColumn = map { it / value } +@JvmName("doubleDiv") +public operator fun Double.div(column: DataColumn): DataColumn = column.map { this / it } + +@JvmName("plusDouble") +public operator fun DataColumn.plus(value: Int): DataColumn = map { it + value } +@JvmName("minusDouble") +public operator fun DataColumn.minus(value: Int): DataColumn = map { it - value } +@JvmName("intPlus") +public operator fun Int.plus(column: DataColumn): DataColumn = column.map { this + it } +@JvmName("intMinus") +public operator fun Int.minus(column: DataColumn): DataColumn = column.map { this - it } +@JvmName("timesDouble") +public operator fun DataColumn.times(value: Int): DataColumn = map { it * value } +@JvmName("divDouble") +public operator fun DataColumn.div(value: Int): DataColumn = map { it / value } +@JvmName("intDiv") +public operator fun Int.div(column: DataColumn): DataColumn = column.map { this / it } + +public operator fun DataColumn.plus(value: Double): DataColumn = map { it + value } +public operator fun DataColumn.minus(value: Double): DataColumn = map { it - value } +public operator fun Double.plus(column: DataColumn): DataColumn = column.map { this + it } +public operator fun Double.minus(column: DataColumn): DataColumn = column.map { this - it } +@JvmName("unaryMinusDouble") +public operator fun DataColumn.unaryMinus(): DataColumn = map { -it } +public operator fun DataColumn.times(value: Double): DataColumn = map { it * value } +public operator fun DataColumn.div(value: Double): DataColumn = map { it / value } +public operator fun Double.div(column: DataColumn): DataColumn = column.map { this / it } + +public operator fun DataColumn.plus(value: Long): DataColumn = map { it + value } +public operator fun DataColumn.minus(value: Long): DataColumn = map { it - value } +public operator fun Long.plus(column: DataColumn): DataColumn = column.map { this + it } +public operator fun Long.minus(column: DataColumn): DataColumn = column.map { this - it } +@JvmName("unaryMinusLong") +public operator fun DataColumn.unaryMinus(): DataColumn = map { -it } +public operator fun DataColumn.times(value: Long): DataColumn = map { it * value } +public operator fun DataColumn.div(value: Long): DataColumn = map { it / value } +public operator fun Long.div(column: DataColumn): DataColumn = column.map { this / it } + +public operator fun DataColumn.plus(value: BigDecimal): DataColumn = map { it + value } +public operator fun DataColumn.minus(value: BigDecimal): DataColumn = map { it - value } +public operator fun BigDecimal.plus(column: DataColumn): DataColumn = column.map { this + it } +public operator fun BigDecimal.minus(column: DataColumn): DataColumn = column.map { this - it } +@JvmName("unaryMinusBigDecimal") +public operator fun DataColumn.unaryMinus(): DataColumn = map { -it } +public operator fun DataColumn.times(value: BigDecimal): DataColumn = map { it * value } +public operator fun DataColumn.div(value: BigDecimal): DataColumn = map { it / value } +public operator fun BigDecimal.div(column: DataColumn): DataColumn = column.map { this / it } + +public infix fun DataColumn.eq(value: T): DataColumn = isMatching { it == value } +public infix fun DataColumn.neq(value: T): DataColumn = isMatching { it != value } +public infix fun > DataColumn.gt(value: T): DataColumn = isMatching { it > value } +public infix fun > DataColumn.lt(value: T): DataColumn = isMatching { it < value } + +internal infix fun DataColumn.isMatching(predicate: Predicate): DataColumn = map { predicate(it) } diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/DataColumnType.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/DataColumnType.kt new file mode 100644 index 000000000..3e3b2729c --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/DataColumnType.kt @@ -0,0 +1,27 @@ +package org.jetbrains.kotlinx.dataframe.api + +import org.jetbrains.kotlinx.dataframe.AnyCol +import org.jetbrains.kotlinx.dataframe.columns.ColumnKind +import org.jetbrains.kotlinx.dataframe.type +import org.jetbrains.kotlinx.dataframe.typeClass +import kotlin.reflect.KClass +import kotlin.reflect.KType +import kotlin.reflect.full.isSubclassOf +import kotlin.reflect.full.isSubtypeOf +import kotlin.reflect.typeOf + +public fun AnyCol.isColumnGroup(): Boolean = kind() == ColumnKind.Group +public fun AnyCol.isFrameColumn(): Boolean = kind() == ColumnKind.Frame +public fun AnyCol.isValueColumn(): Boolean = kind() == ColumnKind.Value + +public fun AnyCol.isSubtypeOf(type: KType): Boolean = this.type.isSubtypeOf(type) && (!this.type.isMarkedNullable || type.isMarkedNullable) +public inline fun AnyCol.isSubtypeOf(): Boolean = isSubtypeOf(typeOf()) +public inline fun AnyCol.isType(): Boolean = type() == typeOf() +public fun AnyCol.isNumber(): Boolean = isSubtypeOf() +public fun AnyCol.isList(): Boolean = typeClass == List::class +public fun AnyCol.isComparable(): Boolean = isSubtypeOf?>() + +@PublishedApi +internal fun AnyCol.isPrimitive(): Boolean = typeClass.isPrimitive() + +internal fun KClass<*>.isPrimitive(): Boolean = isSubclassOf(Number::class) || this == String::class || this == Char::class || this == Array::class || isSubclassOf(Collection::class) diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/DataFrameGet.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/DataFrameGet.kt new file mode 100644 index 000000000..63588c4e9 --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/DataFrameGet.kt @@ -0,0 +1,126 @@ +package org.jetbrains.kotlinx.dataframe.api + +import org.jetbrains.kotlinx.dataframe.AnyCol +import org.jetbrains.kotlinx.dataframe.Column +import org.jetbrains.kotlinx.dataframe.ColumnSelector +import org.jetbrains.kotlinx.dataframe.ColumnsContainer +import org.jetbrains.kotlinx.dataframe.ColumnsSelector +import org.jetbrains.kotlinx.dataframe.DataColumn +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.DataRow +import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup +import org.jetbrains.kotlinx.dataframe.columns.ColumnPath +import org.jetbrains.kotlinx.dataframe.columns.ColumnReference +import org.jetbrains.kotlinx.dataframe.columns.ColumnWithPath +import org.jetbrains.kotlinx.dataframe.columns.FrameColumn +import org.jetbrains.kotlinx.dataframe.columns.UnresolvedColumnsPolicy +import org.jetbrains.kotlinx.dataframe.impl.columnName +import org.jetbrains.kotlinx.dataframe.impl.columns.asAnyFrameColumn +import org.jetbrains.kotlinx.dataframe.impl.columns.toColumns +import org.jetbrains.kotlinx.dataframe.impl.getColumnPaths +import org.jetbrains.kotlinx.dataframe.impl.getColumnsWithPaths +import org.jetbrains.kotlinx.dataframe.ncol +import org.jetbrains.kotlinx.dataframe.nrow +import kotlin.reflect.KProperty + +public fun DataFrame.getColumnsWithPaths(selector: ColumnsSelector): List> = + getColumnsWithPaths(UnresolvedColumnsPolicy.Fail, selector) + +public fun DataFrame.getColumnPath(selector: ColumnSelector): ColumnPath = getColumnPaths(selector).single() +public fun DataFrame.getColumnPaths(selector: ColumnsSelector): List = + getColumnPaths(UnresolvedColumnsPolicy.Fail, selector) + +public fun DataFrame.getColumnWithPath(selector: ColumnSelector): ColumnWithPath = getColumnsWithPaths(selector).single() +public fun DataFrame.getColumns(selector: ColumnsSelector): List> = get(selector) +public fun DataFrame.getColumns(vararg columns: String): List = getColumns { columns.toColumns() } + +public fun DataFrame.getColumnIndex(col: AnyCol): Int = getColumnIndex(col.name()) +public fun DataFrame.getRows(range: IntRange): DataFrame = if (range == indices()) this else columns().map { col -> col[range] }.toDataFrame().cast() +public fun DataFrame.getRows(indices: Iterable): DataFrame = columns().map { col -> col[indices] }.toDataFrame().cast() +public fun DataFrame.getOrNull(index: Int): DataRow? = if (index < 0 || index >= nrow) null else get(index) + +public fun ColumnsContainer.getFrameColumn(columnPath: ColumnPath): FrameColumn<*> = get(columnPath).asAnyFrameColumn() +public fun ColumnsContainer.getFrameColumn(columnName: String): FrameColumn<*> = get(columnName).asAnyFrameColumn() +public fun ColumnsContainer.getColumnGroup(columnPath: ColumnPath): ColumnGroup<*> = get(columnPath).asColumnGroup() + +// region getColumn + +public fun ColumnsContainer.getColumn(name: String): AnyCol = getColumnOrNull(name) ?: throw IllegalArgumentException("Column not found: '$name'") + +public fun ColumnsContainer.getColumn(column: ColumnReference>): FrameColumn = getColumnOrNull(column)?.asFrameColumn() ?: throw IllegalArgumentException("FrameColumn not found: '$column'") + +public fun ColumnsContainer.getColumn(column: ColumnReference>): ColumnGroup = getColumnOrNull(column)?.asColumnGroup() ?: throw IllegalArgumentException("ColumnGroup not found: '$column'") + +public fun ColumnsContainer.getColumn(column: ColumnReference): DataColumn = getColumnOrNull(column) ?: throw IllegalArgumentException("Column not found: '$column'") + +public fun ColumnsContainer.getColumn(path: ColumnPath): AnyCol = getColumnOrNull(path) ?: throw IllegalArgumentException("Column not found: '$path'") + +public fun ColumnsContainer.getColumn(index: Int): AnyCol = getColumnOrNull(index) ?: throw IllegalArgumentException("Column index is out of bounds: $index. Columns count = $ncol") + +public fun ColumnsContainer.getColumn(selector: ColumnSelector): DataColumn = get(selector) + +// endregion + +// region getColumnGroup + +public fun ColumnsContainer.getColumnGroup(index: Int): ColumnGroup<*> = getColumn(index).asColumnGroup() + +public fun ColumnsContainer.getColumnGroup(name: String): ColumnGroup<*> = getColumn(name).asColumnGroup() + +public fun ColumnsContainer.getColumnGroup(column: KProperty<*>): ColumnGroup<*> = getColumnGroup(column.columnName) + +public fun ColumnsContainer.getColumnGroup(column: ColumnReference>): ColumnGroup = getColumn(column) + +public fun ColumnsContainer.getColumnGroup(column: ColumnSelector>): ColumnGroup = get(column).asColumnGroup() + +// endregion + +// region getColumnGroupOrNull + +public fun ColumnsContainer.getColumnGroupOrNull(name: String): ColumnGroup<*>? = getColumnOrNull(name)?.asColumnGroup() + +public fun ColumnsContainer.getColumnGroupOrNull(column: KProperty<*>): ColumnGroup<*>? = getColumnGroupOrNull(column.columnName) + +// endregion + +// region containsColumn + +public fun ColumnsContainer<*>.containsColumn(column: ColumnReference): Boolean = getColumnOrNull(column) != null +public fun ColumnsContainer<*>.containsColumn(column: KProperty<*>): Boolean = containsColumn(column.columnName) + +public operator fun ColumnsContainer<*>.contains(column: Column): Boolean = containsColumn(column) +public operator fun ColumnsContainer<*>.contains(column: KProperty<*>): Boolean = containsColumn(column) + +// region rows + +public fun DataFrame.rows(): Iterable> = object : Iterable> { + override fun iterator() = + + object : Iterator> { + var nextRow = 0 + + override fun hasNext(): Boolean = nextRow < nrow + + override fun next(): DataRow { + require(nextRow < nrow) + return get(nextRow++) + } + } +} + +public fun DataFrame.rowsReversed(): Iterable> = object : Iterable> { + override fun iterator() = + + object : Iterator> { + var nextRow = nrow - 1 + + override fun hasNext(): Boolean = nextRow >= 0 + + override fun next(): DataRow { + require(nextRow >= 0) + return get(nextRow--) + } + } +} + +// endregion diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/DataRowApi.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/DataRowApi.kt new file mode 100644 index 000000000..9cdc67e72 --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/DataRowApi.kt @@ -0,0 +1,119 @@ +package org.jetbrains.kotlinx.dataframe.api + +import org.jetbrains.kotlinx.dataframe.AnyRow +import org.jetbrains.kotlinx.dataframe.Column +import org.jetbrains.kotlinx.dataframe.ColumnsContainer +import org.jetbrains.kotlinx.dataframe.DataColumn +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.DataRow +import org.jetbrains.kotlinx.dataframe.RowExpression +import org.jetbrains.kotlinx.dataframe.annotations.DataSchema +import org.jetbrains.kotlinx.dataframe.columns.ColumnReference +import org.jetbrains.kotlinx.dataframe.impl.columnName +import org.jetbrains.kotlinx.dataframe.impl.owner +import org.jetbrains.kotlinx.dataframe.index +import org.jetbrains.kotlinx.dataframe.indices +import org.jetbrains.kotlinx.dataframe.ncol +import org.jetbrains.kotlinx.dataframe.nrow +import kotlin.experimental.ExperimentalTypeInference +import kotlin.reflect.KProperty +import kotlin.reflect.KType + +public fun AnyRow.isEmpty(): Boolean = owner.columns().all { it[index] == null } +public fun AnyRow.isNotEmpty(): Boolean = !isEmpty() + +public inline fun AnyRow.valuesOf(): List = values().filterIsInstance() + +// region DataSchema +@DataSchema +public data class NameValuePair(val name: String, val value: V) + +// Without these overloads row.transpose().name or row.map { name } won't resolve +public val ColumnsContainer>.name: DataColumn @JvmName("NameValuePairAny_name") get() = this["name"] as DataColumn +public val DataRow>.name: String @JvmName("NameValuePairAny_name") get() = this["name"] as String + +public val ColumnsContainer>.value: DataColumn<*> @JvmName("NameValuePairAny_value") get() = this["value"] +public val DataRow>.value: Any? @JvmName("NameValuePairAny_value") get() = this["value"] + +// endregion + +public inline fun AnyRow.namedValuesOf(): List> = + values().zip(columnNames()).filter { it.first is R }.map { NameValuePair(it.second, it.first as R) } + +public fun AnyRow.namedValues(): List> = + values().zip(columnNames()) { value, name -> NameValuePair(name, value) } + +// region getValue + +public fun AnyRow.getValue(columnName: String): T = get(columnName) as T +public fun AnyRow.getValue(column: ColumnReference): T = get(column) +public fun AnyRow.getValue(column: KProperty): T = get(column) + +public fun AnyRow.getValueOrNull(columnName: String): T? = getOrNull(columnName) as T? +public fun AnyRow.getValueOrNull(column: KProperty): T? = getValueOrNull(column.columnName) + +// endregion + +// region contains + +public fun AnyRow.containsKey(columnName: String): Boolean = owner.containsColumn(columnName) +public fun AnyRow.containsKey(column: Column): Boolean = owner.containsColumn(column) +public fun AnyRow.containsKey(column: KProperty<*>): Boolean = owner.containsColumn(column) + +public operator fun AnyRow.contains(column: Column): Boolean = containsKey(column) +public operator fun AnyRow.contains(column: KProperty<*>): Boolean = containsKey(column) + +// endregion + +@OptIn(ExperimentalTypeInference::class) +@OverloadResolutionByLambdaReturnType +public fun DataRow.diff(expression: RowExpression): Double? = + prev()?.let { p -> expression(this, this) - expression(p, p) } + +public fun DataRow.diff(expression: RowExpression): Int? = + prev()?.let { p -> expression(this, this) - expression(p, p) } + +public fun DataRow.diff(expression: RowExpression): Long? = + prev()?.let { p -> expression(this, this) - expression(p, p) } + +public fun DataRow.diff(expression: RowExpression): Float? = + prev()?.let { p -> expression(this, this) - expression(p, p) } + +public fun AnyRow.columnsCount(): Int = df().ncol +public fun AnyRow.columnNames(): List = df().columnNames() +public fun AnyRow.columnTypes(): List = df().columnTypes() + +public fun DataRow.getRow(index: Int): DataRow = getRowOrNull(index)!! + +public fun DataRow.getRows(indices: Iterable): DataFrame = df().getRows(indices) +public fun DataRow.getRows(indices: IntRange): DataFrame = df().getRows(indices) + +public fun DataRow.getRowOrNull(index: Int): DataRow? { + val df = df() + return if (index >= 0 && index < df.nrow) df[index] else null +} + +public fun DataRow.prev(): DataRow? { + val index = index() + return if (index > 0) df()[index - 1] else null +} + +public fun DataRow.next(): DataRow? { + val index = index() + val df = df() + return if (index < df.nrow - 1) df[index + 1] else null +} + +public fun DataRow.relative(relativeIndices: Iterable): DataFrame = + getRows(relativeIndices.mapNotNull { (index + it).let { if (it >= 0 && it < df().rowsCount()) it else null } }) + +public fun DataRow.relative(relativeIndices: IntRange): DataFrame = + getRows((relativeIndices.first + index).coerceIn(df().indices)..(relativeIndices.last + index).coerceIn(df().indices)) + +public fun DataRow.movingAverage(k: Int, expression: RowExpression): Double { + var count = 0 + return backwardIterable().take(k).sumOf { + count++ + expression(it).toDouble() + } / count +} diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/Defaults.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/Defaults.kt new file mode 100644 index 000000000..646954aa4 --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/Defaults.kt @@ -0,0 +1,7 @@ +package org.jetbrains.kotlinx.dataframe.api + +@PublishedApi +internal val skipNA_default: Boolean = false + +@PublishedApi +internal val ddof_default: Int = 1 diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/JsonPath.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/JsonPath.kt new file mode 100644 index 000000000..58a719da9 --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/JsonPath.kt @@ -0,0 +1,69 @@ +package org.jetbrains.kotlinx.dataframe.api + +import org.intellij.lang.annotations.Language +import java.io.Serializable + +/** + * Simplistic JSON path implementation. + * Supports just keys (in bracket notation), double quotes, arrays and wildcards. + * + * Examples: + * `$["store"]["book"][*]["author"]` + * + * `$[1]` will match `$[*]` + */ +@JvmInline +public value class JsonPath(@Language("jsonpath") public val path: String = "$") : Serializable { + + public fun append(name: String): JsonPath = JsonPath("$path[\"$name\"]") + + public fun appendWildcard(): JsonPath = JsonPath("$path[*]") + + public fun appendArrayWithIndex(index: Int): JsonPath = JsonPath("$path[$index]") + + public fun appendArrayWithWildcard(): JsonPath = JsonPath("$path[*]") + + public fun replaceLastWildcardWithIndex(index: Int): JsonPath = JsonPath( + path.toCharArray().let { chars -> + val lastStarIndex = chars.lastIndexOf('*') + chars.flatMapIndexed { i, c -> + if (i == lastStarIndex) index.toString().toCharArray().toList() + else listOf(c) + }.joinToString("") + } + ) + + public fun prepend(name: String): JsonPath = JsonPath( + "\$[\"$name\"]" + path.removePrefix("$") + ) + + public fun prependWildcard(): JsonPath = JsonPath( + "\$[*]" + path.removePrefix("$") + ) + + public fun prependArrayWithIndex(index: Int): JsonPath = JsonPath( + "\$[$index]" + path.removePrefix("$") + ) + + public fun prependArrayWithWildcard(): JsonPath = JsonPath( + "\$[*]" + path.removePrefix("$") + ) + + public fun erasedIndices(): JsonPath = JsonPath( + path.replace("""\[[0-9]+]""".toRegex(), "[*]") + ) + + private fun splitPath() = path.split("[", "]").filter { it.isNotBlank() } + + public fun matches(other: JsonPath): Boolean = + path == other.path || + run { + val path = splitPath() + val otherPath = other.splitPath() + + if (path.size != otherPath.size) false + else path.zip(otherPath).all { (p, o) -> + p == o || p == "*" || o == "*" + } + } +} diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/KeyValueProperty.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/KeyValueProperty.kt new file mode 100644 index 000000000..56a52c2f3 --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/KeyValueProperty.kt @@ -0,0 +1,15 @@ +package org.jetbrains.kotlinx.dataframe.api + +import org.jetbrains.kotlinx.dataframe.annotations.ColumnName +import org.jetbrains.kotlinx.dataframe.annotations.DataSchema + +/** A [DataSchema] interface / class can implement this if it represents a map-like data schema (so key: value). */ +@DataSchema +public interface KeyValueProperty { + // needs to be explicitly overridden in @DataSchema interface, otherwise extension functions won't generate (TODO) + public val key: String + + // needs to be explicitly overridden in @DataSchema interface, otherwise type will be read as `T` and extensions won't generate (TODO) + @ColumnName("value") + public val `value`: T +} diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/Misc.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/Misc.kt new file mode 100644 index 000000000..5da628238 --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/Misc.kt @@ -0,0 +1 @@ +package org.jetbrains.kotlinx.dataframe.api diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/Nulls.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/Nulls.kt new file mode 100644 index 000000000..6c04bc01a --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/Nulls.kt @@ -0,0 +1,638 @@ +package org.jetbrains.kotlinx.dataframe.api + +import org.jetbrains.kotlinx.dataframe.AnyCol +import org.jetbrains.kotlinx.dataframe.AnyFrame +import org.jetbrains.kotlinx.dataframe.AnyRow +import org.jetbrains.kotlinx.dataframe.Column +import org.jetbrains.kotlinx.dataframe.ColumnsSelector +import org.jetbrains.kotlinx.dataframe.DataColumn +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.api.Update.UpdateOperationArg +import org.jetbrains.kotlinx.dataframe.columns.ColumnKind +import org.jetbrains.kotlinx.dataframe.columns.ColumnReference +import org.jetbrains.kotlinx.dataframe.documentation.* +import org.jetbrains.kotlinx.dataframe.impl.columns.toColumnSet +import org.jetbrains.kotlinx.dataframe.impl.columns.toColumns +import org.jetbrains.kotlinx.dataframe.kind +import org.jetbrains.kotlinx.dataframe.typeClass +import kotlin.reflect.KProperty + +// region fillNulls + +/** + * ## The Fill Nulls Operation + * + * Replaces `null` values with given value or expression. + * Specific case of [update]. + * + * Check out the [`fillNulls` Operation Usage][Usage]. + * + * For more information: [See `fillNulls` on the documentation website.](https://kotlin.github.io/dataframe/fill.html#fillnulls) + */ +internal interface FillNulls { + + /** + * ## [fillNulls][org.jetbrains.kotlinx.dataframe.api.fillNulls] Operation Usage + * + * [fillNulls][org.jetbrains.kotlinx.dataframe.api.fillNulls] `{ `[columns][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns]` }` + * + * - `[.`[where][org.jetbrains.kotlinx.dataframe.api.Update.where]` { `[rowValueCondition][org.jetbrains.kotlinx.dataframe.documentation.SelectingRows.RowValueCondition.WithExample]` } ]` + * + * - `[.`[at][org.jetbrains.kotlinx.dataframe.api.Update.at]` (`[rowIndices][org.jetbrains.kotlinx.dataframe.api.CommonUpdateAtFunctionDoc.RowIndicesParam]`) ]` + * + * - `.`[with][org.jetbrains.kotlinx.dataframe.api.Update.with]` { `[rowExpression][org.jetbrains.kotlinx.dataframe.documentation.ExpressionsGivenRow.RowValueExpression.WithExample]` } + * | .`[notNull][org.jetbrains.kotlinx.dataframe.api.Update.notNull]` { `[rowExpression][org.jetbrains.kotlinx.dataframe.documentation.ExpressionsGivenRow.RowValueExpression.WithExample]` } + * | .`[perCol][org.jetbrains.kotlinx.dataframe.api.Update.perCol]` { `[colExpression][org.jetbrains.kotlinx.dataframe.documentation.ExpressionsGivenColumn.ColumnExpression.WithExample]` } + * | .`[perRowCol][org.jetbrains.kotlinx.dataframe.api.Update.perRowCol]` { `[rowColExpression][org.jetbrains.kotlinx.dataframe.documentation.ExpressionsGivenColumn.RowColumnExpression.WithExample]` } + * | .`[withValue][org.jetbrains.kotlinx.dataframe.api.Update.withValue]`(value) + * | .`[withNull][org.jetbrains.kotlinx.dataframe.api.Update.withNull]`() + * | .`[withZero][org.jetbrains.kotlinx.dataframe.api.Update.withZero]`() + * | .`[asFrame][org.jetbrains.kotlinx.dataframe.api.Update.asFrame]` { `[dataFrameExpression][org.jetbrains.kotlinx.dataframe.documentation.ExpressionsGivenDataFrame.DataFrameExpression.WithExample]` }` + */ + interface Usage +} + + +private interface SetFillNullsOperationArg + +/** + * ## The Fill Nulls Operation + * + * Replaces `null` values with given value or expression. + * Specific case of [update][org.jetbrains.kotlinx.dataframe.api.update]. + * + * Check out the [`fillNulls` Operation Usage][org.jetbrains.kotlinx.dataframe.api.FillNulls.Usage]. + * + * For more information: [See `fillNulls` on the documentation website.](https://kotlin.github.io/dataframe/fill.html#fillnulls) + * ## ‎ + * The columns to update need to be selected. See [Selecting Columns][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns] for all the selecting options. + * ## This Fill Nulls Overload + */ +private interface CommonFillNullsFunctionDoc + +/** + * ## The Fill Nulls Operation + * + * Replaces `null` values with given value or expression. + * Specific case of [update][org.jetbrains.kotlinx.dataframe.api.update]. + * + * Check out the [`fillNulls` Operation Usage][org.jetbrains.kotlinx.dataframe.api.FillNulls.Usage]. + * + * For more information: [See `fillNulls` on the documentation website.](https://kotlin.github.io/dataframe/fill.html#fillnulls) + * ## ‎ + * The columns to update need to be selected. See [Selecting Columns][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns] for all the selecting options. + * ## This Fill Nulls Overload + * Select or express columns using the Column(s) Selection DSL. + * (Any [Access Api][org.jetbrains.kotlinx.dataframe.documentation.AccessApi]). + * + * This DSL comes in the form of either a [Column Selector][org.jetbrains.kotlinx.dataframe.ColumnSelector]- or [Columns Selector][org.jetbrains.kotlinx.dataframe.ColumnsSelector] lambda, + * which operate in the [Column Selection DSL][org.jetbrains.kotlinx.dataframe.api.ColumnSelectionDsl] or the [Columns Selection DSL][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl] and + * expect you to return a [SingleColumn][org.jetbrains.kotlinx.dataframe.columns.SingleColumn] or [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet], respectively. + * + * For example: + * + * `df.`[fillNulls][org.jetbrains.kotlinx.dataframe.api.fillNulls]` { length `[and][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.and]` age }` + * + * `df.`[fillNulls][org.jetbrains.kotlinx.dataframe.api.fillNulls]` { `[cols][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`(1..5) }` + * + * `df.`[fillNulls][org.jetbrains.kotlinx.dataframe.api.fillNulls]` { `[colsOf][org.jetbrains.kotlinx.dataframe.api.colsOf]`() }` + * + * @param columns The [Columns selector DSL][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns.Dsl.WithExample] used to select the columns of this [DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame] to update. + */ +public fun DataFrame.fillNulls(columns: ColumnsSelector): Update = + update(columns).where { it == null } + +/** + * ## The Fill Nulls Operation + * + * Replaces `null` values with given value or expression. + * Specific case of [update][org.jetbrains.kotlinx.dataframe.api.update]. + * + * Check out the [`fillNulls` Operation Usage][org.jetbrains.kotlinx.dataframe.api.FillNulls.Usage]. + * + * For more information: [See `fillNulls` on the documentation website.](https://kotlin.github.io/dataframe/fill.html#fillnulls) + * ## ‎ + * The columns to update need to be selected. See [Selecting Columns][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns] for all the selecting options. + * ## This Fill Nulls Overload + * Select columns using their [column names][String] + * ([String API][org.jetbrains.kotlinx.dataframe.documentation.AccessApi.StringApi]). + * + * For example: + * + * `df.`[fillNulls][org.jetbrains.kotlinx.dataframe.api.fillNulls]`("length", "age")` + * + * @param columns The [Column names][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns.ColumnNames.WithExample] belonging to this [DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame] to update. + */ +public fun DataFrame.fillNulls(vararg columns: String): Update = + fillNulls { columns.toColumns() } + +/** + * ## The Fill Nulls Operation + * + * Replaces `null` values with given value or expression. + * Specific case of [update][org.jetbrains.kotlinx.dataframe.api.update]. + * + * Check out the [`fillNulls` Operation Usage][org.jetbrains.kotlinx.dataframe.api.FillNulls.Usage]. + * + * For more information: [See `fillNulls` on the documentation website.](https://kotlin.github.io/dataframe/fill.html#fillnulls) + * ## ‎ + * The columns to update need to be selected. See [Selecting Columns][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns] for all the selecting options. + * ## This Fill Nulls Overload + * Select columns using [KProperties][KProperty] ([KProperties API][org.jetbrains.kotlinx.dataframe.documentation.AccessApi.KPropertiesApi]). + * + * For example: + * ```kotlin + * data class Person(val length: Double, val age: Double) + * ``` + * + * `df.`[fillNulls][org.jetbrains.kotlinx.dataframe.api.fillNulls]`(Person::length, Person::age)` + * + * @param columns The [KProperties][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns.KProperties.WithExample] corresponding to columns of this [DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame] to update. + */ +public fun DataFrame.fillNulls(vararg columns: KProperty): Update = + fillNulls { columns.toColumns() } + +/** + * ## The Fill Nulls Operation + * + * Replaces `null` values with given value or expression. + * Specific case of [update][org.jetbrains.kotlinx.dataframe.api.update]. + * + * Check out the [`fillNulls` Operation Usage][org.jetbrains.kotlinx.dataframe.api.FillNulls.Usage]. + * + * For more information: [See `fillNulls` on the documentation website.](https://kotlin.github.io/dataframe/fill.html#fillnulls) + * ## ‎ + * The columns to update need to be selected. See [Selecting Columns][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns] for all the selecting options. + * ## This Fill Nulls Overload + * Select columns using [column accessors][org.jetbrains.kotlinx.dataframe.columns.ColumnReference] + * ([Column Accessors API][org.jetbrains.kotlinx.dataframe.documentation.AccessApi.ColumnAccessorsApi]). + * + * For example: + * + * `val length by `[column][org.jetbrains.kotlinx.dataframe.api.column]`()` + * + * `val age by `[column][org.jetbrains.kotlinx.dataframe.api.column]`()` + * + * `df.`[fillNulls][org.jetbrains.kotlinx.dataframe.api.fillNulls]`(length, age)` + * + * @param columns The [Column references][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns.ColumnAccessors.WithExample] of this [DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame] to update. + */ +public fun DataFrame.fillNulls(vararg columns: ColumnReference): Update = + fillNulls { columns.toColumns() } + +/** + * TODO this will be deprecated + */ +public fun DataFrame.fillNulls(columns: Iterable>): Update = + fillNulls { columns.toColumnSet() } + +// endregion + +/** + * [Floats][Float] or [Doubles][Double] can be represented as [Float.NaN] or [Double.NaN], respectively, + * in cases where a mathematical operation is undefined, such as dividing by zero. + * In Dataframe we have helper functions to check for `NaNs`, such as [Any?.isNaN][Any.isNaN] and + * [column.canHaveNaN][DataColumn.canHaveNaN]. + * You can also use [fillNaNs][fillNaNs] to replace `NaNs` in certain columns with a given value or expression. + * + * @see NA + */ +internal interface NaN + +/** + * `NA` in Dataframe can be seen as "[NaN] or `null`". + * + * [Floats][Float] or [Doubles][Double] can be represented as [Float.NaN] or [Double.NaN], respectively, + * in cases where a mathematical operation is undefined, such as dividing by zero. + * + * In Dataframe we have helper functions to check for `NAs`, such as [Any?.isNA][Any.isNA] and + * [column.canHaveNA][DataColumn.canHaveNA]. + * You can also use [fillNA][fillNA] to replace `NAs` in certain columns with a given value or expression. + * @see NaN + */ +internal interface NA + +internal inline val Any?.isNaN: Boolean get() = (this is Double && isNaN()) || (this is Float && isNaN()) + +internal inline val Any?.isNA: Boolean + get() = when (this) { + null -> true + is Double -> isNaN() + is Float -> isNaN() + is AnyRow -> allNA() + is AnyFrame -> isEmpty() + else -> false + } + +internal inline val AnyCol.canHaveNaN: Boolean get() = typeClass.let { it == Double::class || it == Float::class } + +internal inline val AnyCol.canHaveNA: Boolean get() = hasNulls() || canHaveNaN || kind != ColumnKind.Value + +internal inline val Double?.isNA: Boolean get() = this == null || this.isNaN() + +internal inline val Float?.isNA: Boolean get() = this == null || this.isNaN() + +// region fillNaNs + +/** + * ## The Fill NaNs Operation + * + * Replaces [`NaN`][NaN] values with given value or expression. + * Specific case of [update]. + * + * Check out the [`fillNaNs` Operation Usage][Usage]. + * + * For more information: [See `fillNaNs` on the documentation website.](https://kotlin.github.io/dataframe/fill.html#fillnans) + */ +internal interface FillNaNs { + + /** + * ## [fillNaNs][org.jetbrains.kotlinx.dataframe.api.fillNaNs] Operation Usage + * + * [fillNaNs][org.jetbrains.kotlinx.dataframe.api.fillNaNs] `{ `[columns][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns]` }` + * + * - `[.`[where][org.jetbrains.kotlinx.dataframe.api.Update.where]` { `[rowValueCondition][org.jetbrains.kotlinx.dataframe.documentation.SelectingRows.RowValueCondition.WithExample]` } ]` + * + * - `[.`[at][org.jetbrains.kotlinx.dataframe.api.Update.at]` (`[rowIndices][org.jetbrains.kotlinx.dataframe.api.CommonUpdateAtFunctionDoc.RowIndicesParam]`) ]` + * + * - `.`[with][org.jetbrains.kotlinx.dataframe.api.Update.with]` { `[rowExpression][org.jetbrains.kotlinx.dataframe.documentation.ExpressionsGivenRow.RowValueExpression.WithExample]` } + * | .`[notNull][org.jetbrains.kotlinx.dataframe.api.Update.notNull]` { `[rowExpression][org.jetbrains.kotlinx.dataframe.documentation.ExpressionsGivenRow.RowValueExpression.WithExample]` } + * | .`[perCol][org.jetbrains.kotlinx.dataframe.api.Update.perCol]` { `[colExpression][org.jetbrains.kotlinx.dataframe.documentation.ExpressionsGivenColumn.ColumnExpression.WithExample]` } + * | .`[perRowCol][org.jetbrains.kotlinx.dataframe.api.Update.perRowCol]` { `[rowColExpression][org.jetbrains.kotlinx.dataframe.documentation.ExpressionsGivenColumn.RowColumnExpression.WithExample]` } + * | .`[withValue][org.jetbrains.kotlinx.dataframe.api.Update.withValue]`(value) + * | .`[withNull][org.jetbrains.kotlinx.dataframe.api.Update.withNull]`() + * | .`[withZero][org.jetbrains.kotlinx.dataframe.api.Update.withZero]`() + * | .`[asFrame][org.jetbrains.kotlinx.dataframe.api.Update.asFrame]` { `[dataFrameExpression][org.jetbrains.kotlinx.dataframe.documentation.ExpressionsGivenDataFrame.DataFrameExpression.WithExample]` }` + */ + interface Usage +} + + +internal interface SetFillNaNsOperationArg + +/** + * ## The Fill NaNs Operation + * + * Replaces [`NaN`][org.jetbrains.kotlinx.dataframe.api.NaN] values with given value or expression. + * Specific case of [update][org.jetbrains.kotlinx.dataframe.api.update]. + * + * Check out the [`fillNaNs` Operation Usage][org.jetbrains.kotlinx.dataframe.api.FillNaNs.Usage]. + * + * For more information: [See `fillNaNs` on the documentation website.](https://kotlin.github.io/dataframe/fill.html#fillnans) + * ## ‎ + * The columns to update need to be selected. See [Selecting Columns][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns] for all the selecting options. + * ## This Fill NaNs Overload + */ +private interface CommonFillNaNsFunctionDoc + +/** + * ## The Fill NaNs Operation + * + * Replaces [`NaN`][org.jetbrains.kotlinx.dataframe.api.NaN] values with given value or expression. + * Specific case of [update][org.jetbrains.kotlinx.dataframe.api.update]. + * + * Check out the [`fillNaNs` Operation Usage][org.jetbrains.kotlinx.dataframe.api.FillNaNs.Usage]. + * + * For more information: [See `fillNaNs` on the documentation website.](https://kotlin.github.io/dataframe/fill.html#fillnans) + * ## ‎ + * The columns to update need to be selected. See [Selecting Columns][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns] for all the selecting options. + * ## This Fill NaNs Overload + * Select or express columns using the Column(s) Selection DSL. + * (Any [Access Api][org.jetbrains.kotlinx.dataframe.documentation.AccessApi]). + * + * This DSL comes in the form of either a [Column Selector][org.jetbrains.kotlinx.dataframe.ColumnSelector]- or [Columns Selector][org.jetbrains.kotlinx.dataframe.ColumnsSelector] lambda, + * which operate in the [Column Selection DSL][org.jetbrains.kotlinx.dataframe.api.ColumnSelectionDsl] or the [Columns Selection DSL][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl] and + * expect you to return a [SingleColumn][org.jetbrains.kotlinx.dataframe.columns.SingleColumn] or [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet], respectively. + * + * For example: + * + * `df.`[fillNaNs][org.jetbrains.kotlinx.dataframe.api.fillNaNs]` { length `[and][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.and]` age }` + * + * `df.`[fillNaNs][org.jetbrains.kotlinx.dataframe.api.fillNaNs]` { `[cols][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`(1..5) }` + * + * `df.`[fillNaNs][org.jetbrains.kotlinx.dataframe.api.fillNaNs]` { `[colsOf][org.jetbrains.kotlinx.dataframe.api.colsOf]`() }` + * + * @param columns The [Columns selector DSL][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns.Dsl.WithExample] used to select the columns of this [DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame] to update. + */ +public fun DataFrame.fillNaNs(columns: ColumnsSelector): Update = + update(columns).where { it.isNaN } + +/** + * ## The Fill NaNs Operation + * + * Replaces [`NaN`][org.jetbrains.kotlinx.dataframe.api.NaN] values with given value or expression. + * Specific case of [update][org.jetbrains.kotlinx.dataframe.api.update]. + * + * Check out the [`fillNaNs` Operation Usage][org.jetbrains.kotlinx.dataframe.api.FillNaNs.Usage]. + * + * For more information: [See `fillNaNs` on the documentation website.](https://kotlin.github.io/dataframe/fill.html#fillnans) + * ## ‎ + * The columns to update need to be selected. See [Selecting Columns][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns] for all the selecting options. + * ## This Fill NaNs Overload + * Select columns using their [column names][String] + * ([String API][org.jetbrains.kotlinx.dataframe.documentation.AccessApi.StringApi]). + * + * For example: + * + * `df.`[fillNaNs][org.jetbrains.kotlinx.dataframe.api.fillNaNs]`("length", "age")` + * + * @param columns The [Column names][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns.ColumnNames.WithExample] belonging to this [DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame] to update. + */ +public fun DataFrame.fillNaNs(vararg columns: String): Update = + fillNaNs { columns.toColumns() } + +/** + * ## The Fill NaNs Operation + * + * Replaces [`NaN`][org.jetbrains.kotlinx.dataframe.api.NaN] values with given value or expression. + * Specific case of [update][org.jetbrains.kotlinx.dataframe.api.update]. + * + * Check out the [`fillNaNs` Operation Usage][org.jetbrains.kotlinx.dataframe.api.FillNaNs.Usage]. + * + * For more information: [See `fillNaNs` on the documentation website.](https://kotlin.github.io/dataframe/fill.html#fillnans) + * ## ‎ + * The columns to update need to be selected. See [Selecting Columns][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns] for all the selecting options. + * ## This Fill NaNs Overload + * Select columns using [KProperties][KProperty] ([KProperties API][org.jetbrains.kotlinx.dataframe.documentation.AccessApi.KPropertiesApi]). + * + * For example: + * ```kotlin + * data class Person(val length: Double, val age: Double) + * ``` + * + * `df.`[fillNaNs][org.jetbrains.kotlinx.dataframe.api.fillNaNs]`(Person::length, Person::age)` + * + * @param columns The [KProperties][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns.KProperties.WithExample] corresponding to columns of this [DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame] to update. + */ +public fun DataFrame.fillNaNs(vararg columns: KProperty): Update = + fillNaNs { columns.toColumns() } + +/** + * ## The Fill NaNs Operation + * + * Replaces [`NaN`][org.jetbrains.kotlinx.dataframe.api.NaN] values with given value or expression. + * Specific case of [update][org.jetbrains.kotlinx.dataframe.api.update]. + * + * Check out the [`fillNaNs` Operation Usage][org.jetbrains.kotlinx.dataframe.api.FillNaNs.Usage]. + * + * For more information: [See `fillNaNs` on the documentation website.](https://kotlin.github.io/dataframe/fill.html#fillnans) + * ## ‎ + * The columns to update need to be selected. See [Selecting Columns][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns] for all the selecting options. + * ## This Fill NaNs Overload + * Select columns using [column accessors][org.jetbrains.kotlinx.dataframe.columns.ColumnReference] + * ([Column Accessors API][org.jetbrains.kotlinx.dataframe.documentation.AccessApi.ColumnAccessorsApi]). + * + * For example: + * + * `val length by `[column][org.jetbrains.kotlinx.dataframe.api.column]`()` + * + * `val age by `[column][org.jetbrains.kotlinx.dataframe.api.column]`()` + * + * `df.`[fillNaNs][org.jetbrains.kotlinx.dataframe.api.fillNaNs]`(length, age)` + * + * @param columns The [Column references][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns.ColumnAccessors.WithExample] of this [DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame] to update. + */ +public fun DataFrame.fillNaNs(vararg columns: ColumnReference): Update = + fillNaNs { columns.toColumns() } + +/** + * TODO this will be deprecated + */ +public fun DataFrame.fillNaNs(columns: Iterable>): Update = + fillNaNs { columns.toColumnSet() } + +// endregion + +// region fillNA + +/** + * ## The Fill NA Operation + * + * Replaces [`NA`][NA] values with given value or expression. + * Specific case of [update]. + * + * Check out the [`fillNA` Operation Usage][Usage]. + * + * For more information: [See `fillNA` on the documentation website.](https://kotlin.github.io/dataframe/fill.html#fillna) + */ +internal interface FillNA { + + /** + * ## [fillNA][org.jetbrains.kotlinx.dataframe.api.fillNA] Operation Usage + * + * [fillNA][org.jetbrains.kotlinx.dataframe.api.fillNA] `{ `[columns][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns]` }` + * + * - `[.`[where][org.jetbrains.kotlinx.dataframe.api.Update.where]` { `[rowValueCondition][org.jetbrains.kotlinx.dataframe.documentation.SelectingRows.RowValueCondition.WithExample]` } ]` + * + * - `[.`[at][org.jetbrains.kotlinx.dataframe.api.Update.at]` (`[rowIndices][org.jetbrains.kotlinx.dataframe.api.CommonUpdateAtFunctionDoc.RowIndicesParam]`) ]` + * + * - `.`[with][org.jetbrains.kotlinx.dataframe.api.Update.with]` { `[rowExpression][org.jetbrains.kotlinx.dataframe.documentation.ExpressionsGivenRow.RowValueExpression.WithExample]` } + * | .`[notNull][org.jetbrains.kotlinx.dataframe.api.Update.notNull]` { `[rowExpression][org.jetbrains.kotlinx.dataframe.documentation.ExpressionsGivenRow.RowValueExpression.WithExample]` } + * | .`[perCol][org.jetbrains.kotlinx.dataframe.api.Update.perCol]` { `[colExpression][org.jetbrains.kotlinx.dataframe.documentation.ExpressionsGivenColumn.ColumnExpression.WithExample]` } + * | .`[perRowCol][org.jetbrains.kotlinx.dataframe.api.Update.perRowCol]` { `[rowColExpression][org.jetbrains.kotlinx.dataframe.documentation.ExpressionsGivenColumn.RowColumnExpression.WithExample]` } + * | .`[withValue][org.jetbrains.kotlinx.dataframe.api.Update.withValue]`(value) + * | .`[withNull][org.jetbrains.kotlinx.dataframe.api.Update.withNull]`() + * | .`[withZero][org.jetbrains.kotlinx.dataframe.api.Update.withZero]`() + * | .`[asFrame][org.jetbrains.kotlinx.dataframe.api.Update.asFrame]` { `[dataFrameExpression][org.jetbrains.kotlinx.dataframe.documentation.ExpressionsGivenDataFrame.DataFrameExpression.WithExample]` }` + */ + interface Usage +} + + +internal interface SetFillNAOperationArg + +/** + * ## The Fill NA Operation + * + * Replaces [`NA`][org.jetbrains.kotlinx.dataframe.api.NA] values with given value or expression. + * Specific case of [update][org.jetbrains.kotlinx.dataframe.api.update]. + * + * Check out the [`fillNA` Operation Usage][org.jetbrains.kotlinx.dataframe.api.FillNA.Usage]. + * + * For more information: [See `fillNA` on the documentation website.](https://kotlin.github.io/dataframe/fill.html#fillna) + * ## ‎ + * The columns to update need to be selected. See [Selecting Columns][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns] for all the selecting options. + * ## This Fill NA Overload + */ +private interface CommonFillNAFunctionDoc + +/** + * ## The Fill NA Operation + * + * Replaces [`NA`][org.jetbrains.kotlinx.dataframe.api.NA] values with given value or expression. + * Specific case of [update][org.jetbrains.kotlinx.dataframe.api.update]. + * + * Check out the [`fillNA` Operation Usage][org.jetbrains.kotlinx.dataframe.api.FillNA.Usage]. + * + * For more information: [See `fillNA` on the documentation website.](https://kotlin.github.io/dataframe/fill.html#fillna) + * ## ‎ + * The columns to update need to be selected. See [Selecting Columns][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns] for all the selecting options. + * ## This Fill NA Overload + * Select or express columns using the Column(s) Selection DSL. + * (Any [Access Api][org.jetbrains.kotlinx.dataframe.documentation.AccessApi]). + * + * This DSL comes in the form of either a [Column Selector][org.jetbrains.kotlinx.dataframe.ColumnSelector]- or [Columns Selector][org.jetbrains.kotlinx.dataframe.ColumnsSelector] lambda, + * which operate in the [Column Selection DSL][org.jetbrains.kotlinx.dataframe.api.ColumnSelectionDsl] or the [Columns Selection DSL][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl] and + * expect you to return a [SingleColumn][org.jetbrains.kotlinx.dataframe.columns.SingleColumn] or [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet], respectively. + * + * For example: + * + * `df.`[fillNA][org.jetbrains.kotlinx.dataframe.api.fillNA]` { length `[and][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.and]` age }` + * + * `df.`[fillNA][org.jetbrains.kotlinx.dataframe.api.fillNA]` { `[cols][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`(1..5) }` + * + * `df.`[fillNA][org.jetbrains.kotlinx.dataframe.api.fillNA]` { `[colsOf][org.jetbrains.kotlinx.dataframe.api.colsOf]`() }` + * + * @param columns The [Columns selector DSL][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns.Dsl.WithExample] used to select the columns of this [DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame] to update. + */ +public fun DataFrame.fillNA(columns: ColumnsSelector): Update = + update(columns).where { it.isNA } + +/** + * ## The Fill NA Operation + * + * Replaces [`NA`][org.jetbrains.kotlinx.dataframe.api.NA] values with given value or expression. + * Specific case of [update][org.jetbrains.kotlinx.dataframe.api.update]. + * + * Check out the [`fillNA` Operation Usage][org.jetbrains.kotlinx.dataframe.api.FillNA.Usage]. + * + * For more information: [See `fillNA` on the documentation website.](https://kotlin.github.io/dataframe/fill.html#fillna) + * ## ‎ + * The columns to update need to be selected. See [Selecting Columns][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns] for all the selecting options. + * ## This Fill NA Overload + * Select columns using their [column names][String] + * ([String API][org.jetbrains.kotlinx.dataframe.documentation.AccessApi.StringApi]). + * + * For example: + * + * `df.`[fillNA][org.jetbrains.kotlinx.dataframe.api.fillNA]`("length", "age")` + * + * @param columns The [Column names][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns.ColumnNames.WithExample] belonging to this [DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame] to update. + */ +public fun DataFrame.fillNA(vararg columns: String): Update = + fillNA { columns.toColumns() } + +/** + * ## The Fill NA Operation + * + * Replaces [`NA`][org.jetbrains.kotlinx.dataframe.api.NA] values with given value or expression. + * Specific case of [update][org.jetbrains.kotlinx.dataframe.api.update]. + * + * Check out the [`fillNA` Operation Usage][org.jetbrains.kotlinx.dataframe.api.FillNA.Usage]. + * + * For more information: [See `fillNA` on the documentation website.](https://kotlin.github.io/dataframe/fill.html#fillna) + * ## ‎ + * The columns to update need to be selected. See [Selecting Columns][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns] for all the selecting options. + * ## This Fill NA Overload + * Select columns using [KProperties][KProperty] ([KProperties API][org.jetbrains.kotlinx.dataframe.documentation.AccessApi.KPropertiesApi]). + * + * For example: + * ```kotlin + * data class Person(val length: Double, val age: Double) + * ``` + * + * `df.`[fillNA][org.jetbrains.kotlinx.dataframe.api.fillNA]`(Person::length, Person::age)` + * + * @param columns The [KProperties][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns.KProperties.WithExample] corresponding to columns of this [DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame] to update. + */ +public fun DataFrame.fillNA(vararg columns: KProperty): Update = + fillNA { columns.toColumns() } + +/** + * ## The Fill NA Operation + * + * Replaces [`NA`][org.jetbrains.kotlinx.dataframe.api.NA] values with given value or expression. + * Specific case of [update][org.jetbrains.kotlinx.dataframe.api.update]. + * + * Check out the [`fillNA` Operation Usage][org.jetbrains.kotlinx.dataframe.api.FillNA.Usage]. + * + * For more information: [See `fillNA` on the documentation website.](https://kotlin.github.io/dataframe/fill.html#fillna) + * ## ‎ + * The columns to update need to be selected. See [Selecting Columns][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns] for all the selecting options. + * ## This Fill NA Overload + * Select columns using [column accessors][org.jetbrains.kotlinx.dataframe.columns.ColumnReference] + * ([Column Accessors API][org.jetbrains.kotlinx.dataframe.documentation.AccessApi.ColumnAccessorsApi]). + * + * For example: + * + * `val length by `[column][org.jetbrains.kotlinx.dataframe.api.column]`()` + * + * `val age by `[column][org.jetbrains.kotlinx.dataframe.api.column]`()` + * + * `df.`[fillNA][org.jetbrains.kotlinx.dataframe.api.fillNA]`(length, age)` + * + * @param columns The [Column references][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns.ColumnAccessors.WithExample] of this [DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame] to update. + */ +public fun DataFrame.fillNA(vararg columns: ColumnReference): Update = + fillNA { columns.toColumns() } + +/** + * TODO this will be deprecated + */ +public fun DataFrame.fillNA(columns: Iterable>): Update = + fillNA { columns.toColumnSet() } + +// endregion + +// region dropNulls + +public fun DataFrame.dropNulls(whereAllNull: Boolean = false, selector: ColumnsSelector): DataFrame { + val columns = this[selector] + return if (whereAllNull) drop { row -> columns.all { col -> col[row] == null } } + else drop { row -> columns.any { col -> col[row] == null } } +} + +public fun DataFrame.dropNulls(whereAllNull: Boolean = false): DataFrame = + dropNulls(whereAllNull) { all() } + +public fun DataFrame.dropNulls(vararg columns: KProperty<*>, whereAllNull: Boolean = false): DataFrame = + dropNulls(whereAllNull) { columns.toColumns() } + +public fun DataFrame.dropNulls(vararg columns: String, whereAllNull: Boolean = false): DataFrame = + dropNulls(whereAllNull) { columns.toColumns() } + +public fun DataFrame.dropNulls(vararg columns: Column, whereAllNull: Boolean = false): DataFrame = + dropNulls(whereAllNull) { columns.toColumns() } + +public fun DataFrame.dropNulls( + columns: Iterable, + whereAllNull: Boolean = false +): DataFrame = + dropNulls(whereAllNull) { columns.toColumnSet() } + +public fun DataColumn.dropNulls(): DataColumn = + (if (!hasNulls()) this else filter { it != null }) as DataColumn + +// endregion + +// region dropNA + +public fun DataFrame.dropNA(whereAllNA: Boolean = false, selector: ColumnsSelector): DataFrame { + val columns = this[selector] + + return if (whereAllNA) drop { columns.all { this[it].isNA } } + else drop { columns.any { this[it].isNA } } +} + +public fun DataFrame.dropNA(vararg columns: KProperty<*>, whereAllNA: Boolean = false): DataFrame = + dropNA(whereAllNA) { columns.toColumns() } + +public fun DataFrame.dropNA(vararg columns: String, whereAllNA: Boolean = false): DataFrame = + dropNA(whereAllNA) { columns.toColumns() } + +public fun DataFrame.dropNA(vararg columns: Column, whereAllNA: Boolean = false): DataFrame = + dropNA(whereAllNA) { columns.toColumns() } + +public fun DataFrame.dropNA(columns: Iterable, whereAllNA: Boolean = false): DataFrame = + dropNA(whereAllNA) { columns.toColumnSet() } + +public fun DataFrame.dropNA(whereAllNA: Boolean = false): DataFrame = + dropNA(whereAllNA) { all() } + +public fun DataColumn.dropNA(): DataColumn = + when (typeClass) { + Double::class, Float::class -> filter { !it.isNA }.cast() + else -> (if (!hasNulls()) this else filter { it != null }) as DataColumn + } + +// endregion diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/TypeConversions.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/TypeConversions.kt new file mode 100644 index 000000000..7ad2921da --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/TypeConversions.kt @@ -0,0 +1,312 @@ +package org.jetbrains.kotlinx.dataframe.api + +import org.jetbrains.kotlinx.dataframe.AnyBaseCol +import org.jetbrains.kotlinx.dataframe.AnyCol +import org.jetbrains.kotlinx.dataframe.AnyFrame +import org.jetbrains.kotlinx.dataframe.AnyRow +import org.jetbrains.kotlinx.dataframe.ColumnGroupAccessor +import org.jetbrains.kotlinx.dataframe.ColumnSelector +import org.jetbrains.kotlinx.dataframe.DataColumn +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.DataRow +import org.jetbrains.kotlinx.dataframe.columns.ColumnAccessor +import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup +import org.jetbrains.kotlinx.dataframe.columns.ColumnPath +import org.jetbrains.kotlinx.dataframe.columns.ColumnReference +import org.jetbrains.kotlinx.dataframe.columns.ColumnSet +import org.jetbrains.kotlinx.dataframe.columns.FrameColumn +import org.jetbrains.kotlinx.dataframe.columns.ValueColumn +import org.jetbrains.kotlinx.dataframe.impl.GroupByImpl +import org.jetbrains.kotlinx.dataframe.impl.anyNull +import org.jetbrains.kotlinx.dataframe.impl.asList +import org.jetbrains.kotlinx.dataframe.impl.columnName +import org.jetbrains.kotlinx.dataframe.impl.columns.ColumnAccessorImpl +import org.jetbrains.kotlinx.dataframe.impl.columns.asAnyFrameColumn +import org.jetbrains.kotlinx.dataframe.impl.columns.asValues +import org.jetbrains.kotlinx.dataframe.impl.columns.forceResolve +import org.jetbrains.kotlinx.dataframe.impl.owner +import org.jetbrains.kotlinx.dataframe.index +import kotlin.reflect.KProperty +import kotlin.reflect.typeOf + +// region String + +public fun String.toColumnAccessor(): ColumnAccessor = ColumnAccessorImpl(this) + +public fun String.toColumnOf(): ColumnAccessor = ColumnAccessorImpl(this) + +// endregion + +// region ColumnPath + +public fun ColumnPath.toColumnOf(): ColumnAccessor = ColumnAccessorImpl(this) + +public fun ColumnPath.toColumnAccessor(): ColumnAccessor = ColumnAccessorImpl(this) + +public fun ColumnPath.toColumnGroupAccessor(): ColumnAccessor = ColumnAccessorImpl(this) +public fun ColumnPath.toFrameColumnAccessor(): ColumnAccessor = ColumnAccessorImpl(this) + +// endregion + +// region ColumnReference + +public fun ColumnReference.toColumnAccessor(): ColumnAccessor = when (this) { + is ColumnAccessor -> this + else -> ColumnAccessorImpl(path()) +} + +// endregion + +// region KProperty + +public fun KProperty.toColumnAccessor(): ColumnAccessor = ColumnAccessorImpl(columnName) + +// endregion + +// region DataColumn + +public fun AnyBaseCol.toDataFrame(): AnyFrame = dataFrameOf(listOf(this)) + +@JvmName("asNumberAnyNullable") +public fun DataColumn.asNumbers(): ValueColumn { + require(isNumber()) + return this.asValues() +} + +@JvmName("asNumberAny") +public fun DataColumn.asNumbers(): ValueColumn { + require(isNumber()) + return this as ValueColumn +} + +public fun DataColumn.asComparable(): DataColumn> { + require(isComparable()) + return this as DataColumn> +} + +public fun ColumnReference.castToNotNullable(): ColumnReference = cast() + +public fun DataColumn.castToNotNullable(): DataColumn { + require(!hasNulls()) { "Column `$name` has nulls" } + return this as DataColumn +} + +public fun DataColumn.castToNullable(): DataColumn = cast() + +public fun ColumnReference.castToNullable(): ColumnReference = cast() + +// region to array + +public inline fun DataColumn.toTypedArray(): Array = toList().toTypedArray() + +public fun DataColumn.toFloatArray(): FloatArray = convertToFloat().toList().toFloatArray() + +public fun DataColumn.toDoubleArray(): DoubleArray = convertToDouble().toList().toDoubleArray() + +public fun DataColumn.toIntArray(): IntArray = convertToInt().toList().toIntArray() + +public fun DataColumn.toLongArray(): LongArray = convertToLong().toList().toLongArray() + +public fun DataColumn.toShortArray(): ShortArray = convertTo().toList().toShortArray() + +public fun DataColumn.toByteArray(): ByteArray = convertTo().toList().toByteArray() + +// endregion + +public fun AnyCol.asColumnGroup(): ColumnGroup<*> = this as ColumnGroup<*> + +public fun DataColumn>.asFrameColumn(): FrameColumn = (this as AnyCol).asAnyFrameColumn().castFrameColumn() + +@JvmName("asGroupedT") +public fun DataColumn>.asColumnGroup(): ColumnGroup = (this as AnyCol).asColumnGroup().cast() + +public fun DataColumn>.asDataFrame(): DataFrame = asColumnGroup() + +// endregion + +// region ColumnGroup + +public fun ColumnGroup.asDataColumn(): DataColumn> = this as DataColumn> + +public fun ColumnGroup.asDataFrame(): DataFrame = this + +// endregion + +// region FrameColumn + +public fun FrameColumn.asDataColumn(): DataColumn?> = this + +public fun FrameColumn.toValueColumn(): ValueColumn?> = + DataColumn.createValueColumn(name, toList(), type()) + +// endregion + +// region ColumnSet + +@JvmName("asNumbersAny") +public fun ColumnSet.asNumbers(): ColumnSet = this as ColumnSet + +@JvmName("asNumbersAnyNullable") +public fun ColumnSet.asNumbers(): ColumnSet = this as ColumnSet + +public fun ColumnSet.asComparable(): ColumnSet> = this as ColumnSet> + +// endregion + +// region Iterable + +public fun Iterable>.toFrameColumn(name: String = ""): FrameColumn = + DataColumn.createFrameColumn(name, asList()).forceResolve() + +public inline fun Iterable.toValueColumn(name: String = ""): ValueColumn = + DataColumn.createValueColumn(name, asList()).forceResolve() + +public inline fun Iterable.toValueColumn(column: ColumnAccessor): ValueColumn = + toValueColumn(column.name()) + +public inline fun Iterable.toValueColumn(column: KProperty): ValueColumn = + toValueColumn(column.columnName) + +/** + * Indicates how [DataColumn.type] should be calculated. + * + * Used in [add], [insert], [convert], [map], [merge], [split] and other [DataFrame] operations + */ +public enum class Infer { + + /** + * Use reified type argument of an inline [DataFrame] operation as [DataColumn.type]. + */ + None, + + /** + * Use reified type argument of an inline [DataFrame] operation as [DataColumn.type], but compute [DataColumn.hasNulls] by checking column [DataColumn.values] for an actual presence of *null* values. + */ + Nulls, + + /** + * Infer [DataColumn.type] and [DataColumn.hasNulls] from actual [DataColumn.values] using optionally provided base type as an upper bound. + */ + Type +} + +/** + * Indicates how [DataColumn.hasNulls] (or, more accurately, DataColumn.type.isMarkedNullable) should be initialized from + * expected schema and actual data when reading schema-defined data formats. + */ +public enum class NullabilityOptions { + /** + * Use only actual data, set [DataColumn.hasNulls] to true if and only if there are null values in the column. + * On empty dataset use False. + */ + Infer, + + /** + * Set [DataColumn.hasNulls] to expected value. Throw exception if column should be not nullable but there are null values. + */ + Checking, + + /** + * Set [DataColumn.hasNulls] to expected value by default. Change False to True if column should be not nullable but there are null values. + */ + Widening +} + +public class NullabilityException() : Exception() + +/** + * @return if column should be marked nullable for current [NullabilityOptions] value with actual [data] and [expectedNulls] per some schema/signature. + * @throws [NullabilityException] for [NullabilityOptions.Checking] if [expectedNulls] is false and [data] contains nulls. + */ +public fun NullabilityOptions.applyNullability(data: List, expectedNulls: Boolean): Boolean { + val hasNulls = data.anyNull() + return when (this) { + NullabilityOptions.Infer -> hasNulls + NullabilityOptions.Checking -> { + if (!expectedNulls && hasNulls) { + throw NullabilityException() + } + expectedNulls + } + NullabilityOptions.Widening -> { + expectedNulls || hasNulls + } + } +} + +public inline fun Iterable.toColumn( + name: String = "", + infer: Infer = Infer.None +): DataColumn = + ( + if (infer == Infer.Type) DataColumn.createWithTypeInference(name, asList()) + else DataColumn.create(name, asList(), typeOf(), infer) + ).forceResolve() + +public inline fun Iterable<*>.toColumnOf(name: String = ""): DataColumn = + DataColumn.create(name, asList() as List, typeOf()).forceResolve() + +public inline fun Iterable.toColumn(ref: ColumnReference): DataColumn = + DataColumn.create(ref.name(), asList()).forceResolve() + +public inline fun Iterable.toColumn(property: KProperty): DataColumn = + DataColumn.create(property.columnName, asList()).forceResolve() + +public fun Iterable.toPath(): ColumnPath = ColumnPath(asList()) + +public fun Iterable.toColumnGroup(name: String): ColumnGroup<*> = dataFrameOf(this).asColumnGroup(name) +public fun Iterable.toColumnGroup(column: ColumnGroupAccessor): ColumnGroup = dataFrameOf(this).cast().asColumnGroup(column) + +public fun Iterable.toColumnGroupOf(name: String): ColumnGroup = toColumnGroup(name).cast() + +// endregion + +// region DataFrame + +public fun AnyFrame.toMap(): Map> = columns().associateBy({ it.name }, { it.toList() }) + +public fun DataFrame.asColumnGroup(name: String = ""): ColumnGroup = when (this) { + is ColumnGroup -> rename(name) + else -> DataColumn.createColumnGroup(name, this) +} + +public fun DataFrame.asColumnGroup(column: ColumnGroupAccessor): ColumnGroup = asColumnGroup(column.name) + +// region as GroupedDataFrame + +public fun DataFrame.asGroupBy(groupedColumnName: String): GroupBy = + GroupByImpl(this, getFrameColumn(groupedColumnName).castFrameColumn()) { none() } + +public fun DataFrame.asGroupBy(groupedColumn: ColumnReference>): GroupBy = + GroupByImpl(this, getFrameColumn(groupedColumn.name()).castFrameColumn()) { none() } + +public fun DataFrame.asGroupBy(): GroupBy { + val groupCol = columns().single { it.isFrameColumn() }.asAnyFrameColumn().castFrameColumn() + return asGroupBy { groupCol } +} + +public fun DataFrame.asGroupBy(selector: ColumnSelector>): GroupBy { + val column = getColumn(selector).asFrameColumn() + return GroupByImpl(this, column) { none() } +} + +// endregion + +// endregion + +// region DataRow + +public fun DataRow.toDataFrame(): DataFrame = owner[index..index] + +public fun AnyRow.toMap(): Map = df().columns().associate { it.name() to it[index] } + +// endregion + +// region Array + +public inline fun Array.toValueColumn(name: String): ValueColumn = + DataColumn.createValueColumn(name, this.asList(), typeOf()) + +public fun Array.toPath(): ColumnPath = ColumnPath(this.asList()) + +// endregion diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/add.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/add.kt new file mode 100644 index 000000000..a81da1c9a --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/add.kt @@ -0,0 +1,236 @@ +package org.jetbrains.kotlinx.dataframe.api + +import org.jetbrains.kotlinx.dataframe.AnyBaseCol +import org.jetbrains.kotlinx.dataframe.AnyCol +import org.jetbrains.kotlinx.dataframe.AnyColumnGroupAccessor +import org.jetbrains.kotlinx.dataframe.AnyFrame +import org.jetbrains.kotlinx.dataframe.AnyRow +import org.jetbrains.kotlinx.dataframe.Column +import org.jetbrains.kotlinx.dataframe.ColumnsContainer +import org.jetbrains.kotlinx.dataframe.DataColumn +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.DataRow +import org.jetbrains.kotlinx.dataframe.RowExpression +import org.jetbrains.kotlinx.dataframe.Selector +import org.jetbrains.kotlinx.dataframe.columns.BaseColumn +import org.jetbrains.kotlinx.dataframe.columns.ColumnAccessor +import org.jetbrains.kotlinx.dataframe.columns.ColumnPath +import org.jetbrains.kotlinx.dataframe.columns.ColumnReference +import org.jetbrains.kotlinx.dataframe.exceptions.DuplicateColumnNamesException +import org.jetbrains.kotlinx.dataframe.exceptions.UnequalColumnSizesException +import org.jetbrains.kotlinx.dataframe.impl.api.insertImpl +import org.jetbrains.kotlinx.dataframe.impl.columns.resolveSingle +import kotlin.reflect.KProperty + +/** + * `add` operation adds new columns to DataFrame. + */ + +// region Add existing columns + +/** + * Creates new [DataFrame] with given columns added to the end of original [DataFrame.columns] list. + * + * Original [DataFrame] is not modified. + * + * @param columns columns to add + * @throws [DuplicateColumnNamesException] if columns in expected result have repeated names + * @throws [UnequalColumnSizesException] if columns in expected result have different sizes + * @return new [DataFrame] with added columns + */ +public fun DataFrame.add(vararg columns: AnyBaseCol): DataFrame = addAll(columns.asIterable()) + +/** + * Creates new [DataFrame] with given columns added to the end of original [DataFrame.columns] list. + * + * Original [DataFrame] is not modified. + * + * @param columns columns to add + * @throws [DuplicateColumnNamesException] if columns in expected result have repeated names + * @throws [UnequalColumnSizesException] if columns in expected result have different sizes + * @return new [DataFrame] with added columns + */ +public fun DataFrame.addAll(columns: Iterable): DataFrame = + dataFrameOf(columns() + columns).cast() + +/** + * Creates new [DataFrame] with all columns from given [dataFrames] added to the end of original [DataFrame.columns] list. + * + * Original [DataFrame] is not modified. + * + * @param dataFrames dataFrames to get columns from + * @throws [DuplicateColumnNamesException] if columns in expected result have repeated names + * @throws [UnequalColumnSizesException] if columns in expected result have different sizes + * @return new [DataFrame] with added columns + */ +public fun DataFrame.add(vararg dataFrames: AnyFrame): DataFrame = addAll(dataFrames.asIterable()) + +/** + * Creates new [DataFrame] with all columns from given [dataFrames] added to the end of original [DataFrame.columns] list. + * + * Original [DataFrame] is not modified. + * + * @param dataFrames dataFrames to get columns from + * @throws [DuplicateColumnNamesException] if columns in expected result have repeated names + * @throws [UnequalColumnSizesException] if columns in expected result have different sizes + * @return new [DataFrame] with added columns + */ +@JvmName("addAllFrames") +public fun DataFrame.addAll(dataFrames: Iterable): DataFrame = + addAll(dataFrames.flatMap { it.columns() }) + +// endregion + +// region Create and add a single column + +/** + * Receiver that is used in [add] and [update] operations to access new (added or updated) column value in preceding row. + */ +public interface AddDataRow : DataRow { + + /** + * Returns a new value that was already computed for some preceding row during current [add] or [update] column operation. + * + * Can be used to compute series of values with recurrence relations, e.g. fibonacci. + * + * @throws IndexOutOfBoundsException when called on a successive row that doesn't have new value yet + */ + public fun AnyRow.newValue(): C +} + +/** + * [AddExpression] is used to express or select any instance of `R` using the given instance of [AddDataRow]`` as + * `this` and `it`. + * + * Shorthand for: + * ```kotlin + * AddDataRow.(it: AddDataRow) -> R + * ``` + */ +public typealias AddExpression = Selector, R> + +/** + * Creates new column using row [expression] and adds it to the end of [DataFrame] + * + * Original [DataFrame] is not modified. + * + * @param name name for a new column. If it is empty, a unique column name will be generated. Otherwise, it should be unique for original [DataFrame]. + * @param infer a value of [Infer] that specifies how to compute column [type][BaseColumn.type] for a new column + * @param expression [AddExpression] that computes column value for every [DataRow] + * @return new [DataFrame] with added column + * @throws DuplicateColumnNamesException if [DataFrame] already contains a column with given [name] + */ +public inline fun DataFrame.add( + name: String, + infer: Infer = Infer.Nulls, + noinline expression: AddExpression +): DataFrame = + (this + mapToColumn(name, infer, expression)) + +public inline fun DataFrame.add( + property: KProperty, + infer: Infer = Infer.Nulls, + noinline expression: AddExpression +): DataFrame = + (this + mapToColumn(property, infer, expression)) + +public inline fun DataFrame.add( + column: ColumnAccessor, + infer: Infer = Infer.Nulls, + noinline expression: AddExpression +): DataFrame = + add(column.path(), infer, expression) + +public inline fun DataFrame.add( + path: ColumnPath, + infer: Infer = Infer.Nulls, + noinline expression: AddExpression +): DataFrame { + val col = mapToColumn(path.name(), infer, expression) + if (path.size == 1) return this + col + return insertImpl(path, col) +} + +// endregion + +// region Create and add several columns + +public class AddDsl(@PublishedApi internal val df: DataFrame) : ColumnsContainer by df, ColumnSelectionDsl { + + // TODO: support adding column into path + internal val columns = mutableListOf() + + public fun add(column: Column): Boolean = columns.add(column.resolveSingle(df)!!.data) + + public operator fun Column.unaryPlus(): Boolean = add(this) + + public operator fun String.unaryPlus(): Boolean = add(df[this]) + + @PublishedApi + internal inline fun add( + name: String, + infer: Infer = Infer.Nulls, + noinline expression: RowExpression + ): Boolean = add(df.mapToColumn(name, infer, expression)) + + public inline fun expr(noinline expression: RowExpression): DataColumn { + return df.mapToColumn("", Infer.Nulls, expression) + } + + public inline infix fun String.from(noinline expression: RowExpression): Boolean = + add(this, Infer.Nulls, expression) + + // TODO: use path instead of name + public inline infix fun ColumnAccessor.from(noinline expression: RowExpression): Boolean = + name().from(expression) + + public inline infix fun KProperty.from(noinline expression: RowExpression): Boolean = + add(name, Infer.Nulls, expression) + + public infix fun String.from(column: Column): Boolean = add(column.rename(this)) + public inline infix fun ColumnAccessor.from(column: ColumnReference): Boolean = name() from column + public inline infix fun KProperty.from(column: ColumnReference): Boolean = name from column + + public infix fun Column.into(name: String): Boolean = add(rename(name)) + public infix fun ColumnReference.into(column: ColumnAccessor): Boolean = into(column.name()) + public infix fun ColumnReference.into(column: KProperty): Boolean = into(column.name) + + public operator fun String.invoke(body: AddDsl.() -> Unit): Unit = group(this, body) + public infix fun AnyColumnGroupAccessor.from(body: AddDsl.() -> Unit): Unit = group(this, body) + + public fun group(column: AnyColumnGroupAccessor, body: AddDsl.() -> Unit): Unit = group(column.name(), body) + public fun group(name: String, body: AddDsl.() -> Unit) { + val dsl = AddDsl(df) + body(dsl) + add(dsl.columns.toColumnGroup(name)) + } + + public fun group(body: AddDsl.() -> Unit): AddGroup = AddGroup(body) + + public infix fun AddGroup.into(groupName: String): Unit = group(groupName, body) + public infix fun AddGroup.into(column: AnyColumnGroupAccessor): Unit = into(column.name()) +} + +public fun DataFrame.add(body: AddDsl.() -> Unit): DataFrame { + val dsl = AddDsl(this) + body(dsl) + return dataFrameOf(this@add.columns() + dsl.columns).cast() +} + +public inline fun GroupBy.add( + name: String, + infer: Infer = Infer.Nulls, + noinline expression: RowExpression +): GroupBy = + updateGroups { add(name, infer, expression) } + +public inline fun GroupBy.add( + column: ColumnAccessor, + infer: Infer = Infer.Nulls, + noinline expression: RowExpression +): GroupBy = + add(column.name(), infer, expression) + +public class AddGroup(internal val body: AddDsl.() -> Unit) + +// endregion diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/addId.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/addId.kt new file mode 100644 index 000000000..51251c8ad --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/addId.kt @@ -0,0 +1,22 @@ +package org.jetbrains.kotlinx.dataframe.api + +import org.jetbrains.kotlinx.dataframe.AnyCol +import org.jetbrains.kotlinx.dataframe.AnyFrame +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.columns.ColumnAccessor + +// region DataColumn + +public fun AnyCol.addId(columnName: String = "id"): AnyFrame = + toDataFrame().addId(columnName) + +// endregion + +// region DataFrame + +public fun DataFrame.addId(column: ColumnAccessor): DataFrame = insert(column) { index() }.at(0) + +public fun DataFrame.addId(columnName: String = "id"): DataFrame = + insert(columnName) { index() }.at(0) + +// endregion diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/aggregate.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/aggregate.kt new file mode 100644 index 000000000..ad893e9be --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/aggregate.kt @@ -0,0 +1,11 @@ +package org.jetbrains.kotlinx.dataframe.api + +import org.jetbrains.kotlinx.dataframe.DataRow +import org.jetbrains.kotlinx.dataframe.Selector +import org.jetbrains.kotlinx.dataframe.aggregation.AggregateDsl + +// region Pivot + +public fun Pivot.aggregate(separate: Boolean = false, body: Selector, R>): DataRow = delegate { aggregate(separate, body) } + +// endregion diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/all.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/all.kt new file mode 100644 index 000000000..f12fed700 --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/all.kt @@ -0,0 +1,34 @@ +package org.jetbrains.kotlinx.dataframe.api + +import org.jetbrains.kotlinx.dataframe.AnyRow +import org.jetbrains.kotlinx.dataframe.DataColumn +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.Predicate +import org.jetbrains.kotlinx.dataframe.RowFilter +import org.jetbrains.kotlinx.dataframe.columns.size +import org.jetbrains.kotlinx.dataframe.columns.values +import org.jetbrains.kotlinx.dataframe.impl.owner +import org.jetbrains.kotlinx.dataframe.index + +// region DataColumn + +/** Returns `true` if all [values] match the given [predicate] or [values] is empty. */ +public fun DataColumn.all(predicate: Predicate): Boolean = values.all(predicate) + +/** Returns `true` if all [values] are `null` or [values] is empty. */ +public fun DataColumn.allNulls(): Boolean = size == 0 || all { it == null } + +// endregion + +// region DataRow + +public fun AnyRow.allNA(): Boolean = owner.columns().all { it[index].isNA } + +// endregion + +// region DataFrame + +/** Returns `true` if all [rows] match the given [predicate] or [rows] is empty. */ +public fun DataFrame.all(predicate: RowFilter): Boolean = rows().all { predicate(it, it) } + +// endregion diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/any.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/any.kt new file mode 100644 index 000000000..d8af3cf83 --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/any.kt @@ -0,0 +1,19 @@ +package org.jetbrains.kotlinx.dataframe.api + +import org.jetbrains.kotlinx.dataframe.DataColumn +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.Predicate +import org.jetbrains.kotlinx.dataframe.RowFilter +import org.jetbrains.kotlinx.dataframe.columns.values + +// region DataColumn + +public fun DataColumn.any(predicate: Predicate): Boolean = values.any(predicate) + +// endregion + +// region DataFrame + +public fun DataFrame.any(predicate: RowFilter): Boolean = rows().any { predicate(it, it) } + +// endregion diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/append.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/append.kt new file mode 100644 index 000000000..aa2e1e286 --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/append.kt @@ -0,0 +1,30 @@ +package org.jetbrains.kotlinx.dataframe.api + +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.columns.values +import org.jetbrains.kotlinx.dataframe.impl.api.updateWith +import org.jetbrains.kotlinx.dataframe.ncol +import org.jetbrains.kotlinx.dataframe.nrow + +// region DataFrame + +public fun DataFrame.append(vararg values: Any?): DataFrame { + val ncol = ncol + check(values.size % ncol == 0) { "Invalid number of arguments. Multiple of $ncol is expected, but actual was: ${values.size}" } + val newRows = values.size / ncol + return columns().mapIndexed { colIndex, col -> + val newValues = (0 until newRows).map { values[colIndex + it * ncol] } + col.updateWith(col.values + newValues) + }.toDataFrame().cast() +} + +public fun DataFrame.appendNulls(numberOfRows: Int = 1): DataFrame { + require(numberOfRows >= 0) + if (numberOfRows == 0) return this + if (ncol == 0) return DataFrame.empty(nrow + numberOfRows).cast() + return columns().map { col -> + col.updateWith(col.values + arrayOfNulls(numberOfRows)) + }.toDataFrame().cast() +} + +// endregion diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/asIterable.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/asIterable.kt new file mode 100644 index 000000000..2b5c379f4 --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/asIterable.kt @@ -0,0 +1,9 @@ +package org.jetbrains.kotlinx.dataframe.api + +import org.jetbrains.kotlinx.dataframe.DataColumn + +// region DataColumn + +public fun DataColumn.asIterable(): Iterable = values() + +// endregion diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/asSequence.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/asSequence.kt new file mode 100644 index 000000000..73af1ef41 --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/asSequence.kt @@ -0,0 +1,17 @@ +package org.jetbrains.kotlinx.dataframe.api + +import org.jetbrains.kotlinx.dataframe.DataColumn +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.DataRow + +// region DataColumn + +public fun DataColumn.asSequence(): Sequence = asIterable().asSequence() + +// endregion + +// region DataFrame + +public fun DataFrame.asSequence(): Sequence> = rows().asSequence() + +// endregion diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/associate.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/associate.kt new file mode 100644 index 000000000..3e6d9fb44 --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/associate.kt @@ -0,0 +1,15 @@ +package org.jetbrains.kotlinx.dataframe.api + +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.DataRow +import org.jetbrains.kotlinx.dataframe.RowExpression + +// region DataFrame + +public fun DataFrame.associateBy(transform: RowExpression): Map> = + rows().associateBy { transform(it, it) } + +public fun DataFrame.associate(transform: RowExpression>): Map = + rows().associate { transform(it, it) } + +// endregion diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/between.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/between.kt new file mode 100644 index 000000000..d25611827 --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/between.kt @@ -0,0 +1,11 @@ +package org.jetbrains.kotlinx.dataframe.api + +import org.jetbrains.kotlinx.dataframe.DataColumn +import org.jetbrains.kotlinx.dataframe.impl.between + +// region DataColumn + +public fun > DataColumn.between(left: T, right: T, includeBoundaries: Boolean = true): DataColumn = + map { it.between(left, right, includeBoundaries) } + +// endregion diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/cast.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/cast.kt new file mode 100644 index 000000000..921fdd412 --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/cast.kt @@ -0,0 +1,49 @@ +package org.jetbrains.kotlinx.dataframe.api + +import org.jetbrains.kotlinx.dataframe.AnyCol +import org.jetbrains.kotlinx.dataframe.AnyFrame +import org.jetbrains.kotlinx.dataframe.AnyRow +import org.jetbrains.kotlinx.dataframe.DataColumn +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.DataRow +import org.jetbrains.kotlinx.dataframe.columns.ColumnAccessor +import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup +import org.jetbrains.kotlinx.dataframe.columns.ColumnReference +import org.jetbrains.kotlinx.dataframe.columns.ColumnSet +import org.jetbrains.kotlinx.dataframe.columns.ColumnWithPath +import org.jetbrains.kotlinx.dataframe.columns.FrameColumn +import org.jetbrains.kotlinx.dataframe.columns.SingleColumn +import org.jetbrains.kotlinx.dataframe.columns.ValueColumn +import org.jetbrains.kotlinx.dataframe.impl.api.convertToImpl +import kotlin.reflect.typeOf + +public fun AnyFrame.cast(): DataFrame = this as DataFrame + +public inline fun AnyFrame.cast(verify: Boolean = true): DataFrame = if (verify) convertToImpl( + typeOf(), + allowConversion = false, + ExcessiveColumns.Keep +).cast() +else cast() + +public fun AnyRow.cast(): DataRow = this as DataRow + +public inline fun AnyRow.cast(verify: Boolean = true): DataRow = df().cast(verify)[0] + +public fun AnyCol.cast(): DataColumn = this as DataColumn + +public fun ValueColumn<*>.cast(): ValueColumn = this as ValueColumn + +public fun FrameColumn<*>.castFrameColumn(): FrameColumn = this as FrameColumn + +public fun ColumnGroup<*>.cast(): ColumnGroup = this as ColumnGroup + +public fun ColumnWithPath<*>.cast(): ColumnWithPath = this as ColumnWithPath + +public fun ColumnAccessor<*>.cast(): ColumnAccessor = this as ColumnAccessor + +public fun ColumnSet<*>.cast(): ColumnSet = this as ColumnSet + +public fun ColumnReference<*>.cast(): ColumnReference = this as ColumnReference + +public fun SingleColumn<*>.cast(): SingleColumn = this as SingleColumn diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/chunked.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/chunked.kt new file mode 100644 index 000000000..67f3dec6d --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/chunked.kt @@ -0,0 +1,25 @@ +package org.jetbrains.kotlinx.dataframe.api + +import org.jetbrains.kotlinx.dataframe.DataColumn +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.DataRow +import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup +import org.jetbrains.kotlinx.dataframe.columns.FrameColumn +import org.jetbrains.kotlinx.dataframe.columns.ValueColumn +import org.jetbrains.kotlinx.dataframe.impl.getListType +import org.jetbrains.kotlinx.dataframe.nrow +import org.jetbrains.kotlinx.dataframe.type + +public fun DataFrame.chunked(size: Int, name: String = "groups"): FrameColumn { + val startIndices = (0 until nrow step size) + return DataColumn.createFrameColumn(name, this, startIndices) +} + +public fun DataColumn.chunked(size: Int): ValueColumn> { + val values = toList().chunked(size) + return DataColumn.createValueColumn(name(), values, getListType(type)) +} + +public fun ColumnGroup.chunked(size: Int): FrameColumn = chunked(size, name()) + +public fun DataColumn>.chunked(size: Int): FrameColumn = asColumnGroup().chunked(size) diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/concat.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/concat.kt new file mode 100644 index 000000000..c74cfc4a0 --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/concat.kt @@ -0,0 +1,58 @@ +package org.jetbrains.kotlinx.dataframe.api + +import org.jetbrains.kotlinx.dataframe.DataColumn +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.DataRow +import org.jetbrains.kotlinx.dataframe.columns.values +import org.jetbrains.kotlinx.dataframe.impl.api.concatImpl +import org.jetbrains.kotlinx.dataframe.impl.asList + +// region DataColumn + +public fun DataColumn.concat(vararg other: DataColumn): DataColumn = concatImpl(name, listOf(this) + other) + +public fun DataColumn>.concat(): DataFrame = values.concat() + +public fun DataColumn>.concat(): List = values.flatten() + +// endregion + +// region DataRow + +public fun DataRow.concat(vararg rows: DataRow): DataFrame = (listOf(this) + rows).concat() + +// endregion + +// region DataFrame + +public fun DataFrame.concat(vararg frames: DataFrame): DataFrame = concatImpl(listOf(this) + frames) + +@JvmName("concatT") +public fun DataFrame.concat(rows: Iterable>): DataFrame = (rows() + rows).concat() + +public fun DataFrame.concat(frames: Iterable>): DataFrame = (listOf(this) + frames).concat() + +// endregion + +// region GroupBy + +public fun GroupBy.concat(): DataFrame = groups.concat() + +// endregion + +// region Iterable + +public fun Iterable>.concat(): DataFrame { + return concatImpl(asList()) +} + +public fun Iterable>.concat(): DataColumn { + val list = asList() + if (list.isEmpty()) return DataColumn.empty().cast() + return concatImpl(list[0].name(), list) +} + +@JvmName("concatRows") +public fun Iterable?>.concat(): DataFrame = concatImpl(map { it?.toDataFrame() ?: DataFrame.empty(1).cast() }) + +// endregion diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/constructors.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/constructors.kt new file mode 100644 index 000000000..5431d66d7 --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/constructors.kt @@ -0,0 +1,332 @@ +package org.jetbrains.kotlinx.dataframe.api + +import org.jetbrains.kotlinx.dataframe.AnyBaseCol +import org.jetbrains.kotlinx.dataframe.AnyCol +import org.jetbrains.kotlinx.dataframe.AnyFrame +import org.jetbrains.kotlinx.dataframe.AnyRow +import org.jetbrains.kotlinx.dataframe.ColumnGroupReference +import org.jetbrains.kotlinx.dataframe.DataColumn +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.DataRow +import org.jetbrains.kotlinx.dataframe.RowExpression +import org.jetbrains.kotlinx.dataframe.columns.ColumnAccessor +import org.jetbrains.kotlinx.dataframe.columns.ColumnPath +import org.jetbrains.kotlinx.dataframe.columns.ColumnReference +import org.jetbrains.kotlinx.dataframe.columns.FrameColumn +import org.jetbrains.kotlinx.dataframe.exceptions.DuplicateColumnNamesException +import org.jetbrains.kotlinx.dataframe.exceptions.UnequalColumnSizesException +import org.jetbrains.kotlinx.dataframe.impl.DataFrameImpl +import org.jetbrains.kotlinx.dataframe.impl.asList +import org.jetbrains.kotlinx.dataframe.impl.columnName +import org.jetbrains.kotlinx.dataframe.impl.columns.ColumnAccessorImpl +import org.jetbrains.kotlinx.dataframe.impl.columns.createColumn +import org.jetbrains.kotlinx.dataframe.impl.columns.createComputedColumnReference +import org.jetbrains.kotlinx.dataframe.impl.columns.forceResolve +import org.jetbrains.kotlinx.dataframe.impl.columns.unbox +import org.jetbrains.kotlinx.dataframe.size +import kotlin.random.Random +import kotlin.random.nextInt +import kotlin.reflect.KProperty +import kotlin.reflect.full.withNullability +import kotlin.reflect.typeOf + +// region create ColumnAccessor + +// region column + +public fun column(): ColumnDelegate = ColumnDelegate() +public fun column(name: String): ColumnAccessor = ColumnAccessorImpl(name) +public fun column(path: ColumnPath): ColumnAccessor = ColumnAccessorImpl(path) +public fun ColumnGroupReference.column(): ColumnDelegate = ColumnDelegate(this) +public fun ColumnGroupReference.column(name: String): ColumnAccessor = ColumnAccessorImpl(path() + name) +public fun ColumnGroupReference.column(path: ColumnPath): ColumnAccessor = ColumnAccessorImpl(this.path() + path) + +public inline fun column( + name: String = "", + infer: Infer = Infer.Nulls, + noinline expression: RowExpression, +): ColumnReference = createComputedColumnReference(name, typeOf(), infer, expression) + +public inline fun column( + df: DataFrame, + name: String = "", + infer: Infer = Infer.Nulls, + noinline expression: RowExpression, +): ColumnReference = createComputedColumnReference(name, typeOf(), infer, expression as RowExpression) + +// endregion + +// region columnGroup + +public fun columnGroup(): ColumnDelegate = column() + +@JvmName("columnGroupTyped") +public fun columnGroup(): ColumnDelegate> = column() + +public fun columnGroup(name: String): ColumnAccessor = column(name) + +@JvmName("columnGroupTyped") +public fun columnGroup(name: String): ColumnAccessor> = column(name) + +public fun columnGroup(path: ColumnPath): ColumnAccessor = column(path) + +@JvmName("columnGroupTyped") +public fun columnGroup(path: ColumnPath): ColumnAccessor> = column(path) + +public fun ColumnGroupReference.columnGroup(): ColumnDelegate = ColumnDelegate(this) + +@JvmName("columnGroupTyped") +public fun ColumnGroupReference.columnGroup(): ColumnDelegate> = ColumnDelegate(this) + +public fun ColumnGroupReference.columnGroup(name: String): ColumnAccessor = ColumnAccessorImpl(path() + name) + +@JvmName("columnGroupTyped") +public fun ColumnGroupReference.columnGroup(name: String): ColumnAccessor> = + ColumnAccessorImpl(path() + name) + +public fun ColumnGroupReference.columnGroup(path: ColumnPath): ColumnAccessor = + ColumnAccessorImpl(this.path() + path) + +@JvmName("columnGroupTyped") +public fun ColumnGroupReference.columnGroup(path: ColumnPath): ColumnAccessor> = + ColumnAccessorImpl(this.path() + path) + +// endregion + +// region frameColumn + +public fun frameColumn(): ColumnDelegate = column() + +@JvmName("frameColumnTyped") +public fun frameColumn(): ColumnDelegate> = column() + +public fun frameColumn(name: String): ColumnAccessor = column(name) + +@JvmName("frameColumnTyped") +public fun frameColumn(name: String): ColumnAccessor> = column(name) + +public fun frameColumn(path: ColumnPath): ColumnAccessor = column(path) + +@JvmName("frameColumnTyped") +public fun frameColumn(path: ColumnPath): ColumnAccessor> = column(path) + +public fun ColumnGroupReference.frameColumn(): ColumnDelegate = ColumnDelegate(this) + +@JvmName("frameColumnTyped") +public fun ColumnGroupReference.frameColumn(): ColumnDelegate> = ColumnDelegate(this) + +public fun ColumnGroupReference.frameColumn(name: String): ColumnAccessor = ColumnAccessorImpl(path() + name) + +@JvmName("frameColumnTyped") +public fun ColumnGroupReference.frameColumn(name: String): ColumnAccessor> = + ColumnAccessorImpl(path() + name) + +public fun ColumnGroupReference.frameColumn(path: ColumnPath): ColumnAccessor = + ColumnAccessorImpl(this.path() + path) + +@JvmName("frameColumnTyped") +public fun ColumnGroupReference.frameColumn(path: ColumnPath): ColumnAccessor> = + ColumnAccessorImpl(this.path() + path) + +// endregion + +public class ColumnDelegate(private val parent: ColumnGroupReference? = null) { + public operator fun getValue(thisRef: Any?, property: KProperty<*>): ColumnAccessor = named(property.columnName) + + public infix fun named(name: String): ColumnAccessor = + parent?.let { ColumnAccessorImpl(it.path() + name) } ?: ColumnAccessorImpl(name) +} + +// endregion + +// region create DataColumn + +public inline fun columnOf(vararg values: T): DataColumn = + createColumn(values.asIterable(), typeOf(), true).forceResolve() + +public fun columnOf(vararg values: AnyBaseCol): DataColumn = columnOf(values.asIterable()).forceResolve() + +public fun columnOf(vararg frames: DataFrame): FrameColumn = columnOf(frames.asIterable()).forceResolve() + +public fun columnOf(columns: Iterable): DataColumn = + DataColumn.createColumnGroup( + name = "", + df = dataFrameOf(columns) + ) + .asDataColumn() + .forceResolve() + +public fun columnOf(frames: Iterable>): FrameColumn = DataColumn.createFrameColumn( + "", + frames.toList() +).forceResolve() + +public inline fun column(values: Iterable): DataColumn = + createColumn(values, typeOf(), false).forceResolve() + +// endregion + +// region create DataFrame + +/** + * Creates new [DataFrame] with given [columns] + * + * All named columns must have unique names. For columns with empty names unique column names are generated: "untitled", "untitiled1", "untitled2" etc. + * + * All columns must have equal sizes. + * + * @throws [DuplicateColumnNamesException] if column names are not unique + * @throws [UnequalColumnSizesException] if column size are not equal + * @param columns columns for [DataFrame] + */ +public fun dataFrameOf(columns: Iterable): AnyFrame { + val cols = columns.map { it.unbox() } + val nrow = if (cols.isEmpty()) 0 else cols[0].size + return DataFrameImpl(cols, nrow) +} + +public fun dataFrameOf(vararg header: ColumnReference<*>): DataFrameBuilder = DataFrameBuilder(header.map { it.name() }) + +public fun dataFrameOf(vararg columns: AnyBaseCol): AnyFrame = dataFrameOf(columns.asIterable()) + +public fun dataFrameOf(vararg header: String): DataFrameBuilder = dataFrameOf(header.toList()) + +public inline fun dataFrameOf(vararg header: String, fill: (String) -> Iterable): AnyFrame = + dataFrameOf(header.asIterable(), fill) + +public fun dataFrameOf(header: Iterable): DataFrameBuilder = DataFrameBuilder(header.asList()) + +public fun dataFrameOf(vararg columns: Pair>): AnyFrame = + columns.map { it.second.toColumn(it.first, Infer.Type) }.toDataFrame() + +public fun dataFrameOf(header: Iterable, values: Iterable): AnyFrame = + dataFrameOf(header).withValues(values) + +public inline fun dataFrameOf(header: Iterable, fill: (T) -> Iterable): AnyFrame = + header.map { value -> + fill(value).asList().let { + DataColumn.create( + value.toString(), + it + ) + } + }.toDataFrame() + +public fun dataFrameOf(header: CharProgression): DataFrameBuilder = dataFrameOf(header.map { it.toString() }) + +public class DataFrameBuilder(private val header: List) { + + public operator fun invoke(vararg columns: AnyCol): AnyFrame = invoke(columns.asIterable()) + + public operator fun invoke(columns: Iterable): AnyFrame { + val cols = columns.asList() + require(cols.size == header.size) { "Number of columns differs from number of column names" } + return cols.mapIndexed { i, col -> + col.rename(header[i]) + }.toDataFrame() + } + + public operator fun invoke(vararg values: Any?): AnyFrame = withValues(values.asIterable()) + + @JvmName("invoke1") + internal fun withValues(values: Iterable): AnyFrame { + val list = values.asList() + + val ncol = header.size + + require(header.size > 0 && list.size.rem(ncol) == 0) { + "Number of values ${list.size} is not divisible by number of columns $ncol" + } + + val nrow = list.size / ncol + + return (0 until ncol).map { col -> + val colValues = (0 until nrow).map { row -> + list[row * ncol + col] + } + DataColumn.createWithTypeInference(header[col], colValues) + }.toDataFrame() + } + + public operator fun invoke(args: Sequence): AnyFrame = invoke(*args.toList().toTypedArray()) + + public fun withColumns(columnBuilder: (String) -> AnyCol): AnyFrame = header.map(columnBuilder).toDataFrame() + + public inline operator fun invoke(crossinline valuesBuilder: (String) -> Iterable): AnyFrame = + withColumns { name -> + valuesBuilder(name).let { + DataColumn.create( + name, + it.asList() + ) + } + } + + public inline fun fill(nrow: Int, value: C): AnyFrame = withColumns { name -> + DataColumn.createValueColumn( + name, + List(nrow) { value }, + typeOf().withNullability(value == null) + ) + } + + public inline fun nulls(nrow: Int): AnyFrame = fill(nrow, null) + + public inline fun fillIndexed(nrow: Int, crossinline init: (Int, String) -> C): AnyFrame = + withColumns { name -> + DataColumn.create( + name, + List(nrow) { init(it, name) } + ) + } + + public inline fun fill(nrow: Int, crossinline init: (Int) -> C): AnyFrame = withColumns { name -> + DataColumn.create( + name, + List(nrow, init) + ) + } + + private inline fun fillNotNull(nrow: Int, crossinline init: (Int) -> C) = withColumns { name -> + DataColumn.createValueColumn( + name, + List(nrow, init), + typeOf() + ) + } + + public fun randomInt(nrow: Int): AnyFrame = fillNotNull(nrow) { Random.nextInt() } + + public fun randomInt(nrow: Int, range: IntRange): AnyFrame = fillNotNull(nrow) { Random.nextInt(range) } + + public fun randomDouble(nrow: Int): AnyFrame = fillNotNull(nrow) { Random.nextDouble() } + + public fun randomDouble(nrow: Int, range: ClosedRange): AnyFrame = + fillNotNull(nrow) { Random.nextDouble(range.start, range.endInclusive) } + + public fun randomFloat(nrow: Int): AnyFrame = fillNotNull(nrow) { Random.nextFloat() } + + public fun randomLong(nrow: Int): AnyFrame = fillNotNull(nrow) { Random.nextLong() } + + public fun randomLong(nrow: Int, range: ClosedRange): AnyFrame = + fillNotNull(nrow) { Random.nextLong(range.start, range.endInclusive) } + + public fun randomBoolean(nrow: Int): AnyFrame = fillNotNull(nrow) { Random.nextBoolean() } +} + +/** + * Returns [DataFrame] with no rows and no columns. + * + * To create [DataFrame] with empty columns or empty rows see [DataFrame.empty] + * + * @param T schema marker for [DataFrame] + */ +public fun emptyDataFrame(): DataFrame = DataFrame.empty().cast() + +// endregion + +// region create ColumnPath + +public fun pathOf(vararg columnNames: String): ColumnPath = ColumnPath(columnNames.asList()) + +// endregion diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/convert.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/convert.kt new file mode 100644 index 000000000..a8eb4bdba --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/convert.kt @@ -0,0 +1,387 @@ +package org.jetbrains.kotlinx.dataframe.api + +import kotlinx.datetime.Instant +import kotlinx.datetime.LocalDate +import kotlinx.datetime.LocalDateTime +import kotlinx.datetime.TimeZone +import kotlinx.datetime.toLocalDateTime +import org.jetbrains.kotlinx.dataframe.AnyBaseCol +import org.jetbrains.kotlinx.dataframe.AnyCol +import org.jetbrains.kotlinx.dataframe.AnyFrame +import org.jetbrains.kotlinx.dataframe.ColumnsSelector +import org.jetbrains.kotlinx.dataframe.DataColumn +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.RowColumnExpression +import org.jetbrains.kotlinx.dataframe.RowValueExpression +import org.jetbrains.kotlinx.dataframe.columns.ColumnReference +import org.jetbrains.kotlinx.dataframe.dataTypes.IFRAME +import org.jetbrains.kotlinx.dataframe.dataTypes.IMG +import org.jetbrains.kotlinx.dataframe.exceptions.CellConversionException +import org.jetbrains.kotlinx.dataframe.exceptions.TypeConversionException +import org.jetbrains.kotlinx.dataframe.impl.api.Parsers +import org.jetbrains.kotlinx.dataframe.impl.api.convertRowColumnImpl +import org.jetbrains.kotlinx.dataframe.impl.api.convertToTypeImpl +import org.jetbrains.kotlinx.dataframe.impl.api.defaultTimeZone +import org.jetbrains.kotlinx.dataframe.impl.api.toLocalDate +import org.jetbrains.kotlinx.dataframe.impl.api.toLocalDateTime +import org.jetbrains.kotlinx.dataframe.impl.api.toLocalTime +import org.jetbrains.kotlinx.dataframe.impl.api.withRowCellImpl +import org.jetbrains.kotlinx.dataframe.impl.columns.toColumns +import org.jetbrains.kotlinx.dataframe.impl.headPlusArray +import org.jetbrains.kotlinx.dataframe.io.toDataFrame +import org.jetbrains.kotlinx.dataframe.path +import java.math.BigDecimal +import java.net.URL +import java.time.LocalTime +import java.util.Locale +import kotlin.reflect.KProperty +import kotlin.reflect.KType +import kotlin.reflect.typeOf + +public fun DataFrame.convert(columns: ColumnsSelector): Convert = + Convert(this, columns) + +public fun DataFrame.convert(vararg columns: KProperty): Convert = + convert { columns.toColumns() } + +public fun DataFrame.convert(vararg columns: String): Convert = convert { columns.toColumns() } +public fun DataFrame.convert(vararg columns: ColumnReference): Convert = + convert { columns.toColumns() } + +public inline fun DataFrame.convert( + firstCol: ColumnReference, + vararg cols: ColumnReference, + infer: Infer = Infer.Nulls, + noinline expression: RowValueExpression +): DataFrame = + convert(*headPlusArray(firstCol, cols)).with(infer, expression) + +public inline fun DataFrame.convert( + firstCol: KProperty, + vararg cols: KProperty, + infer: Infer = Infer.Nulls, + noinline expression: RowValueExpression +): DataFrame = + convert(*headPlusArray(firstCol, cols)).with(infer, expression) + +public inline fun DataFrame.convert( + firstCol: String, + vararg cols: String, + infer: Infer = Infer.Nulls, + noinline expression: RowValueExpression +): DataFrame = + convert(*headPlusArray(firstCol, cols)).with(infer, expression) + +public inline fun Convert.notNull(crossinline expression: RowValueExpression): DataFrame = + with { + if (it == null) null + else expression(this, it) + } + +public data class Convert(val df: DataFrame, val columns: ColumnsSelector) { + public fun cast(): Convert = Convert(df, columns as ColumnsSelector) + + public inline fun to(): DataFrame = to(typeOf()) +} + +public fun Convert.to(type: KType): DataFrame = to { it.convertTo(type) } + +public inline fun Convert.with( + infer: Infer = Infer.Nulls, + noinline rowConverter: RowValueExpression +): DataFrame = + withRowCellImpl(typeOf(), infer, rowConverter) + +public inline fun Convert.perRowCol( + infer: Infer = Infer.Nulls, + noinline expression: RowColumnExpression +): DataFrame = + convertRowColumnImpl(typeOf(), infer, expression) + +public fun Convert.to(columnConverter: DataFrame.(DataColumn) -> AnyBaseCol): DataFrame = + df.replace(columns).with { columnConverter(df, it) } + +public inline fun AnyCol.convertTo(): DataColumn = convertTo(typeOf()) as DataColumn +public fun AnyCol.convertTo(newType: KType): AnyCol { + if (this.type() == typeOf() && newType == typeOf()) return (this as DataColumn).convertToDouble() + if (this.type() == typeOf() && newType == typeOf()) return (this as DataColumn).convertToDouble() + return convertToTypeImpl(newType) +} + +@JvmName("convertToLocalDateTimeFromT") +public fun DataColumn.convertToLocalDateTime(): DataColumn = convertTo() +public fun DataColumn.convertToLocalDateTime(): DataColumn = convertTo() + +@JvmName("convertToLocalDateFromT") +public fun DataColumn.convertToLocalDate(): DataColumn = convertTo() +public fun DataColumn.convertToLocalDate(): DataColumn = convertTo() + +@JvmName("convertToLocalTimeFromT") +public fun DataColumn.convertToLocalTime(): DataColumn = convertTo() +public fun DataColumn.convertToLocalTime(): DataColumn = convertTo() + +@JvmName("convertToByteFromT") +public fun DataColumn.convertToByte(): DataColumn = convertTo() +public fun DataColumn.convertToByte(): DataColumn = convertTo() + +@JvmName("convertToShortFromT") +public fun DataColumn.convertToShort(): DataColumn = convertTo() +public fun DataColumn.convertToShort(): DataColumn = convertTo() + +@JvmName("convertToIntFromT") +public fun DataColumn.convertToInt(): DataColumn = convertTo() +public fun DataColumn.convertToInt(): DataColumn = convertTo() + +@JvmName("convertToLongFromT") +public fun DataColumn.convertToLong(): DataColumn = convertTo() +public fun DataColumn.convertToLong(): DataColumn = convertTo() + +@JvmName("convertToStringFromT") +public fun DataColumn.convertToString(): DataColumn = convertTo() +public fun DataColumn.convertToString(): DataColumn = convertTo() + +@JvmName("convertToDoubleFromT") +public fun DataColumn.convertToDouble(): DataColumn = convertTo() +public fun DataColumn.convertToDouble(): DataColumn = convertTo() + +/** + * Parse String column to Double considering locale (number format). + * If [locale] parameter is defined, it's number format is used for parsing. + * If [locale] parameter is null, the current system locale is used. If column can not be parsed, then POSIX format is used. + */ +@JvmName("convertToDoubleFromString") +public fun DataColumn.convertToDouble(locale: Locale? = null): DataColumn { + return this.castToNullable().convertToDouble(locale).castToNotNullable() +} + +/** + * Parse String column to Double considering locale (number format). + * If [locale] parameter is defined, it's number format is used for parsing. + * If [locale] parameter is null, the current system locale is used. If column can not be parsed, then POSIX format is used. + */ +@JvmName("convertToDoubleFromStringNullable") +public fun DataColumn.convertToDouble(locale: Locale? = null): DataColumn { + fun applyParser(parser: (String) -> Double?): DataColumn { + var currentRow = 0 + try { + return mapIndexed { row, value -> + currentRow = row + value?.let { parser(value.trim()) ?: throw TypeConversionException(value, typeOf(), typeOf(), path) } + } + } catch (e: TypeConversionException) { + throw CellConversionException(e.value, e.from, e.to, path, currentRow, e) + } + } + + return if (locale != null) { + val explicitParser = Parsers.getDoubleParser(locale) + applyParser(explicitParser) + } else { + try { + val defaultParser = Parsers.getDoubleParser() + applyParser(defaultParser) + } catch (e: TypeConversionException) { + val posixParser = Parsers.getDoubleParser(Locale.forLanguageTag("C.UTF-8")) + applyParser(posixParser) + } + } +} + +@JvmName("convertToFloatFromT") +public fun DataColumn.convertToFloat(): DataColumn = convertTo() +public fun DataColumn.convertToFloat(): DataColumn = convertTo() + +@JvmName("convertToBigDecimalFromT") +public fun DataColumn.convertToBigDecimal(): DataColumn = convertTo() +public fun DataColumn.convertToBigDecimal(): DataColumn = convertTo() + +@JvmName("convertToBooleanFromT") +public fun DataColumn.convertToBoolean(): DataColumn = convertTo() +public fun DataColumn.convertToBoolean(): DataColumn = convertTo() + +// region convert URL + +public fun Convert.toIFrame(border: Boolean = false, width: Int? = null, height: Int? = null): DataFrame = to { it.map { IFRAME(it.toString(), border, width, height) } } +public fun Convert.toImg(width: Int? = null, height: Int? = null): DataFrame = to { it.map { IMG(it.toString(), width, height) } } + +// endregion + +// region toURL + +public fun DataColumn.convertToURL(): DataColumn { + return map { URL(it) } +} + +@JvmName("convertToURLFromStringNullable") +public fun DataColumn.convertToURL(): DataColumn { + return map { it?.let { URL(it) } } +} + +public fun Convert.toURL(): DataFrame = to { it.convertToURL() } + +// endregion + +// region toInstant + +public fun DataColumn.convertToInstant(): DataColumn { + return map { Instant.parse(it) } +} + +@JvmName("convertToInstantFromStringNullable") +public fun DataColumn.convertToInstant(): DataColumn { + return map { it?.let { Instant.parse(it) } } +} + +public fun Convert.toInstant(): DataFrame = to { it.convertToInstant() } + +// endregion + +// region toLocalDate + +@JvmName("convertToLocalDateFromLong") +public fun DataColumn.convertToLocalDate(zone: TimeZone = defaultTimeZone): DataColumn = map { it.toLocalDate(zone) } +public fun DataColumn.convertToLocalDate(zone: TimeZone = defaultTimeZone): DataColumn = map { it?.toLocalDate(zone) } + +@JvmName("convertToLocalDateFromInt") +public fun DataColumn.convertToLocalDate(zone: TimeZone = defaultTimeZone): DataColumn = + map { it.toLong().toLocalDate(zone) } +@JvmName("convertToLocalDateFromIntNullable") +public fun DataColumn.convertToLocalDate(zone: TimeZone = defaultTimeZone): DataColumn = + map { it?.toLong()?.toLocalDate(zone) } + +@JvmName("convertToLocalDateFromString") +public fun DataColumn.convertToLocalDate(pattern: String? = null, locale: Locale? = null): DataColumn { + val converter = Parsers.getDateTimeConverter(LocalDate::class, pattern, locale) + return map { converter(it.trim()) ?: error("Can't convert `$it` to LocalDate") } +} +@JvmName("convertToLocalDateFromStringNullable") +public fun DataColumn.convertToLocalDate(pattern: String? = null, locale: Locale? = null): DataColumn { + val converter = Parsers.getDateTimeConverter(LocalDate::class, pattern, locale) + return map { it?.let { converter(it.trim()) ?: error("Can't convert `$it` to LocalDate") } } +} + +@JvmName("toLocalDateFromTLong") +public fun Convert.toLocalDate(zone: TimeZone = defaultTimeZone): DataFrame = to { it.convertToLocalDate(zone) } +@JvmName("toLocalDateFromTInt") +public fun Convert.toLocalDate(zone: TimeZone = defaultTimeZone): DataFrame = to { it.convertToLocalDate(zone) } + +public fun Convert.toLocalDate(pattern: String? = null, locale: Locale? = null): DataFrame = to { it.convertToLocalDate(pattern, locale) } + +public fun Convert.toLocalDate(): DataFrame = to { it.convertTo() } + +// endregion + +// region toLocalTime + +@JvmName("convertToLocalTimeFromLong") +public fun DataColumn.convertToLocalTime(zone: TimeZone = defaultTimeZone): DataColumn = map { it.toLocalTime(zone) } +public fun DataColumn.convertToLocalTime(zone: TimeZone = defaultTimeZone): DataColumn = map { it?.toLocalTime(zone) } + +@JvmName("convertToLocalTimeFromInt") +public fun DataColumn.convertToLocalTime(zone: TimeZone = defaultTimeZone): DataColumn = + map { it.toLong().toLocalTime(zone) } +@JvmName("convertToLocalTimeIntNullable") +public fun DataColumn.convertToLocalTime(zone: TimeZone = defaultTimeZone): DataColumn = + map { it?.toLong()?.toLocalTime(zone) } + +@JvmName("convertToLocalTimeFromString") +public fun DataColumn.convertToLocalTime(pattern: String? = null, locale: Locale? = null): DataColumn { + val converter = Parsers.getDateTimeConverter(LocalTime::class, pattern, locale) + return map { converter(it.trim()) ?: error("Can't convert `$it` to LocalTime") } +} +@JvmName("convertToLocalTimeFromStringNullable") +public fun DataColumn.convertToLocalTime(pattern: String? = null, locale: Locale? = null): DataColumn { + val converter = Parsers.getDateTimeConverter(LocalTime::class, pattern, locale) + return map { it?.let { converter(it.trim()) ?: error("Can't convert `$it` to LocalTime") } } +} + +@JvmName("toLocalTimeFromTLong") +public fun Convert.toLocalTime(zone: TimeZone = defaultTimeZone): DataFrame = to { it.convertToLocalTime(zone) } +@JvmName("toLocalTimeFromTInt") +public fun Convert.toLocalTime(zone: TimeZone = defaultTimeZone): DataFrame = to { it.convertToLocalTime(zone) } + +public fun Convert.toLocalTime(pattern: String? = null, locale: Locale? = null): DataFrame = to { it.convertToLocalTime(pattern, locale) } + +public fun Convert.toLocalTime(): DataFrame = to { it.convertTo() } + +// endregion + +// region toLocalDateTime + +@JvmName("convertToLocalDateTimeFromLong") +public fun DataColumn.convertToLocalDateTime(zone: TimeZone = defaultTimeZone): DataColumn = map { it.toLocalDateTime(zone) } +public fun DataColumn.convertToLocalDateTime(zone: TimeZone = defaultTimeZone): DataColumn = map { it?.toLocalDateTime(zone) } + +@JvmName("convertToLocalDateTimeFromInstant") +public fun DataColumn.convertToLocalDateTime(zone: TimeZone = defaultTimeZone): DataColumn = + map { it.toLocalDateTime(zone) } +@JvmName("convertToLocalDateTimeFromInstantNullable") +public fun DataColumn.convertToLocalDateTime(zone: TimeZone = defaultTimeZone): DataColumn = + map { it?.toLocalDateTime(zone) } + +@JvmName("convertToLocalDateTimeFromInt") +public fun DataColumn.convertToLocalDateTime(zone: TimeZone = defaultTimeZone): DataColumn = + map { it.toLong().toLocalDateTime(zone) } +@JvmName("convertToLocalDateTimeFromIntNullable") +public fun DataColumn.convertToLocalDateTime(zone: TimeZone = defaultTimeZone): DataColumn = + map { it?.toLong()?.toLocalDateTime(zone) } + +@JvmName("convertToLocalDateTimeFromString") +public fun DataColumn.convertToLocalDateTime(pattern: String? = null, locale: Locale? = null): DataColumn { + val converter = Parsers.getDateTimeConverter(LocalDateTime::class, pattern, locale) + return map { converter(it.trim()) ?: error("Can't convert `$it` to LocalDateTime") } +} +@JvmName("convertToLocalDateTimeFromStringNullable") +public fun DataColumn.convertToLocalDateTime(pattern: String? = null, locale: Locale? = null): DataColumn { + val converter = Parsers.getDateTimeConverter(LocalDateTime::class, pattern, locale) + return map { it?.let { converter(it.trim()) ?: error("Can't convert `$it` to LocalDateTime") } } +} + +@JvmName("toLocalDateTimeFromTLong") +public fun Convert.toLocalDateTime(zone: TimeZone = defaultTimeZone): DataFrame = to { it.convertToLocalDateTime(zone) } + +@JvmName("toLocalDateTimeFromTInstant") +public fun Convert.toLocalDateTime(zone: TimeZone = defaultTimeZone): DataFrame = to { it.convertToLocalDateTime(zone) } + +@JvmName("toLocalDateTimeFromTInt") +public fun Convert.toLocalDateTime(zone: TimeZone = defaultTimeZone): DataFrame = to { it.convertToLocalDateTime(zone) } + +public fun Convert.toLocalDateTime(pattern: String? = null, locale: Locale? = null): DataFrame = to { it.convertToLocalDateTime(pattern, locale) } + +public fun Convert.toLocalDateTime(): DataFrame = to { it.convertTo() } + +// endregion + +@JvmName("toIntTAny") +public fun Convert.toInt(): DataFrame = to() +public fun Convert.toInt(): DataFrame = to() + +@JvmName("toLongTAny") +public fun Convert.toLong(): DataFrame = to() +public fun Convert.toLong(): DataFrame = to() + +@JvmName("toStrTAny") +public fun Convert.toStr(): DataFrame = to() +public fun Convert.toStr(): DataFrame = to() + +@JvmName("toDoubleTAny") +public fun Convert.toDouble(): DataFrame = to() +public fun Convert.toDouble(): DataFrame = to() + +@JvmName("toFloatTAny") +public fun Convert.toFloat(): DataFrame = to() +public fun Convert.toFloat(): DataFrame = to() + +@JvmName("toBigDecimalTAny") +public fun Convert.toBigDecimal(): DataFrame = to() +public fun Convert.toBigDecimal(): DataFrame = to() + +@JvmName("toBooleanTAny") +public fun Convert.toBoolean(): DataFrame = to() +public fun Convert.toBoolean(): DataFrame = to() + +public fun Convert>>.toDataFrames(containsColumns: Boolean = false): DataFrame = + to { it.toDataFrames(containsColumns) } + +public fun DataColumn>>.toDataFrames(containsColumns: Boolean = false): DataColumn = + map { it.toDataFrame(containsColumns) } diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/convertTo.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/convertTo.kt new file mode 100644 index 000000000..99f64b4d7 --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/convertTo.kt @@ -0,0 +1,200 @@ +package org.jetbrains.kotlinx.dataframe.api + +import org.jetbrains.kotlinx.dataframe.AnyFrame +import org.jetbrains.kotlinx.dataframe.ColumnsSelector +import org.jetbrains.kotlinx.dataframe.DataColumn +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.RowExpression +import org.jetbrains.kotlinx.dataframe.exceptions.ColumnNotFoundException +import org.jetbrains.kotlinx.dataframe.exceptions.ExcessiveColumnsException +import org.jetbrains.kotlinx.dataframe.exceptions.TypeConversionException +import org.jetbrains.kotlinx.dataframe.exceptions.TypeConverterNotFoundException +import org.jetbrains.kotlinx.dataframe.impl.api.ConvertSchemaDslInternal +import org.jetbrains.kotlinx.dataframe.impl.api.convertToImpl +import org.jetbrains.kotlinx.dataframe.schema.ColumnSchema +import kotlin.reflect.KProperty +import kotlin.reflect.KType +import kotlin.reflect.typeOf + +/** + * Specifies how to handle columns in original dataframe that were not mathced to any column in destination dataframe schema. + */ +public enum class ExcessiveColumns { + /** + * Remove excessive columns from resulting dataframe + */ + Remove, + + /** + * Keep excessive columns in resulting dataframe + */ + Keep, + + /** + * Throw [ExcessiveColumnsException] if any excessive columns were found in the original dataframe + */ + Fail +} + +/** + * Holds data context for [fill] operation + */ +public data class ConvertToFill( + internal val dsl: ConvertSchemaDsl, + val columns: ColumnsSelector +) + +/** Provides access to [fromType] and [toSchema] in the flexible [ConvertSchemaDsl.convertIf] method. */ +public class ConverterScope(public val fromType: KType, public val toSchema: ColumnSchema) + +/** + * Dsl to customize column conversion + * + * Example: + * ```kotlin + * df.convertTo { + * // defines how to convert Int? -> String + * convert().with { it?.toString() ?: "No input given" } + * // defines how to convert String -> SomeType + * parser { SomeType(it) } + * // fill missing column `sum` with expression `a+b` + * fill { sum }.with { a + b } + * } + * ``` + */ +public interface ConvertSchemaDsl { + + /** + * Defines how to convert [from]: [A] to [to]: [B]. + * + * Note: In most cases using `convert().with { }` is more convenient, however + * if you only have [KType], this method can be used. + */ + public fun convert(from: KType, to: KType, converter: (A) -> B) + + /** + * Advanced version of [convert]. + * If you want to define a common conversion for multiple types (or any type), or + * you need extra information about the target, such as its schema, use this method. + * + * The exact type conversion does have higher priority. After that, this flexible conversions will be checked + * in order. + * + * @param condition a function that should return `true` if the conversion should be applied from the given `fromType` + * to the given `toSchema`. + * @param converter a function that performs the conversion with access to a [ConverterScope]. + */ + public fun convertIf( + condition: (fromType: KType, toSchema: ColumnSchema) -> Boolean, + converter: ConverterScope.(Any?) -> Any?, + ) +} + +/** + * Defines how to fill specified columns in destination schema that were not found in original dataframe. + * All [fill] operations for missing columns are executed after successful conversion of matched columns, so converted values of matched columns can be safely used in [with] expression. + * @param columns target columns in destination dataframe schema to be filled + */ +public inline fun ConvertSchemaDsl.fill(noinline columns: ColumnsSelector): ConvertToFill = ConvertToFill(this, columns) + +public fun ConvertToFill.with(expr: RowExpression) { + (dsl as ConvertSchemaDslInternal).fill(columns as ColumnsSelector<*, C>, expr as RowExpression<*, C>) +} + +/** + * Defines how to convert `String` values into given type [C]. + */ +public inline fun ConvertSchemaDsl<*>.parser(noinline parser: (String) -> C): Unit = + convert().with(parser) + +/** + * Defines how to convert values of given type [C] + */ +public inline fun ConvertSchemaDsl<*>.convert(): ConvertType = ConvertType(this, typeOf()) + +/** + * Defines how to convert values of type [C] into type [R] + */ +public inline fun ConvertType.with(noinline converter: (C) -> R): Unit = + dsl.convert(from, typeOf(), converter) + +public class ConvertType( + @PublishedApi internal val dsl: ConvertSchemaDsl<*>, + @PublishedApi internal val from: KType, + internal val property: KProperty? = null, +) + +// region DataFrame + +/** + * Converts values in [DataFrame] to match given column schema [T]. + * + * Original columns are mapped to destination columns by column [path][DataColumn.path]. + * + * Type converters for every column are selected automatically. See [convert] operation for details. + * + * To specify custom type converters for the particular types use [ConvertSchemaDsl]. + * + * Example of Dsl: + * ```kotlin + * df.convertTo { + * // defines how to convert Int? -> String + * convert().with { it?.toString() ?: "No input given" } + * // defines how to convert String -> SomeType + * parser { SomeType(it) } + * // fill missing column `sum` with expression `a + b` + * fill { sum }.with { a + b } + * } + * ``` + * + * @param [T] class that defines target schema for conversion. + * @param [excessiveColumnsBehavior] how to handle excessive columns in the original [DataFrame]. + * @param [body] optional dsl to define custom type converters. + * @throws [ColumnNotFoundException] if [DataFrame] doesn't contain columns that are required by destination schema. + * @throws [ExcessiveColumnsException] if [DataFrame] contains columns that are not required by destination schema and [excessiveColumnsBehavior] is set to [ExcessiveColumns.Fail]. + * @throws [TypeConverterNotFoundException] if suitable type converter for some column was not found. + * @throws [TypeConversionException] if type converter failed to convert column values. + * @return converted [DataFrame]. + */ +public inline fun AnyFrame.convertTo( + excessiveColumnsBehavior: ExcessiveColumns = ExcessiveColumns.Keep, + noinline body: ConvertSchemaDsl.() -> Unit = {} +): DataFrame = convertToImpl(typeOf(), true, excessiveColumnsBehavior, body).cast() + +/** + * Converts values in [DataFrame] to match given column schema [schemaType]. + * + * Original columns are mapped to destination columns by column [path][DataColumn.path]. + * + * Type converters for every column are selected automatically. See [convert] operation for details. + * + * To specify custom type converters for the particular types use [ConvertSchemaDsl]. + * + * Example of Dsl: + * ```kotlin + * df.convertTo { + * // defines how to convert Int? -> String + * convert().with { it?.toString() ?: "No input given" } + * // defines how to convert String -> SomeType + * parser { SomeType(it) } + * // fill missing column `sum` with expression `a+b` + * fill { sum }.with { a + b } + * } + * ``` + * + * @param [schemaType] defines target schema for conversion. + * @param [excessiveColumnsBehavior] how to handle excessive columns in the original [DataFrame]. + * @param [body] optional dsl to define custom type converters. + * @throws [ColumnNotFoundException] if [DataFrame] doesn't contain columns that are required by destination schema. + * @throws [ExcessiveColumnsException] if [DataFrame] contains columns that are not required by destination schema and [excessiveColumnsBehavior] is set to [ExcessiveColumns.Fail]. + * @throws [TypeConverterNotFoundException] if suitable type converter for some column was not found. + * @throws [TypeConversionException] if type converter failed to convert column values. + * @return converted [DataFrame]. + */ +public fun AnyFrame.convertTo( + schemaType: KType, + excessiveColumnsBehavior: ExcessiveColumns = ExcessiveColumns.Keep, + body: ConvertSchemaDsl.() -> Unit = {}, +): AnyFrame = convertToImpl(schemaType, true, excessiveColumnsBehavior, body) + +// endregion diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/copy.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/copy.kt new file mode 100644 index 000000000..c0c930a03 --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/copy.kt @@ -0,0 +1,9 @@ +package org.jetbrains.kotlinx.dataframe.api + +import org.jetbrains.kotlinx.dataframe.DataFrame + +// region DataFrame + +public fun DataFrame.copy(): DataFrame = columns().toDataFrame().cast() + +// endregion diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/corr.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/corr.kt new file mode 100644 index 000000000..85126780c --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/corr.kt @@ -0,0 +1,35 @@ +package org.jetbrains.kotlinx.dataframe.api + +import org.jetbrains.kotlinx.dataframe.AnyCol +import org.jetbrains.kotlinx.dataframe.ColumnsSelector +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.columns.ColumnReference +import org.jetbrains.kotlinx.dataframe.impl.api.corrImpl +import org.jetbrains.kotlinx.dataframe.impl.columns.toColumns +import kotlin.reflect.KProperty +import kotlin.reflect.typeOf + +internal fun AnyCol.isSuitableForCorr() = isSubtypeOf() || type() == typeOf() + +// region DataFrame + +public data class Corr( + internal val df: DataFrame, + internal val columns: ColumnsSelector +) + +public fun DataFrame.corr(): DataFrame = corr { dfs { it.isSuitableForCorr() } }.withItself() + +public fun DataFrame.corr(columns: ColumnsSelector): Corr = Corr(this, columns) +public fun DataFrame.corr(vararg columns: String): Corr = corr { columns.toColumns() } +public fun DataFrame.corr(vararg columns: KProperty): Corr = corr { columns.toColumns() } +public fun DataFrame.corr(vararg columns: ColumnReference): Corr = corr { columns.toColumns() } + +public fun Corr.with(otherColumns: ColumnsSelector): DataFrame = corrImpl(otherColumns) +public fun Corr.with(vararg otherColumns: String): DataFrame = with { otherColumns.toColumns() } +public fun Corr.with(vararg otherColumns: KProperty): DataFrame = with { otherColumns.toColumns() } +public fun Corr.with(vararg otherColumns: ColumnReference): DataFrame = with { otherColumns.toColumns() } + +public fun Corr.withItself(): DataFrame = with(columns) + +// endregion diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/count.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/count.kt new file mode 100644 index 000000000..55a5fb0a1 --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/count.kt @@ -0,0 +1,54 @@ +package org.jetbrains.kotlinx.dataframe.api + +import org.jetbrains.kotlinx.dataframe.AnyRow +import org.jetbrains.kotlinx.dataframe.DataColumn +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.DataRow +import org.jetbrains.kotlinx.dataframe.Predicate +import org.jetbrains.kotlinx.dataframe.RowFilter +import org.jetbrains.kotlinx.dataframe.impl.aggregation.modes.aggregateValue + +// region DataColumn + +public fun DataColumn.count(predicate: Predicate? = null): Int = if (predicate == null) size() else values().count(predicate) + +// endregion + +// region DataRow + +public fun AnyRow.count(): Int = columnsCount() +public fun AnyRow.count(predicate: Predicate): Int = values().count(predicate) + +// endregion + +// region DataFrame + +public fun DataFrame.count(): Int = rowsCount() + +public fun DataFrame.count(predicate: RowFilter): Int = rows().count { predicate(it, it) } + +// endregion + +// region GroupBy + +public fun Grouped.count(resultName: String = "count"): DataFrame = + aggregateValue(resultName) { count() default 0 } + +public fun Grouped.count(resultName: String = "count", predicate: RowFilter): DataFrame = + aggregateValue(resultName) { count(predicate) default 0 } + +// endregion + +// region Pivot + +public fun Pivot.count(): DataRow = delegate { count() } +public fun Pivot.count(predicate: RowFilter): DataRow = delegate { count(predicate) } + +// endregion + +// region PivotGroupBy + +public fun PivotGroupBy.count(): DataFrame = aggregate { count() default 0 } +public fun PivotGroupBy.count(predicate: RowFilter): DataFrame = aggregate { count(predicate) default 0 } + +// endregion diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/countDistinct.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/countDistinct.kt new file mode 100644 index 000000000..37cfdb6c9 --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/countDistinct.kt @@ -0,0 +1,24 @@ +package org.jetbrains.kotlinx.dataframe.api + +import org.jetbrains.kotlinx.dataframe.AnyFrame +import org.jetbrains.kotlinx.dataframe.Column +import org.jetbrains.kotlinx.dataframe.ColumnsSelector +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.impl.columns.toColumns +import org.jetbrains.kotlinx.dataframe.indices +import kotlin.reflect.KProperty + +// region DataFrame + +public fun AnyFrame.countDistinct(): Int = countDistinct { all() } + +public fun DataFrame.countDistinct(columns: ColumnsSelector): Int { + val cols = get(columns) + return indices.distinctBy { i -> cols.map { it[i] } }.size +} + +public fun DataFrame.countDistinct(vararg columns: String): Int = countDistinct { columns.toColumns() } +public fun DataFrame.countDistinct(vararg columns: KProperty): Int = countDistinct { columns.toColumns() } +public fun DataFrame.countDistinct(vararg columns: Column): Int = countDistinct { columns.toColumns() } + +// endregion diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/cumSum.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/cumSum.kt new file mode 100644 index 000000000..be60508cd --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/cumSum.kt @@ -0,0 +1,57 @@ +package org.jetbrains.kotlinx.dataframe.api + +import org.jetbrains.kotlinx.dataframe.Column +import org.jetbrains.kotlinx.dataframe.ColumnsSelector +import org.jetbrains.kotlinx.dataframe.DataColumn +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.impl.columns.toColumns +import org.jetbrains.kotlinx.dataframe.math.cumSum +import org.jetbrains.kotlinx.dataframe.math.defaultCumSumSkipNA +import org.jetbrains.kotlinx.dataframe.typeClass +import java.math.BigDecimal +import kotlin.reflect.KProperty +import kotlin.reflect.typeOf + +// region DataColumn + +public fun DataColumn.cumSum(skipNA: Boolean = defaultCumSumSkipNA): DataColumn = when (type()) { + typeOf() -> cast().cumSum(skipNA).cast() + typeOf() -> cast().cumSum(skipNA).cast() + typeOf() -> cast().cumSum(skipNA).cast() + typeOf() -> cast().cumSum(skipNA).cast() + typeOf() -> cast().cumSum().cast() + typeOf() -> cast().cumSum(skipNA).cast() + typeOf() -> cast().cumSum().cast() + typeOf() -> cast().cumSum(skipNA).cast() + typeOf() -> cast().cumSum().cast() + typeOf() -> cast().cumSum(skipNA).cast() + typeOf(), typeOf() -> convertToDouble().cumSum(skipNA).cast() + else -> error("Cumsum for type ${type()} is not supported") +} + +private val supportedClasses = setOf(Double::class, Float::class, Int::class, Long::class, BigDecimal::class) + +// endregion + +// region DataFrame + +public fun DataFrame.cumSum(skipNA: Boolean = defaultCumSumSkipNA, columns: ColumnsSelector): DataFrame = + convert(columns).to { if (it.typeClass in supportedClasses) it.cast().cumSum(skipNA) else it } +public fun DataFrame.cumSum(vararg columns: String, skipNA: Boolean = defaultCumSumSkipNA): DataFrame = cumSum(skipNA) { columns.toColumns() } +public fun DataFrame.cumSum(vararg columns: Column, skipNA: Boolean = defaultCumSumSkipNA): DataFrame = cumSum(skipNA) { columns.toColumns() } +public fun DataFrame.cumSum(vararg columns: KProperty<*>, skipNA: Boolean = defaultCumSumSkipNA): DataFrame = cumSum(skipNA) { columns.toColumns() } + +public fun DataFrame.cumSum(skipNA: Boolean = defaultCumSumSkipNA): DataFrame = cumSum(skipNA) { allDfs() } + +// endregion + +// region GroupBy + +public fun GroupBy.cumSum(skipNA: Boolean = defaultCumSumSkipNA, columns: ColumnsSelector): GroupBy = + updateGroups { cumSum(skipNA, columns) } +public fun GroupBy.cumSum(vararg columns: String, skipNA: Boolean = defaultCumSumSkipNA): GroupBy = cumSum(skipNA) { columns.toColumns() } +public fun GroupBy.cumSum(vararg columns: Column, skipNA: Boolean = defaultCumSumSkipNA): GroupBy = cumSum(skipNA) { columns.toColumns() } +public fun GroupBy.cumSum(vararg columns: KProperty<*>, skipNA: Boolean = defaultCumSumSkipNA): GroupBy = cumSum(skipNA) { columns.toColumns() } +public fun GroupBy.cumSum(skipNA: Boolean = defaultCumSumSkipNA): GroupBy = cumSum(skipNA) { allDfs() } + +// endregion diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/describe.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/describe.kt new file mode 100644 index 000000000..9c236c02d --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/describe.kt @@ -0,0 +1,48 @@ +package org.jetbrains.kotlinx.dataframe.api + +import org.jetbrains.kotlinx.dataframe.ColumnsSelector +import org.jetbrains.kotlinx.dataframe.DataColumn +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.annotations.DataSchema +import org.jetbrains.kotlinx.dataframe.columns.ColumnPath +import org.jetbrains.kotlinx.dataframe.columns.ColumnReference +import org.jetbrains.kotlinx.dataframe.impl.api.describeImpl +import org.jetbrains.kotlinx.dataframe.impl.columns.toColumns +import kotlin.reflect.KProperty +import kotlin.reflect.KType + +// region DataSchema +@DataSchema +public interface ColumnDescription { + public val name: String + public val path: ColumnPath + public val type: KType + public val count: Int + public val unique: Int + public val nulls: Int + public val top: Any + public val freq: Int + public val mean: Double + public val std: Double + public val min: Any + public val median: Any + public val max: Any +} + +// endregion + +// region DataColumn + +public fun DataColumn.describe(): DataFrame = describeImpl(listOf(this)) + +// endregion + +// region DataFrame + +public fun DataFrame.describe(): DataFrame = describe { allDfs() } +public fun DataFrame.describe(columns: ColumnsSelector): DataFrame = describeImpl(getColumnsWithPaths(columns)) +public fun DataFrame.describe(vararg columns: String): DataFrame = describe { columns.toColumns() } +public fun DataFrame.describe(vararg columns: ColumnReference): DataFrame = describe { columns.toColumns() } +public fun DataFrame.describe(vararg columns: KProperty): DataFrame = describe { columns.toColumns() } + +// endregion diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/digitize.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/digitize.kt new file mode 100644 index 000000000..4d7834af6 --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/digitize.kt @@ -0,0 +1,29 @@ +package org.jetbrains.kotlinx.dataframe.api + +import org.jetbrains.kotlinx.dataframe.DataColumn +import kotlin.reflect.KClass + +// region DataColumn + +public fun DataColumn.digitize(vararg bins: Int, right: Boolean = false): DataColumn = digitize(bins.toList(), Double::class, right) + +public fun > DataColumn.digitize(vararg bins: T, right: Boolean = false): DataColumn = digitize(bins.toList(), right) + +public fun > DataColumn.digitize(bins: List, kclass: KClass, right: Boolean = false): DataColumn = digitize( + bins.toList().map { org.jetbrains.kotlinx.dataframe.impl.convert(it, kclass) }, + right +) + +public fun > DataColumn.digitize(bins: List, right: Boolean = false): DataColumn { + // TODO: use binary search + // TODO: support descending order of bins + val predicate: (T, T) -> Boolean = if (right) { value, bin -> value <= bin } else { value, bin -> value < bin } + + return map { value -> + val index = bins.indexOfFirst { predicate(value, it) } + if (index == -1) bins.size + else index + } +} + +// endregion diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/distinct.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/distinct.kt new file mode 100644 index 000000000..c0c072c33 --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/distinct.kt @@ -0,0 +1,41 @@ +package org.jetbrains.kotlinx.dataframe.api + +import org.jetbrains.kotlinx.dataframe.Column +import org.jetbrains.kotlinx.dataframe.ColumnsSelector +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.impl.columns.toColumnSet +import org.jetbrains.kotlinx.dataframe.impl.columns.toColumns +import org.jetbrains.kotlinx.dataframe.indices +import kotlin.reflect.KProperty + +// region DataFrame + +public fun DataFrame.distinct(): DataFrame = distinctBy { all() } + +public fun DataFrame.distinct(columns: ColumnsSelector): DataFrame = select(columns).distinct() +public fun DataFrame.distinct(vararg columns: KProperty<*>): DataFrame = distinct { + val set = columns.toColumns() + set +} +public fun DataFrame.distinct(vararg columns: String): DataFrame = distinct { columns.toColumns() } +public fun DataFrame.distinct(vararg columns: Column): DataFrame = distinct { columns.toColumns() } + +@JvmName("distinctT") +public fun DataFrame.distinct(columns: Iterable): DataFrame = distinct { columns.toColumns() } +public fun DataFrame.distinct(columns: Iterable): DataFrame = distinct { columns.toColumnSet() } + +public fun DataFrame.distinctBy(vararg columns: KProperty<*>): DataFrame = distinctBy { columns.toColumns() } +public fun DataFrame.distinctBy(vararg columns: String): DataFrame = distinctBy { columns.toColumns() } +public fun DataFrame.distinctBy(vararg columns: Column): DataFrame = distinctBy { columns.toColumns() } + +@JvmName("distinctByT") +public fun DataFrame.distinctBy(columns: Iterable): DataFrame = distinctBy { columns.toColumns() } +public fun DataFrame.distinctBy(columns: Iterable): DataFrame = distinctBy { columns.toColumnSet() } + +public fun DataFrame.distinctBy(columns: ColumnsSelector): DataFrame { + val cols = get(columns) + val distinctIndices = indices.distinctBy { i -> cols.map { it[i] } } + return this[distinctIndices] +} + +// endregion diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/drop.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/drop.kt new file mode 100644 index 000000000..24791cf2b --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/drop.kt @@ -0,0 +1,57 @@ +package org.jetbrains.kotlinx.dataframe.api + +import org.jetbrains.kotlinx.dataframe.DataColumn +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.Predicate +import org.jetbrains.kotlinx.dataframe.RowFilter +import org.jetbrains.kotlinx.dataframe.columns.size +import org.jetbrains.kotlinx.dataframe.index +import org.jetbrains.kotlinx.dataframe.nrow + +// region DataColumn + +public fun DataColumn.drop(predicate: Predicate): DataColumn = filter { !predicate(it) } + +public fun DataColumn.drop(n: Int): DataColumn = when { + n == 0 -> this + n >= size -> get(emptyList()) + else -> get(n until size) +} + +public fun DataColumn.dropLast(n: Int = 1): DataColumn = take(size - n) + +// endregion + +// region DataFrame + +/** + * Returns a DataFrame containing all rows except first [n] rows. + * + * @throws IllegalArgumentException if [n] is negative. + */ +public fun DataFrame.drop(n: Int): DataFrame { + require(n >= 0) { "Requested rows count $n is less than zero." } + return getRows(n.coerceAtMost(nrow) until nrow) +} + +/** + * Returns a DataFrame containing all rows except last [n] rows. + * + * @throws IllegalArgumentException if [n] is negative. + */ +public fun DataFrame.dropLast(n: Int = 1): DataFrame { + require(n >= 0) { "Requested rows count $n is less than zero." } + return take((nrow - n).coerceAtLeast(0)) +} + +/** + * Returns a DataFrame containing all rows except rows that satisfy the given [predicate]. + */ +public fun DataFrame.drop(predicate: RowFilter): DataFrame = filter { !predicate(it, it) } + +/** + * Returns a DataFrame containing all rows except first rows that satisfy the given [predicate]. + */ +public fun DataFrame.dropWhile(predicate: RowFilter): DataFrame = firstOrNull { !predicate(it, it) }?.let { drop(it.index) } ?: this + +// endregion diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/duplicate.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/duplicate.kt new file mode 100644 index 000000000..ab7bbda51 --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/duplicate.kt @@ -0,0 +1,18 @@ +package org.jetbrains.kotlinx.dataframe.api + +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.DataRow +import org.jetbrains.kotlinx.dataframe.RowFilter +import org.jetbrains.kotlinx.dataframe.columns.FrameColumn +import org.jetbrains.kotlinx.dataframe.impl.api.duplicateImpl +import org.jetbrains.kotlinx.dataframe.impl.api.duplicateRowsImpl + +public fun DataFrame.duplicate(n: Int): FrameColumn = List(n) { this }.toFrameColumn() + +public fun DataFrame.duplicateRows(n: Int): DataFrame = + duplicateRowsImpl(n) + +public fun DataFrame.duplicateRows(n: Int, filter: RowFilter): DataFrame = + duplicateRowsImpl(n, rows().filter { filter(it, it) }.map { it.index() }) + +public fun DataRow.duplicate(n: Int): DataFrame = duplicateImpl(n) diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/enum.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/enum.kt new file mode 100644 index 000000000..76fbe6ca7 --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/enum.kt @@ -0,0 +1,11 @@ +package org.jetbrains.kotlinx.dataframe.api + +/** + * Make your enum class inherit [DataSchemaEnum] to + * make String -> Enum and Enum -> String conversions work + * using [value] instead of the enum name. + * (Fallback to enum name if the value cannot be found is implemented) + */ +public interface DataSchemaEnum { + public val value: String +} diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/explode.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/explode.kt new file mode 100644 index 000000000..b7f9514d5 --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/explode.kt @@ -0,0 +1,59 @@ +package org.jetbrains.kotlinx.dataframe.api + +import org.jetbrains.kotlinx.dataframe.ColumnsSelector +import org.jetbrains.kotlinx.dataframe.DataColumn +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.DataRow +import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup +import org.jetbrains.kotlinx.dataframe.columns.ColumnReference +import org.jetbrains.kotlinx.dataframe.impl.api.explodeImpl +import org.jetbrains.kotlinx.dataframe.impl.columns.toColumns +import kotlin.reflect.KProperty + +private val defaultExplodeColumns: ColumnsSelector<*, *> = { dfs { it.isList() || it.isFrameColumn() } } + +// region explode DataFrame + +public fun DataFrame.explode( + dropEmpty: Boolean = true, + selector: ColumnsSelector = defaultExplodeColumns +): DataFrame = explodeImpl(dropEmpty, selector) + +public fun DataFrame.explode(vararg columns: String, dropEmpty: Boolean = true): DataFrame = + explode(dropEmpty) { columns.toColumns() } + +public fun DataFrame.explode(vararg columns: ColumnReference, dropEmpty: Boolean = true): DataFrame = + explode(dropEmpty) { columns.toColumns() } + +public fun DataFrame.explode(vararg columns: KProperty, dropEmpty: Boolean = true): DataFrame = + explode(dropEmpty) { columns.toColumns() } + +// endregion + +// region explode DataRow + +public fun DataRow.explode( + dropEmpty: Boolean = true, + selector: ColumnsSelector = defaultExplodeColumns +): DataFrame = toDataFrame().explode(dropEmpty, selector) + +public fun DataRow.explode(vararg columns: String, dropEmpty: Boolean = true): DataFrame = + explode(dropEmpty) { columns.toColumns() } + +public fun DataRow.explode(vararg columns: ColumnReference, dropEmpty: Boolean = true): DataFrame = + explode(dropEmpty) { columns.toColumns() } + +public fun DataRow.explode(vararg columns: KProperty, dropEmpty: Boolean = true): DataFrame = + explode(dropEmpty) { columns.toColumns() } + +// endregion + +// region explode DataColumn + +@JvmName("explodeList") +public fun DataColumn>.explode(): DataColumn = explodeImpl() as DataColumn + +@JvmName("explodeFrames") +public fun DataColumn>.explode(): ColumnGroup = concat().asColumnGroup(name()) + +// endregion diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/filter.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/filter.kt new file mode 100644 index 000000000..ccf2e653e --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/filter.kt @@ -0,0 +1,34 @@ +package org.jetbrains.kotlinx.dataframe.api + +import org.jetbrains.kotlinx.dataframe.ColumnSelector +import org.jetbrains.kotlinx.dataframe.DataColumn +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.Predicate +import org.jetbrains.kotlinx.dataframe.RowFilter +import org.jetbrains.kotlinx.dataframe.columns.ColumnReference +import org.jetbrains.kotlinx.dataframe.impl.toIndices +import org.jetbrains.kotlinx.dataframe.indices +import kotlin.reflect.KProperty + +// region DataColumn + +public fun DataColumn.filter(predicate: Predicate): DataColumn = indices.filter { + predicate(get(it)) +}.let { get(it) } + +// endregion + +// region DataFrame + +public fun DataFrame.filter(predicate: RowFilter): DataFrame = + indices.filter { + val row = get(it) + predicate(row, row) + }.let { get(it) } + +public fun DataFrame.filterBy(column: ColumnSelector): DataFrame = getRows(getColumn(column).toList().toIndices()) +public fun DataFrame.filterBy(column: ColumnReference): DataFrame = filterBy { column } +public fun DataFrame.filterBy(column: String): DataFrame = filterBy { column.toColumnOf() } +public fun DataFrame.filterBy(column: KProperty): DataFrame = filterBy { column.toColumnAccessor() } + +// endregion diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/first.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/first.kt new file mode 100644 index 000000000..7c5154ff5 --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/first.kt @@ -0,0 +1,57 @@ +package org.jetbrains.kotlinx.dataframe.api + +import org.jetbrains.kotlinx.dataframe.DataColumn +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.DataRow +import org.jetbrains.kotlinx.dataframe.RowFilter +import org.jetbrains.kotlinx.dataframe.columns.size +import org.jetbrains.kotlinx.dataframe.columns.values +import org.jetbrains.kotlinx.dataframe.nrow + +// region DataColumn + +public fun DataColumn.first(): T = get(0) +public fun DataColumn.firstOrNull(): T? = if (size > 0) first() else null +public fun DataColumn.first(predicate: (T) -> Boolean): T = values.first(predicate) +public fun DataColumn.firstOrNull(predicate: (T) -> Boolean): T? = values.firstOrNull(predicate) + +// endregion + +// region DataFrame + +public fun DataFrame.first(): DataRow { + if (nrow == 0) { + throw NoSuchElementException("DataFrame has no rows. Use `firstOrNull`.") + } + return get(0) +} +public fun DataFrame.firstOrNull(): DataRow? = if (nrow > 0) first() else null + +public fun DataFrame.first(predicate: RowFilter): DataRow = rows().first { predicate(it, it) } +public fun DataFrame.firstOrNull(predicate: RowFilter): DataRow? = rows().firstOrNull { predicate(it, it) } + +// endregion + +// region GroupBy + +public fun GroupBy.first(): ReducedGroupBy = reduce { firstOrNull() } + +public fun GroupBy.first(predicate: RowFilter): ReducedGroupBy = reduce { firstOrNull(predicate) } + +// endregion + +// region Pivot + +public fun Pivot.first(): ReducedPivot = reduce { firstOrNull() } + +public fun Pivot.first(predicate: RowFilter): ReducedPivot = reduce { firstOrNull(predicate) } + +// endregion + +// region PivotGroupBy + +public fun PivotGroupBy.first(): ReducedPivotGroupBy = reduce { firstOrNull() } + +public fun PivotGroupBy.first(predicate: RowFilter): ReducedPivotGroupBy = reduce { firstOrNull(predicate) } + +// endregion diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/flatten.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/flatten.kt new file mode 100644 index 000000000..7d83957bb --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/flatten.kt @@ -0,0 +1,15 @@ +package org.jetbrains.kotlinx.dataframe.api + +import org.jetbrains.kotlinx.dataframe.ColumnsSelector +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.impl.api.flattenImpl + +// region DataFrame + +public fun DataFrame.flatten(): DataFrame = flatten { all() } + +public fun DataFrame.flatten( + columns: ColumnsSelector +): DataFrame = flattenImpl(columns) + +// endregion diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/forEach.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/forEach.kt new file mode 100644 index 000000000..9e5ec62a7 --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/forEach.kt @@ -0,0 +1,29 @@ +package org.jetbrains.kotlinx.dataframe.api + +import org.jetbrains.kotlinx.dataframe.DataColumn +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.RowExpression +import org.jetbrains.kotlinx.dataframe.columns.values + +// region DataColumn + +public fun DataColumn.forEach(action: (T) -> Unit): Unit = values.forEach(action) + +public fun DataColumn.forEachIndexed(action: (Int, T) -> Unit): Unit = values.forEachIndexed(action) + +// endregion + +// region DataFrame + +public fun DataFrame.forEach(action: RowExpression): Unit = rows().forEach { action(it, it) } + +// endregion + +// region GroupBy + +public fun GroupBy.forEach(body: (GroupBy.Entry) -> Unit): Unit = keys.forEach { key -> + val group = groups[key.index()] + body(GroupBy.Entry(key, group)) +} + +// endregion diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/format.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/format.kt new file mode 100644 index 000000000..316fc7872 --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/format.kt @@ -0,0 +1,118 @@ +package org.jetbrains.kotlinx.dataframe.api + +import org.jetbrains.kotlinx.dataframe.ColumnsSelector +import org.jetbrains.kotlinx.dataframe.DataColumn +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.DataRow +import org.jetbrains.kotlinx.dataframe.RowValueFilter +import org.jetbrains.kotlinx.dataframe.columns.ColumnReference +import org.jetbrains.kotlinx.dataframe.impl.api.MergedAttributes +import org.jetbrains.kotlinx.dataframe.impl.api.SingleAttribute +import org.jetbrains.kotlinx.dataframe.impl.api.encode +import org.jetbrains.kotlinx.dataframe.impl.api.formatImpl +import org.jetbrains.kotlinx.dataframe.impl.api.linearGradient +import org.jetbrains.kotlinx.dataframe.impl.columns.toColumns +import org.jetbrains.kotlinx.dataframe.io.DisplayConfiguration +import org.jetbrains.kotlinx.dataframe.io.toHTML +import org.jetbrains.kotlinx.jupyter.api.HtmlData +import kotlin.reflect.KProperty + +// region DataFrame + +public fun DataFrame.format(columns: ColumnsSelector): FormatClause = FormatClause(this, columns) +public fun DataFrame.format(vararg columns: String): FormatClause = format { columns.toColumns() } +public fun DataFrame.format(vararg columns: ColumnReference): FormatClause = format { columns.toColumns() } +public fun DataFrame.format(vararg columns: KProperty): FormatClause = format { columns.toColumns() } + +public fun FormatClause.perRowCol(formatter: RowColFormatter): FormattedFrame = formatImpl(formatter) + +public fun FormatClause.with(formatter: CellFormatter): FormattedFrame = formatImpl { row, col -> formatter(row[col]) } +public fun FormatClause.where(filter: RowValueFilter): FormatClause = copy(filter = filter) + +public fun DataFrame.format(): FormatClause = FormatClause(this) +public fun FormattedFrame.format(): FormatClause = FormatClause(df, null, formatter) + +// endregion + +public data class RGBColor(val r: Short, val g: Short, val b: Short) + +public interface CellAttributes { + + public fun attributes(): List> +} + +public infix fun CellAttributes?.and(other: CellAttributes?): CellAttributes? = when { + other == null -> this + this == null -> other + else -> MergedAttributes(listOf(this, other)) +} + +public object FormattingDSL { + + public fun rgb(r: Short, g: Short, b: Short): RGBColor = RGBColor(r, g, b) + + public val black: RGBColor = rgb(0, 0, 0) + public val white: RGBColor = rgb(255, 255, 255) + public val green: RGBColor = rgb(0, 255, 0) + public val red: RGBColor = rgb(255, 0, 0) + public val blue: RGBColor = rgb(0, 0, 255) + public val gray: RGBColor = rgb(128, 128, 128) + public val darkGray: RGBColor = rgb(169, 169, 169) + public val lightGray: RGBColor = rgb(211, 211, 211) + + public fun attr(name: String, value: String): CellAttributes = SingleAttribute(name, value) + + public fun background(color: RGBColor): CellAttributes = attr("background-color", color.encode()) + public fun background(r: Short, g: Short, b: Short): CellAttributes = background(RGBColor(r, g, b)) + + public fun textColor(color: RGBColor): CellAttributes = attr("color", color.encode()) + public fun textColor(r: Short, g: Short, b: Short): CellAttributes = textColor(RGBColor(r, g, b)) + + public val italic: CellAttributes = attr("font-style", "italic") + + public val bold: CellAttributes = attr("font-weight", "bold") + + public val underline: CellAttributes = attr("text-decoration", "underline") + + public fun linearBg(value: Number, from: Pair, to: Pair): CellAttributes = background( + linear(value, from, to) + ) + + public fun linear(value: Number, from: Pair, to: Pair): RGBColor { + val a = from.first.toDouble() + val b = to.first.toDouble() + if (a < b) return linearGradient(value.toDouble(), a, from.second, b, to.second) + return linearGradient(value.toDouble(), b, to.second, a, from.second) + } +} + +public typealias RowColFormatter = FormattingDSL.(DataRow, DataColumn) -> CellAttributes? + +public class FormattedFrame( + internal val df: DataFrame, + internal val formatter: RowColFormatter? = null, +) { + public fun toHTML(configuration: DisplayConfiguration): HtmlData = df.toHTML(getDisplayConfiguration(configuration)) + + public fun getDisplayConfiguration(configuration: DisplayConfiguration): DisplayConfiguration { + return configuration.copy(cellFormatter = formatter as RowColFormatter<*, *>?) + } +} + +public data class FormatClause( + val df: DataFrame, + val columns: ColumnsSelector? = null, + val oldFormatter: RowColFormatter? = null, + val filter: RowValueFilter = { true }, +) + +public fun FormattedFrame.format(columns: ColumnsSelector): FormatClause = FormatClause(df, columns, formatter) + +public typealias CellFormatter = FormattingDSL.(V) -> CellAttributes? + +public fun FormatClause.linearBg(from: Pair, to: Pair): FormattedFrame = + with { + if (it != null) { + background(linear(it, from, to)) + } else null + } diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/frames.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/frames.kt new file mode 100644 index 000000000..7f82f8fc5 --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/frames.kt @@ -0,0 +1,16 @@ +package org.jetbrains.kotlinx.dataframe.api + +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.DataRow + +// region Pivot + +public fun Pivot.frames(): DataRow = aggregate { this } + +// endregion + +// region PivotGroupBy + +public fun PivotGroupBy.frames(): DataFrame = aggregate { this } + +// endregion diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/gather.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/gather.kt new file mode 100644 index 000000000..119ab658a --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/gather.kt @@ -0,0 +1,102 @@ +package org.jetbrains.kotlinx.dataframe.api + +import org.jetbrains.kotlinx.dataframe.ColumnsSelector +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.RowValueFilter +import org.jetbrains.kotlinx.dataframe.columns.ColumnAccessor +import org.jetbrains.kotlinx.dataframe.columns.ColumnReference +import org.jetbrains.kotlinx.dataframe.impl.api.gatherImpl +import org.jetbrains.kotlinx.dataframe.impl.columnName +import org.jetbrains.kotlinx.dataframe.impl.columns.toColumns +import kotlin.reflect.KProperty +import kotlin.reflect.KType +import kotlin.reflect.typeOf + +public fun DataFrame.gather(selector: ColumnsSelector): Gather = + Gather( + df = this, + columns = selector, + filter = null, + keyType = typeOf(), + keyTransform = { it }, + valueTransform = null, + ) + +public fun DataFrame.gather(vararg columns: String): Gather = + gather { columns.toColumns() } + +public fun DataFrame.gather(vararg columns: ColumnReference): Gather = + gather { columns.toColumns() } + +public fun DataFrame.gather(vararg columns: KProperty): Gather = + gather { columns.toColumns() } + +public fun Gather.where(filter: RowValueFilter): Gather = + copy(filter = this.filter and filter) + +public fun Gather.notNull(): Gather = + where { it != null } as Gather + +public fun Gather.explodeLists(): Gather = + copy(explode = true) + +public inline fun Gather.mapKeys(noinline transform: (String) -> K): Gather = + copy(keyTransform = transform as ((String) -> Nothing), keyType = typeOf()) as Gather + +public fun Gather.mapValues(transform: (C) -> R): Gather = + copy(valueTransform = transform as ((C) -> Nothing)) as Gather + +public data class Gather( + internal val df: DataFrame, + internal val columns: ColumnsSelector, + internal val filter: RowValueFilter? = null, + internal val keyType: KType? = null, + internal val keyTransform: ((String) -> K), + internal val valueTransform: ((C) -> R)? = null, + internal val explode: Boolean = false +) { + public fun

cast(): Split = this as Split +} + +public data class SplitWithTransform( + internal val df: DataFrame, + internal val columns: ColumnsSelector, + internal val inward: Boolean, + internal val tartypeOf: KType, + internal val default: R? = null, + internal val transform: DataRow.(C) -> Iterable +) + +public typealias ColumnNamesGenerator = ColumnWithPath.(extraColumnIndex: Int) -> String + +// region default + +public inline fun , reified R> Split.default(value: R?): SplitWithTransform = + by { it }.default(value) + +public fun Split.default(value: String?): SplitWithTransform = + by { it.splitDefault() }.default(value) + +public fun SplitWithTransform.default(value: R?): SplitWithTransform = copy(default = value) + +// endregion + +// region by + +public inline fun Split.by(noinline splitter: DataRow.(C) -> Iterable): SplitWithTransform = + by(typeOf(), splitter) + +public fun Split.by( + vararg delimiters: Char, + trim: Boolean = true, + ignoreCase: Boolean = false, + limit: Int = 0 +): SplitWithTransform = by { + it.toString().split(*delimiters, ignoreCase = ignoreCase, limit = limit).let { + if (trim) it.map { it.trim() } + else it + } +} + +public fun Split.by( + regex: Regex, + trim: Boolean = true, + limit: Int = 0 +): SplitWithTransform = by { + it.toString().split(regex, limit = limit).let { + if (trim) it.map { it.trim() } + else it + } +} + +public fun Split.by( + vararg delimiters: String, + trim: Boolean = true, + ignoreCase: Boolean = false, + limit: Int = 0 +): SplitWithTransform = by { + it.toString().split(*delimiters, ignoreCase = ignoreCase, limit = limit).let { + if (trim) it.map { it.trim() } + else it + } +} + +@PublishedApi +internal fun Split.by( + type: KType, + splitter: DataRow.(C) -> Iterable +): SplitWithTransform { + return SplitWithTransform(df, columns, false, type) { + if (it == null) emptyList() else splitter(it).asList() + } +} + +// endregion + +// region match + +public fun Split.match(regex: String): SplitWithTransform = match(regex.toRegex()) + +public fun Split.match(regex: Regex): SplitWithTransform = by { + it?.let { regex.matchEntire(it)?.groups?.drop(1)?.map { it?.value } } ?: emptyList() +} + +// endregion + +internal fun Split.toDataFrame(): DataFrame = by { + when (it) { + is List<*> -> it + is AnyFrame -> it.rows() + else -> listOf(it) + } +}.into() + +// region into + +public fun SplitWithTransform.into( + firstName: ColumnAccessor<*>, + vararg otherNames: ColumnAccessor<*> +): DataFrame = + into(listOf(firstName.name()) + otherNames.map { it.name() }) + +public fun SplitWithTransform.into( + firstName: KProperty<*>, + vararg otherNames: KProperty<*> +): DataFrame = + into(listOf(firstName.columnName) + otherNames.map { it.columnName }) + +public fun SplitWithTransform.into( + vararg names: String, + extraNamesGenerator: (ColumnWithPath.(extraColumnIndex: Int) -> String)? = null +): DataFrame = into(names.toList(), extraNamesGenerator) + +public fun SplitWithTransform.into( + names: List, + extraNamesGenerator: (ColumnWithPath.(extraColumnIndex: Int) -> String)? = null +): DataFrame = splitImpl(this) { numberOfNewCols -> + if (extraNamesGenerator != null && names.size < numberOfNewCols) { + names + (1..(numberOfNewCols - names.size)).map { extraNamesGenerator(this, it) } + } else names +} + +public fun > Split.into( + vararg names: String, + extraNamesGenerator: ColumnNamesGenerator? = null +): DataFrame = + by { it }.into(names.toList(), extraNamesGenerator) + +@JvmName("splitDataFrameInto") +public fun Split>.into( + vararg names: String, + extraNamesGenerator: ColumnNamesGenerator>? = null +): DataFrame = + by { it.rows() }.into(names.toList(), extraNamesGenerator) + +public fun Split>.into( + firstCol: String, + secondCol: String +): DataFrame = + by { listOf(it.first, it.second) }.into(firstCol, secondCol) + +public inline fun Split>.into( + firstCol: ColumnAccessor, + secondCol: ColumnAccessor +): DataFrame = + by { listOf(it.first, it.second) }.into(firstCol, secondCol) + +@JvmName("intoTC") +public fun Split.into( + vararg names: String, + extraNamesGenerator: (ColumnWithPath.(extraColumnIndex: Int) -> String)? = null +): DataFrame = + by { it.splitDefault() }.into(names.toList(), extraNamesGenerator) + +// endregion + +// region inward + +public fun SplitWithTransform.inward( + names: Iterable, + extraNamesGenerator: ColumnNamesGenerator? = null +): DataFrame = + copy(inward = true).into(names.toList(), extraNamesGenerator) + +public fun SplitWithTransform.inward( + vararg names: String, + extraNamesGenerator: ColumnNamesGenerator? = null +): DataFrame = inward(names.toList(), extraNamesGenerator) + +public fun SplitWithTransform.inward( + firstName: ColumnAccessor<*>, + vararg otherNames: ColumnAccessor<*> +): DataFrame = + inward(listOf(firstName.name()) + otherNames.map { it.name() }) + +public fun SplitWithTransform.inward( + firstName: KProperty<*>, + vararg otherNames: KProperty<*> +): DataFrame = + inward(listOf(firstName.columnName) + otherNames.map { it.columnName }) + +public inline fun , reified R> Split.inward( + vararg names: String, + noinline extraNamesGenerator: ColumnNamesGenerator? = null +): DataFrame = + by { it }.inward(names.toList(), extraNamesGenerator) + +@JvmName("splitDataFrameInward") +public fun , R> Split.inward( + vararg names: String, + extraNamesGenerator: ColumnNamesGenerator? = null +): DataFrame = + by { it.rows() }.inward(names.toList(), extraNamesGenerator) + +public fun Split>.inward( + firstCol: String, + secondCol: String +): DataFrame = + by { listOf(it.first, it.second) }.inward(firstCol, secondCol) + +public inline fun Split>.inward( + firstCol: ColumnAccessor, + secondCol: ColumnAccessor +): DataFrame = + by { listOf(it.first, it.second) }.inward(firstCol, secondCol) + +@JvmName("inwardTC") +public fun Split.inward( + vararg names: String, + extraNamesGenerator: (ColumnWithPath.(extraColumnIndex: Int) -> String)? = null +): DataFrame = + by { it.splitDefault() }.inward(names.toList(), extraNamesGenerator) + +// endregion + +// region intoColumns + +public fun Split.intoColumns(): DataFrame { + return df.convert(columns).with { + when { + it == null -> null + it.isEmpty() -> DataRow.empty + else -> it.implode { all() }.single() + } + } +} + +// endregion + +// region intoRows + +@JvmName("intoRowsTC") +public inline fun , reified R> Split.intoRows(dropEmpty: Boolean = true): DataFrame = + by { it } + .intoRows(dropEmpty) + +@JvmName("intoRowsFrame") +public fun Split.intoRows(dropEmpty: Boolean = true): DataFrame = + by { it.rows() }.intoRows(dropEmpty) + +internal fun Convert.splitInplace(type: KType, transform: DataRow.(C) -> Iterable) = + withRowCellImpl(getListType(type), Infer.None) { if (it == null) emptyList() else transform(it).asList() } + +public fun SplitWithTransform.intoRows(dropEmpty: Boolean = true): DataFrame { + val paths = df.getColumnPaths(columns).toColumnSet() + return df.convert { paths as ColumnSet }.splitInplace(tartypeOf, transform).explode(dropEmpty) { paths } +} + +// endregion + +// region inplace + +@JvmName("inplaceTC") +public inline fun , reified R> Split.inplace(): DataFrame = by { it }.inplace() + +public fun SplitWithTransform.inplace(): DataFrame = df.convert(columns).splitInplace(tartypeOf, transform) + +// endregion + +// region DataColumn + +public fun DataColumn>.splitInto(vararg names: String): AnyFrame = toDataFrame().split { this@splitInto }.into(*names) + +// endregion diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/std.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/std.kt new file mode 100644 index 000000000..092120000 --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/std.kt @@ -0,0 +1,252 @@ +package org.jetbrains.kotlinx.dataframe.api + +import org.jetbrains.kotlinx.dataframe.AnyRow +import org.jetbrains.kotlinx.dataframe.ColumnsSelector +import org.jetbrains.kotlinx.dataframe.DataColumn +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.DataRow +import org.jetbrains.kotlinx.dataframe.RowExpression +import org.jetbrains.kotlinx.dataframe.aggregation.ColumnsForAggregateSelector +import org.jetbrains.kotlinx.dataframe.columns.ColumnReference +import org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.Aggregators +import org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.cast2 +import org.jetbrains.kotlinx.dataframe.impl.aggregation.modes.aggregateAll +import org.jetbrains.kotlinx.dataframe.impl.aggregation.modes.aggregateFor +import org.jetbrains.kotlinx.dataframe.impl.aggregation.modes.aggregateOf +import org.jetbrains.kotlinx.dataframe.impl.aggregation.modes.of +import org.jetbrains.kotlinx.dataframe.impl.aggregation.numberColumns +import org.jetbrains.kotlinx.dataframe.impl.columns.toColumns +import org.jetbrains.kotlinx.dataframe.impl.columns.toColumnsOf +import org.jetbrains.kotlinx.dataframe.math.std +import kotlin.reflect.KProperty +import kotlin.reflect.typeOf + +// region DataColumn + +public fun DataColumn.std(skipNA: Boolean = skipNA_default, ddof: Int = ddof_default): Double = Aggregators.std(skipNA, ddof).aggregate(this) ?: .0 + +public inline fun DataColumn.stdOf( + skipNA: Boolean = skipNA_default, + ddof: Int = ddof_default, + noinline expression: (T) -> R? +): Double = Aggregators.std(skipNA, ddof).cast2().aggregateOf(this, expression) ?: .0 + +// endregion + +// region DataRow + +public fun AnyRow.rowStd( + skipNA: Boolean = org.jetbrains.kotlinx.dataframe.api.skipNA_default, + ddof: Int = org.jetbrains.kotlinx.dataframe.api.ddof_default +): Double = values().filterIsInstance().map { it.toDouble() }.std(skipNA, ddof) +public inline fun AnyRow.rowStdOf(ddof: Int = org.jetbrains.kotlinx.dataframe.api.ddof_default): Double = values().filterIsInstance().std( + typeOf(), ddof = ddof +) + +// endregion + +// region DataFrame + +public fun DataFrame.std(skipNA: Boolean = skipNA_default, ddof: Int = ddof_default): DataRow = stdFor(skipNA, ddof, numberColumns()) + +public fun DataFrame.stdFor( + skipNA: Boolean = skipNA_default, + ddof: Int = ddof_default, + columns: ColumnsForAggregateSelector +): DataRow = Aggregators.std(skipNA, ddof).aggregateFor(this, columns) +public fun DataFrame.stdFor(vararg columns: String, skipNA: Boolean = skipNA_default, ddof: Int = ddof_default): DataRow = stdFor(skipNA, ddof) { columns.toColumnsOf() } +public fun DataFrame.stdFor( + vararg columns: ColumnReference, + skipNA: Boolean = skipNA_default, + ddof: Int = ddof_default +): DataRow = stdFor(skipNA, ddof) { columns.toColumns() } +public fun DataFrame.stdFor( + vararg columns: KProperty, + skipNA: Boolean = skipNA_default, + ddof: Int = ddof_default +): DataRow = stdFor(skipNA, ddof) { columns.toColumns() } + +public fun DataFrame.std( + skipNA: Boolean = skipNA_default, + ddof: Int = ddof_default, + columns: ColumnsSelector +): Double = Aggregators.std(skipNA, ddof).aggregateAll(this, columns) ?: .0 +public fun DataFrame.std(vararg columns: ColumnReference): Double = std { columns.toColumns() } +public fun DataFrame.std(vararg columns: String): Double = std { columns.toColumnsOf() } +public fun DataFrame.std(vararg columns: KProperty): Double = std { columns.toColumns() } + +public inline fun DataFrame.stdOf( + skipNA: Boolean = skipNA_default, + ddof: Int = ddof_default, + crossinline expression: RowExpression +): Double = Aggregators.std(skipNA, ddof).of(this, expression) ?: .0 + +// endregion + +// region GroupBy + +public fun Grouped.std(skipNA: Boolean = skipNA_default, ddof: Int = ddof_default): DataFrame = stdFor(skipNA, ddof, numberColumns()) + +public fun Grouped.stdFor( + skipNA: Boolean = skipNA_default, + ddof: Int = ddof_default, + columns: ColumnsForAggregateSelector +): DataFrame = Aggregators.std(skipNA, ddof).aggregateFor(this, columns) +public fun Grouped.stdFor(vararg columns: String, skipNA: Boolean = skipNA_default, ddof: Int = ddof_default): DataFrame = stdFor(skipNA, ddof) { columns.toColumnsOf() } +public fun Grouped.stdFor( + vararg columns: ColumnReference, + skipNA: Boolean = skipNA_default, + ddof: Int = ddof_default +): DataFrame = stdFor(skipNA, ddof) { columns.toColumns() } +public fun Grouped.stdFor( + vararg columns: KProperty, + skipNA: Boolean = skipNA_default, + ddof: Int = ddof_default +): DataFrame = stdFor(skipNA, ddof) { columns.toColumns() } + +public fun Grouped.std( + name: String? = null, + skipNA: Boolean = skipNA_default, + ddof: Int = ddof_default, + columns: ColumnsSelector +): DataFrame = Aggregators.std(skipNA, ddof).aggregateAll(this, name, columns) +public fun Grouped.std( + vararg columns: ColumnReference, + name: String? = null, + skipNA: Boolean = skipNA_default, + ddof: Int = ddof_default +): DataFrame = std(name, skipNA, ddof) { columns.toColumns() } +public fun Grouped.std( + vararg columns: String, + name: String? = null, + skipNA: Boolean = skipNA_default, + ddof: Int = ddof_default +): DataFrame = std(name, skipNA, ddof) { columns.toColumnsOf() } +public fun Grouped.std( + vararg columns: KProperty, + name: String? = null, + skipNA: Boolean = skipNA_default, + ddof: Int = ddof_default +): DataFrame = std(name, skipNA, ddof) { columns.toColumns() } + +public inline fun Grouped.stdOf( + name: String? = null, + skipNA: Boolean = skipNA_default, + ddof: Int = ddof_default, + crossinline expression: RowExpression +): DataFrame = Aggregators.std(skipNA, ddof).aggregateOf(this, name, expression) + +// endregion + +// region Pivot + +public fun Pivot.std(separate: Boolean = false, skipNA: Boolean = skipNA_default, ddof: Int = ddof_default): DataRow = stdFor(separate, skipNA, ddof, numberColumns()) + +public fun Pivot.stdFor( + separate: Boolean = false, + skipNA: Boolean = skipNA_default, + ddof: Int = ddof_default, + columns: ColumnsForAggregateSelector +): DataRow = delegate { stdFor(separate, skipNA, ddof, columns) } +public fun Pivot.stdFor( + vararg columns: String, + separate: Boolean = false, + skipNA: Boolean = skipNA_default, + ddof: Int = ddof_default +): DataRow = stdFor(separate, skipNA, ddof) { columns.toColumnsOf() } +public fun Pivot.stdFor( + vararg columns: ColumnReference, + separate: Boolean = false, + skipNA: Boolean = skipNA_default, + ddof: Int = ddof_default +): DataRow = stdFor(separate, skipNA, ddof) { columns.toColumns() } +public fun Pivot.stdFor( + vararg columns: KProperty, + separate: Boolean = false, + skipNA: Boolean = skipNA_default, + ddof: Int = ddof_default +): DataRow = stdFor(separate, skipNA, ddof) { columns.toColumns() } + +public fun Pivot.std( + skipNA: Boolean = skipNA_default, + ddof: Int = ddof_default, + columns: ColumnsSelector +): DataRow = delegate { std(skipNA, ddof, columns) } +public fun Pivot.std( + vararg columns: ColumnReference, + skipNA: Boolean = skipNA_default, + ddof: Int = ddof_default +): DataRow = std(skipNA, ddof) { columns.toColumns() } +public fun Pivot.std(vararg columns: String, skipNA: Boolean = skipNA_default, ddof: Int = ddof_default): DataRow = std(skipNA, ddof) { columns.toColumnsOf() } +public fun Pivot.std( + vararg columns: KProperty, + skipNA: Boolean = skipNA_default, + ddof: Int = ddof_default +): DataRow = std(skipNA, ddof) { columns.toColumns() } + +public inline fun Pivot.stdOf( + skipNA: Boolean = skipNA_default, + ddof: Int = ddof_default, + crossinline expression: RowExpression +): DataRow = delegate { stdOf(skipNA, ddof, expression) } + +// endregion + +// region PivotGroupBy + +public fun PivotGroupBy.std( + separate: Boolean = false, + skipNA: Boolean = skipNA_default, + ddof: Int = ddof_default +): DataFrame = stdFor(separate, skipNA, ddof, numberColumns()) + +public fun PivotGroupBy.stdFor( + separate: Boolean = false, + skipNA: Boolean = skipNA_default, + ddof: Int = ddof_default, + columns: ColumnsForAggregateSelector +): DataFrame = + Aggregators.std(skipNA, ddof).aggregateFor(this, separate, columns) +public fun PivotGroupBy.stdFor( + vararg columns: String, + separate: Boolean = false, + skipNA: Boolean = skipNA_default, + ddof: Int = ddof_default +): DataFrame = stdFor(separate, skipNA, ddof) { columns.toColumnsOf() } +public fun PivotGroupBy.stdFor( + vararg columns: ColumnReference, + separate: Boolean = false, + skipNA: Boolean = skipNA_default, + ddof: Int = ddof_default +): DataFrame = stdFor(separate, skipNA, ddof) { columns.toColumns() } +public fun PivotGroupBy.stdFor( + vararg columns: KProperty, + separate: Boolean = false, + skipNA: Boolean = skipNA_default, + ddof: Int = ddof_default +): DataFrame = stdFor(separate, skipNA, ddof) { columns.toColumns() } + +public fun PivotGroupBy.std( + skipNA: Boolean = skipNA_default, + ddof: Int = ddof_default, + columns: ColumnsSelector +): DataFrame = Aggregators.std(skipNA, ddof).aggregateAll(this, columns) +public fun PivotGroupBy.std( + vararg columns: ColumnReference, + skipNA: Boolean = skipNA_default, + ddof: Int = ddof_default +): DataFrame = std(skipNA, ddof) { columns.toColumns() } +public fun PivotGroupBy.std(vararg columns: String, skipNA: Boolean = skipNA_default, ddof: Int = ddof_default): DataFrame = std(skipNA, ddof) { columns.toColumnsOf() } +public fun PivotGroupBy.std( + vararg columns: KProperty, + skipNA: Boolean = skipNA_default, + ddof: Int = ddof_default +): DataFrame = std(skipNA, ddof) { columns.toColumns() } + +public inline fun PivotGroupBy.stdOf( + skipNA: Boolean = skipNA_default, + ddof: Int = ddof_default, + crossinline expression: RowExpression +): DataFrame = Aggregators.std(skipNA, ddof).aggregateOf(this, expression) + +// endregion diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/sum.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/sum.kt new file mode 100644 index 000000000..de13d32e3 --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/sum.kt @@ -0,0 +1,143 @@ +package org.jetbrains.kotlinx.dataframe.api + +import org.jetbrains.kotlinx.dataframe.AnyRow +import org.jetbrains.kotlinx.dataframe.ColumnsSelector +import org.jetbrains.kotlinx.dataframe.DataColumn +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.DataRow +import org.jetbrains.kotlinx.dataframe.RowExpression +import org.jetbrains.kotlinx.dataframe.aggregation.ColumnsForAggregateSelector +import org.jetbrains.kotlinx.dataframe.columns.ColumnReference +import org.jetbrains.kotlinx.dataframe.columns.values +import org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.Aggregator +import org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.Aggregators +import org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.cast +import org.jetbrains.kotlinx.dataframe.impl.aggregation.modes.aggregateAll +import org.jetbrains.kotlinx.dataframe.impl.aggregation.modes.aggregateFor +import org.jetbrains.kotlinx.dataframe.impl.aggregation.modes.aggregateOf +import org.jetbrains.kotlinx.dataframe.impl.aggregation.modes.of +import org.jetbrains.kotlinx.dataframe.impl.aggregation.numberColumns +import org.jetbrains.kotlinx.dataframe.impl.columns.toColumns +import org.jetbrains.kotlinx.dataframe.impl.columns.toColumnsOf +import org.jetbrains.kotlinx.dataframe.impl.columns.toNumberColumns +import org.jetbrains.kotlinx.dataframe.impl.zero +import org.jetbrains.kotlinx.dataframe.math.sum +import org.jetbrains.kotlinx.dataframe.math.sumOf +import kotlin.reflect.KProperty +import kotlin.reflect.typeOf + +// region DataColumn + +@JvmName("sumT") +public fun DataColumn.sum(): T = values.sum(type()) + +@JvmName("sumTNullable") +public fun DataColumn.sum(): T = values.sum(type()) + +public inline fun DataColumn.sumOf(crossinline expression: (T) -> R): R? = + (Aggregators.sum as Aggregator<*, *>).cast().of(this, expression) + +// endregion + +// region DataRow + +public fun AnyRow.rowSum(): Number = org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.Aggregators.sum.aggregateMixed(values().filterIsInstance()) ?: 0 +public inline fun AnyRow.rowSumOf(): T = values().filterIsInstance().sum(typeOf()) + +// endregion + +// region DataFrame + +public fun DataFrame.sum(): DataRow = sumFor(numberColumns()) + +public fun DataFrame.sumFor(columns: ColumnsForAggregateSelector): DataRow = Aggregators.sum.aggregateFor(this, columns) +public fun DataFrame.sumFor(vararg columns: String): DataRow = sumFor { columns.toColumnsOf() } +public fun DataFrame.sumFor(vararg columns: ColumnReference): DataRow = sumFor { columns.toColumns() } +public fun DataFrame.sumFor(vararg columns: KProperty): DataRow = sumFor { columns.toColumns() } + +public inline fun DataFrame.sum(noinline columns: ColumnsSelector): C = (Aggregators.sum.aggregateAll(this, columns) as C?) ?: C::class.zero() +public inline fun DataFrame.sum(vararg columns: ColumnReference): C = sum { columns.toColumns() } +public fun DataFrame.sum(vararg columns: String): Number = sum { columns.toColumnsOf() } +public inline fun DataFrame.sum(vararg columns: KProperty): C = sum { columns.toColumns() } + +public inline fun DataFrame.sumOf(crossinline expression: RowExpression): C = rows().sumOf( + typeOf() +) { expression(it, it) } + +// endregion + +// region GroupBy + +public fun Grouped.sum(): DataFrame = sumFor(numberColumns()) + +public fun Grouped.sumFor(columns: ColumnsForAggregateSelector): DataFrame = Aggregators.sum.aggregateFor(this, columns) +public fun Grouped.sumFor(vararg columns: String): DataFrame = sumFor { columns.toNumberColumns() } +public fun Grouped.sumFor(vararg columns: ColumnReference): DataFrame = sumFor { columns.toColumns() } +public fun Grouped.sumFor(vararg columns: KProperty): DataFrame = sumFor { columns.toColumns() } + +public fun Grouped.sum(name: String? = null, columns: ColumnsSelector): DataFrame = + Aggregators.sum.aggregateAll(this, name, columns) +public fun Grouped.sum(vararg columns: String, name: String? = null): DataFrame = sum(name) { columns.toNumberColumns() } +public fun Grouped.sum(vararg columns: ColumnReference, name: String? = null): DataFrame = sum(name) { columns.toColumns() } +public fun Grouped.sum(vararg columns: KProperty, name: String? = null): DataFrame = sum(name) { columns.toColumns() } + +public inline fun Grouped.sumOf( + resultName: String? = null, + crossinline expression: RowExpression +): DataFrame = Aggregators.sum.aggregateOf(this, resultName, expression) + +// endregion + +// region Pivot + +public fun Pivot.sum(separate: Boolean = false): DataRow = sumFor(separate, numberColumns()) + +public fun Pivot.sumFor( + separate: Boolean = false, + columns: ColumnsForAggregateSelector +): DataRow = + delegate { sumFor(separate, columns) } +public fun Pivot.sumFor(vararg columns: String, separate: Boolean = false): DataRow = sumFor(separate) { columns.toNumberColumns() } +public fun Pivot.sumFor( + vararg columns: ColumnReference, + separate: Boolean = false +): DataRow = sumFor(separate) { columns.toColumns() } +public fun Pivot.sumFor(vararg columns: KProperty, separate: Boolean = false): DataRow = sumFor(separate) { columns.toColumns() } + +public fun Pivot.sum(columns: ColumnsSelector): DataRow = + delegate { sum(columns) } +public fun Pivot.sum(vararg columns: String): DataRow = sum { columns.toNumberColumns() } +public fun Pivot.sum(vararg columns: ColumnReference): DataRow = sum { columns.toColumns() } +public fun Pivot.sum(vararg columns: KProperty): DataRow = sum { columns.toColumns() } + +public inline fun Pivot.sumOf(crossinline expression: RowExpression): DataRow = + delegate { sumOf(expression) } + +// endregion + +// region PivotGroupBy + +public fun PivotGroupBy.sum(separate: Boolean = false): DataFrame = sumFor(separate, numberColumns()) + +public fun PivotGroupBy.sumFor( + separate: Boolean = false, + columns: ColumnsForAggregateSelector +): DataFrame = + Aggregators.sum.aggregateFor(this, separate, columns) +public fun PivotGroupBy.sumFor(vararg columns: String, separate: Boolean = false): DataFrame = sumFor(separate) { columns.toNumberColumns() } +public fun PivotGroupBy.sumFor( + vararg columns: ColumnReference, + separate: Boolean = false +): DataFrame = sumFor(separate) { columns.toColumns() } +public fun PivotGroupBy.sumFor(vararg columns: KProperty, separate: Boolean = false): DataFrame = sumFor(separate) { columns.toColumns() } + +public fun PivotGroupBy.sum(columns: ColumnsSelector): DataFrame = + Aggregators.sum.aggregateAll(this, columns) +public fun PivotGroupBy.sum(vararg columns: String): DataFrame = sum { columns.toNumberColumns() } +public fun PivotGroupBy.sum(vararg columns: ColumnReference): DataFrame = sum { columns.toColumns() } +public fun PivotGroupBy.sum(vararg columns: KProperty): DataFrame = sum { columns.toColumns() } + +public inline fun PivotGroupBy.sumOf(crossinline expression: RowExpression): DataFrame = + Aggregators.sum.aggregateOf(this, expression) + +// endregion diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/tail.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/tail.kt new file mode 100644 index 000000000..1228924f4 --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/tail.kt @@ -0,0 +1,9 @@ +package org.jetbrains.kotlinx.dataframe.api + +import org.jetbrains.kotlinx.dataframe.DataFrame + +// region DataFrame + +public fun DataFrame.tail(numRows: Int = 5): DataFrame = takeLast(numRows) + +// endregion diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/take.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/take.kt new file mode 100644 index 000000000..1cd68ca17 --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/take.kt @@ -0,0 +1,49 @@ +package org.jetbrains.kotlinx.dataframe.api + +import org.jetbrains.kotlinx.dataframe.DataColumn +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.RowFilter +import org.jetbrains.kotlinx.dataframe.columns.size +import org.jetbrains.kotlinx.dataframe.index +import org.jetbrains.kotlinx.dataframe.nrow + +// region DataColumn + +public fun DataColumn.take(n: Int): DataColumn = when { + n == 0 -> get(emptyList()) + n >= size -> this + else -> get(0 until n) +} + +public fun DataColumn.takeLast(n: Int): DataColumn = drop(size - n) + +// endregion + +// region DataFrame + +/** + * Returns a DataFrame containing first [n] rows. + * + * @throws IllegalArgumentException if [n] is negative. + */ +public fun DataFrame.take(n: Int): DataFrame { + require(n >= 0) { "Requested rows count $n is less than zero." } + return getRows(0 until n.coerceAtMost(nrow)) +} + +/** + * Returns a DataFrame containing last [n] rows. + * + * @throws IllegalArgumentException if [n] is negative. + */ +public fun DataFrame.takeLast(n: Int): DataFrame { + require(n >= 0) { "Requested rows count $n is less than zero." } + return drop((nrow - n).coerceAtLeast(0)) +} + +/** + * Returns a DataFrame containing first rows that satisfy the given [predicate]. + */ +public fun DataFrame.takeWhile(predicate: RowFilter): DataFrame = firstOrNull { !predicate(it, it) }?.let { take(it.index) } ?: this + +// endregion diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/toDataFrame.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/toDataFrame.kt new file mode 100644 index 000000000..3bf5ec6d9 --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/toDataFrame.kt @@ -0,0 +1,209 @@ +package org.jetbrains.kotlinx.dataframe.api + +import org.jetbrains.kotlinx.dataframe.AnyBaseCol +import org.jetbrains.kotlinx.dataframe.AnyFrame +import org.jetbrains.kotlinx.dataframe.Column +import org.jetbrains.kotlinx.dataframe.ColumnsSelector +import org.jetbrains.kotlinx.dataframe.DataColumn +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.DataRow +import org.jetbrains.kotlinx.dataframe.columns.ColumnPath +import org.jetbrains.kotlinx.dataframe.impl.ColumnNameGenerator +import org.jetbrains.kotlinx.dataframe.impl.api.createDataFrameImpl +import org.jetbrains.kotlinx.dataframe.impl.asList +import org.jetbrains.kotlinx.dataframe.impl.columnName +import org.jetbrains.kotlinx.dataframe.impl.columns.guessColumnType +import org.jetbrains.kotlinx.dataframe.index +import kotlin.reflect.KClass +import kotlin.reflect.KProperty + +// region read DataFrame from objects + +public inline fun Iterable.toDataFrame(): DataFrame = toDataFrame { + properties() +} + +public inline fun Iterable.toDataFrame(noinline body: CreateDataFrameDsl.() -> Unit): DataFrame = + createDataFrameImpl(T::class, body) + +public inline fun Iterable.toDataFrame(vararg props: KProperty<*>, maxDepth: Int = 0): DataFrame = + toDataFrame { + properties(roots = props, maxDepth = maxDepth) + } + +@Deprecated("Replaced with `unfold` operation.", ReplaceWith("this.unfold(columns)"), DeprecationLevel.ERROR) +public fun DataFrame.read(columns: ColumnsSelector): DataFrame = unfold(columns) + +@Deprecated("Replaced with `unfold` operation.", ReplaceWith("this.unfold(*columns)"), DeprecationLevel.ERROR) +public fun DataFrame.read(vararg columns: String): DataFrame = unfold(*columns) + +@Deprecated("Replaced with `unfold` operation.", ReplaceWith("this.unfold(*columns)"), DeprecationLevel.ERROR) +public fun DataFrame.read(vararg columns: KProperty<*>): DataFrame = unfold(*columns) + +@Deprecated("Replaced with `unfold` operation.", ReplaceWith("this.unfold(*columns)"), DeprecationLevel.ERROR) +public fun DataFrame.read(vararg columns: Column): DataFrame = unfold(*columns) + +@JvmName("toDataFrameT") +public fun Iterable>.toDataFrame(): DataFrame { + var uniqueDf: DataFrame? = null + for (row in this) { + if (uniqueDf == null) uniqueDf = row.df() + else { + if (uniqueDf !== row.df()) { + uniqueDf = null + break + } + } + } + return if (uniqueDf != null) { + val permutation = map { it.index } + uniqueDf[permutation] + } else map { it.toDataFrame() }.concat() +} + +@JvmName("toDataFrameAnyColumn") +public fun Iterable.toDataFrame(): AnyFrame = dataFrameOf(this) + +@JvmName("toDataFramePairColumnPathAnyCol") +public fun Iterable>.toDataFrameFromPairs(): DataFrame { + val nameGenerator = ColumnNameGenerator() + val columnNames = mutableListOf() + val columnGroups = mutableListOf>?>() + val columns = mutableListOf() + val columnIndices = mutableMapOf() + val columnGroupName = mutableMapOf() + + forEach { (path, col) -> + when (path.size) { + 0 -> { + } + + 1 -> { + val name = path[0] + val uniqueName = nameGenerator.addUnique(name) + val index = columns.size + columnNames.add(uniqueName) + columnGroups.add(null) + columns.add(col.rename(uniqueName)) + columnIndices[uniqueName] = index + } + + else -> { + val name = path[0] + val uniqueName = columnGroupName.getOrPut(name) { + nameGenerator.addUnique(name) + } + val index = columnIndices.getOrPut(uniqueName) { + columnNames.add(uniqueName) + columnGroups.add(mutableListOf()) + columns.add(null) + columns.size - 1 + } + val list = columnGroups[index]!! + list.add(path.drop(1) to col) + } + } + } + columns.indices.forEach { index -> + val group = columnGroups[index] + if (group != null) { + val nestedDf = group.toDataFrameFromPairs() + val col = DataColumn.createColumnGroup(columnNames[index], nestedDf) + assert(columns[index] == null) + columns[index] = col + } else assert(columns[index] != null) + } + return columns.map { it!! }.toDataFrame().cast() +} + +@JvmName("toDataFrameColumnPathAnyNullable") +public fun Iterable>>.toDataFrameFromPairs(): AnyFrame { + return map { it.first to guessColumnType(it.first.last(), it.second.asList()) }.toDataFrameFromPairs() +} + +public fun Iterable>>.toDataFrameFromPairs(): AnyFrame { + return map { ColumnPath(it.first) to guessColumnType(it.first, it.second.asList()) }.toDataFrameFromPairs() +} + +public interface TraversePropertiesDsl { + + /** + * Skip given [classes] during dfs traversal + */ + public fun exclude(vararg classes: KClass<*>) + + /** + * Skip given [properties] during dfs traversal + */ + public fun exclude(vararg properties: KProperty<*>) + + /** + * Store given [classes] in ValueColumns without transformation into ColumnGroups or FrameColumns + */ + public fun preserve(vararg classes: KClass<*>) + + /** + * Store given [properties] in ValueColumns without transformation into ColumnGroups or FrameColumns + */ + public fun preserve(vararg properties: KProperty<*>) +} + +public inline fun TraversePropertiesDsl.preserve(): Unit = preserve(T::class) + +public abstract class CreateDataFrameDsl : TraversePropertiesDsl { + + public abstract val source: Iterable + + public abstract fun add(column: AnyBaseCol, path: ColumnPath? = null) + + public infix fun AnyBaseCol.into(name: String): Unit = add(this, pathOf(name)) + + public infix fun AnyBaseCol.into(path: ColumnPath): Unit = add(this, path) + + public abstract fun properties( + vararg roots: KProperty<*>, + maxDepth: Int = 0, + body: (TraversePropertiesDsl.() -> Unit)? = null + ) + + public inline fun expr(noinline expression: (T) -> R): DataColumn = + source.map { expression(it) }.toColumn() + + public inline fun add(name: String, noinline expression: (T) -> R): Unit = + add(source.map { expression(it) }.toColumn(name, Infer.Nulls)) + + public inline infix fun String.from(noinline expression: (T) -> R): Unit = + add(this, expression) + + public inline infix fun KProperty.from(noinline expression: (T) -> R): Unit = + add(columnName, expression) + + public inline infix fun KProperty.from(inferType: InferType): Unit = + add(DataColumn.createWithTypeInference(columnName, source.map { inferType.expression(it) })) + + public data class InferType(val expression: (T) -> R) + + public inline fun inferType(noinline expression: (T) -> R): InferType = InferType(expression) + + public abstract operator fun String.invoke(builder: CreateDataFrameDsl.() -> Unit) +} + +// endregion + +// region Create DataFrame from Map + +public fun Map>.toDataFrame(): AnyFrame { + return map { DataColumn.createWithTypeInference(it.key, it.value.asList()) }.toDataFrame() +} + +@JvmName("toDataFrameColumnPathAnyNullable") +public fun Map>.toDataFrame(): AnyFrame { + return map { + it.key to DataColumn.createWithTypeInference( + it.key.last(), + it.value.asList() + ) + }.toDataFrameFromPairs() +} + +// endregion diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/toList.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/toList.kt new file mode 100644 index 000000000..b9eb5a873 --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/toList.kt @@ -0,0 +1,14 @@ +package org.jetbrains.kotlinx.dataframe.api + +import org.jetbrains.kotlinx.dataframe.AnyFrame +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.impl.api.toListImpl +import kotlin.reflect.typeOf + +// region DataFrame + +public inline fun DataFrame.toList(): List = toListImpl(typeOf()) as List + +public inline fun AnyFrame.toListOf(): List = toListImpl(typeOf()) as List + +// endregion diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/transpose.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/transpose.kt new file mode 100644 index 000000000..60147825f --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/transpose.kt @@ -0,0 +1,29 @@ +package org.jetbrains.kotlinx.dataframe.api + +import org.jetbrains.kotlinx.dataframe.* +import org.jetbrains.kotlinx.dataframe.impl.api.convertTo +import org.jetbrains.kotlinx.dataframe.impl.columnName +import org.jetbrains.kotlinx.dataframe.impl.owner +import org.jetbrains.kotlinx.dataframe.values +import kotlin.reflect.KType +import kotlin.reflect.typeOf + +// region DataRow + +public fun DataRow.transpose(): DataFrame> { + val valueColumn = DataColumn.createWithTypeInference(NameValuePair<*>::value.columnName, values) + val nameColumn = owner.columnNames().toValueColumn(NameValuePair<*>::name) + return dataFrameOf(nameColumn, valueColumn).cast() +} + +public inline fun AnyRow.transposeTo(): DataFrame> = transposeTo(typeOf()) + +@PublishedApi +internal fun AnyRow.transposeTo(type: KType): DataFrame> { + val convertedValues = values.map { it?.convertTo(type) as T? } + val valueColumn = DataColumn.createWithTypeInference(NameValuePair::value.columnName, convertedValues) + val nameColumn = owner.columnNames().toValueColumn(NameValuePair::name) + return dataFrameOf(nameColumn, valueColumn).cast() +} + +// endregion diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/unfold.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/unfold.kt new file mode 100644 index 000000000..54f59bf95 --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/unfold.kt @@ -0,0 +1,28 @@ +package org.jetbrains.kotlinx.dataframe.api + +import org.jetbrains.kotlinx.dataframe.AnyCol +import org.jetbrains.kotlinx.dataframe.Column +import org.jetbrains.kotlinx.dataframe.ColumnsSelector +import org.jetbrains.kotlinx.dataframe.DataColumn +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.columns.ColumnKind +import org.jetbrains.kotlinx.dataframe.impl.api.createDataFrameImpl +import org.jetbrains.kotlinx.dataframe.impl.columns.toColumns +import org.jetbrains.kotlinx.dataframe.typeClass +import kotlin.reflect.KProperty + +public inline fun DataColumn.unfold(): AnyCol = + when (kind()) { + ColumnKind.Group, ColumnKind.Frame -> this + else -> when { + isPrimitive() -> this + else -> values().createDataFrameImpl(typeClass) { + (this as CreateDataFrameDsl).properties() + }.asColumnGroup(name()).asDataColumn() + } + } + +public fun DataFrame.unfold(columns: ColumnsSelector): DataFrame = replace(columns).with { it.unfold() } +public fun DataFrame.unfold(vararg columns: String): DataFrame = unfold { columns.toColumns() } +public fun DataFrame.unfold(vararg columns: KProperty<*>): DataFrame = unfold { columns.toColumns() } +public fun DataFrame.unfold(vararg columns: Column): DataFrame = unfold { columns.toColumns() } diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/ungroup.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/ungroup.kt new file mode 100644 index 000000000..65e0f820c --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/ungroup.kt @@ -0,0 +1,21 @@ +package org.jetbrains.kotlinx.dataframe.api + +import org.jetbrains.kotlinx.dataframe.Column +import org.jetbrains.kotlinx.dataframe.ColumnsSelector +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.impl.columns.toColumns +import org.jetbrains.kotlinx.dataframe.impl.removeAt +import kotlin.reflect.KProperty + +// region DataFrame + +public fun DataFrame.ungroup(columns: ColumnsSelector): DataFrame { + return move { columns.toColumns().children() } + .into { it.path.removeAt(it.path.size - 2).toPath() } +} + +public fun DataFrame.ungroup(vararg columns: String): DataFrame = ungroup { columns.toColumns() } +public fun DataFrame.ungroup(vararg columns: Column): DataFrame = ungroup { columns.toColumns() } +public fun DataFrame.ungroup(vararg columns: KProperty<*>): DataFrame = ungroup { columns.toColumns() } + +// endregion diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt new file mode 100644 index 000000000..12fe97a00 --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt @@ -0,0 +1,778 @@ +package org.jetbrains.kotlinx.dataframe.api + +import org.jetbrains.kotlinx.dataframe.* +import org.jetbrains.kotlinx.dataframe.api.Update.Usage +import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup +import org.jetbrains.kotlinx.dataframe.columns.ColumnReference +import org.jetbrains.kotlinx.dataframe.documentation.* +import org.jetbrains.kotlinx.dataframe.impl.api.updateImpl +import org.jetbrains.kotlinx.dataframe.impl.api.updateWithValuePerColumnImpl +import org.jetbrains.kotlinx.dataframe.impl.columns.toColumnSet +import org.jetbrains.kotlinx.dataframe.impl.columns.toColumns +import org.jetbrains.kotlinx.dataframe.impl.headPlusArray +import kotlin.reflect.KProperty + +/** + * ## The Update Operation + * + * Returns the [DataFrame] with changed values in some cells + * (column types can not be changed). + * + * Check out the [`update` Operation Usage][Usage]. + * + * For more information: [See `update` on the documentation website.](https://kotlin.github.io/dataframe/update.html) + */ +public data class Update( + val df: DataFrame, + val filter: RowValueFilter?, + val columns: ColumnsSelector, +) { + public fun cast(): Update = + Update(df, filter as RowValueFilter?, columns as ColumnsSelector) + + /** This argument providing the (clickable) name of the update-like function. + * Note: If clickable, make sure to [alias][your type]. + */ + internal interface UpdateOperationArg + + /** + * ## [update][update] Operation Usage + * + * [update][update] `{ `[columns][SelectingColumns]` }` + * + * - `[.`[where][Update.where]` { `[rowValueCondition][SelectingRows.RowValueCondition.WithExample]` } ]` + * + * - `[.`[at][Update.at]` (`[rowIndices][CommonUpdateAtFunctionDoc.RowIndicesParam]`) ]` + * + * - `.`[with][Update.with]` { `[rowExpression][ExpressionsGivenRow.RowValueExpression.WithExample]` } + * | .`[notNull][Update.notNull]` { `[rowExpression][ExpressionsGivenRow.RowValueExpression.WithExample]` } + * | .`[perCol][Update.perCol]` { `[colExpression][ExpressionsGivenColumn.ColumnExpression.WithExample]` } + * | .`[perRowCol][Update.perRowCol]` { `[rowColExpression][ExpressionsGivenColumn.RowColumnExpression.WithExample]` } + * | .`[withValue][Update.withValue]`(value) + * | .`[withNull][Update.withNull]`() + * | .`[withZero][Update.withZero]`() + * | .`[asFrame][Update.asFrame]` { `[dataFrameExpression][ExpressionsGivenDataFrame.DataFrameExpression.WithExample]` }` + */ + public interface Usage + + /** The columns to update need to be selected. See [Selecting Columns][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns] for all the selecting options. */ + public interface Columns + + /** @param columns The [Columns selector DSL][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns.Dsl.WithExample] used to select the columns of this [DataFrame] to update. */ + internal interface DslParam + + /** @param columns The [Column references][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns.ColumnAccessors.WithExample] of this [DataFrame] to update. */ + internal interface ColumnAccessorsParam + + /** @param columns The [KProperties][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns.KProperties.WithExample] corresponding to columns of this [DataFrame] to update. */ + internal interface KPropertiesParam + + /** @param columns The [Column names][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns.ColumnNames.WithExample] belonging to this [DataFrame] to update. */ + internal interface ColumnNamesParam +} + +// region update + + +private interface SetSelectingColumnsOperationArg + +/** + * ## The Update Operation + * + * Returns the [DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame] with changed values in some cells + * (column types can not be changed). + * + * Check out the [`update` Operation Usage][org.jetbrains.kotlinx.dataframe.api.Update.Usage]. + * + * For more information: [See `update` on the documentation website.](https://kotlin.github.io/dataframe/update.html) + * ## ‎ + * The columns to update need to be selected. See [Selecting Columns][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns] for all the selecting options. + * ## This Update Overload + */ +private interface CommonUpdateFunctionDoc + +/** + * ## Optional + * Combine `df.`[update][update]`(...).`[with][Update.with]` { ... }` + * into `df.`[update][update]`(...) { ... }` + */ +private interface UpdateWithNote + +/** + * ## The Update Operation + * + * Returns the [DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame] with changed values in some cells + * (column types can not be changed). + * + * Check out the [`update` Operation Usage][org.jetbrains.kotlinx.dataframe.api.Update.Usage]. + * + * For more information: [See `update` on the documentation website.](https://kotlin.github.io/dataframe/update.html) + * ## ‎ + * The columns to update need to be selected. See [Selecting Columns][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns] for all the selecting options. + * ## This Update Overload + * Select or express columns using the Column(s) Selection DSL. + * (Any [Access Api][org.jetbrains.kotlinx.dataframe.documentation.AccessApi]). + * + * This DSL comes in the form of either a [Column Selector][org.jetbrains.kotlinx.dataframe.ColumnSelector]- or [Columns Selector][org.jetbrains.kotlinx.dataframe.ColumnsSelector] lambda, + * which operate in the [Column Selection DSL][org.jetbrains.kotlinx.dataframe.api.ColumnSelectionDsl] or the [Columns Selection DSL][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl] and + * expect you to return a [SingleColumn][org.jetbrains.kotlinx.dataframe.columns.SingleColumn] or [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet], respectively. + * + * For example: + * + * `df.`[update][org.jetbrains.kotlinx.dataframe.api.update]` { length `[and][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.and]` age }` + * + * `df.`[update][org.jetbrains.kotlinx.dataframe.api.update]` { `[cols][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`(1..5) }` + * + * `df.`[update][org.jetbrains.kotlinx.dataframe.api.update]` { `[colsOf][org.jetbrains.kotlinx.dataframe.api.colsOf]`() }` + * + * @param columns The [Columns selector DSL][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns.Dsl.WithExample] used to select the columns of this [DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame] to update. + */ +public fun DataFrame.update(columns: ColumnsSelector): Update = + Update(this, null, columns) + +/** + * ## The Update Operation + * + * Returns the [DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame] with changed values in some cells + * (column types can not be changed). + * + * Check out the [`update` Operation Usage][org.jetbrains.kotlinx.dataframe.api.Update.Usage]. + * + * For more information: [See `update` on the documentation website.](https://kotlin.github.io/dataframe/update.html) + * ## ‎ + * The columns to update need to be selected. See [Selecting Columns][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns] for all the selecting options. + * ## This Update Overload + * Select columns using their [column names][String] + * ([String API][org.jetbrains.kotlinx.dataframe.documentation.AccessApi.StringApi]). + * + * For example: + * + * `df.`[update][org.jetbrains.kotlinx.dataframe.api.update]`("length", "age")` + * + * + * ## Optional + * Combine `df.`[update][org.jetbrains.kotlinx.dataframe.api.update]`(...).`[with][org.jetbrains.kotlinx.dataframe.api.Update.with]` { ... }` + * into `df.`[update][org.jetbrains.kotlinx.dataframe.api.update]`(...) { ... }` + * @param columns The [Column names][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns.ColumnNames.WithExample] belonging to this [DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame] to update. + */ +public fun DataFrame.update(vararg columns: String): Update = update { columns.toColumns() } + +/** + * ## The Update Operation + * + * Returns the [DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame] with changed values in some cells + * (column types can not be changed). + * + * Check out the [`update` Operation Usage][org.jetbrains.kotlinx.dataframe.api.Update.Usage]. + * + * For more information: [See `update` on the documentation website.](https://kotlin.github.io/dataframe/update.html) + * ## ‎ + * The columns to update need to be selected. See [Selecting Columns][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns] for all the selecting options. + * ## This Update Overload + * Select columns using [KProperties][KProperty] ([KProperties API][org.jetbrains.kotlinx.dataframe.documentation.AccessApi.KPropertiesApi]). + * + * For example: + * ```kotlin + * data class Person(val length: Double, val age: Double) + * ``` + * + * `df.`[update][org.jetbrains.kotlinx.dataframe.api.update]`(Person::length, Person::age)` + * + * + * ## Optional + * Combine `df.`[update][org.jetbrains.kotlinx.dataframe.api.update]`(...).`[with][org.jetbrains.kotlinx.dataframe.api.Update.with]` { ... }` + * into `df.`[update][org.jetbrains.kotlinx.dataframe.api.update]`(...) { ... }` + * @param columns The [KProperties][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns.KProperties.WithExample] corresponding to columns of this [DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame] to update. + */ +public fun DataFrame.update(vararg columns: KProperty): Update = update { columns.toColumns() } + +/** + * ## The Update Operation + * + * Returns the [DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame] with changed values in some cells + * (column types can not be changed). + * + * Check out the [`update` Operation Usage][org.jetbrains.kotlinx.dataframe.api.Update.Usage]. + * + * For more information: [See `update` on the documentation website.](https://kotlin.github.io/dataframe/update.html) + * ## ‎ + * The columns to update need to be selected. See [Selecting Columns][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns] for all the selecting options. + * ## This Update Overload + * Select columns using [column accessors][org.jetbrains.kotlinx.dataframe.columns.ColumnReference] + * ([Column Accessors API][org.jetbrains.kotlinx.dataframe.documentation.AccessApi.ColumnAccessorsApi]). + * + * For example: + * + * `val length by `[column][org.jetbrains.kotlinx.dataframe.api.column]`()` + * + * `val age by `[column][org.jetbrains.kotlinx.dataframe.api.column]`()` + * + * `df.`[update][org.jetbrains.kotlinx.dataframe.api.update]`(length, age)` + * + * + * ## Optional + * Combine `df.`[update][org.jetbrains.kotlinx.dataframe.api.update]`(...).`[with][org.jetbrains.kotlinx.dataframe.api.Update.with]` { ... }` + * into `df.`[update][org.jetbrains.kotlinx.dataframe.api.update]`(...) { ... }` + * @param columns The [Column references][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns.ColumnAccessors.WithExample] of this [DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame] to update. + */ +public fun DataFrame.update(vararg columns: ColumnReference): Update = + update { columns.toColumns() } + +/** + * TODO this will be deprecated + */ +public fun DataFrame.update(columns: Iterable>): Update = + update { columns.toColumnSet() } + +// endregion + +/** + * ## Where + * + * Filter or find rows to operate on after [selecting columns][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns] using a + * [row value filter][org.jetbrains.kotlinx.dataframe.RowValueFilter]. + * + * For example: + * + * `df.`[update][update]` { length }.`[where][where]` { it > 10.0 }` + * + * `df.`[update][update]` { `[cols][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`(1..5) }.`[where][where]` { `[index][org.jetbrains.kotlinx.dataframe.index]`() > 4 && city != "Paris" }` + * + * + * + * + * @param predicate The [row value filter][RowValueFilter] to select the rows to update. + */ +public fun Update.where(predicate: RowValueFilter): Update = + copy(filter = filter and predicate) + +/** ## At + * Only update the columns at certain given [row indices][CommonUpdateAtFunctionDoc.RowIndicesParam]: + * + * Either a [Collection]<[Int]>, an [IntRange], or just `vararg` indices. + * + * For example: + * + * `df.`[update][update]` { city }.`[at][at]`(5..10).`[with][with]` { "Paris" }` + * + * `df.`[update][update]` { name }.`[at][at]`(1, 2, 3, 4).`[with][with]` { "Empty" }` + * + * ## This At Overload + */ +private interface CommonUpdateAtFunctionDoc { + + /** The indices of the rows to update. Either a [Collection]<[Int]>, an [IntRange], or just `vararg` indices. */ + interface RowIndicesParam +} + +/** + * ## At + * Only update the columns at certain given [row indices][org.jetbrains.kotlinx.dataframe.api.CommonUpdateAtFunctionDoc.RowIndicesParam]: + * + * Either a [Collection][Collection]< [Int][Int]>, an [IntRange][IntRange], or just `vararg` indices. + * + * For example: + * + * `df.`[update][org.jetbrains.kotlinx.dataframe.api.update]` { city }.`[at][org.jetbrains.kotlinx.dataframe.api.at]`(5..10).`[with][org.jetbrains.kotlinx.dataframe.api.with]` { "Paris" }` + * + * `df.`[update][org.jetbrains.kotlinx.dataframe.api.update]` { name }.`[at][org.jetbrains.kotlinx.dataframe.api.at]`(1, 2, 3, 4).`[with][org.jetbrains.kotlinx.dataframe.api.with]` { "Empty" }` + * + * ## This At Overload + * + * Provide a [Collection]<[Int]> of row indices to update. + * + * @param rowIndices The indices of the rows to update. Either a [Collection][Collection]< [Int][Int]>, an [IntRange][IntRange], or just `vararg` indices. + */ +public fun Update.at(rowIndices: Collection): Update = where { index in rowIndices } + +/** + * ## At + * Only update the columns at certain given [row indices][org.jetbrains.kotlinx.dataframe.api.CommonUpdateAtFunctionDoc.RowIndicesParam]: + * + * Either a [Collection][Collection]< [Int][Int]>, an [IntRange][IntRange], or just `vararg` indices. + * + * For example: + * + * `df.`[update][org.jetbrains.kotlinx.dataframe.api.update]` { city }.`[at][org.jetbrains.kotlinx.dataframe.api.at]`(5..10).`[with][org.jetbrains.kotlinx.dataframe.api.with]` { "Paris" }` + * + * `df.`[update][org.jetbrains.kotlinx.dataframe.api.update]` { name }.`[at][org.jetbrains.kotlinx.dataframe.api.at]`(1, 2, 3, 4).`[with][org.jetbrains.kotlinx.dataframe.api.with]` { "Empty" }` + * + * ## This At Overload + * + * Provide a `vararg` of [Ints][Int] of row indices to update. + * + * @param rowIndices The indices of the rows to update. Either a [Collection][Collection]< [Int][Int]>, an [IntRange][IntRange], or just `vararg` indices. + */ +public fun Update.at(vararg rowIndices: Int): Update = at(rowIndices.toSet()) + +/** + * ## At + * Only update the columns at certain given [row indices][org.jetbrains.kotlinx.dataframe.api.CommonUpdateAtFunctionDoc.RowIndicesParam]: + * + * Either a [Collection][Collection]< [Int][Int]>, an [IntRange][IntRange], or just `vararg` indices. + * + * For example: + * + * `df.`[update][org.jetbrains.kotlinx.dataframe.api.update]` { city }.`[at][org.jetbrains.kotlinx.dataframe.api.at]`(5..10).`[with][org.jetbrains.kotlinx.dataframe.api.with]` { "Paris" }` + * + * `df.`[update][org.jetbrains.kotlinx.dataframe.api.update]` { name }.`[at][org.jetbrains.kotlinx.dataframe.api.at]`(1, 2, 3, 4).`[with][org.jetbrains.kotlinx.dataframe.api.with]` { "Empty" }` + * + * ## This At Overload + * + * Provide an [IntRange] of row indices to update. + * + * @param rowRange The indices of the rows to update. Either a [Collection][Collection]< [Int][Int]>, an [IntRange][IntRange], or just `vararg` indices. + */ +public fun Update.at(rowRange: IntRange): Update = where { index in rowRange } + +/** + * ## Per Row Col + * + * Provide a new value for every selected cell given both its row and column using a [row-column expression][org.jetbrains.kotlinx.dataframe.RowColumnExpression]. + * + * For example: + * + * `df.`[update][update]` { age }.`[perRowCol][perRowCol]` { row, col ->` + * + * `row.age / col.`[mean][org.jetbrains.kotlinx.dataframe.DataColumn.mean]`(skipNA = true)` + * + * `}` + * + * + * + * + * ## See Also + * - [Update with][org.jetbrains.kotlinx.dataframe.api.Update.with] to provide a new value for every selected cell giving its row. + * - [Update per col][org.jetbrains.kotlinx.dataframe.api.Update.perCol] to provide a new value for every selected cell giving its column. + * @param expression The [Row Column Expression][org.jetbrains.kotlinx.dataframe.documentation.ExpressionsGivenColumn.RowColumnExpression] to provide a new value for every selected cell giving its row and column. + */ +public infix fun Update.perRowCol(expression: RowColumnExpression): DataFrame = + updateImpl { row, column, _ -> expression(row, column) } + +/** [Update per row col][Update.perRowCol] to provide a new value for every selected cell giving its row and column. */ +private interface SeeAlsoPerRowCol + +/** ## Update Expression + * @see ExpressionsGivenRow.RowValueExpression.WithExample + * @see ExpressionsGivenRow.AddDataRowNote + */ // doc processor plugin does not work with type aliases yet +public typealias UpdateExpression = AddDataRow.(C) -> R + +/** + * ## With + * + * Provide a new value for every selected cell given its row and its previous value using a + * [row value expression][org.jetbrains.kotlinx.dataframe.RowValueExpression]. + * + * For example: + * + * `df.`[update][update]` { city }.`[with][with]` { name.firstName + " from " + it }` + * + * `df.`[update][update]` { city }.`[with][with]` { it.uppercase() }` + * + * + * + * ## Note + * + * [update with][org.jetbrains.kotlinx.dataframe.api.Update.with]- and [add][org.jetbrains.kotlinx.dataframe.api.add]-like expressions use [AddDataRow][org.jetbrains.kotlinx.dataframe.api.AddDataRow] instead of [DataRow][org.jetbrains.kotlinx.dataframe.DataRow] as the DSL's receiver type. + * This is an extension to [RowValueExpression][org.jetbrains.kotlinx.dataframe.RowValueExpression] and + * [RowExpression][org.jetbrains.kotlinx.dataframe.RowExpression] that provides access to + * the modified/generated value of the preceding row ([AddDataRow.newValue][org.jetbrains.kotlinx.dataframe.api.AddDataRow.newValue]). + * ## See Also + * - [Update per col][org.jetbrains.kotlinx.dataframe.api.Update.perCol] to provide a new value for every selected cell giving its column. + * - [Update per row col][org.jetbrains.kotlinx.dataframe.api.Update.perRowCol] to provide a new value for every selected cell giving its row and column. + * @param expression The [Row Value Expression][org.jetbrains.kotlinx.dataframe.documentation.ExpressionsGivenRow.RowValueExpression.WithExample] to update the rows with. + */ +public infix fun Update.with(expression: UpdateExpression): DataFrame = + updateImpl { row, _, value -> + expression(row, value) + } + +/** [Update with][Update.with] to provide a new value for every selected cell giving its row. */ +private interface SeeAlsoWith + +/** + * ## As Frame + * + * Updates selected [column group][ColumnGroup] as a [DataFrame] with the given [expression]. + * + * + * Provide a new value for every selected data frame using a [dataframe expression][org.jetbrains.kotlinx.dataframe.DataFrameExpression]. + * + * For example: + * + * `df.`[update][update]` { name }.`[asFrame][asFrame]` { `[select][org.jetbrains.kotlinx.dataframe.DataFrame.select]` { lastName } }` + * + * @param expression The [Data Frame Expression][org.jetbrains.kotlinx.dataframe.documentation.ExpressionsGivenDataFrame.DataFrameExpression] to replace the selected column group with. + */ +public infix fun Update>.asFrame(expression: DataFrameExpression>): DataFrame = + df.replace(columns).with { it.asColumnGroup().let { expression(it, it) }.asColumnGroup(it.name()) } + +@Deprecated( + "Useless unless in combination with `withValue(null)`, but then users can just use `with { null }`...", + ReplaceWith("this as Update") +) +public fun Update.asNullable(): Update = this as Update + +/** + * ## Per Col + * + * Per Col can be used for two different types of operations: + * - Provide a new value for every selected cell given its column using a [column expression][org.jetbrains.kotlinx.dataframe.ColumnExpression]. + * - Provide a new value for every selected cell per column using a [Map][Map]`<`[colName: String][String]`, value: C>` + * or [DataRow][org.jetbrains.kotlinx.dataframe.DataRow] as Map. + * + * ## See Also + * - [Update with][org.jetbrains.kotlinx.dataframe.api.Update.with] to provide a new value for every selected cell giving its row. + * - [Update per row col][org.jetbrains.kotlinx.dataframe.api.Update.perRowCol] to provide a new value for every selected cell giving its row and column. + * ## This Per Col Overload + */ +private interface CommonUpdatePerColDoc + +/** Provide a new value for every selected cell per column using a [Map][Map]`<`[colName: String][String]`, value: C>` + * or [DataRow][DataRow] as Map. */ +private interface UpdatePerColMap + +/** + * ## Per Col + * + * Per Col can be used for two different types of operations: + * - Provide a new value for every selected cell given its column using a [column expression][org.jetbrains.kotlinx.dataframe.ColumnExpression]. + * - Provide a new value for every selected cell per column using a [Map][Map]`<`[colName: String][String]`, value: C>` + * or [DataRow][org.jetbrains.kotlinx.dataframe.DataRow] as Map. + * + * ## See Also + * - [Update with][org.jetbrains.kotlinx.dataframe.api.Update.with] to provide a new value for every selected cell giving its row. + * - [Update per row col][org.jetbrains.kotlinx.dataframe.api.Update.perRowCol] to provide a new value for every selected cell giving its row and column. + * ## This Per Col Overload + * Provide a new value for every selected cell per column using a [Map][Map]`<`[colName: String][String]`, value: C>` + * or [DataRow][org.jetbrains.kotlinx.dataframe.DataRow] as Map. + * + * For example: + * + * `val defaults = {@includeArg [CommonUpdatePerColMapDoc]}` + * + * `df.`[update][update]` { name and age }.`[where][Update.where]` { ... }.`[perCol][perCol]`(defaults)` + * + * @throws IllegalArgumentException if a value for a selected cell's column is not defined in [values\]. + */ +private interface CommonUpdatePerColMapDoc + +/** + * ## Per Col + * + * Per Col can be used for two different types of operations: + * - Provide a new value for every selected cell given its column using a [column expression][org.jetbrains.kotlinx.dataframe.ColumnExpression]. + * - Provide a new value for every selected cell per column using a [Map][Map]`<`[colName: String][String]`, value: C>` + * or [DataRow][org.jetbrains.kotlinx.dataframe.DataRow] as Map. + * + * ## See Also + * - [Update with][org.jetbrains.kotlinx.dataframe.api.Update.with] to provide a new value for every selected cell giving its row. + * - [Update per row col][org.jetbrains.kotlinx.dataframe.api.Update.perRowCol] to provide a new value for every selected cell giving its row and column. + * ## This Per Col Overload + * Provide a new value for every selected cell per column using a [Map][Map]`<`[colName: String][String]`, value: C>` + * or [DataRow][org.jetbrains.kotlinx.dataframe.DataRow] as Map. + * + * For example: + * + * `val defaults = `[mapOf][mapOf]`("name" to "Empty", "age" to 0)` + * + * `df.`[update][org.jetbrains.kotlinx.dataframe.api.update]` { name and age }.`[where][org.jetbrains.kotlinx.dataframe.api.Update.where]` { ... }.`[perCol][org.jetbrains.kotlinx.dataframe.api.perCol]`(defaults)` + * + * @throws IllegalArgumentException if a value for a selected cell's column is not defined in [values][values]. + * + * + * @param values The [Map]<[String], Value> to provide a new value for every selected cell. + * For each selected column, there must be a value in the map with the same name. + */ +public fun Update.perCol(values: Map): DataFrame = updateWithValuePerColumnImpl { + values[it.name()] ?: throw IllegalArgumentException("Update value for column ${it.name()} is not defined") +} + +/** + * ## Per Col + * + * Per Col can be used for two different types of operations: + * - Provide a new value for every selected cell given its column using a [column expression][org.jetbrains.kotlinx.dataframe.ColumnExpression]. + * - Provide a new value for every selected cell per column using a [Map][Map]`<`[colName: String][String]`, value: C>` + * or [DataRow][org.jetbrains.kotlinx.dataframe.DataRow] as Map. + * + * ## See Also + * - [Update with][org.jetbrains.kotlinx.dataframe.api.Update.with] to provide a new value for every selected cell giving its row. + * - [Update per row col][org.jetbrains.kotlinx.dataframe.api.Update.perRowCol] to provide a new value for every selected cell giving its row and column. + * ## This Per Col Overload + * Provide a new value for every selected cell per column using a [Map][Map]`<`[colName: String][String]`, value: C>` + * or [DataRow][org.jetbrains.kotlinx.dataframe.DataRow] as Map. + * + * For example: + * + * `val defaults = df.`[getRows][DataFrame.getRows]`(`[listOf][listOf]`(0))` + * + * `.`[update][update]` { name }.`[with][Update.with]` { "Empty" }` + * + * `.`[update][update]` { age }.`[with][Update.with]` { 0 }` + * + * `.first()` + * + * `df.`[update][org.jetbrains.kotlinx.dataframe.api.update]` { name and age }.`[where][org.jetbrains.kotlinx.dataframe.api.Update.where]` { ... }.`[perCol][org.jetbrains.kotlinx.dataframe.api.perCol]`(defaults)` + * + * @throws IllegalArgumentException if a value for a selected cell's column is not defined in [values][values]. + * + * + * @param values The [DataRow] to provide a new value for every selected cell. + */ +public fun Update.perCol(values: AnyRow): DataFrame = perCol(values.toMap() as Map) + +/** + * ## Per Col + * + * Per Col can be used for two different types of operations: + * - Provide a new value for every selected cell given its column using a [column expression][org.jetbrains.kotlinx.dataframe.ColumnExpression]. + * - Provide a new value for every selected cell per column using a [Map][Map]`<`[colName: String][String]`, value: C>` + * or [DataRow][org.jetbrains.kotlinx.dataframe.DataRow] as Map. + * + * ## See Also + * - [Update with][org.jetbrains.kotlinx.dataframe.api.Update.with] to provide a new value for every selected cell giving its row. + * - [Update per row col][org.jetbrains.kotlinx.dataframe.api.Update.perRowCol] to provide a new value for every selected cell giving its row and column. + * ## This Per Col Overload + * + * Provide a new value for every selected cell given its column using a [column expression][org.jetbrains.kotlinx.dataframe.ColumnExpression]. + * + * For example: + * + * `df.`[update][update]` { age }.`[perCol][perCol]` { `[mean][org.jetbrains.kotlinx.dataframe.DataColumn.mean]`(skipNA = true) }` + * + * `df.`[update][update]` { age }.`[perCol][perCol]` { `[count][org.jetbrains.kotlinx.dataframe.DataColumn.count]` { it > 10 } }` + * + * + * + * @param valueSelector The [Column Expression][org.jetbrains.kotlinx.dataframe.documentation.ExpressionsGivenColumn.ColumnExpression] to provide a new value for every selected cell giving its column. + */ +public fun Update.perCol(valueSelector: ColumnExpression): DataFrame = + updateWithValuePerColumnImpl(valueSelector) + +/** [Update per col][Update.perCol] to provide a new value for every selected cell giving its column. */ +private interface SeeAlsoPerCol + +/** Chains up two row value filters together. */ +internal infix fun RowValueFilter?.and(other: RowValueFilter): RowValueFilter { + if (this == null) return other + val thisExp = this + return { thisExp(this, it) && other(this, it) } +} + +/** + * ## Not Null + * + * Selects only the rows where the values in the selected columns are not null. + * + * Shorthand for: [update][org.jetbrains.kotlinx.dataframe.api.update]` { ... }.`[where][org.jetbrains.kotlinx.dataframe.api.Update.where]` { it != null }` + * + * For example: + * + * `df.`[update][org.jetbrains.kotlinx.dataframe.api.update]` { `[colsOf][org.jetbrains.kotlinx.dataframe.api.colsOf]`<`[Number][Number]`?>() }.`[notNull][org.jetbrains.kotlinx.dataframe.api.notNull]`()`.[perCol][org.jetbrains.kotlinx.dataframe.api.Update.perCol] `{ `[mean][org.jetbrains.kotlinx.dataframe.api.mean]`() }` + * + * ### Optional + * Provide an [expression][expression] to update the rows with. + * This combines [with][org.jetbrains.kotlinx.dataframe.api.Update.with] with [notNull][org.jetbrains.kotlinx.dataframe.api.notNull]. + * + * For example: + * + * `df.`[update][org.jetbrains.kotlinx.dataframe.api.update]` { city }.`[notNull][org.jetbrains.kotlinx.dataframe.api.Update.notNull]` { it.`[toUpperCase][String.toUpperCase]`() }` + * + * @param expression Optional [Row Expression][org.jetbrains.kotlinx.dataframe.documentation.ExpressionsGivenRow.RowExpression.WithExample] to update the rows with. + */ +public fun Update.notNull(): Update = + where { it != null } as Update + +/** + * ## Not Null + * + * Selects only the rows where the values in the selected columns are not null. + * + * Shorthand for: [update][update]` { ... }.`[where][Update.where]` { it != null }` + * + * For example: + * + * `df.`[update][update]` { `[colsOf][colsOf]`<`[Number][Number]`?>() }.`[notNull][notNull]`()`.[perCol][Update.perCol] `{ `[mean][mean]`() }` + * + * ### Optional + * Provide an [expression] to update the rows with. + * This combines [with][Update.with] with [notNull]. + * + * For example: + * + * `df.`[update][update]` { city }.`[notNull][Update.notNull]` { it.`[toUpperCase][String.toUpperCase]`() }` + * + * @param expression Optional [Row Expression][org.jetbrains.kotlinx.dataframe.documentation.ExpressionsGivenRow.RowExpression.WithExample] to update the rows with. + */ +public fun Update.notNull(expression: UpdateExpression): DataFrame = + notNull().with(expression) + +/** + * ## The Update Operation + * + * Returns the [DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame] with changed values in some cells + * (column types can not be changed). + * + * Check out the [`update` Operation Usage][org.jetbrains.kotlinx.dataframe.api.Update.Usage]. + * + * For more information: [See `update` on the documentation website.](https://kotlin.github.io/dataframe/update.html) + * ## ‎ + * The columns to update need to be selected. See [Selecting Columns][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns] for all the selecting options. + * ## This Update Overload + * ### This overload is a combination of [update] and [with][Update.with]. + * + * Select columns using [column accessors][org.jetbrains.kotlinx.dataframe.columns.ColumnReference] + * ([Column Accessors API][org.jetbrains.kotlinx.dataframe.documentation.AccessApi.ColumnAccessorsApi]). + * + * + * Provide a new value for every selected cell given its row and its previous value using a + * [row value expression][org.jetbrains.kotlinx.dataframe.RowValueExpression]. + * + * For example: + * + * `df.`[update][update]`("city")` ` { name.firstName + " from " + it }` + * + * `df.`[update][update]`("city")` ` { it.uppercase() }` + * + * + * + * @param columns The [Column references][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns.ColumnAccessors.WithExample] of this [DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame] to update. + * @param expression The [Row Value Expression][org.jetbrains.kotlinx.dataframe.documentation.ExpressionsGivenRow.RowValueExpression.WithExample] to update the rows with. + */ +public fun DataFrame.update( + firstCol: ColumnReference, + vararg cols: ColumnReference, + expression: UpdateExpression +): DataFrame = + update(*headPlusArray(firstCol, cols)).with(expression) + +/** + * ## The Update Operation + * + * Returns the [DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame] with changed values in some cells + * (column types can not be changed). + * + * Check out the [`update` Operation Usage][org.jetbrains.kotlinx.dataframe.api.Update.Usage]. + * + * For more information: [See `update` on the documentation website.](https://kotlin.github.io/dataframe/update.html) + * ## ‎ + * The columns to update need to be selected. See [Selecting Columns][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns] for all the selecting options. + * ## This Update Overload + * ### This overload is a combination of [update] and [with][Update.with]. + * + * Select columns using [KProperties][KProperty] ([KProperties API][org.jetbrains.kotlinx.dataframe.documentation.AccessApi.KPropertiesApi]). + * + * + * Provide a new value for every selected cell given its row and its previous value using a + * [row value expression][org.jetbrains.kotlinx.dataframe.RowValueExpression]. + * + * For example: + * + * `df.`[update][update]`("city")` ` { name.firstName + " from " + it }` + * + * `df.`[update][update]`("city")` ` { it.uppercase() }` + * + * + * + * @param columns The [KProperties][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns.KProperties.WithExample] corresponding to columns of this [DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame] to update. + * @param expression The [Row Value Expression][org.jetbrains.kotlinx.dataframe.documentation.ExpressionsGivenRow.RowValueExpression.WithExample] to update the rows with. + */ +public fun DataFrame.update( + firstCol: KProperty, + vararg cols: KProperty, + expression: UpdateExpression +): DataFrame = + update(*headPlusArray(firstCol, cols)).with(expression) + +/** + * ## The Update Operation + * + * Returns the [DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame] with changed values in some cells + * (column types can not be changed). + * + * Check out the [`update` Operation Usage][org.jetbrains.kotlinx.dataframe.api.Update.Usage]. + * + * For more information: [See `update` on the documentation website.](https://kotlin.github.io/dataframe/update.html) + * ## ‎ + * The columns to update need to be selected. See [Selecting Columns][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns] for all the selecting options. + * ## This Update Overload + * ### This overload is a combination of [update] and [with][Update.with]. + * + * Select columns using their [column names][String] + * ([String API][org.jetbrains.kotlinx.dataframe.documentation.AccessApi.StringApi]). + * + * + * Provide a new value for every selected cell given its row and its previous value using a + * [row value expression][org.jetbrains.kotlinx.dataframe.RowValueExpression]. + * + * For example: + * + * `df.`[update][update]`("city")` ` { name.firstName + " from " + it }` + * + * `df.`[update][update]`("city")` ` { it.uppercase() }` + * + * + * + * @param columns The [Column names][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns.ColumnNames.WithExample] belonging to this [DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame] to update. + * @param expression The [Row Value Expression][org.jetbrains.kotlinx.dataframe.documentation.ExpressionsGivenRow.RowValueExpression.WithExample] to update the rows with. + */ +public fun DataFrame.update( + firstCol: String, + vararg cols: String, + expression: UpdateExpression +): DataFrame = + update(*headPlusArray(firstCol, cols)).with(expression) + +/** + * Specific version of [with] that simply sets the value of each selected row to {@includeArg [CommonSpecificWithDocFirstArg]}. + * + * For example: + * + * `df.`[update][update]` { id }.`[where][Update.where]` { it < 0 }.`{@includeArg [CommonSpecificWithDocSecondArg]}` + */ +private interface CommonSpecificWithDoc + +/** Arg for the resulting value */ +private interface CommonSpecificWithDocFirstArg + +/** Arg for the function call */ +private interface CommonSpecificWithDocSecondArg + +/** + * ## With Null + * + * Specific version of [with][org.jetbrains.kotlinx.dataframe.api.with] that simply sets the value of each selected row to `null`. + * + * For example: + * + * `df.`[update][org.jetbrains.kotlinx.dataframe.api.update]` { id }.`[where][org.jetbrains.kotlinx.dataframe.api.Update.where]` { it < 0 }.`[withNull][withNull]`()` + */ +public fun Update.withNull(): DataFrame = with { null } + +/** + * ## With Zero + * + * Specific version of [with][org.jetbrains.kotlinx.dataframe.api.with] that simply sets the value of each selected row to `0`. + * + * For example: + * + * `df.`[update][org.jetbrains.kotlinx.dataframe.api.update]` { id }.`[where][org.jetbrains.kotlinx.dataframe.api.Update.where]` { it < 0 }.`[withZero][withZero]`()` + */ +public fun Update.withZero(): DataFrame = updateWithValuePerColumnImpl { 0 as C } + +/** + * ## With Value + * + * Specific version of [with][org.jetbrains.kotlinx.dataframe.api.with] that simply sets the value of each selected row to [value]. + * + * For example: + * + * `df.`[update][org.jetbrains.kotlinx.dataframe.api.update]` { id }.`[where][org.jetbrains.kotlinx.dataframe.api.Update.where]` { it < 0 }.`[withValue][withValue]`(-1)` + * + * + * + * @param value The value to set the selected rows to. In contrast to [with][Update.with], this must be the same exact type. + */ +public infix fun Update.withValue(value: C): DataFrame = with { value } diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/uppercase.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/uppercase.kt new file mode 100644 index 000000000..e2595534a --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/uppercase.kt @@ -0,0 +1,9 @@ +package org.jetbrains.kotlinx.dataframe.api + +import org.jetbrains.kotlinx.dataframe.StringCol + +// region StringCol + +public fun StringCol.uppercase(): StringCol = map { it?.uppercase() } + +// endregion diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/valueCounts.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/valueCounts.kt new file mode 100644 index 000000000..7de0675a8 --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/valueCounts.kt @@ -0,0 +1,87 @@ +package org.jetbrains.kotlinx.dataframe.api + +import org.jetbrains.kotlinx.dataframe.Column +import org.jetbrains.kotlinx.dataframe.ColumnsSelector +import org.jetbrains.kotlinx.dataframe.DataColumn +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.annotations.DataSchema +import org.jetbrains.kotlinx.dataframe.impl.columns.toColumns +import org.jetbrains.kotlinx.dataframe.impl.nameGenerator +import kotlin.reflect.KProperty +import kotlin.reflect.full.withNullability +import kotlin.reflect.typeOf + +// region DataSchema + +@DataSchema +public interface ValueCount { + public val count: Int +} + +// endregion + +// region DataColumn + +internal val defaultCountColumnName: String = ValueCount::count.name + +public fun DataColumn.valueCounts( + sort: Boolean = true, + ascending: Boolean = false, + dropNA: Boolean = true, + resultColumn: String = defaultCountColumnName +): DataFrame { + var grouped = toList().groupBy { it }.map { it.key to it.value.size } + if (sort) { + grouped = if (ascending) grouped.sortedBy { it.second } + else grouped.sortedByDescending { it.second } + } + if (dropNA) grouped = grouped.filter { !it.first.isNA } + val nulls = if (dropNA) false else hasNulls() + val values = DataColumn.create(name(), grouped.map { it.first }, type().withNullability(nulls)) + val countName = if (resultColumn == name()) resultColumn + "1" else resultColumn + val counts = DataColumn.create(countName, grouped.map { it.second }, typeOf()) + return dataFrameOf(values, counts).cast() +} + +// endregion + +// region DataFrame + +public fun DataFrame.valueCounts( + sort: Boolean = true, + ascending: Boolean = false, + dropNA: Boolean = true, + resultColumn: String = defaultCountColumnName, + columns: ColumnsSelector? = null +): DataFrame { + var df = if (columns != null) select(columns) else this + if (dropNA) df = df.dropNA() + + val rows by columnGroup() + val countName = nameGenerator().addUnique(resultColumn) + return df.asColumnGroup(rows).asDataColumn().valueCounts(sort, ascending, dropNA, countName).ungroup(rows).cast() +} + +public fun DataFrame.valueCounts( + vararg columns: String, + sort: Boolean = true, + ascending: Boolean = false, + dropNA: Boolean = true, + resultColumn: String = defaultCountColumnName +): DataFrame = valueCounts(sort, ascending, dropNA, resultColumn) { columns.toColumns() } +public fun DataFrame.valueCounts( + vararg columns: Column, + sort: Boolean = true, + ascending: Boolean = false, + dropNA: Boolean = true, + resultColumn: String = defaultCountColumnName +): DataFrame = valueCounts(sort, ascending, dropNA, resultColumn) { columns.toColumns() } +public fun DataFrame.valueCounts( + vararg columns: KProperty<*>, + sort: Boolean = true, + ascending: Boolean = false, + dropNA: Boolean = true, + resultColumn: String = defaultCountColumnName +): DataFrame = valueCounts(sort, ascending, dropNA, resultColumn) { columns.toColumns() } + +// endregion diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/values.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/values.kt new file mode 100644 index 000000000..417487f73 --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/values.kt @@ -0,0 +1,177 @@ +package org.jetbrains.kotlinx.dataframe.api + +import org.jetbrains.kotlinx.dataframe.Column +import org.jetbrains.kotlinx.dataframe.ColumnsSelector +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.DataRow +import org.jetbrains.kotlinx.dataframe.aggregation.ColumnsForAggregateSelector +import org.jetbrains.kotlinx.dataframe.impl.aggregation.columnValues +import org.jetbrains.kotlinx.dataframe.impl.aggregation.internal +import org.jetbrains.kotlinx.dataframe.impl.aggregation.remainingColumnsSelector +import org.jetbrains.kotlinx.dataframe.impl.columns.toColumns +import org.jetbrains.kotlinx.dataframe.impl.valuesImpl +import kotlin.reflect.KProperty + +// region DataFrame + +public fun DataFrame.values(byRow: Boolean = false, columns: ColumnsSelector): Sequence = valuesImpl(byRow, columns) +public fun DataFrame.values(byRows: Boolean = false): Sequence = values(byRows) { all() } + +public fun DataFrame.valuesNotNull(byRow: Boolean = false, columns: ColumnsSelector): Sequence = values(byRow, columns).filterNotNull() +public fun DataFrame.valuesNotNull(byRow: Boolean = false): Sequence = valuesNotNull(byRow) { all() } + +// endregion + +// region GroupBy + +public fun Grouped.values(vararg columns: Column, dropNA: Boolean = false, distinct: Boolean = false): DataFrame = values(dropNA, distinct) { columns.toColumns() } +public fun Grouped.values(vararg columns: String, dropNA: Boolean = false, distinct: Boolean = false): DataFrame = values(dropNA, distinct) { columns.toColumns() } +public fun Grouped.values( + dropNA: Boolean = false, + distinct: Boolean = false, + columns: ColumnsForAggregateSelector +): DataFrame = aggregate { internal().columnValues(columns, true, dropNA, distinct) } +public fun Grouped.values(dropNA: Boolean = false, distinct: Boolean = false): DataFrame = values(dropNA, distinct, remainingColumnsSelector()) + +// endregion + +// region ReducedGroupBy + +public fun ReducedGroupBy.values(): DataFrame = values(groupBy.remainingColumnsSelector()) + +public fun ReducedGroupBy.values( + vararg columns: Column +): DataFrame = values { columns.toColumns() } + +public fun ReducedGroupBy.values( + vararg columns: String +): DataFrame = values { columns.toColumns() } + +public fun ReducedGroupBy.values( + vararg columns: KProperty<*> +): DataFrame = values { columns.toColumns() } + +public fun ReducedGroupBy.values( + columns: ColumnsForAggregateSelector +): DataFrame = groupBy.aggregate { internal().columnValues(columns, reducer) } + +// endregion + +// region Pivot + +public fun Pivot.values( + dropNA: Boolean = false, + distinct: Boolean = false, + separate: Boolean = false, + columns: ColumnsForAggregateSelector +): DataRow = delegate { values(dropNA, distinct, separate, columns) } +public fun Pivot.values( + vararg columns: Column, + dropNA: Boolean = false, + distinct: Boolean = false, + separate: Boolean = false +): DataRow = values(dropNA, distinct, separate) { columns.toColumns() } +public fun Pivot.values( + vararg columns: String, + dropNA: Boolean = false, + distinct: Boolean = false, + separate: Boolean = false +): DataRow = values(dropNA, distinct, separate) { columns.toColumns() } +public fun Pivot.values( + vararg columns: KProperty<*>, + dropNA: Boolean = false, + distinct: Boolean = false, + separate: Boolean = false +): DataRow = values(dropNA, distinct, separate) { columns.toColumns() } + +public fun Pivot.values(dropNA: Boolean = false, distinct: Boolean = false, separate: Boolean = false): DataRow = delegate { values(dropNA, distinct, separate) } + +// endregion + +// region ReducedPivot + +public fun ReducedPivot.values( + separate: Boolean = false +): DataRow = pivot.delegate { reduce(reducer).values(separate = separate) } + +public fun ReducedPivot.values( + vararg columns: Column, + separate: Boolean = false +): DataRow = values(separate) { columns.toColumns() } + +public fun ReducedPivot.values( + vararg columns: String, + separate: Boolean = false +): DataRow = values(separate) { columns.toColumns() } + +public fun ReducedPivot.values( + vararg columns: KProperty<*>, + separate: Boolean = false +): DataRow = values(separate) { columns.toColumns() } + +public fun ReducedPivot.values( + separate: Boolean = false, + columns: ColumnsForAggregateSelector +): DataRow = pivot.delegate { reduce(reducer).values(separate = separate, columns = columns) } + +// endregion + +// region PivotGroupBy + +public fun PivotGroupBy.values(dropNA: Boolean = false, distinct: Boolean = false, separate: Boolean = false): DataFrame = values(dropNA, distinct, separate, remainingColumnsSelector()) + +public fun PivotGroupBy.values( + vararg columns: Column, + dropNA: Boolean = false, + distinct: Boolean = false, + separate: Boolean = false +): DataFrame = values(dropNA, distinct, separate) { columns.toColumns() } +public fun PivotGroupBy.values( + vararg columns: String, + dropNA: Boolean = false, + distinct: Boolean = false, + separate: Boolean = false +): DataFrame = values(dropNA, distinct, separate) { columns.toColumns() } +public fun PivotGroupBy.values( + vararg columns: KProperty<*>, + dropNA: Boolean = false, + distinct: Boolean = false, + separate: Boolean = false +): DataFrame = values(dropNA, distinct, separate) { columns.toColumns() } +public fun PivotGroupBy.values( + dropNA: Boolean = false, + distinct: Boolean = false, + separate: Boolean = false, + columns: ColumnsForAggregateSelector +): DataFrame = + aggregate(separate = separate) { internal().columnValues(columns, false, dropNA, distinct) } + +// endregion + +// region ReducedPivotGroupBy + +public fun ReducedPivotGroupBy.values( + separate: Boolean = false +): DataFrame = values(separate, pivot.remainingColumnsSelector()) + +public fun ReducedPivotGroupBy.values( + vararg columns: Column, + separate: Boolean = false +): DataFrame = values(separate) { columns.toColumns() } + +public fun ReducedPivotGroupBy.values( + vararg columns: String, + separate: Boolean = false +): DataFrame = values(separate) { columns.toColumns() } + +public fun ReducedPivotGroupBy.values( + vararg columns: KProperty<*>, + separate: Boolean = false +): DataFrame = values(separate) { columns.toColumns() } + +public fun ReducedPivotGroupBy.values( + separate: Boolean = false, + columns: ColumnsForAggregateSelector +): DataFrame = pivot.aggregate(separate = separate) { internal().columnValues(columns, reducer) } + +// endregion diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/with.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/with.kt new file mode 100644 index 000000000..7b5942b83 --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/with.kt @@ -0,0 +1,47 @@ +package org.jetbrains.kotlinx.dataframe.api + +import org.jetbrains.kotlinx.dataframe.Column +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.DataRow +import org.jetbrains.kotlinx.dataframe.RowExpression +import org.jetbrains.kotlinx.dataframe.impl.aggregation.internal +import org.jetbrains.kotlinx.dataframe.impl.aggregation.withExpr +import org.jetbrains.kotlinx.dataframe.impl.emptyPath +import kotlin.reflect.typeOf + +// region Pivot + +public inline fun Pivot.with(noinline expression: RowExpression): DataRow = delegate { with(expression) } + +// endregion + +// region ReducedPivot + +public inline fun ReducedPivot.with(noinline expression: RowExpression): DataRow = pivot.delegate { reduce(reducer).with(expression) } + +// endregion + +// region PivotGroupBy + +public inline fun PivotGroupBy.with(noinline expression: RowExpression): DataFrame { + val type = typeOf() + return aggregate { internal().withExpr(type, emptyPath(), expression) } +} + +// endregion + +// region ReducedPivotGroupBy + +public inline fun ReducedPivotGroupBy.with(noinline expression: RowExpression): DataFrame { + val type = typeOf() + return pivot.aggregate { + val value = reducer(this)?.let { + val value = expression(it, it) + if (value is Column) it[value] + else value + } + internal().yield(emptyPath(), value, type) + } +} + +// endregion diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/xs.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/xs.kt new file mode 100644 index 000000000..3706590e7 --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/xs.kt @@ -0,0 +1,21 @@ +package org.jetbrains.kotlinx.dataframe.api + +import org.jetbrains.kotlinx.dataframe.ColumnsSelector +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.impl.api.xsImpl + +// region DataFrame + +public fun DataFrame.xs(vararg keyValues: Any?): DataFrame = xs(*keyValues) { allDfs().take(keyValues.size) } + +public fun DataFrame.xs(vararg keyValues: C, keyColumns: ColumnsSelector): DataFrame = xsImpl(keyColumns, false, *keyValues) + +// endregion + +// region GroupBy + +public fun GroupBy.xs(vararg keyValues: Any?): GroupBy = xs(*keyValues) { allDfs().take(keyValues.size) } + +public fun GroupBy.xs(vararg keyValues: C, keyColumns: ColumnsSelector): GroupBy = xsImpl(*keyValues, keyColumns = keyColumns) + +// endregion diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/codeGen/CodeGenerator.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/codeGen/CodeGenerator.kt new file mode 100644 index 000000000..4ec420258 --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/codeGen/CodeGenerator.kt @@ -0,0 +1,73 @@ +package org.jetbrains.dataframe.impl.codeGen + +import org.jetbrains.kotlinx.dataframe.codeGen.CodeWithConverter +import org.jetbrains.kotlinx.dataframe.codeGen.DefaultReadDfMethod +import org.jetbrains.kotlinx.dataframe.codeGen.ExtensionsCodeGenerator +import org.jetbrains.kotlinx.dataframe.codeGen.Marker +import org.jetbrains.kotlinx.dataframe.codeGen.MarkerVisibility +import org.jetbrains.kotlinx.dataframe.codeGen.MarkersExtractor +import org.jetbrains.kotlinx.dataframe.codeGen.NameNormalizer +import org.jetbrains.kotlinx.dataframe.impl.codeGen.CodeGeneratorImpl +import org.jetbrains.kotlinx.dataframe.impl.codeGen.FullyQualifiedNames +import org.jetbrains.kotlinx.dataframe.impl.codeGen.ShortNames +import org.jetbrains.kotlinx.dataframe.impl.codeGen.id +import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema +import kotlin.reflect.KClass + +public enum class InterfaceGenerationMode { + NoFields, + WithFields, + Enum, + TypeAlias, + None; +} + +public data class CodeGenResult(val code: CodeWithConverter, val newMarkers: List) + +public interface CodeGenerator : ExtensionsCodeGenerator { + + public fun generate( + schema: DataFrameSchema, + name: String, + fields: Boolean, + extensionProperties: Boolean, + isOpen: Boolean, + visibility: MarkerVisibility = MarkerVisibility.IMPLICIT_PUBLIC, + knownMarkers: Iterable = emptyList(), + readDfMethod: DefaultReadDfMethod? = null, + fieldNameNormalizer: NameNormalizer = NameNormalizer.id(), + ): CodeGenResult + + public fun generate( + marker: Marker, + interfaceMode: InterfaceGenerationMode, + extensionProperties: Boolean, + readDfMethod: DefaultReadDfMethod? = null, + ): CodeWithConverter + + public companion object { + public fun create(useFqNames: Boolean = true): CodeGenerator { + return if (useFqNames) { + CodeGeneratorImpl(FullyQualifiedNames) + } else { + CodeGeneratorImpl(ShortNames) + } + } + } +} + +@PublishedApi +internal fun CodeGenerator.generate( + markerClass: KClass<*>, + interfaceMode: InterfaceGenerationMode, + extensionProperties: Boolean, +): CodeWithConverter = generate( + MarkersExtractor.get(markerClass), + interfaceMode, + extensionProperties +) + +public inline fun CodeGenerator.generate( + interfaceMode: InterfaceGenerationMode, + extensionProperties: Boolean, +): CodeWithConverter = generate(T::class, interfaceMode, extensionProperties) diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/codeGen/CodeWithConverter.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/codeGen/CodeWithConverter.kt new file mode 100644 index 000000000..dd48b739f --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/codeGen/CodeWithConverter.kt @@ -0,0 +1,29 @@ +package org.jetbrains.kotlinx.dataframe.codeGen + +import org.jetbrains.kotlinx.jupyter.api.Code +import org.jetbrains.kotlinx.jupyter.api.VariableName + +/** + * Class representing generated code declarations for a [Marker]. + * + * @param declarations The generated code. + * @param converter Optional converter for the [Marker], such as a [org.jetbrains.kotlinx.dataframe.api.cast], often used for Jupyter. + */ +public data class CodeWithConverter(val declarations: Code, val converter: (VariableName) -> Code = EmptyConverter) { + + public companion object { + public const val EmptyDeclarations: Code = "" + public val EmptyConverter: (VariableName) -> Code = { it } + public val Empty: CodeWithConverter = CodeWithConverter(EmptyDeclarations, EmptyConverter) + } + + val hasDeclarations: Boolean get() = declarations.isNotBlank() + + val hasConverter: Boolean get() = converter("it").trim() != "it" + + public fun with(name: VariableName): Code = when { + !hasConverter -> declarations + !hasDeclarations -> converter(name) + else -> declarations + "\n" + converter(name) + } +} diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/codeGen/DefaultReadDfMethods.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/codeGen/DefaultReadDfMethods.kt new file mode 100644 index 000000000..5f93bc31a --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/codeGen/DefaultReadDfMethods.kt @@ -0,0 +1,103 @@ +package org.jetbrains.kotlinx.dataframe.codeGen + +import com.squareup.kotlinpoet.ClassName +import com.squareup.kotlinpoet.FunSpec +import com.squareup.kotlinpoet.KModifier +import com.squareup.kotlinpoet.ParameterSpec +import com.squareup.kotlinpoet.ParameterizedTypeName.Companion.parameterizedBy +import com.squareup.kotlinpoet.PropertySpec +import com.squareup.kotlinpoet.TypeSpec +import com.squareup.kotlinpoet.asClassName +import com.squareup.kotlinpoet.typeNameOf +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.io.MethodArguments + +public interface DefaultReadDfMethod { + public fun toDeclaration(marker: Marker, visibility: String): String + + public val additionalImports: List +} + +// Used APIs +private const val cast = "cast" +private const val verify = "verify" // cast(true) is obscure, i think it's better to use named argument here +private const val readCSV = "readCSV" +private const val readTSV = "readTSV" +private const val readJson = "readJson" + +public abstract class AbstractDefaultReadMethod( + private val path: String?, + private val arguments: MethodArguments, + private val methodName: String, +) : DefaultReadDfMethod { + override fun toDeclaration(marker: Marker, visibility: String): String { + val parameters = arguments.defaultValues.map { + ParameterSpec.builder(it.name, it.property.type) + .defaultValue("%N", it.property) + .build() + } + + val defaultPath = path?.let { + PropertySpec.builder("defaultPath", typeNameOf(), KModifier.CONST) + .initializer("%S", path) + .build() + } + + val type = DataFrame::class.asClassName().parameterizedBy(ClassName("", listOf(marker.shortName))) + + val arguments = parameters.joinToString(", ") { "${it.name} = ${it.name}" } + + val typeSpec = TypeSpec.companionObjectBuilder() + .apply { + if (defaultPath != null) { + addProperty(defaultPath) + } + } + .addProperties(this.arguments.defaultValues.map { it.property }) + .addFunction( + FunSpec.builder(methodName) + .returns(type) + .addParameter( + ParameterSpec.builder("path", typeNameOf()) + .apply { + if (defaultPath != null) { + defaultValue("%N", defaultPath) + } + } + .build() + + ) + .addParameters(parameters) + .addParameter( + ParameterSpec.builder("verify", typeNameOf()) + .defaultValue("null") + .build() + ) + .addCode( + """ + val df = DataFrame.$methodName(path, $arguments) + return if ($verify != null) df.$cast($verify = $verify) else df.$cast() + """.trimIndent() + ) + .build() + ) + .build() + + return typeSpec.toString() + } + + override val additionalImports: List = listOf("import org.jetbrains.kotlinx.dataframe.io.$methodName") +} + +internal class DefaultReadJsonMethod(path: String?, arguments: MethodArguments) : AbstractDefaultReadMethod( + path = path, + arguments = arguments, + methodName = readJson, +) + +internal class DefaultReadCsvMethod( + path: String?, + arguments: MethodArguments, +) : AbstractDefaultReadMethod(path, arguments, readCSV) + +internal class DefaultReadTsvMethod(path: String?) : AbstractDefaultReadMethod(path, MethodArguments.EMPTY, readTSV) diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/codeGen/ExtensionsCodeGenerator.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/codeGen/ExtensionsCodeGenerator.kt new file mode 100644 index 000000000..de874282f --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/codeGen/ExtensionsCodeGenerator.kt @@ -0,0 +1,12 @@ +package org.jetbrains.kotlinx.dataframe.codeGen + +import org.jetbrains.kotlinx.dataframe.impl.codeGen.ExtensionsCodeGeneratorImpl +import org.jetbrains.kotlinx.dataframe.impl.codeGen.ShortNames + +public interface ExtensionsCodeGenerator { + public fun generate(marker: IsolatedMarker): CodeWithConverter + + public companion object { + public fun create(): ExtensionsCodeGenerator = ExtensionsCodeGeneratorImpl(ShortNames) + } +} diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/codeGen/GeneratedField.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/codeGen/GeneratedField.kt new file mode 100644 index 000000000..d82ca0d78 --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/codeGen/GeneratedField.kt @@ -0,0 +1,145 @@ +package org.jetbrains.kotlinx.dataframe.codeGen + +import org.jetbrains.kotlinx.dataframe.columns.ColumnKind +import org.jetbrains.kotlinx.dataframe.impl.codeGen.needsQuoting +import org.jetbrains.kotlinx.dataframe.schema.ColumnSchema + +public sealed interface FieldType { + public class ValueFieldType(public val typeFqName: String) : FieldType + public class FrameFieldType(public val markerName: String, public val nullable: Boolean) : FieldType + public class GroupFieldType(public val markerName: String) : FieldType +} + +/** + * Returns whether the column type ends with `?` or not. + * NOTE: for [FieldType.FrameFieldType], the `nullable` property indicates the nullability of the frame itself, not the type of the column. + */ +public fun FieldType.isNullable(): Boolean = + when (this) { + is FieldType.FrameFieldType -> markerName.endsWith("?") || markerName == "*" + is FieldType.GroupFieldType -> markerName.endsWith("?") || markerName == "*" + is FieldType.ValueFieldType -> typeFqName.endsWith("?") || typeFqName == "*" + } + +/** + * Returns whether the column type doesn't end with `?` or whether it does. + * NOTE: for [FieldType.FrameFieldType], the `nullable` property indicates the nullability of the frame itself, not the type of the column. + */ +public fun FieldType.isNotNullable(): Boolean = !isNullable() + +private fun String.toNullable() = if (this.last() == '?' || this == "*") this else "$this?" + +/** + * Returns a new fieldType with the same type but with nullability in the column type. + * NOTE: for [FieldType.FrameFieldType], the `nullable` property indicates the nullability of the frame itself, not the type of the column. + */ +public fun FieldType.toNullable(): FieldType = + if (isNotNullable()) { + when (this) { + is FieldType.FrameFieldType -> FieldType.FrameFieldType(markerName.toNullable(), nullable) + is FieldType.GroupFieldType -> FieldType.GroupFieldType(markerName.toNullable()) + is FieldType.ValueFieldType -> FieldType.ValueFieldType(typeFqName.toNullable()) + } + } else this + +/** + * Returns a new fieldType with the same type but with nullability disabled in the column type. + * NOTE: for [FieldType.FrameFieldType], the `nullable` property indicates the nullability of the frame itself, not the type of the column. + */ +public fun FieldType.toNotNullable(): FieldType = + if (isNullable()) { + when (this) { + is FieldType.FrameFieldType -> FieldType.FrameFieldType( + markerName = markerName.let { + if (it == "*") "Any" + else it.removeSuffix("?") + }, + nullable = nullable, + ) + + is FieldType.GroupFieldType -> FieldType.GroupFieldType( + markerName = markerName.let { + if (it == "*") "Any" + else it.removeSuffix("?") + }, + ) + + is FieldType.ValueFieldType -> FieldType.ValueFieldType( + typeFqName = typeFqName.let { + if (it == "*") "Any" + else it.removeSuffix("?") + }, + ) + } + } else this + +public val FieldType.name: String + get() = when (this) { + is FieldType.FrameFieldType -> markerName + is FieldType.GroupFieldType -> markerName + is FieldType.ValueFieldType -> typeFqName + } + +public class ValidFieldName private constructor(private val identifier: String, public val needsQuote: Boolean) { + public val unquoted: String get() = identifier + public val quotedIfNeeded: String get() = if (needsQuote) "`$identifier`" else identifier + + public operator fun plus(other: ValidFieldName): ValidFieldName { + return ValidFieldName(identifier = identifier + other.identifier, needsQuote = needsQuote || other.needsQuote) + } + + public companion object { + public fun of(name: String): ValidFieldName { + val needsQuote = name.needsQuoting() + var result = name + if (needsQuote) { + result = name.replace("<", "{") + .replace(">", "}") + .replace("::", " - ") + .replace(": ", " - ") + .replace(":", " - ") + .replace(".", " ") + .replace("/", "-") + .replace("[", "{") + .replace("]", "}") + .replace("`", "'") + .replace(";", " ") + .replace("\\", " ") + } + + return ValidFieldName(result, needsQuote) + } + } +} + +public interface BaseField { + public val fieldName: ValidFieldName + public val columnName: String + public val fieldType: FieldType +} + +public fun BaseField.toNullable(): BaseField = + if (fieldType.isNullable()) this + else object : BaseField { + override val fieldName: ValidFieldName = this@toNullable.fieldName + override val columnName: String = this@toNullable.columnName + override val fieldType: FieldType = this@toNullable.fieldType.toNullable() + } + +public fun BaseField.toNotNullable(): BaseField = + if (fieldType.isNotNullable()) this + else object : BaseField { + override val fieldName: ValidFieldName = this@toNotNullable.fieldName + override val columnName: String = this@toNotNullable.columnName + override val fieldType: FieldType = this@toNotNullable.fieldType.toNotNullable() + } + +public data class GeneratedField( + override val fieldName: ValidFieldName, + override val columnName: String, + val overrides: Boolean, + val columnSchema: ColumnSchema, + override val fieldType: FieldType, +) : BaseField { + val columnKind: ColumnKind get() = columnSchema.kind +} diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/codeGen/Marker.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/codeGen/Marker.kt new file mode 100644 index 000000000..7d11defeb --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/codeGen/Marker.kt @@ -0,0 +1,119 @@ +package org.jetbrains.kotlinx.dataframe.codeGen + +import org.jetbrains.kotlinx.dataframe.impl.schema.DataFrameSchemaImpl +import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema +import kotlin.reflect.KClass + +public enum class MarkerVisibility { + INTERNAL, IMPLICIT_PUBLIC, EXPLICIT_PUBLIC +} + +public interface IsolatedMarker { + public val name: String + public val fields: List + public val visibility: MarkerVisibility + + /** + * Type parameters for in front of the function or getter/setter. + * Like `` in `val MyType.something get() = ...`. + */ + public val typeParameters: String + + /** + * Type arguments belonging to this marker. + * Like `` in `MyMarker`. + */ + public val typeArguments: String +} + +public abstract class AbstractMarker( + typeParameters: List, + typeArguments: List, +) : IsolatedMarker { + override val typeParameters: String = typeParameters.join() + override val typeArguments: String = typeArguments.join() + + private fun List.join() = if (isEmpty()) { + "" + } else { + joinToString(", ", "<", ">") + } +} + +public open class Marker( + override val name: String, + public val isOpen: Boolean, + override val fields: List, + superMarkers: List, + override val visibility: MarkerVisibility, + typeParameters: List, + typeArguments: List, +) : AbstractMarker(typeParameters, typeArguments) { + + public val shortName: String + get() = name.substringAfterLast(".") + + public val superMarkers: Map = superMarkers.associateBy { it.name } + + public val allSuperMarkers: Map by lazy { + val result = this.superMarkers.toMutableMap() + this.superMarkers.forEach { + result.putAll(it.value.allSuperMarkers) + } + result + } + + public val allFields: List by lazy { + + val fieldsMap = mutableMapOf() + this.superMarkers.values.forEach { + it.allFields.forEach { + fieldsMap[it.fieldName.quotedIfNeeded] = it + } + } + fields.forEach { + fieldsMap[it.fieldName.quotedIfNeeded] = it + } + fieldsMap.values.toList() + } + + public val allFieldsByColumn: Map by lazy { + allFields.associateBy { it.columnName } + } + + public fun getField(columnName: String): GeneratedField? = allFieldsByColumn[columnName] + + public fun containsColumn(columnName: String): Boolean = allFieldsByColumn.containsKey(columnName) + + public val columnNames: List get() = allFields.map { it.columnName } + + public val schema: DataFrameSchema by lazy { DataFrameSchemaImpl(allFields.associate { it.columnName to it.columnSchema }) } + + public fun implements(schema: Marker): Boolean = + if (schema.name == name) true else allSuperMarkers[schema.name]?.let { it === schema } ?: false + + public fun implementsAll(schemas: Iterable): Boolean = schemas.all { implements(it) } + + internal companion object { + operator fun invoke( + name: String, + isOpen: Boolean, + fields: List, + superMarkers: List, + visibility: MarkerVisibility, + klass: KClass<*>, + ): Marker { + val typeParameters = klass.typeParameters.map { + buildString { + append(it.name) + if (it.upperBounds.isNotEmpty()) { + append(" : ") + append(it.upperBounds.joinToString(",") { it.toString() }) + } + } + } + val typeArguments = klass.typeParameters.map { it.name } + return Marker(name, isOpen, fields, superMarkers, visibility, typeParameters, typeArguments) + } + } +} diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/codeGen/MarkersExtractor.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/codeGen/MarkersExtractor.kt new file mode 100644 index 000000000..ef883a33e --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/codeGen/MarkersExtractor.kt @@ -0,0 +1,91 @@ +package org.jetbrains.kotlinx.dataframe.codeGen + +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.DataRow +import org.jetbrains.kotlinx.dataframe.annotations.ColumnName +import org.jetbrains.kotlinx.dataframe.annotations.DataSchema +import org.jetbrains.kotlinx.dataframe.impl.schema.getPropertiesOrder +import org.jetbrains.kotlinx.dataframe.schema.ColumnSchema +import kotlin.reflect.KClass +import kotlin.reflect.KType +import kotlin.reflect.full.findAnnotation +import kotlin.reflect.full.hasAnnotation +import kotlin.reflect.full.memberProperties +import kotlin.reflect.full.superclasses +import kotlin.reflect.full.withNullability +import kotlin.reflect.jvm.jvmErasure +import kotlin.reflect.typeOf + +internal fun KType.shouldBeConvertedToFrameColumn(): Boolean = when (jvmErasure) { + DataFrame::class -> true + List::class -> arguments[0].type?.jvmErasure?.hasAnnotation() == true + else -> false +} + +internal fun KType.shouldBeConvertedToColumnGroup(): Boolean = jvmErasure.let { + it == DataRow::class || it.hasAnnotation() +} + +private fun String.toNullable(): String = if (endsWith("?")) this else "$this?" + +internal object MarkersExtractor { + + private val cache = mutableMapOf, Boolean>, Marker>() + + inline fun get() = get(T::class) + + fun get(markerClass: KClass<*>, nullableProperties: Boolean = false): Marker = + cache.getOrPut(Pair(markerClass, nullableProperties)) { + val fields = getFields(markerClass, nullableProperties) + val isOpen = !markerClass.isSealed && + markerClass.java.isInterface && + markerClass.findAnnotation()?.isOpen == true + + val baseSchemas = markerClass.superclasses.filter { it != Any::class }.map { get(it, nullableProperties) } + Marker( + name = markerClass.qualifiedName ?: markerClass.simpleName!!, + isOpen = isOpen, + fields = fields, + superMarkers = baseSchemas, + visibility = MarkerVisibility.IMPLICIT_PUBLIC, + klass = markerClass, + ) + } + + private fun getFields(markerClass: KClass<*>, nullableProperties: Boolean): List { + val order = getPropertiesOrder(markerClass) + return markerClass.memberProperties.sortedBy { order[it.name] ?: Int.MAX_VALUE }.mapIndexed { _, it -> + val fieldName = ValidFieldName.of(it.name) + val columnName = it.findAnnotation()?.name ?: fieldName.unquoted + val type = it.returnType + val fieldType: FieldType + val clazz = type.jvmErasure + val columnSchema = when { + type.shouldBeConvertedToColumnGroup() -> { + val nestedType = if (clazz == DataRow::class) type.arguments[0].type ?: typeOf() else type + val marker = get(nestedType.jvmErasure, nullableProperties || type.isMarkedNullable) + fieldType = FieldType.GroupFieldType(marker.name) + ColumnSchema.Group(marker.schema, nestedType) + } + + type.shouldBeConvertedToFrameColumn() -> { + val frameType = type.arguments[0].type ?: typeOf() + val marker = get(frameType.jvmErasure, nullableProperties || type.isMarkedNullable) + fieldType = FieldType.FrameFieldType(marker.name, type.isMarkedNullable || nullableProperties) + ColumnSchema.Frame(marker.schema, type.isMarkedNullable, frameType) + } + + else -> { + fieldType = FieldType.ValueFieldType( + if (nullableProperties) type.toString().toNullable() else type.toString() + ) + ColumnSchema.Value( + if (nullableProperties) type.withNullability(true) else type + ) + } + } + + GeneratedField(fieldName, columnName, false, columnSchema, fieldType) + } + } +} diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/codeGen/NameNormalizer.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/codeGen/NameNormalizer.kt new file mode 100644 index 000000000..78ba98a3c --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/codeGen/NameNormalizer.kt @@ -0,0 +1,5 @@ +package org.jetbrains.kotlinx.dataframe.codeGen + +public class NameNormalizer(private val f: (String) -> String) : (String) -> String by f { + public companion object +} diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/codeGen/ReplCodeGenerator.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/codeGen/ReplCodeGenerator.kt new file mode 100644 index 000000000..63ceb4ffd --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/codeGen/ReplCodeGenerator.kt @@ -0,0 +1,30 @@ +package org.jetbrains.dataframe.impl.codeGen + +import org.jetbrains.kotlinx.dataframe.AnyFrame +import org.jetbrains.kotlinx.dataframe.AnyRow +import org.jetbrains.kotlinx.dataframe.codeGen.CodeWithConverter +import org.jetbrains.kotlinx.dataframe.impl.codeGen.ReplCodeGeneratorImpl +import org.jetbrains.kotlinx.jupyter.api.Code +import kotlin.reflect.KClass +import kotlin.reflect.KProperty + +internal interface ReplCodeGenerator { + + fun process( + df: AnyFrame, + property: KProperty<*>? = null, + ): CodeWithConverter + + fun process( + row: AnyRow, + property: KProperty<*>? = null, + ): CodeWithConverter + + fun process(markerClass: KClass<*>): Code + + companion object { + fun create(): ReplCodeGenerator = ReplCodeGeneratorImpl() + } +} + +internal inline fun ReplCodeGenerator.process(): Code = process(T::class) diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/codeGen/SchemaProcessor.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/codeGen/SchemaProcessor.kt new file mode 100644 index 000000000..23d57898e --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/codeGen/SchemaProcessor.kt @@ -0,0 +1,27 @@ +package org.jetbrains.kotlinx.dataframe.codeGen + +import org.jetbrains.kotlinx.dataframe.impl.codeGen.SchemaProcessorImpl +import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema + +internal interface SchemaProcessor { + + val generatedMarkers: List + + val namePrefix: String + + fun process( + schema: DataFrameSchema, + isOpen: Boolean, + visibility: MarkerVisibility = MarkerVisibility.IMPLICIT_PUBLIC + ): Marker + + companion object { + fun create( + namePrefix: String, + existingMarkers: Iterable = emptyList(), + fieldNameNormalizer: (String) -> String = { it } + ): SchemaProcessorImpl { + return SchemaProcessorImpl(existingMarkers, namePrefix, fieldNameNormalizer) + } + } +} diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/codeGen/generateCode.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/codeGen/generateCode.kt new file mode 100644 index 000000000..578be837a --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/codeGen/generateCode.kt @@ -0,0 +1,43 @@ +package org.jetbrains.kotlinx.dataframe.codeGen + +import org.jetbrains.dataframe.impl.codeGen.CodeGenerator +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.api.schema + +public inline fun DataFrame.generateCode( + fields: Boolean = true, + extensionProperties: Boolean = true, +): String { + val name = if (T::class.isAbstract) { + T::class.simpleName!! + } else "DataEntry" + return generateCode(name, fields, extensionProperties) +} + +public fun DataFrame.generateCode( + markerName: String, + fields: Boolean = true, + extensionProperties: Boolean = true, + visibility: MarkerVisibility = MarkerVisibility.IMPLICIT_PUBLIC, +): String { + val codeGen = CodeGenerator.create() + return codeGen.generate( + schema = schema(), + name = markerName, + fields = fields, + extensionProperties = extensionProperties, + isOpen = true, + visibility = visibility, + ).code.declarations +} + +public inline fun DataFrame.generateInterfaces(): String = generateCode( + fields = true, + extensionProperties = false +) + +public fun DataFrame.generateInterfaces(markerName: String): String = generateCode( + markerName = markerName, + fields = true, + extensionProperties = false +) diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/BaseColumn.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/BaseColumn.kt new file mode 100644 index 000000000..2c5d1838f --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/BaseColumn.kt @@ -0,0 +1,72 @@ +package org.jetbrains.kotlinx.dataframe.columns + +import org.jetbrains.kotlinx.dataframe.AnyBaseCol +import org.jetbrains.kotlinx.dataframe.AnyCol +import org.jetbrains.kotlinx.dataframe.AnyRow +import org.jetbrains.kotlinx.dataframe.impl.asList +import org.jetbrains.kotlinx.dataframe.impl.columnName +import org.jetbrains.kotlinx.dataframe.impl.columns.DataColumnInternal +import org.jetbrains.kotlinx.dataframe.impl.headPlusIterable +import kotlin.reflect.KProperty +import kotlin.reflect.KType + +/** + * Column with [type], [name]/[path] and [values] + * Base interface for all three kinds of columns: [ValueColumn], [ColumnGroup] and [FrameColumn]. + * Column operations that doesn't clash by signature with [DataFrame] operations can be defined for [BaseColumn] + * + * @param T type of values contained in column. + */ +public interface BaseColumn : ColumnReference { + + // region info + + public fun size(): Int + public fun kind(): ColumnKind + public fun type(): KType + + // TODO: remove + public fun defaultValue(): T? + + // endregion + + // region get + + public operator fun get(index: Int): T + public operator fun get(firstIndex: Int, vararg otherIndices: Int): BaseColumn = get( + headPlusIterable( + firstIndex, + otherIndices.asIterable() + ) + ) + public operator fun get(row: AnyRow): T = get(row.index()) + + public operator fun get(range: IntRange): BaseColumn + + public operator fun get(indices: Iterable): BaseColumn + + public operator fun get(columnName: String): AnyCol + + // endregion + + // region values + + public fun values(): Iterable + + public fun toList(): List = values().asList() + public fun toSet(): Set + + public fun distinct(): BaseColumn + public fun countDistinct(): Int + + public operator fun contains(value: @UnsafeVariance T): Boolean + + // endregion + + override fun rename(newName: String): BaseColumn + + public override operator fun getValue(thisRef: Any?, property: KProperty<*>): BaseColumn = (this as DataColumnInternal<*>).rename(property.columnName).forceResolve() as BaseColumn +} + +internal val BaseColumn.values: Iterable get() = values() +internal val AnyBaseCol.size: Int get() = size() diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/ColumnAccessor.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/ColumnAccessor.kt new file mode 100644 index 000000000..4536755c6 --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/ColumnAccessor.kt @@ -0,0 +1,27 @@ +package org.jetbrains.kotlinx.dataframe.columns + +import org.jetbrains.kotlinx.dataframe.DataColumn +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.DataRow +import org.jetbrains.kotlinx.dataframe.api.column +import org.jetbrains.kotlinx.dataframe.api.columnGroup +import org.jetbrains.kotlinx.dataframe.api.frameColumn +import kotlin.reflect.KProperty + +/** + * Combination of [column path][path] and [column type][T]. + * + * Used to retrieve [DataColumn] from [DataFrame] or value from [DataRow]. + * + * Can be created by [column], [columnGroup] or [frameColumn] delegates. + * + * @param T Expected [type][DataColumn.type] of values in the column + */ +public interface ColumnAccessor : ColumnReference { + + public override operator fun getValue(thisRef: Any?, property: KProperty<*>): ColumnAccessor = this + + public operator fun get(column: ColumnReference): ColumnAccessor + + override fun rename(newName: String): ColumnAccessor +} diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/ColumnGroup.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/ColumnGroup.kt new file mode 100644 index 000000000..bce661df7 --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/ColumnGroup.kt @@ -0,0 +1,42 @@ +package org.jetbrains.kotlinx.dataframe.columns + +import org.jetbrains.kotlinx.dataframe.AnyCol +import org.jetbrains.kotlinx.dataframe.DataColumn +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.DataRow +import org.jetbrains.kotlinx.dataframe.annotations.DataSchema +import org.jetbrains.kotlinx.dataframe.api.asColumnGroup +import org.jetbrains.kotlinx.dataframe.api.columnGroup +import kotlin.reflect.KProperty + +/** + * Group of columns. Used to create column hierarchy in [DataFrame]. + * + * ColumnGroup is a mix of [DataFrame] and [DataColumn] that supports all [DataFrame] operations but also has [column name][name] and [column type][type]. + * It derives not from [DataColumn], but from [BaseColumn] to avoid API clashes between [DataFrame] and [DataColumn]. + * + * ColumnGroup interface can be returned by: + * - extension property generated for [DataSchema] + * - [ColumnAccessor] created by [columnGroup] delegate + * - explicit cast using [asColumnGroup] + * + * @param T Schema marker. See [DataFrame] for details. + */ +public interface ColumnGroup : BaseColumn>, DataFrame { + + override fun get(indices: Iterable): ColumnGroup + + override fun get(columnName: String): AnyCol + + override fun kind(): ColumnKind = ColumnKind.Group + + override fun distinct(): ColumnGroup + + override fun get(firstIndex: Int, vararg otherIndices: Int): ColumnGroup + + override fun get(range: IntRange): ColumnGroup + + override fun rename(newName: String): ColumnGroup + + override operator fun getValue(thisRef: Any?, property: KProperty<*>): ColumnGroup = super.getValue(thisRef, property) as ColumnGroup +} diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/ColumnKind.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/ColumnKind.kt new file mode 100644 index 000000000..a4dff8b68 --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/ColumnKind.kt @@ -0,0 +1,13 @@ +package org.jetbrains.kotlinx.dataframe.columns + +public enum class ColumnKind { + Value { + override fun toString(): String = "ValueColumn" + }, + Group { + override fun toString(): String = "ColumnGroup" + }, + Frame { + override fun toString(): String = "FrameColumn" + } +} diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/ColumnPath.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/ColumnPath.kt new file mode 100644 index 000000000..78269d6bf --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/ColumnPath.kt @@ -0,0 +1,57 @@ +package org.jetbrains.kotlinx.dataframe.columns + +import org.jetbrains.kotlinx.dataframe.AnyRow +import org.jetbrains.kotlinx.dataframe.DataColumn +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.impl.columns.ColumnAccessorImpl +import org.jetbrains.kotlinx.dataframe.impl.owner + +/** + * Path to a [column][DataColumn] in [DataFrame]. + * + * Stores a list of [column names][DataColumn.name] that are used to retrieve columns through a chain of [column groups][ColumnGroup]. + */ +public data class ColumnPath(val path: List) : List by path, ColumnAccessor { + + public constructor(name: String) : this(listOf(name)) + + public fun drop(size: Int): ColumnPath = ColumnPath(path.drop(size)) + + public fun parent(): ColumnPath? = if (path.isEmpty()) null else dropLast(1) + + public fun dropLast(size: Int = 1): ColumnPath = ColumnPath(path.dropLast(size)) + + public fun dropFirst(size: Int = 1): ColumnPath = ColumnPath(path.drop(size)) + + public operator fun plus(name: String): ColumnPath = ColumnPath(path + name) + + public operator fun plus(otherPath: ColumnPath): ColumnPath = ColumnPath(path + otherPath) + + public operator fun plus(otherPath: Iterable): ColumnPath = ColumnPath(path + otherPath) + + public fun take(first: Int): ColumnPath = ColumnPath(path.take(first)) + + public fun replaceLast(name: String): ColumnPath = ColumnPath(if (size < 2) listOf(name) else dropLast(1) + name) + + public fun takeLast(first: Int): ColumnPath = ColumnPath(path.takeLast(first)) + + override fun path(): ColumnPath = this + + override fun name(): String = path.last() + + val columnName: String get() = name() + + val parentName: String? get() = if (path.size > 1) path[path.size - 2] else null + + override fun rename(newName: String): ColumnPath = ColumnPath(path.dropLast(1) + newName) + + override fun getValue(row: AnyRow): Any? = row.owner[this][row.index()] + + override fun getValueOrNull(row: AnyRow): Any? = row.owner.getColumnOrNull(this)?.get(row.index()) + + override fun toString(): String = path.toString() + + public fun joinToString(separator: String = "/"): String = path.joinToString(separator) + + override fun get(column: ColumnReference): ColumnAccessor = ColumnAccessorImpl(this + column.path()) +} diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/ColumnReference.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/ColumnReference.kt new file mode 100644 index 000000000..e65d38476 --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/ColumnReference.kt @@ -0,0 +1,44 @@ +package org.jetbrains.kotlinx.dataframe.columns + +import org.jetbrains.kotlinx.dataframe.AnyFrame +import org.jetbrains.kotlinx.dataframe.AnyRow +import org.jetbrains.kotlinx.dataframe.DataColumn +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.DataRow +import org.jetbrains.kotlinx.dataframe.api.name +import org.jetbrains.kotlinx.dataframe.impl.columnName +import org.jetbrains.kotlinx.dataframe.impl.columns.RenamedColumnReference +import org.jetbrains.kotlinx.dataframe.impl.columns.addPath +import org.jetbrains.kotlinx.dataframe.impl.columns.getColumn +import kotlin.reflect.KProperty + +/** + * Entity that can retrieve [DataColumn] from [DataFrame] or value from [DataRow]. + * + * Base interface for [DataColumn] and [ColumnAccessor]. + * @param C Expected [type][DataColumn.type] of values in the column + */ +public interface ColumnReference : SingleColumn { + + public operator fun getValue(thisRef: Any?, property: KProperty<*>): ColumnReference = renamedReference(property.columnName) + + public fun name(): String + + public fun rename(newName: String): ColumnReference + + public fun path(): ColumnPath = ColumnPath(name) + + public fun getValue(row: AnyRow): C = resolveFor(row.df())!![row.index()] + + public fun getValueOrNull(row: AnyRow): C? = resolveFor(row.df())?.get(row.index()) + + override fun resolveSingle(context: ColumnResolutionContext): ColumnWithPath? { + return context.df.getColumn(path(), context.unresolvedColumnsPolicy)?.addPath(path()) + } +} + +internal fun ColumnReference.renamedReference(newName: String): ColumnReference = RenamedColumnReference(this, newName) + +internal fun ColumnReference<*>.shortPath() = ColumnPath(name) + +internal fun ColumnReference.resolveFor(df: AnyFrame): ColumnWithPath? = resolveSingle(ColumnResolutionContext(df, UnresolvedColumnsPolicy.Skip)) diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/ColumnSet.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/ColumnSet.kt new file mode 100644 index 000000000..794f94aab --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/ColumnSet.kt @@ -0,0 +1,26 @@ +package org.jetbrains.kotlinx.dataframe.columns + +import org.jetbrains.kotlinx.dataframe.ColumnsSelector +import org.jetbrains.kotlinx.dataframe.DataColumn +import org.jetbrains.kotlinx.dataframe.DataFrame + +/** + * Entity that can be resolved into a list of [columns][DataColumn]. + * + * Used as a return type of [ColumnsSelector]. + * @param C common type of resolved columns + */ +public interface ColumnSet { + + public fun resolve(context: ColumnResolutionContext): List> +} + +public class ColumnResolutionContext internal constructor ( + internal val df: DataFrame<*>, + internal val unresolvedColumnsPolicy: UnresolvedColumnsPolicy +) { + + public val allowMissingColumns: Boolean = unresolvedColumnsPolicy != UnresolvedColumnsPolicy.Fail +} + +internal enum class UnresolvedColumnsPolicy { Fail, Skip, Create } diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/ColumnWithPath.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/ColumnWithPath.kt new file mode 100644 index 000000000..d14595add --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/ColumnWithPath.kt @@ -0,0 +1,27 @@ +package org.jetbrains.kotlinx.dataframe.columns + +import org.jetbrains.kotlinx.dataframe.DataColumn +import org.jetbrains.kotlinx.dataframe.api.asColumnGroup +import org.jetbrains.kotlinx.dataframe.api.isColumnGroup +import org.jetbrains.kotlinx.dataframe.impl.columns.addParentPath +import org.jetbrains.kotlinx.dataframe.impl.columns.addPath +import org.jetbrains.kotlinx.dataframe.impl.columns.depth + +public interface ColumnWithPath : DataColumn { + + public val data: DataColumn + public val path: ColumnPath + public val name: String get() = name() + public val parentName: String? get() = path.parentName + public fun depth(): Int = path.depth() + public fun getChild(accessor: ColumnReference): ColumnWithPath? = asColumnGroup().getColumnOrNull(accessor)?.addPath(path + accessor.path()) + public fun getChild(name: String): ColumnWithPath? = asColumnGroup().getColumnOrNull(name)?.addParentPath(path) + public fun getChild(index: Int): ColumnWithPath? = asColumnGroup().getColumnOrNull(index)?.addParentPath(path) + public fun children(): List> = if (isColumnGroup()) data.asColumnGroup().columns().map { it.addParentPath(path) } else emptyList() + + override fun path(): ColumnPath = path + + override fun rename(newName: String): ColumnWithPath +} + +public val ColumnWithPath.depth: Int get() = path.depth() diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/FrameColumn.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/FrameColumn.kt new file mode 100644 index 000000000..70b988740 --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/FrameColumn.kt @@ -0,0 +1,23 @@ +package org.jetbrains.kotlinx.dataframe.columns + +import org.jetbrains.kotlinx.dataframe.DataColumn +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema + +/** + * Column that stores values of type [DataFrame] + * + * @param T schema marker of contained dataframes. See [DataFrame] for details. + */ +public interface FrameColumn : DataColumn> { + + public val schema: Lazy + + override fun distinct(): FrameColumn + + override fun kind(): ColumnKind = ColumnKind.Frame + + override fun rename(newName: String): FrameColumn + + override fun get(indices: Iterable): FrameColumn +} diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/SingleColumn.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/SingleColumn.kt new file mode 100644 index 000000000..f822f9774 --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/SingleColumn.kt @@ -0,0 +1,15 @@ +package org.jetbrains.kotlinx.dataframe.columns + +import org.jetbrains.kotlinx.dataframe.DataColumn + +/** + * Entity that can be [resolved][resolveSingle] into [DataColumn]. + * + * @param C Column [type][BaseColumn.type] of resolved column. + */ +public interface SingleColumn : ColumnSet { + + override fun resolve(context: ColumnResolutionContext): List> = resolveSingle(context)?.let { listOf(it) } ?: emptyList() + + public fun resolveSingle(context: ColumnResolutionContext): ColumnWithPath? +} diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/ValueColumn.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/ValueColumn.kt new file mode 100644 index 000000000..12470fefc --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/ValueColumn.kt @@ -0,0 +1,24 @@ +package org.jetbrains.kotlinx.dataframe.columns + +import org.jetbrains.kotlinx.dataframe.DataColumn +import kotlin.reflect.KProperty + +/** + * Column that stores values. + * + * @param T - type of values + */ +public interface ValueColumn : DataColumn { + + override fun kind(): ColumnKind = ColumnKind.Value + + override fun distinct(): ValueColumn + + override fun get(indices: Iterable): ValueColumn + + override fun rename(newName: String): ValueColumn + + override operator fun getValue(thisRef: Any?, property: KProperty<*>): ValueColumn = super.getValue(thisRef, property) as ValueColumn + + public override operator fun get(range: IntRange): ValueColumn +} diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/dataTypes/IFRAME.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/dataTypes/IFRAME.kt new file mode 100644 index 000000000..65c7b5a1f --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/dataTypes/IFRAME.kt @@ -0,0 +1,16 @@ +package org.jetbrains.kotlinx.dataframe.dataTypes + +import java.net.URL + +public data class IFRAME( + val src: String, + val border: Boolean = false, + val width: Int? = null, + val height: Int? = null +) { + public constructor(src: URL, border: Boolean = false, width: Int? = null, height: Int? = null) : this(src.toString(), border, width, height) + + override fun toString(): String { + return """

cast(): Gather { + // TODO: introduce GatherWithTransform to avoid this error + require(valueTransform == null) { "Cast is not allowed to be called after `mapValues`" } + return this as Gather + } +} + +public fun Gather.into( + keyColumn: String, + valueColumn: String +): DataFrame = gatherImpl(keyColumn, valueColumn) + +public fun Gather.into( + keyColumn: ColumnAccessor, + valueColumn: ColumnAccessor +): DataFrame = into(keyColumn.name(), valueColumn.name) + +public fun Gather.into( + keyColumn: KProperty, + valueColumn: KProperty +): DataFrame = into(keyColumn.columnName, valueColumn.columnName) + +public fun Gather.keysInto( + keyColumn: String +): DataFrame = gatherImpl(keyColumn, null) + +public fun Gather.keysInto( + keyColumn: ColumnAccessor +): DataFrame = keysInto(keyColumn.name()) + +public fun Gather.keysInto( + keyColumn: KProperty +): DataFrame = keysInto(keyColumn.columnName) + +public fun Gather.valuesInto( + valueColumn: String +): DataFrame = gatherImpl(null, valueColumn) + +public fun Gather.valuesInto( + valueColumn: ColumnAccessor +): DataFrame = valuesInto(valueColumn.name()) + +public fun Gather.valuesInto( + valueColumn: KProperty +): DataFrame = valuesInto(valueColumn.columnName) diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/group.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/group.kt new file mode 100644 index 000000000..2c75681d3 --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/group.kt @@ -0,0 +1,33 @@ +package org.jetbrains.kotlinx.dataframe.api + +import org.jetbrains.kotlinx.dataframe.AnyColumnGroupAccessor +import org.jetbrains.kotlinx.dataframe.Column +import org.jetbrains.kotlinx.dataframe.ColumnsSelector +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.columns.ColumnWithPath +import org.jetbrains.kotlinx.dataframe.impl.columnName +import org.jetbrains.kotlinx.dataframe.impl.columns.toColumns +import kotlin.experimental.ExperimentalTypeInference +import kotlin.reflect.KProperty + +// region DataFrame + +public data class GroupClause(val df: DataFrame, val columns: ColumnsSelector) + +public fun DataFrame.group(columns: ColumnsSelector): GroupClause = GroupClause(this, columns) +public fun DataFrame.group(vararg columns: String): GroupClause = group { columns.toColumns() } +public fun DataFrame.group(vararg columns: Column): GroupClause = group { columns.toColumns() } +public fun DataFrame.group(vararg columns: KProperty<*>): GroupClause = group { columns.toColumns() } + +@JvmName("intoString") +@OverloadResolutionByLambdaReturnType +@OptIn(ExperimentalTypeInference::class) +public infix fun GroupClause.into(column: ColumnsSelectionDsl.(ColumnWithPath) -> String): DataFrame = df.move(columns).under { column(it).toColumnAccessor() } + +@JvmName("intoColumn") +public infix fun GroupClause.into(column: ColumnsSelectionDsl.(ColumnWithPath) -> Column): DataFrame = df.move(columns).under(column) +public infix fun GroupClause.into(column: String): DataFrame = into(columnGroup().named(column)) +public infix fun GroupClause.into(column: AnyColumnGroupAccessor): DataFrame = df.move(columns).under(column) +public infix fun GroupClause.into(column: KProperty<*>): DataFrame = into(column.columnName) + +// endregion diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/groupBy.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/groupBy.kt new file mode 100644 index 000000000..b02c37e7b --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/groupBy.kt @@ -0,0 +1,91 @@ +package org.jetbrains.kotlinx.dataframe.api + +import org.jetbrains.kotlinx.dataframe.AnyFrame +import org.jetbrains.kotlinx.dataframe.Column +import org.jetbrains.kotlinx.dataframe.ColumnsSelector +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.DataRow +import org.jetbrains.kotlinx.dataframe.Selector +import org.jetbrains.kotlinx.dataframe.aggregation.Aggregatable +import org.jetbrains.kotlinx.dataframe.aggregation.AggregateGroupedBody +import org.jetbrains.kotlinx.dataframe.columns.FrameColumn +import org.jetbrains.kotlinx.dataframe.impl.aggregation.PivotImpl +import org.jetbrains.kotlinx.dataframe.impl.api.getPivotColumnPaths +import org.jetbrains.kotlinx.dataframe.impl.api.groupByImpl +import org.jetbrains.kotlinx.dataframe.impl.columns.toColumnSet +import org.jetbrains.kotlinx.dataframe.impl.columns.toColumns +import kotlin.reflect.KProperty + +// region DataFrame + +public fun DataFrame.groupBy(moveToTop: Boolean = true, cols: ColumnsSelector): GroupBy = + groupByImpl(moveToTop, cols) + +public fun DataFrame.groupBy(cols: Iterable): GroupBy = groupBy { cols.toColumnSet() } +public fun DataFrame.groupBy(vararg cols: KProperty<*>): GroupBy = groupBy { cols.toColumns() } +public fun DataFrame.groupBy(vararg cols: String): GroupBy = groupBy { cols.toColumns() } +public fun DataFrame.groupBy(vararg cols: Column, moveToTop: Boolean = true): GroupBy = + groupBy(moveToTop) { cols.toColumns() } + +// endregion + +// region Pivot + +public fun Pivot.groupBy(moveToTop: Boolean = true, columns: ColumnsSelector): PivotGroupBy = + (this as PivotImpl).toGroupedPivot(moveToTop, columns) + +public fun Pivot.groupBy(vararg columns: Column): PivotGroupBy = groupBy { columns.toColumns() } +public fun Pivot.groupBy(vararg columns: String): PivotGroupBy = groupBy { columns.toColumns() } +public fun Pivot.groupBy(vararg columns: KProperty<*>): PivotGroupBy = groupBy { columns.toColumns() } + +public fun Pivot.groupByOther(): PivotGroupBy { + val impl = this as PivotImpl + val pivotColumns = df.getPivotColumnPaths(columns).toColumnSet() + return impl.toGroupedPivot(moveToTop = false) { except(pivotColumns) } +} + +// endregion + +public typealias GroupedRowSelector = GroupedDataRow.(GroupedDataRow) -> R + +public typealias GroupedRowFilter = GroupedRowSelector + +public interface GroupedDataRow : DataRow { + + public fun group(): DataFrame +} + +public val GroupedDataRow.group: DataFrame get() = group() + +public data class GroupWithKey(val key: DataRow, val group: DataFrame) + +public interface GroupBy : Grouped { + + public val groups: FrameColumn + + public val keys: DataFrame + + public fun toDataFrame(groupedColumnName: String? = null): DataFrame + + public fun updateGroups(transform: Selector, DataFrame>): GroupBy + + public fun filter(predicate: GroupedRowFilter): GroupBy + + public data class Entry(val key: DataRow, val group: DataFrame) + + public companion object { + internal val groupedColumnAccessor = column("group") + } +} + +public interface Grouped : Aggregatable { + + public fun aggregate(body: AggregateGroupedBody): DataFrame +} + +public data class ReducedGroupBy( + @PublishedApi internal val groupBy: GroupBy, + @PublishedApi internal val reducer: Selector, DataRow?> +) + +internal fun GroupBy.reduce(reducer: Selector, DataRow?>) = ReducedGroupBy(this, reducer) diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/head.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/head.kt new file mode 100644 index 000000000..3c23f050a --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/head.kt @@ -0,0 +1,9 @@ +package org.jetbrains.kotlinx.dataframe.api + +import org.jetbrains.kotlinx.dataframe.DataFrame + +// region DataFrame + +public fun DataFrame.head(numRows: Int = 5): DataFrame = take(numRows) + +// endregion diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/implode.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/implode.kt new file mode 100644 index 000000000..50bbd9ad5 --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/implode.kt @@ -0,0 +1,24 @@ +package org.jetbrains.kotlinx.dataframe.api + +import org.jetbrains.kotlinx.dataframe.ColumnsSelector +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.DataRow +import org.jetbrains.kotlinx.dataframe.columns.ColumnReference +import org.jetbrains.kotlinx.dataframe.impl.api.implodeImpl +import org.jetbrains.kotlinx.dataframe.impl.columns.toColumns +import kotlin.reflect.KProperty + +public fun DataFrame.implode(dropNA: Boolean = false): DataRow = + implode(dropNA) { all() }[0] + +public fun DataFrame.implode(dropNA: Boolean = false, columns: ColumnsSelector): DataFrame = + implodeImpl(dropNA, columns) + +public fun DataFrame.implode(vararg columns: String, dropNA: Boolean = false): DataFrame = + implode(dropNA) { columns.toColumns() } + +public fun DataFrame.implode(vararg columns: ColumnReference, dropNA: Boolean = false): DataFrame = + implode(dropNA) { columns.toColumns() } + +public fun DataFrame.implode(vararg columns: KProperty, dropNA: Boolean = false): DataFrame = + implode(dropNA) { columns.toColumns() } diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/indices.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/indices.kt new file mode 100644 index 000000000..5ffaf0f3a --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/indices.kt @@ -0,0 +1,17 @@ +package org.jetbrains.kotlinx.dataframe.api + +import org.jetbrains.kotlinx.dataframe.AnyFrame +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.RowFilter +import org.jetbrains.kotlinx.dataframe.indices + +// region DataFrame + +public fun AnyFrame.indices(): IntRange = 0 until rowsCount() + +public fun DataFrame.indices(filter: RowFilter): List = indices.filter { + val row = get(it) + filter(row, row) +} + +// endregion diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/inferType.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/inferType.kt new file mode 100644 index 000000000..cb1fe0463 --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/inferType.kt @@ -0,0 +1,19 @@ +package org.jetbrains.kotlinx.dataframe.api + +import org.jetbrains.kotlinx.dataframe.AnyCol +import org.jetbrains.kotlinx.dataframe.ColumnsSelector +import org.jetbrains.kotlinx.dataframe.DataColumn +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.columns.ColumnReference +import org.jetbrains.kotlinx.dataframe.impl.columns.guessColumnType +import org.jetbrains.kotlinx.dataframe.impl.columns.toColumns +import org.jetbrains.kotlinx.dataframe.type +import kotlin.reflect.KProperty + +public fun AnyCol.inferType(): DataColumn<*> = guessColumnType(name, toList(), type, true) + +public fun DataFrame.inferType(): DataFrame = inferType { allDfs() } +public fun DataFrame.inferType(columns: ColumnsSelector): DataFrame = replace(columns).with { it.inferType() } +public fun DataFrame.inferType(vararg columns: String): DataFrame = inferType { columns.toColumns() } +public fun DataFrame.inferType(vararg columns: ColumnReference<*>): DataFrame = inferType { columns.toColumns() } +public fun DataFrame.inferType(vararg columns: KProperty<*>): DataFrame = inferType { columns.toColumns() } diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/insert.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/insert.kt new file mode 100644 index 000000000..3cad76444 --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/insert.kt @@ -0,0 +1,51 @@ +package org.jetbrains.kotlinx.dataframe.api + +import org.jetbrains.kotlinx.dataframe.AnyCol +import org.jetbrains.kotlinx.dataframe.ColumnSelector +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.RowExpression +import org.jetbrains.kotlinx.dataframe.columns.ColumnAccessor +import org.jetbrains.kotlinx.dataframe.columns.ColumnPath +import org.jetbrains.kotlinx.dataframe.impl.api.insertImpl +import org.jetbrains.kotlinx.dataframe.impl.columnName +import org.jetbrains.kotlinx.dataframe.impl.removeAt +import kotlin.reflect.KProperty + +public fun DataFrame.insert(column: AnyCol): InsertClause = InsertClause(this, column) + +public inline fun DataFrame.insert( + name: String, + infer: Infer = Infer.Nulls, + noinline expression: RowExpression +): InsertClause = insert(mapToColumn(name, infer, expression)) + +public inline fun DataFrame.insert( + column: ColumnAccessor, + infer: Infer = Infer.Nulls, + noinline expression: RowExpression +): InsertClause = insert(column.name(), infer, expression) + +public inline fun DataFrame.insert( + column: KProperty, + infer: Infer = Infer.Nulls, + noinline expression: RowExpression +): InsertClause = insert(column.columnName, infer, expression) + +public data class InsertClause(val df: DataFrame, val column: AnyCol) + +public fun InsertClause.under(column: ColumnSelector): DataFrame = under(df.getColumnPath(column)) +public fun InsertClause.under(columnPath: ColumnPath): DataFrame = df.insertImpl(columnPath + column.name, column) +public fun InsertClause.under(column: ColumnAccessor<*>): DataFrame = under(column.path()) +public fun InsertClause.under(column: KProperty<*>): DataFrame = under(column.columnName) +public fun InsertClause.under(column: String): DataFrame = under(pathOf(column)) + +public fun InsertClause.after(column: ColumnSelector): DataFrame = after(df.getColumnPath(column)) +public fun InsertClause.after(column: String): DataFrame = df.add(this.column).move(this.column).after(column) +public fun InsertClause.after(column: ColumnAccessor<*>): DataFrame = after(column.path()) +public fun InsertClause.after(column: KProperty<*>): DataFrame = after(column.columnName) +public fun InsertClause.after(columnPath: ColumnPath): DataFrame { + val dstPath = ColumnPath(columnPath.removeAt(columnPath.size - 1) + column.name()) + return df.insertImpl(dstPath, column).move { dstPath }.after { columnPath } +} + +public fun InsertClause.at(position: Int): DataFrame = df.add(column).move(column).to(position) diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/into.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/into.kt new file mode 100644 index 000000000..9b9e2b177 --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/into.kt @@ -0,0 +1,68 @@ +package org.jetbrains.kotlinx.dataframe.api + +import org.jetbrains.kotlinx.dataframe.AnyFrame +import org.jetbrains.kotlinx.dataframe.AnyRow +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.RowExpression +import org.jetbrains.kotlinx.dataframe.columns.ColumnAccessor +import org.jetbrains.kotlinx.dataframe.impl.aggregation.internal +import org.jetbrains.kotlinx.dataframe.impl.aggregation.withExpr +import org.jetbrains.kotlinx.dataframe.impl.columnName +import kotlin.reflect.KProperty +import kotlin.reflect.typeOf + +// region GroupBy + +public fun GroupBy.into(column: String): DataFrame = toDataFrame(column) +public fun GroupBy.into(column: ColumnAccessor): DataFrame = toDataFrame(column.name()) +public fun GroupBy.into(column: KProperty): DataFrame = toDataFrame(column.columnName) + +public inline fun GroupBy.into( + columnName: String? = null, + noinline expression: RowExpression +): DataFrame = into(pathOf(columnName ?: groups.name()).cast(), expression) +public inline fun GroupBy.into( + column: ColumnAccessor, + noinline expression: RowExpression +): DataFrame { + val type = typeOf() + val path = column.path() + return aggregate { + internal().withExpr(type, path, expression) + } +} +public inline fun GroupBy.into(column: KProperty, noinline expression: RowExpression): DataFrame = into(column.columnName, expression) + +// endregion + +// region ReducedGroupBy + +public inline fun ReducedGroupBy.into( + columnName: String? = null, + noinline expression: RowExpression +): DataFrame { + val type = typeOf() + val name = columnName ?: groupBy.groups.name() + return groupBy.aggregate { + val row = reducer(it, it) + if (row != null) { + internal().yield(pathOf(name), expression(row, row), type) + } + } +} +public inline fun ReducedGroupBy.into( + column: ColumnAccessor, + noinline expression: RowExpression +): DataFrame = into(column.name(), expression) +public inline fun ReducedGroupBy.into( + column: KProperty, + noinline expression: RowExpression +): DataFrame = into(column.columnName, expression) + +public fun ReducedGroupBy.into(columnName: String): DataFrame = into(columnName) { this } +public fun ReducedGroupBy.into(column: ColumnAccessor): DataFrame = into(column) { this } +public fun ReducedGroupBy.into(column: KProperty): DataFrame = into(column) { this } + +public fun ReducedGroupBy.concat(): DataFrame = groupBy.groups.values().map { reducer(it, it) }.concat() + +// endregion diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/isEmpty.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/isEmpty.kt new file mode 100644 index 000000000..e55186ee1 --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/isEmpty.kt @@ -0,0 +1,12 @@ +package org.jetbrains.kotlinx.dataframe.api + +import org.jetbrains.kotlinx.dataframe.AnyFrame +import org.jetbrains.kotlinx.dataframe.ncol +import org.jetbrains.kotlinx.dataframe.nrow + +// region DataFrame + +public fun AnyFrame.isEmpty(): Boolean = ncol == 0 || nrow == 0 +public fun AnyFrame.isNotEmpty(): Boolean = !isEmpty() + +// endregion diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/join.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/join.kt new file mode 100644 index 000000000..f1d6ff56a --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/join.kt @@ -0,0 +1,128 @@ +package org.jetbrains.kotlinx.dataframe.api + +import org.jetbrains.kotlinx.dataframe.ColumnsContainer +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.columns.ColumnReference +import org.jetbrains.kotlinx.dataframe.columns.ColumnResolutionContext +import org.jetbrains.kotlinx.dataframe.columns.ColumnSet +import org.jetbrains.kotlinx.dataframe.columns.ColumnWithPath +import org.jetbrains.kotlinx.dataframe.impl.api.joinImpl +import org.jetbrains.kotlinx.dataframe.impl.columns.toColumns +import kotlin.reflect.KProperty + +public fun DataFrame.join( + other: DataFrame, + type: JoinType = JoinType.Inner, + selector: JoinColumnsSelector? = null +): DataFrame = joinImpl(other, type, true, selector) + +public fun DataFrame.join( + other: DataFrame, + vararg columns: String, + type: JoinType = JoinType.Inner +): DataFrame = join(other, type) { columns.toColumns() } + +public fun DataFrame.innerJoin( + other: DataFrame, + selector: JoinColumnsSelector? = null +): DataFrame = join(other, JoinType.Inner, selector = selector) + +public fun DataFrame.innerJoin( + other: DataFrame, + vararg columns: String +): DataFrame = innerJoin(other) { columns.toColumns() } + +public fun DataFrame.leftJoin( + other: DataFrame, + selector: JoinColumnsSelector? = null +): DataFrame = join(other, JoinType.Left, selector = selector) + +public fun DataFrame.leftJoin( + other: DataFrame, + vararg columns: String +): DataFrame = leftJoin(other) { columns.toColumns() } + +public fun DataFrame.rightJoin( + other: DataFrame, + selector: JoinColumnsSelector? = null +): DataFrame = join(other, JoinType.Right, selector = selector) + +public fun DataFrame.rightJoin( + other: DataFrame, + vararg columns: String +): DataFrame = rightJoin(other) { columns.toColumns() } + +public fun DataFrame.fullJoin( + other: DataFrame, + selector: JoinColumnsSelector? = null +): DataFrame = join(other, JoinType.Full, selector = selector) + +public fun DataFrame.fullJoin( + other: DataFrame, + vararg columns: String +): DataFrame = fullJoin(other) { columns.toColumns() } + +public fun DataFrame.filterJoin( + other: DataFrame, + selector: JoinColumnsSelector? = null +): DataFrame = joinImpl(other, JoinType.Inner, addNewColumns = false, selector = selector) + +public fun DataFrame.filterJoin( + other: DataFrame, + vararg columns: String +): DataFrame = filterJoin(other) { columns.toColumns() } + +public fun DataFrame.excludeJoin( + other: DataFrame, + selector: JoinColumnsSelector? = null +): DataFrame = joinImpl(other, JoinType.Exclude, addNewColumns = false, selector = selector) + +public fun DataFrame.excludeJoin( + other: DataFrame, + vararg columns: String +): DataFrame = excludeJoin(other) { columns.toColumns() } + +public fun Iterable>.joinOrNull( + joinType: JoinType = JoinType.Inner, + selector: JoinColumnsSelector? = null +): DataFrame? = + fold, DataFrame?>(null) { joined, new -> joined?.join(new, joinType, selector = selector) ?: new } + +public interface JoinDsl : ColumnsSelectionDsl { + + public val right: DataFrame + + public infix fun ColumnReference.match(other: ColumnReference): ColumnMatch = ColumnMatch(this, other) + + public infix fun String.match(other: ColumnReference): ColumnMatch = ColumnMatch(toColumnOf(), other) + + public infix fun ColumnReference.match(other: String): ColumnMatch = ColumnMatch(this, other.toColumnOf()) + + public infix fun String.match(other: String): ColumnMatch = ColumnMatch(toColumnAccessor(), other.toColumnAccessor()) + + public infix fun KProperty.match(other: KProperty): ColumnMatch = ColumnMatch(toColumnAccessor(), other.toColumnAccessor()) + + public infix fun ColumnReference.match(other: KProperty): ColumnMatch = ColumnMatch(this, other.toColumnAccessor()) + + public infix fun KProperty.match(other: ColumnReference): ColumnMatch = ColumnMatch(toColumnAccessor(), other) +} + +public class ColumnMatch(public val left: ColumnReference, public val right: ColumnReference) : ColumnSet { + + override fun resolve(context: ColumnResolutionContext): List> { + throw UnsupportedOperationException() + } +} + +public typealias JoinColumnsSelector = JoinDsl.(ColumnsContainer) -> ColumnSet<*> + +public enum class JoinType { + Left, // all data from left data frame, nulls for mismatches in right data frame + Right, // all data from right data frame, nulls for mismatches in left data frame + Inner, // only matched data from right and left data frame + Full, // all data from left and from right data frame, nulls for any mismatches + Exclude // mismatched rows from left data frame +} + +public val JoinType.allowLeftNulls: Boolean get() = this == JoinType.Right || this == JoinType.Full +public val JoinType.allowRightNulls: Boolean get() = this == JoinType.Left || this == JoinType.Full || this == JoinType.Exclude diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/last.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/last.kt new file mode 100644 index 000000000..c1ebe1d27 --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/last.kt @@ -0,0 +1,58 @@ +package org.jetbrains.kotlinx.dataframe.api + +import org.jetbrains.kotlinx.dataframe.DataColumn +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.DataRow +import org.jetbrains.kotlinx.dataframe.RowFilter +import org.jetbrains.kotlinx.dataframe.columns.size +import org.jetbrains.kotlinx.dataframe.columns.values +import org.jetbrains.kotlinx.dataframe.nrow + +// region DataColumn + +public fun DataColumn.last(): T = get(size - 1) +public fun DataColumn.lastOrNull(): T? = if (size > 0) last() else null +public fun DataColumn.last(predicate: (T) -> Boolean): T = values.last(predicate) +public fun DataColumn.lastOrNull(predicate: (T) -> Boolean): T? = values.lastOrNull(predicate) + +// endregion + +// region DataFrame + +public fun DataFrame.lastOrNull(predicate: RowFilter): DataRow? = + rowsReversed().firstOrNull { predicate(it, it) } + +public fun DataFrame.last(predicate: RowFilter): DataRow = rowsReversed().first { predicate(it, it) } +public fun DataFrame.lastOrNull(): DataRow? = if (nrow > 0) get(nrow - 1) else null +public fun DataFrame.last(): DataRow { + if (nrow == 0) { + throw NoSuchElementException("DataFrame has no rows. Use `lastOrNull`.") + } + return get(nrow - 1) +} + +// endregion + +// region GroupBy + +public fun GroupBy.last(): ReducedGroupBy = reduce { lastOrNull() } + +public fun GroupBy.last(predicate: RowFilter): ReducedGroupBy = reduce { lastOrNull(predicate) } + +// endregion + +// region Pivot + +public fun Pivot.last(): ReducedPivot = reduce { lastOrNull() } + +public fun Pivot.last(predicate: RowFilter): ReducedPivot = reduce { lastOrNull(predicate) } + +// endregion + +// region PivotGroupBy + +public fun PivotGroupBy.last(): ReducedPivotGroupBy = reduce { lastOrNull() } + +public fun PivotGroupBy.last(predicate: RowFilter): ReducedPivotGroupBy = reduce { lastOrNull(predicate) } + +// endregion diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/length.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/length.kt new file mode 100644 index 000000000..3c9eb65af --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/length.kt @@ -0,0 +1,10 @@ +package org.jetbrains.kotlinx.dataframe.api + +import org.jetbrains.kotlinx.dataframe.DataColumn +import org.jetbrains.kotlinx.dataframe.StringCol + +// region StringCol + +public fun StringCol.length(): DataColumn = map { it?.length ?: 0 } + +// endregion diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/lowercase.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/lowercase.kt new file mode 100644 index 000000000..1f10f5991 --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/lowercase.kt @@ -0,0 +1,9 @@ +package org.jetbrains.kotlinx.dataframe.api + +import org.jetbrains.kotlinx.dataframe.StringCol + +// region StringCol + +public fun StringCol.lowercase(): StringCol = map { it?.lowercase() } + +// endregion diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/map.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/map.kt new file mode 100644 index 000000000..becac2ab0 --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/map.kt @@ -0,0 +1,130 @@ +package org.jetbrains.kotlinx.dataframe.api + +import org.jetbrains.kotlinx.dataframe.AnyFrame +import org.jetbrains.kotlinx.dataframe.ColumnsContainer +import org.jetbrains.kotlinx.dataframe.DataColumn +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.DataRow +import org.jetbrains.kotlinx.dataframe.RowExpression +import org.jetbrains.kotlinx.dataframe.Selector +import org.jetbrains.kotlinx.dataframe.columns.ColumnAccessor +import org.jetbrains.kotlinx.dataframe.columns.ColumnReference +import org.jetbrains.kotlinx.dataframe.columns.FrameColumn +import org.jetbrains.kotlinx.dataframe.impl.columnName +import org.jetbrains.kotlinx.dataframe.impl.columns.createComputedColumnReference +import org.jetbrains.kotlinx.dataframe.impl.columns.newColumn +import kotlin.reflect.KProperty +import kotlin.reflect.KType +import kotlin.reflect.typeOf + +// region ColumnReference + +public inline fun ColumnReference.map(infer: Infer = Infer.Nulls, noinline transform: (C) -> R): ColumnReference = + createComputedColumnReference(name(), typeOf(), infer) { transform(this@map()) } + +// endregion + +// region DataColumn + +public inline fun DataColumn.map( + infer: Infer = Infer.Nulls, + crossinline transform: (T) -> R +): DataColumn { + val newValues = Array(size()) { transform(get(it)) }.asList() + return DataColumn.create(name(), newValues, typeOf(), infer) +} + +public fun DataColumn.map( + type: KType, + infer: Infer = Infer.Nulls, + transform: (T) -> R +): DataColumn { + val values = Array(size()) { transform(get(it)) }.asList() + return DataColumn.create(name(), values, type, infer).cast() +} + +public inline fun DataColumn.mapIndexed( + infer: Infer = Infer.Nulls, + crossinline transform: (Int, T) -> R +): DataColumn { + val newValues = Array(size()) { transform(it, get(it)) }.asList() + return DataColumn.create(name(), newValues, typeOf(), infer) +} + +public fun DataColumn.mapIndexed( + type: KType, + infer: Infer = Infer.Nulls, + transform: (Int, T) -> R +): DataColumn { + val values = Array(size()) { transform(it, get(it)) }.asList() + return DataColumn.create(name(), values, type, infer).cast() +} + +// endregion + +// region DataFrame + +public fun DataFrame.map(transform: RowExpression): List = rows().map { transform(it, it) } + +public inline fun ColumnsContainer.mapToColumn( + name: String, + infer: Infer = Infer.Nulls, + noinline body: AddExpression +): DataColumn = mapToColumn(name, typeOf(), infer, body) + +public inline fun ColumnsContainer.mapToColumn( + column: ColumnAccessor, + infer: Infer = Infer.Nulls, + noinline body: AddExpression +): DataColumn = mapToColumn(column, typeOf(), infer, body) + +public inline fun ColumnsContainer.mapToColumn( + column: KProperty, + infer: Infer = Infer.Nulls, + noinline body: AddExpression +): DataColumn = mapToColumn(column, typeOf(), infer, body) + +public fun ColumnsContainer.mapToColumn( + name: String, + type: KType, + infer: Infer = Infer.Nulls, + body: AddExpression +): DataColumn = newColumn(type, name, infer, body) + +public fun ColumnsContainer.mapToColumn( + column: ColumnAccessor, + type: KType, + infer: Infer = Infer.Nulls, + body: AddExpression +): DataColumn = mapToColumn(column.name(), type, infer, body) + +public fun ColumnsContainer.mapToColumn( + column: KProperty, + type: KType, + infer: Infer = Infer.Nulls, + body: AddExpression +): DataColumn = mapToColumn(column.columnName, type, infer, body) + +public fun DataFrame.mapToFrame(body: AddDsl.() -> Unit): AnyFrame { + val dsl = AddDsl(this) + body(dsl) + return dataFrameOf(dsl.columns) +} + +// endregion + +// region GroupBy + +public fun GroupBy.map(body: Selector, R>): List = keys.rows().mapIndexedNotNull { index, row -> + val group = groups[index] + val g = GroupWithKey(row, group) + body(g, g) +} + +public fun GroupBy.mapToRows(body: Selector, DataRow?>): DataFrame = + map(body).concat() + +public fun GroupBy.mapToFrames(body: Selector, DataFrame>): FrameColumn = + DataColumn.createFrameColumn(groups.name, map(body)) + +// endregion diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/matches.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/matches.kt new file mode 100644 index 000000000..0409cda9a --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/matches.kt @@ -0,0 +1,10 @@ +package org.jetbrains.kotlinx.dataframe.api + +import org.jetbrains.kotlinx.dataframe.DataFrame + +// region PivotGroupBy + +public fun PivotGroupBy.matches(): DataFrame = matches(yes = true, no = false) +public fun PivotGroupBy.matches(yes: R, no: R): DataFrame = aggregate { yes default no } + +// endregion diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/max.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/max.kt new file mode 100644 index 000000000..ccbb97ca1 --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/max.kt @@ -0,0 +1,170 @@ +package org.jetbrains.kotlinx.dataframe.api + +import org.jetbrains.kotlinx.dataframe.AnyRow +import org.jetbrains.kotlinx.dataframe.ColumnsSelector +import org.jetbrains.kotlinx.dataframe.DataColumn +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.DataRow +import org.jetbrains.kotlinx.dataframe.RowExpression +import org.jetbrains.kotlinx.dataframe.aggregation.ColumnsForAggregateSelector +import org.jetbrains.kotlinx.dataframe.columns.ColumnReference +import org.jetbrains.kotlinx.dataframe.columns.values +import org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.Aggregators +import org.jetbrains.kotlinx.dataframe.impl.aggregation.comparableColumns +import org.jetbrains.kotlinx.dataframe.impl.aggregation.modes.aggregateAll +import org.jetbrains.kotlinx.dataframe.impl.aggregation.modes.aggregateFor +import org.jetbrains.kotlinx.dataframe.impl.aggregation.modes.aggregateOfDelegated +import org.jetbrains.kotlinx.dataframe.impl.columns.toColumns +import org.jetbrains.kotlinx.dataframe.impl.columns.toComparableColumns +import org.jetbrains.kotlinx.dataframe.impl.indexOfMax +import org.jetbrains.kotlinx.dataframe.impl.suggestIfNull +import kotlin.reflect.KProperty + +// region DataColumn + +public fun > DataColumn.max(): T = maxOrNull().suggestIfNull("max") +public fun > DataColumn.maxOrNull(): T? = asSequence().filterNotNull().maxOrNull() + +public fun > DataColumn.maxBy(selector: (T) -> R): T = maxByOrNull(selector).suggestIfNull("maxBy") +public fun > DataColumn.maxByOrNull(selector: (T) -> R): T? = values.maxByOrNull(selector) + +public fun > DataColumn.maxOf(selector: (T) -> R): R = maxOfOrNull(selector).suggestIfNull("maxOf") +public fun > DataColumn.maxOfOrNull(selector: (T) -> R): R? = values.maxOfOrNull(selector) + +// endregion + +// region DataRow + +public fun AnyRow.rowMaxOrNull(): Any? = values().filterIsInstance>().maxWithOrNull(compareBy { it }) +public fun AnyRow.rowMax(): Any = rowMaxOrNull().suggestIfNull("rowMax") +public inline fun > AnyRow.rowMaxOfOrNull(): T? = values().filterIsInstance().maxOrNull() +public inline fun > AnyRow.rowMaxOf(): T = rowMaxOfOrNull().suggestIfNull("rowMaxOf") + +// endregion + +// region DataFrame + +public fun DataFrame.max(): DataRow = maxFor(comparableColumns()) + +public fun > DataFrame.maxFor(columns: ColumnsForAggregateSelector): DataRow = Aggregators.max.aggregateFor(this, columns) +public fun DataFrame.maxFor(vararg columns: String): DataRow = maxFor { columns.toComparableColumns() } +public fun > DataFrame.maxFor(vararg columns: ColumnReference): DataRow = maxFor { columns.toColumns() } +public fun > DataFrame.maxFor(vararg columns: KProperty): DataRow = maxFor { columns.toColumns() } + +public fun > DataFrame.max(columns: ColumnsSelector): C = maxOrNull(columns).suggestIfNull("max") +public fun DataFrame.max(vararg columns: String): Comparable = maxOrNull(*columns).suggestIfNull("max") +public fun > DataFrame.max(vararg columns: ColumnReference): C = maxOrNull(*columns).suggestIfNull("max") +public fun > DataFrame.max(vararg columns: KProperty): C = maxOrNull(*columns).suggestIfNull("max") + +public fun > DataFrame.maxOrNull(columns: ColumnsSelector): C? = Aggregators.max.aggregateAll(this, columns) as C? +public fun DataFrame.maxOrNull(vararg columns: String): Comparable? = maxOrNull { columns.toComparableColumns() } +public fun > DataFrame.maxOrNull(vararg columns: ColumnReference): C? = maxOrNull { columns.toColumns() } +public fun > DataFrame.maxOrNull(vararg columns: KProperty): C? = maxOrNull { columns.toColumns() } + +public fun > DataFrame.maxOf(expression: RowExpression): C = maxOfOrNull(expression).suggestIfNull("maxOf") +public fun > DataFrame.maxOfOrNull(expression: RowExpression): C? = rows().maxOfOrNull { expression(it, it) } + +public fun > DataFrame.maxBy(expression: RowExpression): DataRow = maxByOrNull(expression).suggestIfNull("maxBy") +public fun DataFrame.maxBy(column: String): DataRow = maxByOrNull(column).suggestIfNull("maxBy") +public fun > DataFrame.maxBy(column: ColumnReference): DataRow = maxByOrNull(column).suggestIfNull("maxBy") +public fun > DataFrame.maxBy(column: KProperty): DataRow = maxByOrNull(column).suggestIfNull("maxBy") + +public fun > DataFrame.maxByOrNull(expression: RowExpression): DataRow? = getOrNull(rows().asSequence().map { expression(it, it) }.indexOfMax()) +public fun DataFrame.maxByOrNull(column: String): DataRow? = maxByOrNull(column.toColumnOf?>()) +public fun > DataFrame.maxByOrNull(column: ColumnReference): DataRow? = getOrNull(get(column).asSequence().indexOfMax()) +public fun > DataFrame.maxByOrNull(column: KProperty): DataRow? = maxByOrNull(column.toColumnAccessor()) + +// endregion + +// region GroupBy + +public fun Grouped.max(): DataFrame = maxFor(comparableColumns()) + +public fun > Grouped.maxFor(columns: ColumnsForAggregateSelector): DataFrame = Aggregators.max.aggregateFor(this, columns) +public fun Grouped.maxFor(vararg columns: String): DataFrame = maxFor { columns.toComparableColumns() } +public fun > Grouped.maxFor(vararg columns: ColumnReference): DataFrame = maxFor { columns.toColumns() } +public fun > Grouped.maxFor(vararg columns: KProperty): DataFrame = maxFor { columns.toColumns() } + +public fun > Grouped.max( + name: String? = null, + columns: ColumnsSelector +): DataFrame = + Aggregators.max.aggregateAll(this, name, columns) +public fun Grouped.max(vararg columns: String, name: String? = null): DataFrame = max(name) { columns.toComparableColumns() } +public fun > Grouped.max(vararg columns: ColumnReference, name: String? = null): DataFrame = max(name) { columns.toColumns() } +public fun > Grouped.max(vararg columns: KProperty, name: String? = null): DataFrame = max(name) { columns.toColumns() } + +public fun > Grouped.maxOf(name: String? = null, expression: RowExpression): DataFrame = + Aggregators.max.aggregateOfDelegated(this, name) { maxOfOrNull(expression) } + +public fun > GroupBy.maxBy(rowExpression: RowExpression): ReducedGroupBy = reduce { maxByOrNull(rowExpression) } +public fun > GroupBy.maxBy(column: ColumnReference): ReducedGroupBy = reduce { maxByOrNull(column) } +public fun GroupBy.maxBy(column: String): ReducedGroupBy = maxBy(column.toColumnAccessor().cast>()) +public fun > GroupBy.maxBy(column: KProperty): ReducedGroupBy = maxBy(column.toColumnAccessor()) + +// endregion + +// region Pivot + +public fun Pivot.max(separate: Boolean = false): DataRow = delegate { max(separate) } + +public fun > Pivot.maxFor( + separate: Boolean = false, + columns: ColumnsForAggregateSelector +): DataRow = delegate { maxFor(separate, columns) } +public fun Pivot.maxFor(vararg columns: String, separate: Boolean = false): DataRow = maxFor(separate) { columns.toComparableColumns() } +public fun > Pivot.maxFor( + vararg columns: ColumnReference, + separate: Boolean = false +): DataRow = maxFor(separate) { columns.toColumns() } +public fun > Pivot.maxFor( + vararg columns: KProperty, + separate: Boolean = false +): DataRow = maxFor(separate) { columns.toColumns() } + +public fun > Pivot.max(columns: ColumnsSelector): DataRow = delegate { max(columns) } +public fun Pivot.max(vararg columns: String): DataRow = max { columns.toComparableColumns() } +public fun > Pivot.max(vararg columns: ColumnReference): DataRow = max { columns.toColumns() } +public fun > Pivot.max(vararg columns: KProperty): DataRow = max { columns.toColumns() } + +public fun > Pivot.maxOf(rowExpression: RowExpression): DataRow = delegate { maxOf(rowExpression) } + +public fun > Pivot.maxBy(rowExpression: RowExpression): ReducedPivot = reduce { maxByOrNull(rowExpression) } +public fun > Pivot.maxBy(column: ColumnReference): ReducedPivot = reduce { maxByOrNull(column) } +public fun Pivot.maxBy(column: String): ReducedPivot = maxBy(column.toColumnAccessor().cast>()) +public fun > Pivot.maxBy(column: KProperty): ReducedPivot = maxBy(column.toColumnAccessor()) + +// endregion + +// region PivotGroupBy + +public fun PivotGroupBy.max(separate: Boolean = false): DataFrame = maxFor(separate, comparableColumns()) + +public fun > PivotGroupBy.maxFor( + separate: Boolean = false, + columns: ColumnsForAggregateSelector +): DataFrame = + Aggregators.max.aggregateFor(this, separate, columns) +public fun PivotGroupBy.maxFor(vararg columns: String, separate: Boolean = false): DataFrame = maxFor(separate) { columns.toComparableColumns() } +public fun > PivotGroupBy.maxFor( + vararg columns: ColumnReference, + separate: Boolean = false +): DataFrame = maxFor(separate) { columns.toColumns() } +public fun > PivotGroupBy.maxFor( + vararg columns: KProperty, + separate: Boolean = false +): DataFrame = maxFor(separate) { columns.toColumns() } + +public fun > PivotGroupBy.max(columns: ColumnsSelector): DataFrame = Aggregators.max.aggregateAll(this, columns) +public fun PivotGroupBy.max(vararg columns: String): DataFrame = max { columns.toComparableColumns() } +public fun > PivotGroupBy.max(vararg columns: ColumnReference): DataFrame = max { columns.toColumns() } +public fun > PivotGroupBy.max(vararg columns: KProperty): DataFrame = max { columns.toColumns() } + +public fun > PivotGroupBy.maxOf(rowExpression: RowExpression): DataFrame = aggregate { maxOf(rowExpression) } + +public fun > PivotGroupBy.maxBy(rowExpression: RowExpression): ReducedPivotGroupBy = reduce { maxByOrNull(rowExpression) } +public fun > PivotGroupBy.maxBy(column: ColumnReference): ReducedPivotGroupBy = reduce { maxByOrNull(column) } +public fun PivotGroupBy.maxBy(column: String): ReducedPivotGroupBy = maxBy(column.toColumnAccessor().cast>()) +public fun > PivotGroupBy.maxBy(column: KProperty): ReducedPivotGroupBy = maxBy(column.toColumnAccessor()) + +// endregion diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/mean.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/mean.kt new file mode 100644 index 000000000..7bcb4ceff --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/mean.kt @@ -0,0 +1,183 @@ +package org.jetbrains.kotlinx.dataframe.api + +import org.jetbrains.kotlinx.dataframe.AnyRow +import org.jetbrains.kotlinx.dataframe.ColumnsSelector +import org.jetbrains.kotlinx.dataframe.DataColumn +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.DataRow +import org.jetbrains.kotlinx.dataframe.RowExpression +import org.jetbrains.kotlinx.dataframe.aggregation.ColumnsForAggregateSelector +import org.jetbrains.kotlinx.dataframe.columns.ColumnReference +import org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.Aggregators +import org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.cast2 +import org.jetbrains.kotlinx.dataframe.impl.aggregation.modes.aggregateAll +import org.jetbrains.kotlinx.dataframe.impl.aggregation.modes.aggregateFor +import org.jetbrains.kotlinx.dataframe.impl.aggregation.modes.aggregateOf +import org.jetbrains.kotlinx.dataframe.impl.aggregation.modes.of +import org.jetbrains.kotlinx.dataframe.impl.aggregation.numberColumns +import org.jetbrains.kotlinx.dataframe.impl.columns.toColumns +import org.jetbrains.kotlinx.dataframe.impl.columns.toColumnsOf +import org.jetbrains.kotlinx.dataframe.impl.columns.toNumberColumns +import org.jetbrains.kotlinx.dataframe.impl.suggestIfNull +import org.jetbrains.kotlinx.dataframe.math.mean +import kotlin.reflect.KProperty +import kotlin.reflect.typeOf + +// region DataColumn + +public fun DataColumn.mean(skipNA: Boolean = skipNA_default): Double = meanOrNull(skipNA).suggestIfNull("mean") +public fun DataColumn.meanOrNull(skipNA: Boolean = skipNA_default): Double? = Aggregators.mean(skipNA).aggregate(this) + +public inline fun DataColumn.meanOf( + skipNA: Boolean = skipNA_default, + noinline expression: (T) -> R? +): Double = Aggregators.mean(skipNA).cast2().aggregateOf(this, expression) ?: Double.NaN + +// endregion + +// region DataRow + +public fun AnyRow.rowMean(skipNA: Boolean = org.jetbrains.kotlinx.dataframe.api.skipNA_default): Double = values().filterIsInstance().map { it.toDouble() }.mean(skipNA) +public inline fun AnyRow.rowMeanOf(): Double = values().filterIsInstance().mean(typeOf()) + +// endregion + +// region DataFrame + +public fun DataFrame.mean(skipNA: Boolean = skipNA_default): DataRow = meanFor(skipNA, numberColumns()) + +public fun DataFrame.meanFor( + skipNA: Boolean = skipNA_default, + columns: ColumnsForAggregateSelector +): DataRow = Aggregators.mean(skipNA).aggregateFor(this, columns) +public fun DataFrame.meanFor(vararg columns: String, skipNA: Boolean = skipNA_default): DataRow = meanFor(skipNA) { columns.toNumberColumns() } +public fun DataFrame.meanFor(vararg columns: ColumnReference, skipNA: Boolean = skipNA_default): DataRow = meanFor(skipNA) { columns.toColumns() } +public fun DataFrame.meanFor(vararg columns: KProperty, skipNA: Boolean = skipNA_default): DataRow = meanFor(skipNA) { columns.toColumns() } + +public fun DataFrame.mean(skipNA: Boolean = skipNA_default, columns: ColumnsSelector): Double = Aggregators.mean(skipNA).aggregateAll(this, columns) as Double? ?: Double.NaN +public fun DataFrame.mean(vararg columns: String, skipNA: Boolean = skipNA_default): Double = mean(skipNA) { columns.toNumberColumns() } +public fun DataFrame.mean(vararg columns: ColumnReference, skipNA: Boolean = skipNA_default): Double = mean(skipNA) { columns.toColumns() } +public fun DataFrame.mean(vararg columns: KProperty, skipNA: Boolean = skipNA_default): Double = mean(skipNA) { columns.toColumns() } + +public inline fun DataFrame.meanOf( + skipNA: Boolean = skipNA_default, + noinline expression: RowExpression +): Double = Aggregators.mean(skipNA).of(this, expression) ?: Double.NaN + +// endregion + +// region GroupBy + +public fun Grouped.mean(skipNA: Boolean = skipNA_default): DataFrame = meanFor(skipNA, numberColumns()) + +public fun Grouped.meanFor( + skipNA: Boolean = skipNA_default, + columns: ColumnsForAggregateSelector +): DataFrame = Aggregators.mean(skipNA).aggregateFor(this, columns) +public fun Grouped.meanFor(vararg columns: String, skipNA: Boolean = skipNA_default): DataFrame = meanFor(skipNA) { columns.toNumberColumns() } +public fun Grouped.meanFor(vararg columns: ColumnReference, skipNA: Boolean = skipNA_default): DataFrame = meanFor(skipNA) { columns.toColumns() } +public fun Grouped.meanFor(vararg columns: KProperty, skipNA: Boolean = skipNA_default): DataFrame = meanFor(skipNA) { columns.toColumns() } + +public fun Grouped.mean( + name: String? = null, + skipNA: Boolean = skipNA_default, + columns: ColumnsSelector +): DataFrame = Aggregators.mean(skipNA).aggregateAll(this, name, columns) + +public fun Grouped.mean(vararg columns: String, name: String? = null, skipNA: Boolean = skipNA_default): DataFrame = mean(name, skipNA) { columns.toNumberColumns() } + +public fun Grouped.mean( + vararg columns: ColumnReference, + name: String? = null, + skipNA: Boolean = skipNA_default +): DataFrame = mean(name, skipNA) { columns.toColumns() } + +public fun Grouped.mean( + vararg columns: KProperty, + name: String? = null, + skipNA: Boolean = skipNA_default +): DataFrame = mean(name, skipNA) { columns.toColumns() } + +public inline fun Grouped.meanOf( + name: String? = null, + skipNA: Boolean = skipNA_default, + crossinline expression: RowExpression +): DataFrame = + Aggregators.mean(skipNA).aggregateOf(this, name, expression) + +// endregion + +// region Pivot + +public fun Pivot.mean(skipNA: Boolean = skipNA_default, separate: Boolean = false): DataRow = meanFor(skipNA, separate, numberColumns()) + +public fun Pivot.meanFor( + skipNA: Boolean = skipNA_default, + separate: Boolean = false, + columns: ColumnsForAggregateSelector +): DataRow = delegate { meanFor(skipNA, separate, columns) } +public fun Pivot.meanFor( + vararg columns: String, + skipNA: Boolean = skipNA_default, + separate: Boolean = false +): DataRow = meanFor(skipNA, separate) { columns.toNumberColumns() } +public fun Pivot.meanFor( + vararg columns: ColumnReference, + skipNA: Boolean = skipNA_default, + separate: Boolean = false +): DataRow = meanFor(skipNA, separate) { columns.toColumns() } +public fun Pivot.meanFor( + vararg columns: KProperty, + skipNA: Boolean = skipNA_default, + separate: Boolean = false +): DataRow = meanFor(skipNA, separate) { columns.toColumns() } + +public fun Pivot.mean(skipNA: Boolean = skipNA_default, columns: ColumnsSelector): DataRow = + delegate { mean(skipNA, columns) } + +public inline fun Pivot.meanOf( + skipNA: Boolean = skipNA_default, + crossinline expression: RowExpression +): DataRow = + delegate { meanOf(skipNA, expression) } + +// endregion + +// region PivotGroupBy + +public fun PivotGroupBy.mean(separate: Boolean = false, skipNA: Boolean = skipNA_default): DataFrame = meanFor(skipNA, separate, numberColumns()) + +public fun PivotGroupBy.meanFor( + skipNA: Boolean = skipNA_default, + separate: Boolean = false, + columns: ColumnsForAggregateSelector +): DataFrame = Aggregators.mean(skipNA).aggregateFor(this, separate, columns) +public fun PivotGroupBy.meanFor( + vararg columns: String, + separate: Boolean = false, + skipNA: Boolean = skipNA_default +): DataFrame = meanFor(skipNA, separate) { columns.toNumberColumns() } +public fun PivotGroupBy.meanFor( + vararg columns: ColumnReference, + separate: Boolean = false, + skipNA: Boolean = skipNA_default, +): DataFrame = meanFor(skipNA, separate) { columns.toColumns() } +public fun PivotGroupBy.meanFor( + vararg columns: KProperty, + separate: Boolean = false, + skipNA: Boolean = skipNA_default, +): DataFrame = meanFor(skipNA, separate) { columns.toColumns() } + +public fun PivotGroupBy.mean(skipNA: Boolean = skipNA_default, columns: ColumnsSelector): DataFrame = + Aggregators.mean(skipNA).aggregateAll(this, columns) +public fun PivotGroupBy.mean(vararg columns: String, skipNA: Boolean = skipNA_default): DataFrame = mean(skipNA) { columns.toColumnsOf() } +public fun PivotGroupBy.mean(vararg columns: ColumnReference, skipNA: Boolean = skipNA_default): DataFrame = mean(skipNA) { columns.toColumns() } +public fun PivotGroupBy.mean(vararg columns: KProperty, skipNA: Boolean = skipNA_default): DataFrame = mean(skipNA) { columns.toColumns() } + +public inline fun PivotGroupBy.meanOf( + skipNA: Boolean = skipNA_default, + crossinline expression: RowExpression +): DataFrame = + Aggregators.mean(skipNA).aggregateOf(this, expression) + +// endregion diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/median.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/median.kt new file mode 100644 index 000000000..81fcd39db --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/median.kt @@ -0,0 +1,150 @@ +package org.jetbrains.kotlinx.dataframe.api + +import org.jetbrains.kotlinx.dataframe.AnyRow +import org.jetbrains.kotlinx.dataframe.ColumnsSelector +import org.jetbrains.kotlinx.dataframe.DataColumn +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.DataRow +import org.jetbrains.kotlinx.dataframe.RowExpression +import org.jetbrains.kotlinx.dataframe.aggregation.ColumnsForAggregateSelector +import org.jetbrains.kotlinx.dataframe.columns.ColumnReference +import org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.Aggregators +import org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.cast +import org.jetbrains.kotlinx.dataframe.impl.aggregation.comparableColumns +import org.jetbrains.kotlinx.dataframe.impl.aggregation.modes.aggregateAll +import org.jetbrains.kotlinx.dataframe.impl.aggregation.modes.aggregateFor +import org.jetbrains.kotlinx.dataframe.impl.aggregation.modes.aggregateOf +import org.jetbrains.kotlinx.dataframe.impl.aggregation.modes.of +import org.jetbrains.kotlinx.dataframe.impl.columns.toColumns +import org.jetbrains.kotlinx.dataframe.impl.columns.toComparableColumns +import org.jetbrains.kotlinx.dataframe.impl.suggestIfNull +import org.jetbrains.kotlinx.dataframe.math.medianOrNull +import kotlin.reflect.KProperty + +// region DataColumn + +public fun > DataColumn.median(): T = medianOrNull().suggestIfNull("median") +public fun > DataColumn.medianOrNull(): T? = Aggregators.median.cast().aggregate(this) + +public inline fun > DataColumn.medianOfOrNull(noinline expression: (T) -> R?): R? = Aggregators.median.cast().aggregateOf(this, expression) +public inline fun > DataColumn.medianOf(noinline expression: (T) -> R?): R = medianOfOrNull(expression).suggestIfNull("medianOf") + +// endregion + +// region DataRow + +public fun AnyRow.rowMedianOrNull(): Any? = org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.Aggregators.median.aggregateMixed(values().filterIsInstance>().asIterable()) +public fun AnyRow.rowMedian(): Any = rowMedianOrNull().suggestIfNull("rowMedian") +public inline fun > AnyRow.rowMedianOfOrNull(): T? = valuesOf().medianOrNull() +public inline fun > AnyRow.rowMedianOf(): T = rowMedianOfOrNull().suggestIfNull("rowMedianOf") + +// endregion + +// region DataFrame + +public fun DataFrame.median(): DataRow = medianFor(comparableColumns()) + +public fun > DataFrame.medianFor(columns: ColumnsForAggregateSelector): DataRow = Aggregators.median.aggregateFor(this, columns) +public fun DataFrame.medianFor(vararg columns: String): DataRow = medianFor { columns.toComparableColumns() } +public fun > DataFrame.medianFor(vararg columns: ColumnReference): DataRow = medianFor { columns.toColumns() } +public fun > DataFrame.medianFor(vararg columns: KProperty): DataRow = medianFor { columns.toColumns() } + +public fun > DataFrame.median(columns: ColumnsSelector): C = medianOrNull(columns).suggestIfNull("median") +public fun DataFrame.median(vararg columns: String): Any = median { columns.toComparableColumns() } +public fun > DataFrame.median(vararg columns: ColumnReference): C = median { columns.toColumns() } +public fun > DataFrame.median(vararg columns: KProperty): C = median { columns.toColumns() } + +public fun > DataFrame.medianOrNull(columns: ColumnsSelector): C? = Aggregators.median.aggregateAll(this, columns) as C? +public fun DataFrame.medianOrNull(vararg columns: String): Any? = medianOrNull { columns.toComparableColumns() } +public fun > DataFrame.medianOrNull(vararg columns: ColumnReference): C? = medianOrNull { columns.toColumns() } +public fun > DataFrame.medianOrNull(vararg columns: KProperty): C? = medianOrNull { columns.toColumns() } + +public inline fun > DataFrame.medianOf(crossinline expression: RowExpression): R? = Aggregators.median.of(this, expression) as R? + +// endregion + +// region GroupBy + +public fun Grouped.median(): DataFrame = medianFor(comparableColumns()) + +public fun > Grouped.medianFor(columns: ColumnsForAggregateSelector): DataFrame = Aggregators.median.aggregateFor(this, columns) +public fun Grouped.medianFor(vararg columns: String): DataFrame = medianFor { columns.toComparableColumns() } +public fun > Grouped.medianFor(vararg columns: ColumnReference): DataFrame = medianFor { columns.toColumns() } +public fun > Grouped.medianFor(vararg columns: KProperty): DataFrame = medianFor { columns.toColumns() } + +public fun > Grouped.median(name: String? = null, columns: ColumnsSelector): DataFrame = Aggregators.median.aggregateAll(this, name, columns) +public fun Grouped.median(vararg columns: String, name: String? = null): DataFrame = median(name) { columns.toComparableColumns() } +public fun > Grouped.median( + vararg columns: ColumnReference, + name: String? = null +): DataFrame = median(name) { columns.toColumns() } +public fun > Grouped.median(vararg columns: KProperty, name: String? = null): DataFrame = median(name) { columns.toColumns() } + +public inline fun > Grouped.medianOf( + name: String? = null, + crossinline expression: RowExpression +): DataFrame = Aggregators.median.aggregateOf(this, name, expression) + +// endregion + +// region Pivot + +public fun Pivot.median(separate: Boolean = false): DataRow = medianFor(separate, comparableColumns()) + +public fun > Pivot.medianFor( + separate: Boolean = false, + columns: ColumnsForAggregateSelector +): DataRow = delegate { medianFor(separate, columns) } +public fun Pivot.medianFor(vararg columns: String, separate: Boolean = false): DataRow = medianFor(separate) { columns.toComparableColumns() } +public fun > Pivot.medianFor( + vararg columns: ColumnReference, + separate: Boolean = false +): DataRow = medianFor(separate) { columns.toColumns() } +public fun > Pivot.medianFor( + vararg columns: KProperty, + separate: Boolean = false +): DataRow = medianFor(separate) { columns.toColumns() } + +public fun > Pivot.median(columns: ColumnsSelector): DataRow = delegate { median(columns) } +public fun Pivot.median(vararg columns: String): DataRow = median { columns.toComparableColumns() } +public fun > Pivot.median( + vararg columns: ColumnReference +): DataRow = median { columns.toColumns() } +public fun > Pivot.median(vararg columns: KProperty): DataRow = median { columns.toColumns() } + +public inline fun > Pivot.medianOf( + crossinline expression: RowExpression +): DataRow = delegate { medianOf(expression) } + +// endregion + +// region PivotGroupBy + +public fun PivotGroupBy.median(separate: Boolean = false): DataFrame = medianFor(separate, comparableColumns()) + +public fun > PivotGroupBy.medianFor( + separate: Boolean = false, + columns: ColumnsForAggregateSelector +): DataFrame = Aggregators.median.aggregateFor(this, separate, columns) +public fun PivotGroupBy.medianFor(vararg columns: String, separate: Boolean = false): DataFrame = medianFor(separate) { columns.toComparableColumns() } +public fun > PivotGroupBy.medianFor( + vararg columns: ColumnReference, + separate: Boolean = false +): DataFrame = medianFor(separate) { columns.toColumns() } +public fun > PivotGroupBy.medianFor( + vararg columns: KProperty, + separate: Boolean = false +): DataFrame = medianFor(separate) { columns.toColumns() } + +public fun > PivotGroupBy.median(columns: ColumnsSelector): DataFrame = Aggregators.median.aggregateAll(this, columns) +public fun PivotGroupBy.median(vararg columns: String): DataFrame = median { columns.toComparableColumns() } +public fun > PivotGroupBy.median( + vararg columns: ColumnReference +): DataFrame = median { columns.toColumns() } +public fun > PivotGroupBy.median(vararg columns: KProperty): DataFrame = median { columns.toColumns() } + +public inline fun > PivotGroupBy.medianOf( + crossinline expression: RowExpression +): DataFrame = Aggregators.median.aggregateOf(this, expression) + +// endregion diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/merge.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/merge.kt new file mode 100644 index 000000000..29cc85521 --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/merge.kt @@ -0,0 +1,104 @@ +package org.jetbrains.kotlinx.dataframe.api + +import org.jetbrains.kotlinx.dataframe.AnyRow +import org.jetbrains.kotlinx.dataframe.ColumnsSelector +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.DataRow +import org.jetbrains.kotlinx.dataframe.columns.ColumnAccessor +import org.jetbrains.kotlinx.dataframe.columns.ColumnPath +import org.jetbrains.kotlinx.dataframe.columns.ColumnReference +import org.jetbrains.kotlinx.dataframe.impl.api.removeImpl +import org.jetbrains.kotlinx.dataframe.impl.api.withRowCellImpl +import org.jetbrains.kotlinx.dataframe.impl.columns.toColumns +import org.jetbrains.kotlinx.dataframe.impl.nameGenerator +import kotlin.reflect.KProperty +import kotlin.reflect.KType +import kotlin.reflect.typeOf + +public fun DataFrame.merge(selector: ColumnsSelector): Merge> = + Merge(this, selector, false, { it }, typeOf(), Infer.Type) + +public fun DataFrame.merge(vararg columns: String): Merge> = + merge { columns.toColumns() } + +public inline fun DataFrame.merge(vararg columns: ColumnReference): Merge> = + merge { columns.toColumns() } + +public inline fun DataFrame.merge(vararg columns: KProperty): Merge> = + merge { columns.toColumns() } + +public data class Merge( + @PublishedApi + internal val df: DataFrame, + @PublishedApi + internal val selector: ColumnsSelector, + @PublishedApi + internal val notNull: Boolean, + @PublishedApi + internal val transform: DataRow.(List) -> R, + @PublishedApi + internal val resultType: KType, + @PublishedApi + internal val infer: Infer, +) + +public fun Merge.notNull(): Merge = copy(notNull = true) + +public fun Merge.into(columnName: String): DataFrame = into(pathOf(columnName)) +public fun Merge.into(column: ColumnAccessor<*>): DataFrame = into(column.path()) + +public fun Merge.intoList(): List = + df.select(selector).rows().map { transform(it, it.values() as List) } + +public fun Merge.into(path: ColumnPath): DataFrame { + // If target path exists, merge into temp path + val mergePath = if (df.getColumnOrNull(path) != null) pathOf(nameGenerator().addUnique("temp")) else path + + // move columns into group + val grouped = df.move(selector).under { mergePath } + + var res = grouped.convert { getColumnGroup(mergePath) }.withRowCellImpl(resultType, infer) { + val srcRow = df[index()] + var values = it.values() as List + if (notNull) { + values = values.filter { + it != null && (it !is AnyRow || !it.isEmpty()) + } + } + transform(srcRow, values) + } + if (mergePath != path) { + // target path existed before merge, but + // it may have already been removed + res = res.removeImpl(allowMissingColumns = true) { path }.df.move(mergePath).into { path } + } + return res +} + +public fun Merge.asStrings(): Merge = by(", ") +public fun Merge.by( + separator: CharSequence = ", ", + prefix: CharSequence = "", + postfix: CharSequence = "", + limit: Int = -1, + truncated: CharSequence = "..." +): Merge = + Merge( + df, selector, notNull, + transform = { + it.joinToString( + separator = separator, + prefix = prefix, + postfix = postfix, + limit = limit, + truncated = truncated + ) + }, + typeOf(), Infer.Nulls + ) + +public inline fun Merge.by( + infer: Infer = Infer.Nulls, + crossinline transform: DataRow.(R) -> V +): Merge = + Merge(df, selector, notNull, { transform(this@by.transform(this, it)) }, typeOf(), infer) diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/min.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/min.kt new file mode 100644 index 000000000..7cc69e6c8 --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/min.kt @@ -0,0 +1,170 @@ +package org.jetbrains.kotlinx.dataframe.api + +import org.jetbrains.kotlinx.dataframe.AnyRow +import org.jetbrains.kotlinx.dataframe.ColumnsSelector +import org.jetbrains.kotlinx.dataframe.DataColumn +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.DataRow +import org.jetbrains.kotlinx.dataframe.RowExpression +import org.jetbrains.kotlinx.dataframe.aggregation.ColumnsForAggregateSelector +import org.jetbrains.kotlinx.dataframe.columns.ColumnReference +import org.jetbrains.kotlinx.dataframe.columns.values +import org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.Aggregators +import org.jetbrains.kotlinx.dataframe.impl.aggregation.comparableColumns +import org.jetbrains.kotlinx.dataframe.impl.aggregation.modes.aggregateAll +import org.jetbrains.kotlinx.dataframe.impl.aggregation.modes.aggregateFor +import org.jetbrains.kotlinx.dataframe.impl.aggregation.modes.aggregateOfDelegated +import org.jetbrains.kotlinx.dataframe.impl.columns.toColumns +import org.jetbrains.kotlinx.dataframe.impl.columns.toComparableColumns +import org.jetbrains.kotlinx.dataframe.impl.indexOfMin +import org.jetbrains.kotlinx.dataframe.impl.suggestIfNull +import kotlin.reflect.KProperty + +// region DataColumn + +public fun > DataColumn.min(): T = minOrNull().suggestIfNull("min") +public fun > DataColumn.minOrNull(): T? = asSequence().filterNotNull().minOrNull() + +public fun > DataColumn.minBy(selector: (T) -> R): T = minByOrNull(selector).suggestIfNull("minBy") +public fun > DataColumn.minByOrNull(selector: (T) -> R): T? = values.minByOrNull(selector) + +public fun > DataColumn.minOf(selector: (T) -> R): R = minOfOrNull(selector).suggestIfNull("minOf") +public fun > DataColumn.minOfOrNull(selector: (T) -> R): R? = values.minOfOrNull(selector) + +// endregion + +// region DataRow + +public fun AnyRow.rowMinOrNull(): Any? = values().filterIsInstance>().minWithOrNull(compareBy { it }) +public fun AnyRow.rowMin(): Any = rowMinOrNull().suggestIfNull("rowMin") +public inline fun > AnyRow.rowMinOfOrNull(): T? = values().filterIsInstance().minOrNull() +public inline fun > AnyRow.rowMinOf(): T = rowMinOfOrNull().suggestIfNull("rowMinOf") + +// endregion + +// region DataFrame + +public fun DataFrame.min(): DataRow = minFor(comparableColumns()) + +public fun > DataFrame.minFor(columns: ColumnsForAggregateSelector): DataRow = Aggregators.min.aggregateFor(this, columns) +public fun DataFrame.minFor(vararg columns: String): DataRow = minFor { columns.toComparableColumns() } +public fun > DataFrame.minFor(vararg columns: ColumnReference): DataRow = minFor { columns.toColumns() } +public fun > DataFrame.minFor(vararg columns: KProperty): DataRow = minFor { columns.toColumns() } + +public fun > DataFrame.min(columns: ColumnsSelector): C = minOrNull(columns).suggestIfNull("min") +public fun DataFrame.min(vararg columns: String): Comparable = minOrNull(*columns).suggestIfNull("min") +public fun > DataFrame.min(vararg columns: ColumnReference): C = minOrNull(*columns).suggestIfNull("min") +public fun > DataFrame.min(vararg columns: KProperty): C = minOrNull(*columns).suggestIfNull("min") + +public fun > DataFrame.minOrNull(columns: ColumnsSelector): C? = Aggregators.min.aggregateAll(this, columns) as C? +public fun DataFrame.minOrNull(vararg columns: String): Comparable? = minOrNull { columns.toComparableColumns() } +public fun > DataFrame.minOrNull(vararg columns: ColumnReference): C? = minOrNull { columns.toColumns() } +public fun > DataFrame.minOrNull(vararg columns: KProperty): C? = minOrNull { columns.toColumns() } + +public fun > DataFrame.minOf(expression: RowExpression): C = minOfOrNull(expression).suggestIfNull("minOf") +public fun > DataFrame.minOfOrNull(expression: RowExpression): C? = rows().minOfOrNull { expression(it, it) } + +public fun > DataFrame.minBy(expression: RowExpression): DataRow = minByOrNull(expression).suggestIfNull("minBy") +public fun DataFrame.minBy(column: String): DataRow = minByOrNull(column).suggestIfNull("minBy") +public fun > DataFrame.minBy(column: ColumnReference): DataRow = minByOrNull(column).suggestIfNull("minBy") +public fun > DataFrame.minBy(column: KProperty): DataRow = minByOrNull(column).suggestIfNull("minBy") + +public fun > DataFrame.minByOrNull(expression: RowExpression): DataRow? = getOrNull(rows().asSequence().map { expression(it, it) }.indexOfMin()) +public fun DataFrame.minByOrNull(column: String): DataRow? = minByOrNull(column.toColumnOf?>()) +public fun > DataFrame.minByOrNull(column: ColumnReference): DataRow? = getOrNull(get(column).asSequence().indexOfMin()) +public fun > DataFrame.minByOrNull(column: KProperty): DataRow? = minByOrNull(column.toColumnAccessor()) + +// endregion + +// region GroupBy + +public fun Grouped.min(): DataFrame = minFor(comparableColumns()) + +public fun > Grouped.minFor(columns: ColumnsForAggregateSelector): DataFrame = Aggregators.min.aggregateFor(this, columns) +public fun Grouped.minFor(vararg columns: String): DataFrame = minFor { columns.toComparableColumns() } +public fun > Grouped.minFor(vararg columns: ColumnReference): DataFrame = minFor { columns.toColumns() } +public fun > Grouped.minFor(vararg columns: KProperty): DataFrame = minFor { columns.toColumns() } + +public fun > Grouped.min( + name: String? = null, + columns: ColumnsSelector +): DataFrame = + Aggregators.min.aggregateAll(this, name, columns) +public fun Grouped.min(vararg columns: String, name: String? = null): DataFrame = min(name) { columns.toComparableColumns() } +public fun > Grouped.min(vararg columns: ColumnReference, name: String? = null): DataFrame = min(name) { columns.toColumns() } +public fun > Grouped.min(vararg columns: KProperty, name: String? = null): DataFrame = min(name) { columns.toColumns() } + +public fun > Grouped.minOf(name: String? = null, expression: RowExpression): DataFrame = + Aggregators.min.aggregateOfDelegated(this, name) { minOfOrNull(expression) } + +public fun > GroupBy.minBy(rowExpression: RowExpression): ReducedGroupBy = reduce { minByOrNull(rowExpression) } +public fun > GroupBy.minBy(column: ColumnReference): ReducedGroupBy = reduce { minByOrNull(column) } +public fun GroupBy.minBy(column: String): ReducedGroupBy = minBy(column.toColumnAccessor().cast>()) +public fun > GroupBy.minBy(column: KProperty): ReducedGroupBy = minBy(column.toColumnAccessor()) + +// endregion + +// region Pivot + +public fun Pivot.min(separate: Boolean = false): DataRow = delegate { min(separate) } + +public fun > Pivot.minFor( + separate: Boolean = false, + columns: ColumnsForAggregateSelector +): DataRow = delegate { minFor(separate, columns) } +public fun Pivot.minFor(vararg columns: String, separate: Boolean = false): DataRow = minFor(separate) { columns.toComparableColumns() } +public fun > Pivot.minFor( + vararg columns: ColumnReference, + separate: Boolean = false +): DataRow = minFor(separate) { columns.toColumns() } +public fun > Pivot.minFor( + vararg columns: KProperty, + separate: Boolean = false +): DataRow = minFor(separate) { columns.toColumns() } + +public fun > Pivot.min(columns: ColumnsSelector): DataRow = delegate { min(columns) } +public fun > Pivot.min(vararg columns: String): DataRow = min { columns.toComparableColumns() } +public fun > Pivot.min(vararg columns: ColumnReference): DataRow = min { columns.toColumns() } +public fun > Pivot.min(vararg columns: KProperty): DataRow = min { columns.toColumns() } + +public fun > Pivot.minOf(rowExpression: RowExpression): DataRow = delegate { minOf(rowExpression) } + +public fun > Pivot.minBy(rowExpression: RowExpression): ReducedPivot = reduce { minByOrNull(rowExpression) } +public fun > Pivot.minBy(column: ColumnReference): ReducedPivot = reduce { minByOrNull(column) } +public fun Pivot.minBy(column: String): ReducedPivot = minBy(column.toColumnAccessor().cast>()) +public fun > Pivot.minBy(column: KProperty): ReducedPivot = minBy(column.toColumnAccessor()) + +// endregion + +// region PivotGroupBy + +public fun PivotGroupBy.min(separate: Boolean = false): DataFrame = minFor(separate, comparableColumns()) + +public fun > PivotGroupBy.minFor( + separate: Boolean = false, + columns: ColumnsForAggregateSelector +): DataFrame = + Aggregators.min.aggregateFor(this, separate, columns) +public fun PivotGroupBy.minFor(vararg columns: String, separate: Boolean = false): DataFrame = minFor(separate) { columns.toComparableColumns() } +public fun > PivotGroupBy.minFor( + vararg columns: ColumnReference, + separate: Boolean = false +): DataFrame = minFor(separate) { columns.toColumns() } +public fun > PivotGroupBy.minFor( + vararg columns: KProperty, + separate: Boolean = false +): DataFrame = minFor(separate) { columns.toColumns() } + +public fun > PivotGroupBy.min(columns: ColumnsSelector): DataFrame = Aggregators.min.aggregateAll(this, columns) +public fun PivotGroupBy.min(vararg columns: String): DataFrame = min { columns.toComparableColumns() } +public fun > PivotGroupBy.min(vararg columns: ColumnReference): DataFrame = min { columns.toColumns() } +public fun > PivotGroupBy.min(vararg columns: KProperty): DataFrame = min { columns.toColumns() } + +public fun > PivotGroupBy.minOf(rowExpression: RowExpression): DataFrame = aggregate { minOf(rowExpression) } + +public fun > PivotGroupBy.minBy(rowExpression: RowExpression): ReducedPivotGroupBy = reduce { minByOrNull(rowExpression) } +public fun > PivotGroupBy.minBy(column: ColumnReference): ReducedPivotGroupBy = reduce { minByOrNull(column) } +public fun PivotGroupBy.minBy(column: String): ReducedPivotGroupBy = minBy(column.toColumnAccessor().cast>()) +public fun > PivotGroupBy.minBy(column: KProperty): ReducedPivotGroupBy = minBy(column.toColumnAccessor()) + +// endregion diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/move.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/move.kt new file mode 100644 index 000000000..5a01944c5 --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/move.kt @@ -0,0 +1,84 @@ +package org.jetbrains.kotlinx.dataframe.api + +import org.jetbrains.kotlinx.dataframe.AnyColumnGroupAccessor +import org.jetbrains.kotlinx.dataframe.Column +import org.jetbrains.kotlinx.dataframe.ColumnSelector +import org.jetbrains.kotlinx.dataframe.ColumnsSelector +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.columns.ColumnReference +import org.jetbrains.kotlinx.dataframe.columns.ColumnWithPath +import org.jetbrains.kotlinx.dataframe.impl.api.afterOrBefore +import org.jetbrains.kotlinx.dataframe.impl.api.moveImpl +import org.jetbrains.kotlinx.dataframe.impl.api.moveTo +import org.jetbrains.kotlinx.dataframe.impl.columns.toColumns +import org.jetbrains.kotlinx.dataframe.ncol +import kotlin.reflect.KProperty + +public fun DataFrame.move(columns: ColumnsSelector): MoveClause = MoveClause(this, columns) +public fun DataFrame.move(vararg cols: String): MoveClause = move { cols.toColumns() } +public fun DataFrame.move(vararg cols: ColumnReference): MoveClause = move { cols.toColumns() } +public fun DataFrame.move(vararg cols: KProperty): MoveClause = move { cols.toColumns() } + +public fun DataFrame.moveTo(newColumnIndex: Int, columns: ColumnsSelector): DataFrame = move(columns).to(newColumnIndex) +public fun DataFrame.moveTo(newColumnIndex: Int, vararg columns: String): DataFrame = moveTo(newColumnIndex) { columns.toColumns() } +public fun DataFrame.moveTo(newColumnIndex: Int, vararg columns: Column): DataFrame = moveTo(newColumnIndex) { columns.toColumns() } +public fun DataFrame.moveTo(newColumnIndex: Int, vararg columns: KProperty<*>): DataFrame = moveTo(newColumnIndex) { columns.toColumns() } + +public fun DataFrame.moveToLeft(columns: ColumnsSelector): DataFrame = move(columns).toLeft() +public fun DataFrame.moveToLeft(vararg columns: String): DataFrame = moveToLeft { columns.toColumns() } +public fun DataFrame.moveToLeft(vararg columns: Column): DataFrame = moveToLeft { columns.toColumns() } +public fun DataFrame.moveToLeft(vararg columns: KProperty<*>): DataFrame = moveToLeft { columns.toColumns() } + +public fun DataFrame.moveToRight(columns: ColumnsSelector): DataFrame = move(columns).toRight() +public fun DataFrame.moveToRight(vararg columns: String): DataFrame = moveToRight { columns.toColumns() } +public fun DataFrame.moveToRight(vararg columns: Column): DataFrame = moveToRight { columns.toColumns() } +public fun DataFrame.moveToRight(vararg columns: KProperty<*>): DataFrame = moveToRight { columns.toColumns() } + +public fun MoveClause.into(column: ColumnsSelectionDsl.(ColumnWithPath) -> Column): DataFrame = moveImpl( + under = false, + column +) + +public fun MoveClause.into(column: String): DataFrame = pathOf(column).let { path -> into { path } } + +public fun MoveClause.intoIndexed( + newPathExpression: ColumnsSelectionDsl.(ColumnWithPath, Int) -> Column +): DataFrame { + var counter = 0 + return into { col -> + newPathExpression(this, col, counter++) + } +} + +public fun MoveClause.under(column: String): DataFrame = pathOf(column).let { path -> under { path } } +public fun MoveClause.under(column: AnyColumnGroupAccessor): DataFrame = column.path().let { path -> under { path } } +public fun MoveClause.under(column: ColumnsSelectionDsl.(ColumnWithPath) -> Column): DataFrame = moveImpl( + under = true, + column +) + +public fun MoveClause.to(columnIndex: Int): DataFrame = moveTo(columnIndex) + +public fun MoveClause.toTop( + newColumnName: ColumnsSelectionDsl.(ColumnWithPath) -> String = { it.name() } +): DataFrame = + into { newColumnName(it).toColumnAccessor() } + +public fun MoveClause.after(column: ColumnSelector): DataFrame = afterOrBefore(column, true) +public fun MoveClause.after(column: String): DataFrame = after { column.toColumnAccessor() } +public fun MoveClause.after(column: Column): DataFrame = after { column } +public fun MoveClause.after(column: KProperty<*>): DataFrame = after { column.toColumnAccessor() } + +// TODO: implement 'before' +/* +fun MoveColsClause.before(columnPath: ColumnPath) = before { columnPath.toColumnDef() } +fun MoveColsClause.before(column: Column) = before { column } +fun MoveColsClause.before(column: KProperty<*>) = before { column.toColumnDef() } +fun MoveColsClause.before(column: String) = before { column.toColumnDef() } +fun MoveColsClause.before(column: ColumnSelector) = afterOrBefore(column, false) +*/ + +public fun MoveClause.toLeft(): DataFrame = to(0) +public fun MoveClause.toRight(): DataFrame = to(df.ncol) + +public class MoveClause(internal val df: DataFrame, internal val columns: ColumnsSelector) diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/parse.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/parse.kt new file mode 100644 index 000000000..309da276d --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/parse.kt @@ -0,0 +1,65 @@ +package org.jetbrains.kotlinx.dataframe.api + +import org.jetbrains.kotlinx.dataframe.AnyFrame +import org.jetbrains.kotlinx.dataframe.ColumnsSelector +import org.jetbrains.kotlinx.dataframe.DataColumn +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.columns.ColumnReference +import org.jetbrains.kotlinx.dataframe.impl.api.Parsers +import org.jetbrains.kotlinx.dataframe.impl.api.parseImpl +import org.jetbrains.kotlinx.dataframe.impl.api.tryParseImpl +import org.jetbrains.kotlinx.dataframe.impl.columns.toColumns +import org.jetbrains.kotlinx.dataframe.typeClass +import java.time.format.DateTimeFormatter +import java.util.Locale +import kotlin.reflect.KProperty + +public val DataFrame.Companion.parser: GlobalParserOptions get() = Parsers + +public fun DataFrame.parse(options: ParserOptions? = null, columns: ColumnsSelector): DataFrame = + parseImpl(options, columns) + +public fun DataFrame.parse(vararg columns: String, options: ParserOptions? = null): DataFrame = + parse(options) { columns.toColumns() } + +public fun DataFrame.parse(vararg columns: ColumnReference, options: ParserOptions? = null): DataFrame = + parse(options) { columns.toColumns() } + +public fun DataFrame.parse(vararg columns: KProperty, options: ParserOptions? = null): DataFrame = + parse(options) { columns.toColumns() } + +public interface GlobalParserOptions { + + public fun addDateTimePattern(pattern: String) + + public fun addNullString(str: String) + + public fun resetToDefault() + + public var locale: Locale +} + +public data class ParserOptions( + val locale: Locale? = null, + val dateTimeFormatter: DateTimeFormatter? = null, + val dateTimePattern: String? = null, + val nullStrings: Set? = null +) { + internal fun getDateTimeFormatter(): DateTimeFormatter? = when { + dateTimeFormatter != null -> dateTimeFormatter + dateTimePattern != null && locale != null -> DateTimeFormatter.ofPattern(dateTimePattern, locale) + dateTimePattern != null -> DateTimeFormatter.ofPattern(dateTimePattern) + else -> null + } +} + +public fun DataColumn.tryParse(options: ParserOptions? = null): DataColumn<*> = tryParseImpl(options) + +public fun DataFrame.parse(options: ParserOptions? = null): DataFrame = parse(options) { allDfs() } + +public fun DataColumn.parse(options: ParserOptions? = null): DataColumn<*> = + tryParse(options).also { if (it.typeClass == String::class) error("Can't guess column type") } + +@JvmName("parseAnyFrameNullable") +public fun DataColumn.parse(options: ParserOptions? = null): DataColumn = + map { it?.parse(options) } diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/pivot.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/pivot.kt new file mode 100644 index 000000000..c4023430c --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/pivot.kt @@ -0,0 +1,125 @@ +package org.jetbrains.kotlinx.dataframe.api + +import org.jetbrains.kotlinx.dataframe.Column +import org.jetbrains.kotlinx.dataframe.ColumnsSelector +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.DataRow +import org.jetbrains.kotlinx.dataframe.Selector +import org.jetbrains.kotlinx.dataframe.aggregation.Aggregatable +import org.jetbrains.kotlinx.dataframe.aggregation.AggregateBody +import org.jetbrains.kotlinx.dataframe.aggregation.AggregateGroupedDsl +import org.jetbrains.kotlinx.dataframe.columns.ColumnSet +import org.jetbrains.kotlinx.dataframe.impl.aggregation.PivotGroupByImpl +import org.jetbrains.kotlinx.dataframe.impl.aggregation.PivotImpl +import org.jetbrains.kotlinx.dataframe.impl.aggregation.PivotInAggregateImpl +import org.jetbrains.kotlinx.dataframe.impl.api.PivotChainColumnSet +import org.jetbrains.kotlinx.dataframe.impl.columns.toColumns +import kotlin.reflect.KProperty + +public interface PivotDsl : ColumnsSelectionDsl { + + public infix fun ColumnSet.then(other: ColumnSet): ColumnSet = PivotChainColumnSet(this, other) + + public infix fun String.then(other: ColumnSet): ColumnSet = toColumnOf() then other + + public infix fun ColumnSet.then(other: String): ColumnSet = this then other.toColumnOf() + + public infix fun String.then(other: String): ColumnSet = toColumnAccessor() then other.toColumnAccessor() + + public infix fun KProperty.then(other: ColumnSet): ColumnSet = toColumnAccessor() then other + + public infix fun ColumnSet.then(other: KProperty): ColumnSet = this then other.toColumnAccessor() + + public infix fun KProperty.then(other: KProperty): ColumnSet = toColumnAccessor() then other.toColumnAccessor() + + public infix fun KProperty.then(other: String): ColumnSet = toColumnAccessor() then other.toColumnOf() + + public infix fun String.then(other: KProperty): ColumnSet = toColumnOf() then other.toColumnAccessor() +} + +// region DataFrame + +public fun DataFrame.pivot(inward: Boolean? = null, columns: PivotColumnsSelector): Pivot = PivotImpl(this, columns, inward) +public fun DataFrame.pivot(vararg columns: String, inward: Boolean? = null): Pivot = pivot(inward) { columns.toColumns() } +public fun DataFrame.pivot(vararg columns: Column, inward: Boolean? = null): Pivot = pivot(inward) { columns.toColumns() } +public fun DataFrame.pivot(vararg columns: KProperty<*>, inward: Boolean? = null): Pivot = pivot(inward) { columns.toColumns() } + +public fun DataFrame.pivotMatches(inward: Boolean = true, columns: ColumnsSelector): DataFrame = pivot(inward, columns).groupByOther().matches() +public fun DataFrame.pivotMatches(vararg columns: String, inward: Boolean = true): DataFrame = pivotMatches(inward) { columns.toColumns() } +public fun DataFrame.pivotMatches(vararg columns: Column, inward: Boolean = true): DataFrame = pivotMatches(inward) { columns.toColumns() } +public fun DataFrame.pivotMatches(vararg columns: KProperty<*>, inward: Boolean = true): DataFrame = pivotMatches(inward) { columns.toColumns() } + +public fun DataFrame.pivotCounts(inward: Boolean = true, columns: ColumnsSelector): DataFrame = pivot(inward, columns).groupByOther().count() +public fun DataFrame.pivotCounts(vararg columns: String, inward: Boolean = true): DataFrame = pivotCounts(inward) { columns.toColumns() } +public fun DataFrame.pivotCounts(vararg columns: Column, inward: Boolean = true): DataFrame = pivotCounts(inward) { columns.toColumns() } +public fun DataFrame.pivotCounts(vararg columns: KProperty<*>, inward: Boolean = true): DataFrame = pivotCounts(inward) { columns.toColumns() } + +// endregion + +// region GroupBy + +public fun GroupBy<*, G>.pivot(inward: Boolean = true, columns: ColumnsSelector): PivotGroupBy = PivotGroupByImpl(this, columns, inward) +public fun GroupBy<*, G>.pivot(vararg columns: Column, inward: Boolean = true): PivotGroupBy = pivot(inward) { columns.toColumns() } +public fun GroupBy<*, G>.pivot(vararg columns: String, inward: Boolean = true): PivotGroupBy = pivot(inward) { columns.toColumns() } +public fun GroupBy<*, G>.pivot(vararg columns: KProperty<*>, inward: Boolean = true): PivotGroupBy = pivot(inward) { columns.toColumns() } + +public fun GroupBy<*, G>.pivotMatches(inward: Boolean = true, columns: ColumnsSelector): DataFrame = pivot(inward, columns).matches() +public fun GroupBy<*, G>.pivotMatches(vararg columns: String, inward: Boolean = true): DataFrame = pivotMatches(inward) { columns.toColumns() } +public fun GroupBy<*, G>.pivotMatches(vararg columns: Column, inward: Boolean = true): DataFrame = pivotMatches(inward) { columns.toColumns() } +public fun GroupBy<*, G>.pivotMatches(vararg columns: KProperty<*>, inward: Boolean = true): DataFrame = pivotMatches(inward) { columns.toColumns() } + +public fun GroupBy<*, G>.pivotCounts(inward: Boolean = true, columns: ColumnsSelector): DataFrame = pivot(inward, columns).count() +public fun GroupBy<*, G>.pivotCounts(vararg columns: String, inward: Boolean = true): DataFrame = pivotCounts(inward) { columns.toColumns() } +public fun GroupBy<*, G>.pivotCounts(vararg columns: Column, inward: Boolean = true): DataFrame = pivotCounts(inward) { columns.toColumns() } +public fun GroupBy<*, G>.pivotCounts(vararg columns: KProperty<*>, inward: Boolean = true): DataFrame = pivotCounts(inward) { columns.toColumns() } + +// endregion + +// region GroupBy.aggregate + +public fun AggregateGroupedDsl.pivot(inward: Boolean = true, columns: ColumnsSelector): PivotGroupBy = + PivotInAggregateImpl(this, columns, inward) +public fun AggregateGroupedDsl.pivot(vararg columns: String, inward: Boolean = true): PivotGroupBy = pivot(inward) { columns.toColumns() } +public fun AggregateGroupedDsl.pivot(vararg columns: Column, inward: Boolean = true): PivotGroupBy = pivot(inward) { columns.toColumns() } +public fun AggregateGroupedDsl.pivot(vararg columns: KProperty<*>, inward: Boolean = true): PivotGroupBy = pivot(inward) { columns.toColumns() } + +public fun AggregateGroupedDsl.pivotMatches(inward: Boolean = true, columns: ColumnsSelector): DataFrame = pivot(inward, columns).matches() +public fun AggregateGroupedDsl.pivotMatches(vararg columns: String, inward: Boolean = true): DataFrame = pivotMatches(inward) { columns.toColumns() } +public fun AggregateGroupedDsl.pivotMatches(vararg columns: Column, inward: Boolean = true): DataFrame = pivotMatches(inward) { columns.toColumns() } +public fun AggregateGroupedDsl.pivotMatches(vararg columns: KProperty<*>, inward: Boolean = true): DataFrame = pivotMatches(inward) { columns.toColumns() } + +public fun AggregateGroupedDsl.pivotCounts(inward: Boolean = true, columns: ColumnsSelector): DataFrame = pivot(inward, columns).matches() +public fun AggregateGroupedDsl.pivotCounts(vararg columns: String, inward: Boolean = true): DataFrame = pivotCounts(inward) { columns.toColumns() } +public fun AggregateGroupedDsl.pivotCounts(vararg columns: Column, inward: Boolean = true): DataFrame = pivotCounts(inward) { columns.toColumns() } +public fun AggregateGroupedDsl.pivotCounts(vararg columns: KProperty<*>, inward: Boolean = true): DataFrame = pivotCounts(inward) { columns.toColumns() } + +// endregion + +public interface Pivot : Aggregatable + +public typealias PivotColumnsSelector = Selector, ColumnSet> + +public data class ReducedPivot( + @PublishedApi internal val pivot: Pivot, + @PublishedApi internal val reducer: Selector, DataRow?> +) + +internal fun Pivot.reduce(reducer: Selector, DataRow?>) = ReducedPivot(this, reducer) + +@PublishedApi +internal inline fun Pivot.delegate(crossinline body: PivotGroupBy.() -> DataFrame): DataRow = body(groupBy { none() })[0] + +public interface PivotGroupBy : Aggregatable { + + public fun aggregate(separate: Boolean = false, body: AggregateBody): DataFrame + + public fun default(value: Any?): PivotGroupBy +} + +public data class ReducedPivotGroupBy( + @PublishedApi internal val pivot: PivotGroupBy, + @PublishedApi internal val reducer: Selector, DataRow?> +) + +@PublishedApi +internal fun PivotGroupBy.reduce(reducer: Selector, DataRow?>): ReducedPivotGroupBy = ReducedPivotGroupBy(this, reducer) diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/print.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/print.kt new file mode 100644 index 000000000..08350d782 --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/print.kt @@ -0,0 +1,44 @@ +package org.jetbrains.kotlinx.dataframe.api + +import org.jetbrains.kotlinx.dataframe.DataColumn +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.DataRow +import org.jetbrains.kotlinx.dataframe.io.renderToString +import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema + +// region DataColumn + +public fun DataColumn.print(): Unit = println(this) + +// endregion + +// region DataRow + +public fun DataRow.print(): Unit = println(this) + +// endregion + +// region DataFrame + +public fun DataFrame.print( + rowsLimit: Int = 20, + valueLimit: Int = 40, + borders: Boolean = false, + alignLeft: Boolean = false, + columnTypes: Boolean = false, + title: Boolean = false +): Unit = println(renderToString(rowsLimit, valueLimit, borders, alignLeft, columnTypes, title)) + +// endregion + +// region GroupBy + +public fun GroupBy.print(): Unit = println(this) + +// endregion + +// region DataFrameSchema + +public fun DataFrameSchema.print(): Unit = println(this) + +// endregion diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/remove.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/remove.kt new file mode 100644 index 000000000..b252b7ed0 --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/remove.kt @@ -0,0 +1,24 @@ +package org.jetbrains.kotlinx.dataframe.api + +import org.jetbrains.kotlinx.dataframe.Column +import org.jetbrains.kotlinx.dataframe.ColumnsSelector +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.impl.api.removeImpl +import org.jetbrains.kotlinx.dataframe.impl.columns.toColumnSet +import org.jetbrains.kotlinx.dataframe.impl.columns.toColumns +import kotlin.reflect.KProperty + +// region DataFrame + +public fun DataFrame.remove(columns: ColumnsSelector): DataFrame = removeImpl(allowMissingColumns = true, columns = columns).df +public fun DataFrame.remove(vararg columns: KProperty<*>): DataFrame = remove { columns.toColumns() } +public fun DataFrame.remove(vararg columns: String): DataFrame = remove { columns.toColumns() } +public fun DataFrame.remove(vararg columns: Column): DataFrame = remove { columns.toColumns() } +public fun DataFrame.remove(columns: Iterable): DataFrame = remove { columns.toColumnSet() } + +public infix operator fun DataFrame.minus(columns: ColumnsSelector): DataFrame = remove(columns) +public infix operator fun DataFrame.minus(column: String): DataFrame = remove(column) +public infix operator fun DataFrame.minus(column: Column): DataFrame = remove(column) +public infix operator fun DataFrame.minus(columns: Iterable): DataFrame = remove(columns) + +// endregion diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/rename.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/rename.kt new file mode 100644 index 000000000..797df20d1 --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/rename.kt @@ -0,0 +1,77 @@ +package org.jetbrains.kotlinx.dataframe.api + +import org.jetbrains.kotlinx.dataframe.AnyFrame +import org.jetbrains.kotlinx.dataframe.ColumnsSelector +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.columns.ColumnAccessor +import org.jetbrains.kotlinx.dataframe.columns.ColumnReference +import org.jetbrains.kotlinx.dataframe.columns.ColumnWithPath +import org.jetbrains.kotlinx.dataframe.impl.DELIMITED_STRING_REGEX +import org.jetbrains.kotlinx.dataframe.impl.DELIMITERS_REGEX +import org.jetbrains.kotlinx.dataframe.impl.columnName +import org.jetbrains.kotlinx.dataframe.impl.columns.toColumnSet +import org.jetbrains.kotlinx.dataframe.impl.columns.toColumns +import org.jetbrains.kotlinx.dataframe.impl.toCamelCaseByDelimiters +import kotlin.reflect.KProperty + +// region DataFrame + +public fun DataFrame.rename(vararg mappings: Pair): DataFrame = + rename { mappings.map { it.first.toColumnAccessor() }.toColumnSet() } + .into(*mappings.map { it.second }.toTypedArray()) + +public fun DataFrame.rename(columns: ColumnsSelector): RenameClause = RenameClause(this, columns) +public fun DataFrame.rename(vararg cols: ColumnReference): RenameClause = rename { cols.toColumns() } +public fun DataFrame.rename(vararg cols: KProperty): RenameClause = rename { cols.toColumns() } +public fun DataFrame.rename(vararg cols: String): RenameClause = rename { cols.toColumns() } +public fun DataFrame.rename(cols: Iterable>): RenameClause = + rename { cols.toColumnSet() } + +public data class RenameClause(val df: DataFrame, val columns: ColumnsSelector) + +public fun DataFrame.renameToCamelCase(): DataFrame { + return rename { + dfs { it.isColumnGroup() && it.name() matches DELIMITED_STRING_REGEX } + }.toCamelCase() + .rename { + dfs { !it.isColumnGroup() && it.name() matches DELIMITED_STRING_REGEX } + }.toCamelCase() + .update { + dfsOf() + }.with { it.renameToCamelCase() } +} + +public fun RenameClause.into(vararg newColumns: ColumnReference<*>): DataFrame = + into(*newColumns.map { it.name() }.toTypedArray()) + +public fun RenameClause.into(vararg newNames: String): DataFrame = + df.move(columns).intoIndexed { col, index -> + col.path.dropLast(1) + newNames[index] + } +public fun RenameClause.into(vararg newNames: KProperty<*>): DataFrame = + into(*newNames.map { it.name }.toTypedArray()) + +public fun RenameClause.into(transform: (ColumnWithPath) -> String): DataFrame = + df.move(columns).into { + it.path.dropLast(1) + transform(it) + } + +public fun RenameClause.toCamelCase(): DataFrame = + into { it.name().toCamelCaseByDelimiters(DELIMITERS_REGEX) } + +// endregion + +// region DataColumn + +public fun > C.rename(column: KProperty): C = rename(column.columnName) as C +public fun > C.rename(column: ColumnAccessor): C = rename(column.name()) as C + +// endregion + +// region named + +public infix fun > C.named(name: String): C = rename(name) as C +public infix fun > C.named(name: KProperty<*>): C = rename(name) +public infix fun > C.named(name: ColumnAccessor<*>): C = rename(name) + +// endregion diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/reorder.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/reorder.kt new file mode 100644 index 000000000..f6eb6cdfd --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/reorder.kt @@ -0,0 +1,48 @@ +package org.jetbrains.kotlinx.dataframe.api + +import org.jetbrains.kotlinx.dataframe.AnyCol +import org.jetbrains.kotlinx.dataframe.ColumnExpression +import org.jetbrains.kotlinx.dataframe.ColumnsSelector +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.Selector +import org.jetbrains.kotlinx.dataframe.columns.ColumnReference +import org.jetbrains.kotlinx.dataframe.impl.api.reorderImpl +import org.jetbrains.kotlinx.dataframe.impl.columns.toColumns +import kotlin.reflect.KProperty + +// region DataFrame + +public data class Reorder( + internal val df: DataFrame, + internal val columns: ColumnsSelector, + internal val inFrameColumns: Boolean +) { + public fun cast(): Reorder = this as Reorder +} + +public fun DataFrame.reorder(selector: ColumnsSelector): Reorder = Reorder(this, selector, false) +public fun DataFrame.reorder(vararg columns: ColumnReference): Reorder = + reorder { columns.toColumns() } + +public fun DataFrame.reorder(vararg columns: KProperty): Reorder = reorder { columns.toColumns() } +public fun DataFrame.reorder(vararg columns: String): Reorder = reorder { columns.toColumns() } + +public fun > Reorder.by(expression: ColumnExpression): DataFrame = + reorderImpl(false, expression) + +public fun Reorder.byName(desc: Boolean = false): DataFrame = + if (desc) byDesc { it.name } else by { it.name } + +public fun > Reorder.byDesc(expression: ColumnExpression): DataFrame = + reorderImpl(true, expression) + +public fun > DataFrame.reorderColumnsBy( + dfs: Boolean = true, + desc: Boolean = false, + expression: Selector +): DataFrame = Reorder(this, { if (dfs) allDfs(true) else all() }, dfs).reorderImpl(desc, expression) + +public fun DataFrame.reorderColumnsByName(dfs: Boolean = true, desc: Boolean = false): DataFrame = + reorderColumnsBy(dfs, desc) { name() } + +// endregion diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/replace.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/replace.kt new file mode 100644 index 000000000..1b529aead --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/replace.kt @@ -0,0 +1,50 @@ +package org.jetbrains.kotlinx.dataframe.api + +import org.jetbrains.kotlinx.dataframe.AnyBaseCol +import org.jetbrains.kotlinx.dataframe.AnyCol +import org.jetbrains.kotlinx.dataframe.ColumnsContainer +import org.jetbrains.kotlinx.dataframe.ColumnsSelector +import org.jetbrains.kotlinx.dataframe.DataColumn +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.columns.ColumnReference +import org.jetbrains.kotlinx.dataframe.impl.api.ColumnToInsert +import org.jetbrains.kotlinx.dataframe.impl.api.insertImpl +import org.jetbrains.kotlinx.dataframe.impl.api.removeImpl +import org.jetbrains.kotlinx.dataframe.impl.columns.toColumnSet +import org.jetbrains.kotlinx.dataframe.impl.columns.toColumns +import kotlin.reflect.KProperty + +public fun DataFrame.replace(columns: ColumnsSelector): ReplaceClause = ReplaceClause(this, columns) +public fun DataFrame.replace(vararg columns: String): ReplaceClause = replace { columns.toColumns() } +public fun DataFrame.replace(vararg columns: ColumnReference): ReplaceClause = replace { columns.toColumns() } +public fun DataFrame.replace(vararg columns: KProperty): ReplaceClause = replace { columns.toColumns() } +public fun DataFrame.replace(columns: Iterable>): ReplaceClause = replace { columns.toColumnSet() } + +public fun DataFrame.replaceAll( + vararg valuePairs: Pair, + columns: ColumnsSelector = { allDfs() } +): DataFrame { + val map = valuePairs.toMap() + return update(columns).with { map[it] ?: it } +} + +public data class ReplaceClause(val df: DataFrame, val columns: ColumnsSelector) + +public fun ReplaceClause.with(vararg columns: AnyCol): DataFrame = with(columns.toList()) + +public fun ReplaceClause.with(newColumns: List): DataFrame { + var index = 0 + return with { + require(index < newColumns.size) { "Insufficient number of new columns in 'replace': ${newColumns.size} instead of ${df[columns].size}" } + newColumns[index++] + } +} + +public fun ReplaceClause.with(transform: ColumnsContainer.(DataColumn) -> AnyBaseCol): DataFrame { + val removeResult = df.removeImpl(columns = columns) + val toInsert = removeResult.removedColumns.map { + val newCol = transform(df, it.data.column as DataColumn) + ColumnToInsert(it.pathFromRoot().dropLast(1) + newCol.name, newCol, it) + } + return removeResult.df.insertImpl(toInsert) +} diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/reverse.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/reverse.kt new file mode 100644 index 000000000..3e3f66325 --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/reverse.kt @@ -0,0 +1,18 @@ +package org.jetbrains.kotlinx.dataframe.api + +import org.jetbrains.kotlinx.dataframe.DataColumn +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup +import org.jetbrains.kotlinx.dataframe.columns.FrameColumn +import org.jetbrains.kotlinx.dataframe.columns.ValueColumn +import org.jetbrains.kotlinx.dataframe.indices + +public fun DataFrame.reverse(): DataFrame = get(indices.reversed()) + +public fun DataColumn.reverse(): DataColumn = get(indices.reversed()) + +public fun ColumnGroup.reverse(): ColumnGroup = get(indices.reversed()) + +public fun FrameColumn.reverse(): FrameColumn = get(indices.reversed()) + +public fun ValueColumn.reverse(): ValueColumn = get(indices.reversed()) diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/schema.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/schema.kt new file mode 100644 index 000000000..2c61d85b9 --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/schema.kt @@ -0,0 +1,25 @@ +package org.jetbrains.kotlinx.dataframe.api + +import org.jetbrains.kotlinx.dataframe.AnyFrame +import org.jetbrains.kotlinx.dataframe.AnyRow +import org.jetbrains.kotlinx.dataframe.impl.owner +import org.jetbrains.kotlinx.dataframe.impl.schema.extractSchema +import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema + +// region DataRow + +public fun AnyRow.schema(): DataFrameSchema = owner.schema() + +// endregion + +// region DataFrame + +public fun AnyFrame.schema(): DataFrameSchema = extractSchema() + +// endregion + +// region GroupBy + +public fun GroupBy<*, *>.schema(): DataFrameSchema = toDataFrame().schema() + +// endregion diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/select.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/select.kt new file mode 100644 index 000000000..3277ec595 --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/select.kt @@ -0,0 +1,37 @@ +package org.jetbrains.kotlinx.dataframe.api + +import org.jetbrains.kotlinx.dataframe.Column +import org.jetbrains.kotlinx.dataframe.ColumnsSelector +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.impl.columnName +import org.jetbrains.kotlinx.dataframe.impl.columns.toColumnSet +import org.jetbrains.kotlinx.dataframe.impl.columns.toColumns +import kotlin.reflect.KProperty + +// region DataFrame + +public fun DataFrame.select(columns: ColumnsSelector): DataFrame = + get(columns).toDataFrame().cast() + +public fun DataFrame.select(vararg columns: KProperty<*>): DataFrame = + select(columns.asIterable()) + +@JvmName("selectKPropertyIterable") +public fun DataFrame.select(columns: Iterable>): DataFrame = + select(columns.map { it.columnName }) + +public fun DataFrame.select(vararg columns: String): DataFrame = + select(columns.asIterable()) + +@JvmName("selectStringIterable") +public fun DataFrame.select(columns: Iterable): DataFrame = + columns.map { get(it) }.toDataFrame().cast() + +public fun DataFrame.select(vararg columns: Column): DataFrame = + select { columns.toColumns() } + +@JvmName("selectAnyColumnReferenceIterable") +public fun DataFrame.select(columns: Iterable): DataFrame = + select { columns.toColumnSet() } + +// endregion diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/shuffle.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/shuffle.kt new file mode 100644 index 000000000..ed5c0bccb --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/shuffle.kt @@ -0,0 +1,17 @@ +package org.jetbrains.kotlinx.dataframe.api + +import org.jetbrains.kotlinx.dataframe.DataColumn +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.indices + +// region DataColumn + +public fun DataColumn.shuffle(): DataColumn = get(indices.shuffled()) + +// endregion + +// region DataFrame + +public fun DataFrame.shuffle(): DataFrame = getRows(indices.shuffled()) + +// endregion diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/single.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/single.kt new file mode 100644 index 000000000..8f8b95125 --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/single.kt @@ -0,0 +1,31 @@ +package org.jetbrains.kotlinx.dataframe.api + +import org.jetbrains.kotlinx.dataframe.DataColumn +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.DataRow +import org.jetbrains.kotlinx.dataframe.RowExpression +import org.jetbrains.kotlinx.dataframe.columns.values +import org.jetbrains.kotlinx.dataframe.nrow + +// region DataColumn + +public fun DataColumn.single(): C = values.single() + +// endregion + +// region DataFrame + +public fun DataFrame.single(): DataRow = + when (nrow) { + 0 -> throw NoSuchElementException("DataFrame has no rows. Use `singleOrNull`.") + 1 -> get(0) + else -> throw IllegalArgumentException("DataFrame has more than one row.") + } + +public fun DataFrame.singleOrNull(): DataRow? = rows().singleOrNull() + +public fun DataFrame.single(predicate: RowExpression): DataRow = rows().single { predicate(it, it) } +public fun DataFrame.singleOrNull(predicate: RowExpression): DataRow? = + rows().singleOrNull { predicate(it, it) } + +// endregion diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/sort.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/sort.kt new file mode 100644 index 000000000..cdafcc1da --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/sort.kt @@ -0,0 +1,141 @@ +package org.jetbrains.kotlinx.dataframe.api + +import org.jetbrains.kotlinx.dataframe.DataColumn +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.DataFrameExpression +import org.jetbrains.kotlinx.dataframe.DataRow +import org.jetbrains.kotlinx.dataframe.Selector +import org.jetbrains.kotlinx.dataframe.columns.ColumnReference +import org.jetbrains.kotlinx.dataframe.columns.ColumnSet +import org.jetbrains.kotlinx.dataframe.columns.UnresolvedColumnsPolicy +import org.jetbrains.kotlinx.dataframe.columns.ValueColumn +import org.jetbrains.kotlinx.dataframe.impl.api.SortFlag +import org.jetbrains.kotlinx.dataframe.impl.api.addFlag +import org.jetbrains.kotlinx.dataframe.impl.api.sortByImpl +import org.jetbrains.kotlinx.dataframe.impl.columns.newColumnWithActualType +import org.jetbrains.kotlinx.dataframe.impl.columns.toColumnSet +import org.jetbrains.kotlinx.dataframe.impl.columns.toColumns +import org.jetbrains.kotlinx.dataframe.index +import org.jetbrains.kotlinx.dataframe.nrow +import org.jetbrains.kotlinx.dataframe.type +import kotlin.reflect.KProperty + +public interface SortDsl : ColumnsSelectionDsl { + + public fun ColumnSet.desc(): ColumnSet = addFlag(SortFlag.Reversed) + public fun String.desc(): ColumnSet?> = invoke>().desc() + public fun KProperty.desc(): ColumnSet = toColumnAccessor().desc() + + public fun ColumnSet.nullsLast(flag: Boolean = true): ColumnSet = + if (flag) addFlag(SortFlag.NullsLast) else this + + public fun String.nullsLast(flag: Boolean = true): ColumnSet?> = invoke>().nullsLast(flag) + public fun KProperty.nullsLast(flag: Boolean = true): ColumnSet = toColumnAccessor().nullsLast(flag) +} + +/** + * [SortColumnsSelector] is used to express or select multiple columns to sort by, represented by [ColumnSet]``, + * using the context of [SortDsl]`` as `this` and `it`. + * + * So: + * ```kotlin + * SortDsl.(it: SortDsl) -> ColumnSet + * ``` + */ +public typealias SortColumnsSelector = Selector, ColumnSet> + +// region DataColumn + +public fun > DataColumn.sort(): ValueColumn = DataColumn.createValueColumn(name, values().sorted(), type, defaultValue = defaultValue()) +public fun > DataColumn.sortDesc(): ValueColumn = DataColumn.createValueColumn(name, values().sortedDescending(), type, defaultValue = defaultValue()) + +// endregion + +// region DataFrame + +public fun DataFrame.sortBy(columns: SortColumnsSelector): DataFrame = sortByImpl( + UnresolvedColumnsPolicy.Fail, columns +) + +public fun DataFrame.sortBy(cols: Iterable?>>): DataFrame = + sortBy { cols.toColumnSet() } + +public fun DataFrame.sortBy(vararg cols: ColumnReference?>): DataFrame = + sortBy { cols.toColumns() } + +public fun DataFrame.sortBy(vararg cols: String): DataFrame = sortBy { cols.toColumns() } +public fun DataFrame.sortBy(vararg cols: KProperty?>): DataFrame = sortBy { cols.toColumns() } + +public fun DataFrame.sortWith(comparator: Comparator>): DataFrame { + val permutation = rows().sortedWith(comparator).map { it.index } + return this[permutation] +} + +public fun DataFrame.sortWith(comparator: (DataRow, DataRow) -> Int): DataFrame = + sortWith(Comparator(comparator)) + +public fun DataFrame.sortByDesc(columns: SortColumnsSelector): DataFrame { + val set = columns.toColumns() + return sortByImpl { set.desc() } +} + +public fun DataFrame.sortByDesc(vararg columns: KProperty?>): DataFrame = + sortByDesc { columns.toColumns() } + +public fun DataFrame.sortByDesc(vararg columns: String): DataFrame = sortByDesc { columns.toColumns() } +public fun DataFrame.sortByDesc(vararg columns: ColumnReference?>): DataFrame = + sortByDesc { columns.toColumns() } + +public fun DataFrame.sortByDesc(columns: Iterable?>>): DataFrame = + sortByDesc { columns.toColumnSet() } + +// endregion + +// region GroupBy + +public fun GroupBy.sortBy(vararg cols: String): GroupBy = sortBy { cols.toColumns() } +public fun GroupBy.sortBy(vararg cols: ColumnReference?>): GroupBy = sortBy { cols.toColumns() } +public fun GroupBy.sortBy(vararg cols: KProperty?>): GroupBy = sortBy { cols.toColumns() } +public fun GroupBy.sortBy(selector: SortColumnsSelector): GroupBy = sortByImpl(selector) + +public fun GroupBy.sortByDesc(vararg cols: String): GroupBy = sortByDesc { cols.toColumns() } +public fun GroupBy.sortByDesc(vararg cols: ColumnReference?>): GroupBy = sortByDesc { cols.toColumns() } +public fun GroupBy.sortByDesc(vararg cols: KProperty?>): GroupBy = sortByDesc { cols.toColumns() } +public fun GroupBy.sortByDesc(selector: SortColumnsSelector): GroupBy { + val set = selector.toColumns() + return sortByImpl { set.desc() } +} + +private fun GroupBy.createColumnFromGroupExpression( + receiver: ColumnsSelectionDsl, + expression: DataFrameExpression +): DataColumn { + return receiver.newColumnWithActualType("") { row -> + val group = row[groups] + expression(group, group) + } +} + +public fun GroupBy.sortByGroup( + nullsLast: Boolean = false, + expression: DataFrameExpression +): GroupBy = toDataFrame().sortBy { + createColumnFromGroupExpression(this, expression).nullsLast(nullsLast) +}.asGroupBy(groups) + +public fun GroupBy.sortByGroupDesc( + nullsLast: Boolean = false, + expression: DataFrameExpression +): GroupBy = toDataFrame().sortBy { + createColumnFromGroupExpression(this, expression).desc().nullsLast(nullsLast) +}.asGroupBy(groups) + +public fun GroupBy.sortByCountAsc(): GroupBy = sortByGroup { nrow } +public fun GroupBy.sortByCount(): GroupBy = sortByGroupDesc { nrow } + +public fun GroupBy.sortByKeyDesc(nullsLast: Boolean = false): GroupBy = toDataFrame() + .sortBy { keys.columns().toColumnSet().desc().nullsLast(nullsLast) }.asGroupBy(groups) +public fun GroupBy.sortByKey(nullsLast: Boolean = false): GroupBy = toDataFrame() + .sortBy { keys.columns().toColumnSet().nullsLast(nullsLast) }.asGroupBy(groups) + +// endregion diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/split.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/split.kt new file mode 100644 index 000000000..682750d65 --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/split.kt @@ -0,0 +1,300 @@ +package org.jetbrains.kotlinx.dataframe.api + +import org.jetbrains.kotlinx.dataframe.AnyFrame +import org.jetbrains.kotlinx.dataframe.ColumnsSelector +import org.jetbrains.kotlinx.dataframe.DataColumn +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.DataRow +import org.jetbrains.kotlinx.dataframe.columns.ColumnAccessor +import org.jetbrains.kotlinx.dataframe.columns.ColumnReference +import org.jetbrains.kotlinx.dataframe.columns.ColumnSet +import org.jetbrains.kotlinx.dataframe.columns.ColumnWithPath +import org.jetbrains.kotlinx.dataframe.impl.api.splitDefault +import org.jetbrains.kotlinx.dataframe.impl.api.splitImpl +import org.jetbrains.kotlinx.dataframe.impl.api.withRowCellImpl +import org.jetbrains.kotlinx.dataframe.impl.asList +import org.jetbrains.kotlinx.dataframe.impl.columnName +import org.jetbrains.kotlinx.dataframe.impl.columns.toColumnSet +import org.jetbrains.kotlinx.dataframe.impl.columns.toColumns +import org.jetbrains.kotlinx.dataframe.impl.getListType +import kotlin.reflect.KProperty +import kotlin.reflect.KType +import kotlin.reflect.typeOf + +public fun DataFrame.split(columns: ColumnsSelector): Split = + Split(this, columns) + +public fun DataFrame.split(vararg columns: String): Split = split { columns.toColumns() } +public fun DataFrame.split(vararg columns: ColumnReference): Split = split { columns.toColumns() } +public fun DataFrame.split(vararg columns: KProperty): Split = split { columns.toColumns() } + +public data class Split( + internal val df: DataFrame, + internal val columns: ColumnsSelector, +) { + public fun