Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update compiler plugin #832

Merged
merged 9 commits into from
Aug 22, 2024
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,12 @@ public annotation class HasSchema(val schemaArg: Int)
* Needed because some function calls only serve as a part of overall compile time DataSchema evaluation
* There's no need to update return type of such calls
*/
internal annotation class Interpretable(val interpreter: String)
public annotation class Interpretable(val interpreter: String)

/**
* Compiler plugin will replace return type of calls to the annotated function
*/
internal annotation class Refine
public annotation class Refine

internal annotation class OptInRefine

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ private interface CommonFillNullsFunctionDoc
* @include [SelectingColumns.Dsl.WithExample] {@include [SetFillNullsOperationArg]}
* @include [Update.DslParam]
*/
@Interpretable("FillNulls0")
public fun <T, C> DataFrame<T>.fillNulls(columns: ColumnsSelector<T, C?>): Update<T, C?> =
update(columns).where { it == null }

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ import org.jetbrains.kotlinx.dataframe.DataColumn
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.DataRow
import org.jetbrains.kotlinx.dataframe.RowExpression
import org.jetbrains.kotlinx.dataframe.annotations.Interpretable
import org.jetbrains.kotlinx.dataframe.annotations.Refine
import org.jetbrains.kotlinx.dataframe.columns.ColumnAccessor
import org.jetbrains.kotlinx.dataframe.columns.ColumnPath
import org.jetbrains.kotlinx.dataframe.columns.ColumnReference
Expand All @@ -18,6 +20,7 @@ import org.jetbrains.kotlinx.dataframe.exceptions.UnequalColumnSizesException
import org.jetbrains.kotlinx.dataframe.impl.ColumnNameGenerator
import org.jetbrains.kotlinx.dataframe.impl.DataFrameImpl
import org.jetbrains.kotlinx.dataframe.impl.UNNAMED_COLUMN_PREFIX
import org.jetbrains.kotlinx.dataframe.impl.api.withValuesImpl
import org.jetbrains.kotlinx.dataframe.impl.asList
import org.jetbrains.kotlinx.dataframe.impl.columnName
import org.jetbrains.kotlinx.dataframe.impl.columns.ColumnAccessorImpl
Expand Down Expand Up @@ -268,6 +271,7 @@ public fun dataFrameOf(vararg header: ColumnReference<*>): DataFrameBuilder = Da

public fun dataFrameOf(vararg columns: AnyBaseCol): AnyFrame = dataFrameOf(columns.asIterable())

@Interpretable("DataFrameOf0")
public fun dataFrameOf(vararg header: String): DataFrameBuilder = dataFrameOf(header.toList())

public inline fun <reified C> dataFrameOf(vararg header: String, fill: (String) -> Iterable<C>): AnyFrame =
Expand Down Expand Up @@ -302,27 +306,15 @@ public class DataFrameBuilder(private val header: List<String>) {
}.toDataFrame()
}

@Refine
@Interpretable("DataFrameBuilderInvoke0")
public operator fun invoke(vararg values: Any?): AnyFrame = withValues(values.asIterable())

@JvmName("invoke1")
internal fun withValues(values: Iterable<Any?>): AnyFrame {
val list = values.asList()

val ncol = header.size

require(header.isNotEmpty() && list.size.rem(ncol) == 0) {
"Number of values ${list.size} is not divisible by number of columns $ncol"
}

val nrow = list.size / ncol

return (0 until ncol).map { col ->
val colValues = (0 until nrow).map { row ->
list[row * ncol + col]
}
DataColumn.createWithTypeInference(header[col], colValues)
internal fun withValues(values: Iterable<Any?>): AnyFrame =
withValuesImpl(header, values.asList()).map { (name, values) ->
DataColumn.createWithTypeInference(name, values)
}.toDataFrame()
}

public operator fun invoke(args: Sequence<Any?>): AnyFrame = invoke(*args.toList().toTypedArray())

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ public inline fun <T, C, reified R> Convert<T, C?>.notNull(
public class Convert<T, out C>(internal val df: DataFrame<T>, internal val columns: ColumnsSelector<T, C>) {
public fun <R> cast(): Convert<T, R> = Convert(df, columns as ColumnsSelector<T, R>)

@Refine
@Interpretable("To0")
public inline fun <reified D> to(): DataFrame<T> = to(typeOf<D>())

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,22 @@ package org.jetbrains.kotlinx.dataframe.api

import org.jetbrains.kotlinx.dataframe.ColumnsSelector
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.annotations.Interpretable
import org.jetbrains.kotlinx.dataframe.annotations.Refine
import org.jetbrains.kotlinx.dataframe.columns.ColumnReference
import org.jetbrains.kotlinx.dataframe.columns.toColumnSet
import org.jetbrains.kotlinx.dataframe.impl.api.flattenImpl
import kotlin.reflect.KProperty

// region DataFrame

@Refine
@Interpretable("FlattenDefault")
public fun <T> DataFrame<T>.flatten(keepParentNameForColumns: Boolean = false, separator: String = "."): DataFrame<T> =
flatten(keepParentNameForColumns, separator) { all() }

@Refine
@Interpretable("Flatten0")
public fun <T, C> DataFrame<T>.flatten(
keepParentNameForColumns: Boolean = false,
separator: String = ".",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,13 @@ public abstract class CreateDataFrameDsl<T> : TraversePropertiesDsl {
public abstract operator fun String.invoke(builder: CreateDataFrameDsl<T>.() -> Unit)
}

@Refine
@Interpretable("ToDataFrameColumn")
public inline fun <reified T> Iterable<T>.toDataFrame(columnName: String): DataFrame<*> =
toDataFrame {
columnName from { it }
}

// endregion

// region toDataFrame overloads for built-in types
Expand Down Expand Up @@ -304,6 +311,8 @@ public interface ValueProperty<T> {
public val value: T
}

// endregion

// region Create DataFrame from Map

public fun Map<String, Iterable<Any?>>.toDataFrame(): AnyFrame =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ import org.jetbrains.kotlinx.dataframe.DataFrameExpression
import org.jetbrains.kotlinx.dataframe.DataRow
import org.jetbrains.kotlinx.dataframe.RowColumnExpression
import org.jetbrains.kotlinx.dataframe.RowValueFilter
import org.jetbrains.kotlinx.dataframe.annotations.Interpretable
import org.jetbrains.kotlinx.dataframe.annotations.Refine
import org.jetbrains.kotlinx.dataframe.api.Update.Grammar
import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup
import org.jetbrains.kotlinx.dataframe.columns.ColumnReference
Expand Down Expand Up @@ -273,7 +275,9 @@ public typealias UpdateExpression<T, C, R> = AddDataRow<T>.(C) -> R
* - {@include [SeeAlsoPerRowCol]}
* @param [expression] The {@include [ExpressionsGivenRow.RowValueExpressionLink]} to update the rows with.
*/
public fun <T, C> Update<T, C>.with(expression: UpdateExpression<T, C, C?>): DataFrame<T> =
@Refine
@Interpretable("UpdateWith0")
public fun <T, C, R : C?> Update<T, C>.with(expression: UpdateExpression<T, C, R>): DataFrame<T> =
Copy link
Collaborator

@Jolanrensen Jolanrensen Aug 22, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In my testrun of the compiler plugin I now cannot use update {}.with {} anymore, just fillNulls {}.with {}. It gives

[NONE_APPLICABLE] None of the following candidates is applicable: val DataRow<Into_93I>.age: Int? val ColumnsContainer<Into_93I>.age: DataColumn<Int?>

when trying to access the updated column. Is this intended for now?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For now yes, there's such issue because plugin fails to interpret update { }.with { } (update not supported) and fallback to an empty schema. Will fix

updateImpl { row, _, value ->
expression(row, value)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -436,6 +436,8 @@ internal fun guessValueType(values: Sequence<Any?>, upperBound: KType? = null, l
collectionClasses.add(it.javaClass.kotlin)
}

is Function<*> -> classes.add(Function::class)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why not :) Maybe we should change the rendering for functions in dataframes though. After a quick test I found it looks like:

⌌---------------------------------------------------------------------------------------------------------------⌍
|  |                                                                                        a:Function<*>| b:Int|
|--|-----------------------------------------------------------------------------------------------------|------|
| 0| org.jetbrains.kotlinx.dataframe.testSets.person.DataFrameTests$$Lambda$60/0x000000010013a040@64ee819|     2|
⌎---------------------------------------------------------------------------------------------------------------⌏

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sadly for such lambda objects toString is weird. I tried to look at the object in the debugger, but there's literally nothing that hints at signature or anything useful

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hmm you'd think there was a way in kotlin to detect it's a () -> Int or something :/

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

actually...

image

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It already renders correctly often!

image

might just be a fluke in the tests if the lambda is serialized as interface

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what kernel version do you use?
image

Copy link
Collaborator

@Jolanrensen Jolanrensen Aug 22, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I run the dev version of this PR's branch in the notebook. (so publish to maven local and use v=0.14.0-dev)

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, ok, so the fix is needed anyway


else -> classes.add(it.javaClass.kotlin)
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
package org.jetbrains.kotlinx.dataframe.impl.api

internal fun <T> withValuesImpl(header: List<String>, values: List<T>): List<Pair<String, List<T>>> {
val ncol = header.size

require(header.isNotEmpty() && values.size.rem(ncol) == 0) {
"Number of values ${values.size} is not divisible by number of columns $ncol"
}

val nrow = values.size / ncol

return (0 until ncol).map { col ->
val colValues = (0 until nrow).map { row ->
values[row * ncol + col]
}
header[col] to colValues
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import org.jetbrains.kotlinx.dataframe.kind
import org.jetbrains.kotlinx.dataframe.type
import org.junit.Ignore
import org.junit.Test
import java.io.File
import kotlin.reflect.KProperty
import kotlin.reflect.typeOf

Expand Down Expand Up @@ -452,4 +453,11 @@ class CreateDataFrameTests {
df.participants[0].city
}
}

@Test
fun toDataFrameColumn() {
val files = listOf(File("data.csv"))
val df = files.toDataFrame(columnName = "files")
df["files"][0] shouldBe File("data.csv")
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import org.jetbrains.kotlinx.dataframe.explainer.TransformDataFrameExpressions
import org.jetbrains.kotlinx.dataframe.kind
import org.jetbrains.kotlinx.dataframe.type
import org.junit.Test
import java.io.File
import kotlin.reflect.typeOf

class Create : TestBase() {
Expand Down Expand Up @@ -456,4 +457,13 @@ class Create : TestBase() {
peek(dataFrameOf(col), dataFrameOf(col))
// SampleEnd
}

@Test
@TransformDataFrameExpressions
fun toDataFrameColumn() {
// SampleStart
val files = listOf(File("data.csv"), File("data1.csv"))
val df = files.toDataFrame(columnName = "data")
// SampleEnd
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,13 @@ class DataFrameTests : BaseTest() {
dataFrameOf("name", "age", "city", "weight")(c1, c2, c3, c4) shouldBe df
}

@Test
fun `guess column type for type without classifier`() {
val df = dataFrameOf("a", "b")({ 1 }, 2)
df["a"].type() shouldBe typeOf<Function<*>>()
(df["a"][0] as () -> Int).invoke() shouldBe 1
}

@Test
fun `create with columnOf`() {
val col = columnOf("Alice", "Bob")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ import org.jetbrains.kotlinx.dataframe.AnyRow
import org.jetbrains.kotlinx.dataframe.ColumnsSelector
import org.jetbrains.kotlinx.dataframe.DataColumn
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.annotations.Interpretable
import org.jetbrains.kotlinx.dataframe.annotations.Refine
import org.jetbrains.kotlinx.dataframe.api.dataFrameOf
import org.jetbrains.kotlinx.dataframe.api.forEach
import org.jetbrains.kotlinx.dataframe.api.select
Expand Down Expand Up @@ -143,6 +145,8 @@ public fun DataFrame.Companion.readExcel(
* @param nameRepairStrategy handling of column names.
* The default behavior is [NameRepairStrategy.CHECK_UNIQUE]
*/
@Refine
@Interpretable("ReadExcel")
public fun DataFrame.Companion.readExcel(
fileOrUrl: String,
sheetName: String? = null,
Expand Down Expand Up @@ -209,7 +213,9 @@ public fun DataFrame.Companion.readExcel(
* @param range comma separated list of Excel column letters and column ranges (e.g. “A:E” or “A,C,E:F”)
*/
@JvmInline
public value class StringColumns(public val range: String)
public value class StringColumns
@Interpretable("StringColumns")
constructor(public val range: String)

public fun StringColumns.toFormattingOptions(formatter: DataFormatter = DataFormatter()): FormattingOptions =
FormattingOptions(range, formatter)
Expand Down
Loading
Loading