Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixed rename behavior #419

Merged
merged 4 commits into from
Jul 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,11 @@ import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.columns.ColumnAccessor
import org.jetbrains.kotlinx.dataframe.columns.ColumnReference
import org.jetbrains.kotlinx.dataframe.columns.ColumnWithPath
import org.jetbrains.kotlinx.dataframe.columns.FrameColumn
import org.jetbrains.kotlinx.dataframe.columns.toColumnSet
import org.jetbrains.kotlinx.dataframe.impl.DELIMITED_STRING_REGEX
import org.jetbrains.kotlinx.dataframe.impl.DELIMITERS_REGEX
import org.jetbrains.kotlinx.dataframe.impl.api.renameImpl
import org.jetbrains.kotlinx.dataframe.impl.columnName
import org.jetbrains.kotlinx.dataframe.impl.toCamelCaseByDelimiters
import org.jetbrains.kotlinx.dataframe.util.ITERABLE_COLUMNS_DEPRECATION_MESSAGE
Expand Down Expand Up @@ -42,47 +44,47 @@ public fun <T, C> DataFrame<T>.rename(cols: Iterable<ColumnReference<C>>): Renam

public data class RenameClause<T, C>(val df: DataFrame<T>, val columns: ColumnsSelector<T, C>)

/**
* ## Rename to camelCase
*
* This function renames all columns to `camelCase` by replacing all [delimiters][DELIMITERS_REGEX]
* and converting the first char to lowercase.
* Even [DataFrames][DataFrame] inside [FrameColumns][FrameColumn] are traversed recursively.
*/
public fun <T> DataFrame<T>.renameToCamelCase(): DataFrame<T> = this
// recursively rename all column groups to camel case
// recursively rename all columns written with delimiters or starting with a capital to camel case
.rename {
groups { it.name() matches DELIMITED_STRING_REGEX }.recursively()
}.toCamelCase()

// recursively rename all other columns to camel case
.rename {
cols { !it.isColumnGroup() && it.name() matches DELIMITED_STRING_REGEX }.recursively()
cols { it.name() matches DELIMITED_STRING_REGEX || it.name[0].isUpperCase() }.recursively()
}.toCamelCase()

// take all frame columns recursively and call renameToCamelCase() on all dataframes inside
.update {
colsOf<AnyFrame>().recursively()
}.with { it.renameToCamelCase() }

// convert all first chars of all columns to the lowercase
.rename {
cols { !it.isColumnGroup() }.recursively()
}.into {
it.name.replaceFirstChar { it.lowercaseChar() }
}

public fun <T, C> RenameClause<T, C>.into(vararg newColumns: ColumnReference<*>): DataFrame<T> =
into(*newColumns.map { it.name() }.toTypedArray())

public fun <T, C> RenameClause<T, C>.into(vararg newNames: String): DataFrame<T> =
df.move(columns).intoIndexed { col, index ->
col.path.dropLast(1) + newNames[index]
}
renameImpl(newNames)

public fun <T, C> RenameClause<T, C>.into(vararg newNames: KProperty<*>): DataFrame<T> =
into(*newNames.map { it.name }.toTypedArray())

public fun <T, C> RenameClause<T, C>.into(transform: (ColumnWithPath<C>) -> String): DataFrame<T> =
df.move(columns).into {
it.path.dropLast(1) + transform(it)
}

public fun <T, C> RenameClause<T, C>.toCamelCase(): DataFrame<T> =
into { it.name().toCamelCaseByDelimiters(DELIMITERS_REGEX) }
renameImpl(transform)

/**
* ## Rename to camelCase
*
* Renames the selected columns to `camelCase` by replacing all [delimiters][DELIMITERS_REGEX]
* and converting the first char to lowercase.
*/
public fun <T, C> RenameClause<T, C>.toCamelCase(): DataFrame<T> = into {
it.name()
.toCamelCaseByDelimiters(DELIMITERS_REGEX)
.replaceFirstChar { it.lowercaseChar() }
}

// endregion

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ public fun <T, C> ReplaceClause<T, C>.with(newColumns: List<AnyCol>): DataFrame<
}
}

/* TODO: Issue #418: breaks if running on ColumnGroup and its child */
public fun <T, C> ReplaceClause<T, C>.with(transform: ColumnsContainer<T>.(DataColumn<C>) -> AnyBaseCol): DataFrame<T> {
val removeResult = df.removeImpl(columns = columns)
val toInsert = removeResult.removedColumns.map {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
package org.jetbrains.kotlinx.dataframe.impl.api

import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.api.RenameClause
import org.jetbrains.kotlinx.dataframe.api.asColumnGroup
import org.jetbrains.kotlinx.dataframe.api.cast
import org.jetbrains.kotlinx.dataframe.api.getColumnsWithPaths
import org.jetbrains.kotlinx.dataframe.api.toDataFrame
import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup
import org.jetbrains.kotlinx.dataframe.columns.ColumnKind
import org.jetbrains.kotlinx.dataframe.columns.ColumnWithPath
import org.jetbrains.kotlinx.dataframe.impl.columns.tree.allChildrenNotNull
import org.jetbrains.kotlinx.dataframe.impl.columns.tree.collectTree
import org.jetbrains.kotlinx.dataframe.impl.columns.tree.map
import org.jetbrains.kotlinx.dataframe.kind

internal fun <T, C> RenameClause<T, C>.renameImpl(newNames: Array<out String>): DataFrame<T> {
var i = 0
return renameImpl { newNames[i++] }
}

internal fun <T, C> RenameClause<T, C>.renameImpl(transform: (ColumnWithPath<C>) -> String): DataFrame<T> {
// get all selected columns and their paths
val selectedColumnsWithPath = df.getColumnsWithPaths(columns)
.associateBy { it.data }
// gather a tree of all columns where the nodes will be renamed
val tree = df.getColumnsWithPaths { all().rec() }.collectTree()

// perform rename in nodes
tree.allChildrenNotNull().forEach { node ->
// Check if the current node/column is a selected column and, if so, get its ColumnWithPath
val column = selectedColumnsWithPath[node.data] ?: return@forEach
// Use the found selected ColumnWithPath to query for the new name
val newColumnName = transform(column)
node.name = newColumnName
}

// use the mapping function to convert the tree to a ColumnGroup/ValueColumn structure
// The result will be a ColumnGroup, since the root node's data is null
val renamedDfAsColumnGroup = tree.map { node, children ->
val col = node.data
when (col?.kind) {
// if the column is a value column or a frame column, rename it using the node's (new) name
ColumnKind.Value, ColumnKind.Frame ->
col.rename(node.name)

// if the column is a group column, create a new column group using the node's (new) name and children
// if the column is null, node is the root, so we'll create a column group as well
ColumnKind.Group, null ->
children
.toDataFrame()
.asColumnGroup(node.name)
}
} as ColumnGroup<*>

// convert the created ColumnGroup to a DataFrame
val renamedDf = renamedDfAsColumnGroup.columns().toDataFrame()
return renamedDf.cast()
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ package org.jetbrains.kotlinx.dataframe.impl.columns.tree
import org.jetbrains.kotlinx.dataframe.columns.ColumnPath

internal class TreeNode<T>(
override val name: String,
override var name: String,
override val depth: Int,
override var data: T,
override val parent: TreeNode<T>? = null,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package org.jetbrains.kotlinx.dataframe.impl.columns.tree

import org.jetbrains.kotlinx.dataframe.AnyCol
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.api.asColumnGroup
import org.jetbrains.kotlinx.dataframe.api.isColumnGroup
import org.jetbrains.kotlinx.dataframe.columns.ColumnPath
Expand Down Expand Up @@ -61,6 +62,16 @@ internal fun <T> TreeNode<T>.topmostChildrenExcluding(excludeRoot: TreeNode<*>):
return result
}

/**
* Mapping function for [ReadonlyTreeNodes][ReadonlyTreeNode] (like [TreeNode])
* which can convert the tree-structure (depth-first) to any other tree-type structure (e.g. [DataFrame]).
*/
@Suppress("UNCHECKED_CAST")
internal fun <T : ReadonlyTreeNode<*>, R> T.map(operation: (node: T, children: List<R>) -> R): R {
val children = children.map { (it as T).map(operation) }
return operation(this, children)
}

internal fun <T> TreeNode<T?>.allChildrenNotNull(): List<TreeNode<T>> =
allChildren { it.data != null } as List<TreeNode<T>>

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,76 @@ import org.jetbrains.kotlinx.dataframe.impl.columns.asAnyFrameColumn
import org.junit.Test

class RenameTests {
companion object {
val simpleDf = dataFrameOf("a", "b", "c")(
1, 2, 3,
4, 5, 6,
)
val groupedDf = simpleDf.group { "a" and "b" }.into("group")

val doubleGroupedDf = groupedDf.group { "group"["a"] }.into { "group"["aGroup"] }
}

@Test
fun `simple rename`() {
val renamedDf = dataFrameOf("a_renamed", "b_renamed", "c_renamed")(
1, 2, 3,
4, 5, 6,
)

simpleDf.rename { all() }.into { it.name + "_renamed" } shouldBe renamedDf
simpleDf.rename { all() }.into("a_renamed", "b_renamed", "c_renamed") shouldBe renamedDf
}

@Test
fun `partial grouped rename`() {
val renamedDf = dataFrameOf("a_renamed", "b", "c")(
1, 2, 3,
4, 5, 6,
).group { "a_renamed" and "b" }.into("group_renamed")

groupedDf
.rename { "group" and "group"["a"] }
.into { it.name + "_renamed" } shouldBe renamedDf
}

@Test
fun `grouped rename`() {
val renamedDf = dataFrameOf("a_renamed", "b_renamed", "c_renamed")(
1, 2, 3,
4, 5, 6,
).group { "a_renamed" and "b_renamed" }.into("group_renamed")

groupedDf
.rename { all().recursively() }
.into { it.name + "_renamed" } shouldBe renamedDf
}

@Test
fun `double grouped rename in 3 steps`() {
val renamedDf = dataFrameOf("a_renamed", "b_renamed", "c_renamed")(
1, 2, 3,
4, 5, 6,
).group { "a_renamed" and "b_renamed" }.into("group_renamed")
.group { "group_renamed"["a_renamed"] }.into { "group_renamed"["aGroup_renamed"] }

doubleGroupedDf
.rename { all().recursively() }
.into { it.name + "_renamed" } shouldBe renamedDf
}
}

class RenameToCamelCaseTests {
companion object {
val nestedDf = dataFrameOf("test_name")(dataFrameOf("another_name")(1))
val nestedColumnGroup = dataFrameOf("test_name")(
dataFrameOf("another_name")(1).first()
)
val doublyNestedColumnGroup = dataFrameOf("test_name")(
dataFrameOf("another_name")(
dataFrameOf("third_name")(1).first()
).first()
)
val deeplyNestedDf = kotlin.run {
val df = dataFrameOf("another_name")(1)
val rowWithDf = dataFrameOf("group_name")(df).first()
Expand All @@ -36,6 +101,20 @@ class RenameTests {
df.getColumnGroup("testName").columnNames() shouldBe listOf("anotherName")
}

@Test
fun `doubly nested row`() {
val doublyNestedColumnGroup = dataFrameOf("test_name")(
dataFrameOf("another_name")(
dataFrameOf("third_name")(1).first()
).first()
)

val df = doublyNestedColumnGroup.renameToCamelCase()
df.columnNames() shouldBe listOf("testName")
df["testName"].asColumnGroup().columnNames() shouldBe listOf("anotherName")
df["testName"]["anotherName"].asColumnGroup().columnNames() shouldBe listOf("thirdName")
}

@Test
fun `deeply nested df`() {
val df = deeplyNestedDf.renameToCamelCase()
Expand Down
48 changes: 25 additions & 23 deletions core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/rename.kt
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,11 @@ import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.columns.ColumnAccessor
import org.jetbrains.kotlinx.dataframe.columns.ColumnReference
import org.jetbrains.kotlinx.dataframe.columns.ColumnWithPath
import org.jetbrains.kotlinx.dataframe.columns.FrameColumn
import org.jetbrains.kotlinx.dataframe.columns.toColumnSet
import org.jetbrains.kotlinx.dataframe.impl.DELIMITED_STRING_REGEX
import org.jetbrains.kotlinx.dataframe.impl.DELIMITERS_REGEX
import org.jetbrains.kotlinx.dataframe.impl.api.renameImpl
import org.jetbrains.kotlinx.dataframe.impl.columnName
import org.jetbrains.kotlinx.dataframe.impl.toCamelCaseByDelimiters
import org.jetbrains.kotlinx.dataframe.util.ITERABLE_COLUMNS_DEPRECATION_MESSAGE
Expand Down Expand Up @@ -42,47 +44,47 @@ public fun <T, C> DataFrame<T>.rename(cols: Iterable<ColumnReference<C>>): Renam

public data class RenameClause<T, C>(val df: DataFrame<T>, val columns: ColumnsSelector<T, C>)

/**
* ## Rename to camelCase
*
* This function renames all columns to `camelCase` by replacing all [delimiters][DELIMITERS_REGEX]
* and converting the first char to lowercase.
* Even [DataFrames][DataFrame] inside [FrameColumns][FrameColumn] are traversed recursively.
*/
public fun <T> DataFrame<T>.renameToCamelCase(): DataFrame<T> = this
// recursively rename all column groups to camel case
// recursively rename all columns written with delimiters or starting with a capital to camel case
.rename {
groups { it.name() matches DELIMITED_STRING_REGEX }.recursively()
}.toCamelCase()

// recursively rename all other columns to camel case
.rename {
cols { !it.isColumnGroup() && it.name() matches DELIMITED_STRING_REGEX }.recursively()
cols { it.name() matches DELIMITED_STRING_REGEX || it.name[0].isUpperCase() }.recursively()
}.toCamelCase()

// take all frame columns recursively and call renameToCamelCase() on all dataframes inside
.update {
colsOf<AnyFrame>().recursively()
}.with { it.renameToCamelCase() }

// convert all first chars of all columns to the lowercase
.rename {
cols { !it.isColumnGroup() }.recursively()
}.into {
it.name.replaceFirstChar { it.lowercaseChar() }
}

public fun <T, C> RenameClause<T, C>.into(vararg newColumns: ColumnReference<*>): DataFrame<T> =
into(*newColumns.map { it.name() }.toTypedArray())

public fun <T, C> RenameClause<T, C>.into(vararg newNames: String): DataFrame<T> =
df.move(columns).intoIndexed { col, index ->
col.path.dropLast(1) + newNames[index]
}
renameImpl(newNames)

public fun <T, C> RenameClause<T, C>.into(vararg newNames: KProperty<*>): DataFrame<T> =
into(*newNames.map { it.name }.toTypedArray())

public fun <T, C> RenameClause<T, C>.into(transform: (ColumnWithPath<C>) -> String): DataFrame<T> =
df.move(columns).into {
it.path.dropLast(1) + transform(it)
}

public fun <T, C> RenameClause<T, C>.toCamelCase(): DataFrame<T> =
into { it.name().toCamelCaseByDelimiters(DELIMITERS_REGEX) }
renameImpl(transform)

/**
* ## Rename to camelCase
*
* Renames the selected columns to `camelCase` by replacing all [delimiters][DELIMITERS_REGEX]
* and converting the first char to lowercase.
*/
public fun <T, C> RenameClause<T, C>.toCamelCase(): DataFrame<T> = into {
it.name()
.toCamelCaseByDelimiters(DELIMITERS_REGEX)
.replaceFirstChar { it.lowercaseChar() }
}

// endregion

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ public fun <T, C> ReplaceClause<T, C>.with(newColumns: List<AnyCol>): DataFrame<
}
}

/* TODO: Issue #418: breaks if running on ColumnGroup and its child */
public fun <T, C> ReplaceClause<T, C>.with(transform: ColumnsContainer<T>.(DataColumn<C>) -> AnyBaseCol): DataFrame<T> {
val removeResult = df.removeImpl(columns = columns)
val toInsert = removeResult.removedColumns.map {
Expand Down
Loading