Skip to content

Commit

Permalink
Merge pull request #173 from Kotlin/new-open-api
Browse files Browse the repository at this point in the history
OpenAPI/Swagger JSON type schema support + many small fixes I came across
  • Loading branch information
Jolanrensen authored Nov 25, 2022
2 parents 48a3594 + 4673b9c commit 0089ed3
Show file tree
Hide file tree
Showing 119 changed files with 14,197 additions and 899 deletions.
5 changes: 4 additions & 1 deletion .editorconfig
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,7 @@ indent_size=4
max_line_length=120

[*.json]
indent_size=2
indent_size=2

[*.yaml]
indent_size=2
18 changes: 9 additions & 9 deletions build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ plugins {
kotlin("libs.publisher") version libs.versions.libsPublisher
kotlin("plugin.serialization") version libs.versions.kotlin
id("org.jetbrains.kotlinx.dataframe") version libs.versions.dataframe apply false
kotlin("jupyter.api") version libs.versions.kotlinJupyter apply false

id("org.jetbrains.dokka") version libs.versions.dokka
id("org.jetbrains.kotlinx.kover") version libs.versions.kover
Expand Down Expand Up @@ -73,14 +74,13 @@ group = "org.jetbrains.kotlinx"
fun detectVersion(): String {
val buildNumber = rootProject.findProperty("build.number") as String?
val versionProp = property("version") as String
return if(buildNumber != null) {
return if (buildNumber != null) {
if (rootProject.findProperty("build.number.detection") == "true") {
"$versionProp-dev-$buildNumber"
} else {
buildNumber
}
}
else if(hasProperty("release")) {
} else if (hasProperty("release")) {
versionProp
} else {
"$versionProp-dev"
Expand All @@ -104,15 +104,15 @@ kotlinPublications {
fairDokkaJars.set(false)

sonatypeSettings(
project.findProperty("kds.sonatype.user") as String?,
project.findProperty("kds.sonatype.password") as String?,
"dataframe project, v. ${project.version}"
project.findProperty("kds.sonatype.user") as String?,
project.findProperty("kds.sonatype.password") as String?,
"dataframe project, v. ${project.version}"
)

signingCredentials(
project.findProperty("kds.sign.key.id") as String?,
project.findProperty("kds.sign.key.private") as String?,
project.findProperty("kds.sign.key.passphrase") as String?
project.findProperty("kds.sign.key.id") as String?,
project.findProperty("kds.sign.key.private") as String?,
project.findProperty("kds.sign.key.passphrase") as String?
)

pom {
Expand Down
23 changes: 13 additions & 10 deletions core/build.gradle.kts
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@

@Suppress("DSL_SCOPE_VIOLATION", "UnstableApiUsage")
plugins {
kotlin("jvm")
kotlin("libs.publisher")
kotlin("plugin.serialization")
kotlin("jupyter.api") version libs.versions.kotlinJupyter
kotlin("jupyter.api")

id("io.github.devcrocod.korro") version libs.versions.korro
id("org.jetbrains.dataframe.generator")
Expand All @@ -25,16 +24,16 @@ repositories {
}

dependencies {
api(libs.kotlin.reflect)
implementation(libs.kotlin.stdlib)
implementation(libs.kotlin.stdlib.jdk8)
implementation(libs.kotlin.reflect)

api(libs.commonsCsv)
implementation(libs.klaxon)
implementation(libs.fuel)

implementation(libs.kotlin.datetimeJvm)
implementation("com.squareup:kotlinpoet:1.11.0")
api(libs.kotlin.datetimeJvm)
implementation(libs.kotlinpoet)

testImplementation(libs.junit)
testImplementation(libs.kotestAssertions) {
Expand Down Expand Up @@ -114,7 +113,8 @@ kotlinter {
"experimental:annotation",
"max-line-length",
"filename",
"comment-spacing"
"comment-spacing",
"curly-spacing",
)
}

Expand All @@ -137,10 +137,12 @@ tasks.withType<org.jetbrains.kotlin.gradle.tasks.KotlinCompile> {
tasks.test {
maxHeapSize = "2048m"
extensions.configure(kotlinx.kover.api.KoverTaskExtension::class) {
excludes.set(listOf(
"org.jetbrains.kotlinx.dataframe.jupyter.*",
"org.jetbrains.kotlinx.dataframe.jupyter.SampleNotebooksTests"
))
excludes.set(
listOf(
"org.jetbrains.kotlinx.dataframe.jupyter.*",
"org.jetbrains.kotlinx.dataframe.jupyter.SampleNotebooksTests"
)
)
}
}

Expand Down Expand Up @@ -168,6 +170,7 @@ artifacts {
}
}

// Disable and enable if updating plugin breaks the build
dataframes {
schema {
sourceSet = "test"
Expand Down
15 changes: 11 additions & 4 deletions core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataFrame.kt
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import org.jetbrains.kotlinx.dataframe.columns.UnresolvedColumnsPolicy
import org.jetbrains.kotlinx.dataframe.impl.DataFrameImpl
import org.jetbrains.kotlinx.dataframe.impl.DataFrameSize
import org.jetbrains.kotlinx.dataframe.impl.getColumnsImpl
import org.jetbrains.kotlinx.dataframe.impl.headPlusArray
import org.jetbrains.kotlinx.dataframe.impl.headPlusIterable
import org.jetbrains.kotlinx.dataframe.impl.schema.createEmptyDataFrameOf
import kotlin.reflect.KType
Expand Down Expand Up @@ -53,10 +54,13 @@ public interface DataFrame<out T> : Aggregatable<T>, ColumnsContainer<T> {

// region get columns

override operator fun <C> get(columns: ColumnsSelector<T, C>): List<DataColumn<C>> = getColumnsImpl(UnresolvedColumnsPolicy.Fail, columns)
override operator fun <C> get(columns: ColumnsSelector<T, C>): List<DataColumn<C>> =
getColumnsImpl(UnresolvedColumnsPolicy.Fail, columns)

public operator fun get(first: Column, vararg other: Column): DataFrame<T> = select(listOf(first) + other)
public operator fun get(first: String, vararg other: String): DataFrame<T> = select(listOf(first) + other)
public operator fun get(columnRange: ClosedRange<String>): DataFrame<T> = select { columnRange.start..columnRange.endInclusive }
public operator fun get(columnRange: ClosedRange<String>): DataFrame<T> =
select { columnRange.start..columnRange.endInclusive }

// endregion

Expand All @@ -65,8 +69,11 @@ public interface DataFrame<out T> : Aggregatable<T>, ColumnsContainer<T> {
public operator fun get(index: Int): DataRow<T>
public operator fun get(indices: Iterable<Int>): DataFrame<T> = getRows(indices)
public operator fun get(range: IntRange): DataFrame<T> = getRows(range)
public operator fun get(vararg ranges: IntRange): DataFrame<T> = getRows(ranges.asSequence().flatMap { it.asSequence() }.asIterable())
public operator fun get(firstIndex: Int, vararg otherIndices: Int): DataFrame<T> = get(headPlusIterable(firstIndex, otherIndices.asIterable()))
public operator fun get(first: IntRange, vararg ranges: IntRange): DataFrame<T> =
getRows(headPlusArray(first, ranges).asSequence().flatMap { it.asSequence() }.asIterable())

public operator fun get(firstIndex: Int, vararg otherIndices: Int): DataFrame<T> =
get(headPlusIterable(firstIndex, otherIndices.asIterable()))

// endregion

Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,14 @@
package org.jetbrains.kotlinx.dataframe.annotations

import org.jetbrains.kotlinx.dataframe.api.JsonPath
import org.jetbrains.kotlinx.dataframe.api.KeyValueProperty
import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup
import org.jetbrains.kotlinx.dataframe.columns.FrameColumn
import org.jetbrains.kotlinx.dataframe.io.JSON

/**
* Annotation preprocessing will generate a DataSchema interface from the data at `path`.
* Data must be of supported format: CSV, JSON, Apache Arrow, Excel.
* Data must be of supported format: CSV, JSON, Apache Arrow, Excel, OpenAPI (Swagger) in YAML/JSON.
* Generated data schema has properties inferred from data and a companion object with `read method`.
* `read method` is either `readCSV` or `readJson` that returns `DataFrame<name>`
*
Expand All @@ -15,7 +21,8 @@ package org.jetbrains.kotlinx.dataframe.annotations
* @param normalizationDelimiters if not empty, split property names by delimiters,
* lowercase parts and join to camel case. Set empty list to disable normalization
* @param withDefaultPath if `true`, generate `defaultPath` property to the data schema's companion object and make it default argument for a `read method`
* @param csvOptions options to parse CSV data. Not used when data is JSON
* @param csvOptions options to parse CSV data. Not used when data is not Csv
* @param jsonOptions options to parse JSON data. Not used when data is not Json
*/
@Retention(AnnotationRetention.SOURCE)
@Target(AnnotationTarget.FILE)
Expand All @@ -26,13 +33,29 @@ public annotation class ImportDataSchema(
val visibility: DataSchemaVisibility = DataSchemaVisibility.IMPLICIT_PUBLIC,
val normalizationDelimiters: CharArray = ['\t', ' ', '_'],
val withDefaultPath: Boolean = true,
val csvOptions: CsvOptions = CsvOptions(',')
val csvOptions: CsvOptions = CsvOptions(','),
val jsonOptions: JsonOptions = JsonOptions(),
)

public enum class DataSchemaVisibility {
INTERNAL, IMPLICIT_PUBLIC, EXPLICIT_PUBLIC
}

public annotation class CsvOptions(
val delimiter: Char
public val delimiter: Char,
)

public annotation class JsonOptions(

/** Allows the choice of how to handle type clashes when reading a JSON file. */
public val typeClashTactic: JSON.TypeClashTactic = JSON.TypeClashTactic.ARRAY_AND_VALUE_COLUMNS,

/**
* List of [JsonPath]s where instead of a [ColumnGroup], a [FrameColumn]<[KeyValueProperty]>
* will be created.
*
* Example:
* `["""$["store"]["book"][*]["author"]"""]`
*/
public val keyValuePaths: Array<String> = [],
)
Original file line number Diff line number Diff line change
Expand Up @@ -67,13 +67,17 @@ public operator fun AnyRow.contains(column: KProperty<*>): Boolean = containsKey

@OptIn(ExperimentalTypeInference::class)
@OverloadResolutionByLambdaReturnType
public fun <T> DataRow<T>.diff(expression: RowExpression<T, Double>): Double? = prev()?.let { p -> expression(this, this) - expression(p, p) }
public fun <T> DataRow<T>.diff(expression: RowExpression<T, Double>): Double? =
prev()?.let { p -> expression(this, this) - expression(p, p) }

public fun <T> DataRow<T>.diff(expression: RowExpression<T, Int>): Int? = prev()?.let { p -> expression(this, this) - expression(p, p) }
public fun <T> DataRow<T>.diff(expression: RowExpression<T, Int>): Int? =
prev()?.let { p -> expression(this, this) - expression(p, p) }

public fun <T> DataRow<T>.diff(expression: RowExpression<T, Long>): Long? = prev()?.let { p -> expression(this, this) - expression(p, p) }
public fun <T> DataRow<T>.diff(expression: RowExpression<T, Long>): Long? =
prev()?.let { p -> expression(this, this) - expression(p, p) }

public fun <T> DataRow<T>.diff(expression: RowExpression<T, Float>): Float? = prev()?.let { p -> expression(this, this) - expression(p, p) }
public fun <T> DataRow<T>.diff(expression: RowExpression<T, Float>): Float? =
prev()?.let { p -> expression(this, this) - expression(p, p) }

public fun AnyRow.columnsCount(): Int = df().ncol
public fun AnyRow.columnNames(): List<String> = df().columnNames()
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
package org.jetbrains.kotlinx.dataframe.api

import org.intellij.lang.annotations.Language
import java.io.Serializable

/**
* Simplistic JSON path implementation.
* Supports just keys (in bracket notation), double quotes, arrays and wildcards.
*
* Examples:
* `$["store"]["book"][*]["author"]`
*
* `$[1]` will match `$[*]`
*/
@JvmInline
public value class JsonPath(@Language("jsonpath") public val path: String = "$") : Serializable {

public fun append(name: String): JsonPath = JsonPath("$path[\"$name\"]")

public fun appendWildcard(): JsonPath = JsonPath("$path[*]")

public fun appendArrayWithIndex(index: Int): JsonPath = JsonPath("$path[$index]")

public fun appendArrayWithWildcard(): JsonPath = JsonPath("$path[*]")

public fun replaceLastWildcardWithIndex(index: Int): JsonPath = JsonPath(
path.toCharArray().let { chars ->
val lastStarIndex = chars.lastIndexOf('*')
chars.flatMapIndexed { i, c ->
if (i == lastStarIndex) index.toString().toCharArray().toList()
else listOf(c)
}.joinToString("")
}
)

public fun prepend(name: String): JsonPath = JsonPath(
"\$[\"$name\"]" + path.removePrefix("$")
)

public fun prependWildcard(): JsonPath = JsonPath(
"\$[*]" + path.removePrefix("$")
)

public fun prependArrayWithIndex(index: Int): JsonPath = JsonPath(
"\$[$index]" + path.removePrefix("$")
)

public fun prependArrayWithWildcard(): JsonPath = JsonPath(
"\$[*]" + path.removePrefix("$")
)

public fun erasedIndices(): JsonPath = JsonPath(
path.replace("""\[[0-9]+]""".toRegex(), "[*]")
)

private fun splitPath() = path.split("[", "]").filter { it.isNotBlank() }

public fun matches(other: JsonPath): Boolean =
path == other.path ||
run {
val path = splitPath()
val otherPath = other.splitPath()

if (path.size != otherPath.size) false
else path.zip(otherPath).all { (p, o) ->
p == o || p == "*" || o == "*"
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
package org.jetbrains.kotlinx.dataframe.api

import org.jetbrains.kotlinx.dataframe.annotations.ColumnName
import org.jetbrains.kotlinx.dataframe.annotations.DataSchema

/** A [DataSchema] interface / class can implement this if it represents a map-like data schema (so key: value). */
@DataSchema
public interface KeyValueProperty<T> {
// needs to be explicitly overridden in @DataSchema interface, otherwise extension functions won't generate (TODO)
public val key: String

// needs to be explicitly overridden in @DataSchema interface, otherwise type will be read as `T` and extensions won't generate (TODO)
@ColumnName("value")
public val `value`: T
}
Original file line number Diff line number Diff line change
Expand Up @@ -298,7 +298,7 @@ public fun <T, G> DataFrame<T>.asGroupBy(selector: ColumnSelector<T, DataFrame<G

public fun <T> DataRow<T>.toDataFrame(): DataFrame<T> = owner[index..index]

public fun AnyRow.toMap(): Map<String, Any?> = df().columns().map { it.name() to it[index] }.toMap()
public fun AnyRow.toMap(): Map<String, Any?> = df().columns().associate { it.name() to it[index] }

// endregion

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,10 @@ import org.jetbrains.kotlinx.dataframe.index

// region DataColumn

/** Returns `true` if all [values] match the given [predicate] or [values] is empty. */
public fun <T> DataColumn<T>.all(predicate: Predicate<T>): Boolean = values.all(predicate)

/** Returns `true` if all [values] are `null` or [values] is empty. */
public fun <C> DataColumn<C>.allNulls(): Boolean = size == 0 || all { it == null }

// endregion
Expand All @@ -26,6 +28,7 @@ public fun AnyRow.allNA(): Boolean = owner.columns().all { it[index].isNA }

// region DataFrame

/** Returns `true` if all [rows] match the given [predicate] or [rows] is empty. */
public fun <T> DataFrame<T>.all(predicate: RowFilter<T>): Boolean = rows().all { predicate(it, it) }

// endregion
Loading

0 comments on commit 0089ed3

Please sign in to comment.