From 52812cf7e7c90a28813b1e9b249fdf3365cb37ad Mon Sep 17 00:00:00 2001 From: zaleslaw Date: Tue, 9 Jul 2024 13:51:34 +0200 Subject: [PATCH 1/5] Add extension functions for the ResultSet --- .../kotlinx/dataframe/io/readJdbc.kt | 77 +++++++++++++++++++ .../kotlinx/dataframe/io/h2/h2Test.kt | 62 ++++++++++++--- 2 files changed, 130 insertions(+), 9 deletions(-) diff --git a/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/readJdbc.kt b/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/readJdbc.kt index 46061d0b3..160504258 100644 --- a/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/readJdbc.kt +++ b/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/readJdbc.kt @@ -254,6 +254,32 @@ public fun DataFrame.Companion.readResultSet( return fetchAndConvertDataFromResultSet(tableColumns, resultSet, dbType, limit, inferNullability) } +/** + * Reads the data from a [ResultSet][java.sql.ResultSet] and converts it into a DataFrame. + * + * A [ResultSet][java.sql.ResultSet] object maintains a cursor pointing to its current row of data. + * By default, a ResultSet object is not updatable and has a cursor that can only move forward. + * Therefore, you can iterate through it only once, from the first row to the last row. + * + * For more details, refer to the official Java documentation on [ResultSet][java.sql.ResultSet]. + * + * NOTE: Reading from the [ResultSet][java.sql.ResultSet] could potentially change its state. + * + * @param [dbType] the type of database that the [ResultSet] belongs to. + * @param [limit] the maximum number of rows to read from the [ResultSet][java.sql.ResultSet]. + * @param [inferNullability] indicates how the column nullability should be inferred. + * @return the DataFrame generated from the [ResultSet][java.sql.ResultSet] data. + * + * [java.sql.ResultSet]: https://docs.oracle.com/javase/8/docs/api/java/sql/ResultSet.html + */ +public fun ResultSet.toDF( + dbType: DbType, + limit: Int = DEFAULT_LIMIT, + inferNullability: Boolean = true, +): AnyFrame { + return DataFrame.Companion.readResultSet(this, dbType, limit, inferNullability) +} + /** * Reads the data from a [ResultSet][java.sql.ResultSet] and converts it into a DataFrame. * @@ -286,6 +312,33 @@ public fun DataFrame.Companion.readResultSet( return readResultSet(resultSet, dbType, limit, inferNullability) } +/** + * Reads the data from a [ResultSet][java.sql.ResultSet] and converts it into a DataFrame. + * + * A [ResultSet][java.sql.ResultSet] object maintains a cursor pointing to its current row of data. + * By default, a ResultSet object is not updatable and has a cursor that can only move forward. + * Therefore, you can iterate through it only once, from the first row to the last row. + * + * For more details, refer to the official Java documentation on [ResultSet][java.sql.ResultSet]. + * + * NOTE: Reading from the [ResultSet][java.sql.ResultSet] could potentially change its state. + * + * @param [connection] the connection to the database (it's required to extract the database type) + * that the [ResultSet] belongs to. + * @param [limit] the maximum number of rows to read from the [ResultSet][java.sql.ResultSet]. + * @param [inferNullability] indicates how the column nullability should be inferred. + * @return the DataFrame generated from the [ResultSet][java.sql.ResultSet] data. + * + * [java.sql.ResultSet]: https://docs.oracle.com/javase/8/docs/api/java/sql/ResultSet.html + */ +public fun ResultSet.toDF( + connection: Connection, + limit: Int = DEFAULT_LIMIT, + inferNullability: Boolean = true, +): AnyFrame { + return DataFrame.Companion.readResultSet(this, connection, limit, inferNullability) +} + /** * Reads all non-system tables from a database and returns them * as a map of SQL tables and corresponding dataframes using the provided database configuration and limit. @@ -451,6 +504,18 @@ public fun DataFrame.Companion.getSchemaForResultSet(resultSet: ResultSet, dbTyp return buildSchemaByTableColumns(tableColumns, dbType) } +/** + * Retrieves the schema from [ResultSet]. + * + * NOTE: This function will not close connection and result set and not retrieve data from the result set. + * + * @param [dbType] the type of database that the [ResultSet] belongs to. + * @return the schema of the [ResultSet] as a [DataFrameSchema] object. + */ +public fun ResultSet.getDataFrameSchema(dbType: DbType): DataFrameSchema { + return DataFrame.getSchemaForResultSet(this, dbType) +} + /** * Retrieves the schema from [ResultSet]. * @@ -468,6 +533,18 @@ public fun DataFrame.Companion.getSchemaForResultSet(resultSet: ResultSet, conne return buildSchemaByTableColumns(tableColumns, dbType) } +/** + * Retrieves the schema from [ResultSet]. + * + * NOTE: This function will not close connection and result set and not retrieve data from the result set. + * + * @param [connection] the connection to the database (it's required to extract the database type). + * @return the schema of the [ResultSet] as a [DataFrameSchema] object. + */ +public fun ResultSet.getDataFrameSchema(connection: Connection): DataFrameSchema { + return DataFrame.getSchemaForResultSet(this, connection) +} + /** * Retrieves the schemas of all non-system tables in the database using the provided database configuration. * diff --git a/dataframe-jdbc/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/h2/h2Test.kt b/dataframe-jdbc/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/h2/h2Test.kt index def477d2d..d37b0809b 100644 --- a/dataframe-jdbc/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/h2/h2Test.kt +++ b/dataframe-jdbc/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/h2/h2Test.kt @@ -12,17 +12,9 @@ import org.jetbrains.kotlinx.dataframe.api.cast import org.jetbrains.kotlinx.dataframe.api.filter import org.jetbrains.kotlinx.dataframe.api.schema import org.jetbrains.kotlinx.dataframe.api.select -import org.jetbrains.kotlinx.dataframe.io.DatabaseConfiguration +import org.jetbrains.kotlinx.dataframe.io.* import org.jetbrains.kotlinx.dataframe.io.db.H2 import org.jetbrains.kotlinx.dataframe.io.db.MySql -import org.jetbrains.kotlinx.dataframe.io.getSchemaForAllSqlTables -import org.jetbrains.kotlinx.dataframe.io.getSchemaForResultSet -import org.jetbrains.kotlinx.dataframe.io.getSchemaForSqlQuery -import org.jetbrains.kotlinx.dataframe.io.getSchemaForSqlTable -import org.jetbrains.kotlinx.dataframe.io.readAllSqlTables -import org.jetbrains.kotlinx.dataframe.io.readResultSet -import org.jetbrains.kotlinx.dataframe.io.readSqlQuery -import org.jetbrains.kotlinx.dataframe.io.readSqlTable import org.junit.AfterClass import org.junit.BeforeClass import org.junit.Test @@ -410,6 +402,58 @@ class JdbcTest { } } + @Test + fun `read from extension function on ResultSet`() { + connection.createStatement(ResultSet.TYPE_SCROLL_SENSITIVE, ResultSet.CONCUR_UPDATABLE).use { st -> + @Language("SQL") + val selectStatement = "SELECT * FROM Customer" + + st.executeQuery(selectStatement).use { rs -> + val df = rs.toDF(H2(MySql)).cast() + + df.rowsCount() shouldBe 4 + df.filter { it[Customer::age] != null && it[Customer::age]!! > 30 }.rowsCount() shouldBe 2 + df[0][1] shouldBe "John" + + rs.beforeFirst() + + val df1 = rs.toDF(H2(MySql), 1).cast() + + df1.rowsCount() shouldBe 1 + df1.filter { it[Customer::age] != null && it[Customer::age]!! > 30 }.rowsCount() shouldBe 1 + df1[0][1] shouldBe "John" + + rs.beforeFirst() + + val dataSchema = rs.getDataFrameSchema(H2(MySql)) + dataSchema.columns.size shouldBe 3 + dataSchema.columns["name"]!!.type shouldBe typeOf() + + rs.beforeFirst() + + val df2 = rs.toDF(connection).cast() + + df2.rowsCount() shouldBe 4 + df2.filter { it[Customer::age] != null && it[Customer::age]!! > 30 }.rowsCount() shouldBe 2 + df2[0][1] shouldBe "John" + + rs.beforeFirst() + + val df3 = rs.toDF(connection, 1).cast() + + df3.rowsCount() shouldBe 1 + df3.filter { it[Customer::age] != null && it[Customer::age]!! > 30 }.rowsCount() shouldBe 1 + df3[0][1] shouldBe "John" + + rs.beforeFirst() + + val dataSchema1 = rs.getDataFrameSchema(connection) + dataSchema1.columns.size shouldBe 3 + dataSchema1.columns["name"]!!.type shouldBe typeOf() + } + } + } + // to cover a reported case from https://github.com/Kotlin/dataframe/issues/494 @Test fun `repeated read from ResultSet with limit`() { From 776880550446869565c3b1a81cde74a134d9201d Mon Sep 17 00:00:00 2001 From: zaleslaw Date: Wed, 3 Jan 2024 16:58:50 +0100 Subject: [PATCH 2/5] added extension functions for Connection, DatabaseConfiguration --- .../kotlinx/dataframe/io/readJdbc.kt | 93 +++++++++++++++++++ .../kotlinx/dataframe/io/h2/h2Test.kt | 82 ++++++++++++++++ 2 files changed, 175 insertions(+) diff --git a/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/readJdbc.kt b/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/readJdbc.kt index 160504258..e733ee1c7 100644 --- a/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/readJdbc.kt +++ b/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/readJdbc.kt @@ -216,6 +216,67 @@ public fun DataFrame.Companion.readSqlQuery( } } +/** + * Converts the result of an SQL query to the DataFrame. + * Also, it could be a just name of the SQL table. + * + * NOTE: It should be a name of one of the existing SQL tables, + * or the SQL query should start from SELECT and contain one query for reading data without any manipulation. + * It should not contain `;` symbol. + * + * @param [sqlQueryOrTableName] the SQL query to execute or name of the SQL table. + * @param [limit] the maximum number of rows to retrieve from the result of the SQL query execution. + * @param [inferNullability] indicates how the column nullability should be inferred. + * @return the DataFrame containing the result of the SQL query. + */ +public fun DatabaseConfiguration.toDF( + sqlQueryOrTableName: String, + limit: Int = DEFAULT_LIMIT, + inferNullability: Boolean = true, +): AnyFrame { + return when { + isSqlQuery(sqlQueryOrTableName) -> DataFrame.readSqlQuery(this, sqlQueryOrTableName, limit, inferNullability) + isSqlTableName(sqlQueryOrTableName) -> DataFrame.readSqlTable(this, sqlQueryOrTableName, limit, inferNullability) + else -> throw IllegalArgumentException("$sqlQueryOrTableName should be SQL query or name of one of the existing SQL tables!") + } +} + +private fun isSqlQuery(sqlQueryOrTableName: String): Boolean { + val queryPattern = Regex("(?i)\\b(SELECT)\\b") + return queryPattern.containsMatchIn(sqlQueryOrTableName.trim()) +} + +private fun isSqlTableName(sqlQueryOrTableName: String): Boolean { + // Match table names with optional schema and catalog (e.g., catalog.schema.table) + val tableNamePattern = Regex("^[a-zA-Z_][a-zA-Z0-9_]*(\\.[a-zA-Z_][a-zA-Z0-9_]*){0,2}$") + return tableNamePattern.matches(sqlQueryOrTableName.trim()) +} + +/** + * Converts the result of an SQL query to the DataFrame. + * Also, it could be a just name of the SQL table. + * + * NOTE: It should be a name of one of the existing SQL tables, + * or the SQL query should start from SELECT and contain one query for reading data without any manipulation. + * It should not contain `;` symbol. + * + * @param [sqlQueryOrTableName] the SQL query to execute or name of the SQL table. + * @param [limit] the maximum number of rows to retrieve from the result of the SQL query execution. + * @param [inferNullability] indicates how the column nullability should be inferred. + * @return the DataFrame containing the result of the SQL query. + */ +public fun Connection.toDF( + sqlQueryOrTableName: String, + limit: Int = DEFAULT_LIMIT, + inferNullability: Boolean = true, +): AnyFrame { + return when { + isSqlQuery(sqlQueryOrTableName) -> DataFrame.readSqlQuery(this, sqlQueryOrTableName, limit, inferNullability) + isSqlTableName(sqlQueryOrTableName) -> DataFrame.readSqlTable(this, sqlQueryOrTableName, limit, inferNullability) + else -> throw IllegalArgumentException("$sqlQueryOrTableName should be SQL query or name of one of the existing SQL tables!") + } +} + /** SQL query is accepted only if it starts from SELECT */ private fun isValid(sqlQuery: String): Boolean { val normalizedSqlQuery = sqlQuery.trim().uppercase() @@ -490,6 +551,38 @@ public fun DataFrame.Companion.getSchemaForSqlQuery(connection: Connection, sqlQ } } +/** + * Retrieves the schema of an SQL query result or the SQL table using the provided database configuration. + * + * @param [sqlQueryOrTableName] the SQL query to execute and retrieve the schema from. + * @return the schema of the SQL query as a [DataFrameSchema] object. + */ +public fun DatabaseConfiguration.getDataFrameSchema( + sqlQueryOrTableName: String +): DataFrameSchema { + return when { + isSqlQuery(sqlQueryOrTableName) -> DataFrame.getSchemaForSqlQuery(this, sqlQueryOrTableName) + isSqlTableName(sqlQueryOrTableName) -> DataFrame.getSchemaForSqlTable(this, sqlQueryOrTableName) + else -> throw IllegalArgumentException("$sqlQueryOrTableName should be SQL query or name of one of the existing SQL tables!") + } +} + +/** + * Retrieves the schema of an SQL query result or the SQL table using the provided database configuration. + * + * @param [sqlQueryOrTableName] the SQL query to execute and retrieve the schema from. + * @return the schema of the SQL query as a [DataFrameSchema] object. + */ +public fun Connection.getDataFrameSchema( + sqlQueryOrTableName: String +): DataFrameSchema { + return when { + isSqlQuery(sqlQueryOrTableName) -> DataFrame.getSchemaForSqlQuery(this, sqlQueryOrTableName) + isSqlTableName(sqlQueryOrTableName) -> DataFrame.getSchemaForSqlTable(this, sqlQueryOrTableName) + else -> throw IllegalArgumentException("$sqlQueryOrTableName should be SQL query or name of one of the existing SQL tables!") + } +} + /** * Retrieves the schema from [ResultSet]. * diff --git a/dataframe-jdbc/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/h2/h2Test.kt b/dataframe-jdbc/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/h2/h2Test.kt index d37b0809b..e5ebc53f2 100644 --- a/dataframe-jdbc/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/h2/h2Test.kt +++ b/dataframe-jdbc/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/h2/h2Test.kt @@ -329,6 +329,43 @@ class JdbcTest { dataSchema1.columns["name"]!!.type shouldBe typeOf() } + @Test + fun `read from table with extension functions`() { + val tableName = "Customer" + val df = connection.toDF(tableName).cast() + + df.rowsCount() shouldBe 4 + df.filter { it[Customer::age] != null && it[Customer::age]!! > 30 }.rowsCount() shouldBe 2 + df[0][1] shouldBe "John" + + val df1 = connection.toDF(tableName, 1).cast() + + df1.rowsCount() shouldBe 1 + df1.filter { it[Customer::age] != null && it[Customer::age]!! > 30 }.rowsCount() shouldBe 1 + df1[0][1] shouldBe "John" + + val dataSchema = connection.getDataFrameSchema(tableName) + dataSchema.columns.size shouldBe 3 + dataSchema.columns["name"]!!.type shouldBe typeOf() + + val dbConfig = DatabaseConfiguration(url = URL) + val df2 = dbConfig.toDF(tableName).cast() + + df2.rowsCount() shouldBe 4 + df2.filter { it[Customer::age] != null && it[Customer::age]!! > 30 }.rowsCount() shouldBe 2 + df2[0][1] shouldBe "John" + + val df3 = dbConfig.toDF(tableName, 1).cast() + + df3.rowsCount() shouldBe 1 + df3.filter { it[Customer::age] != null && it[Customer::age]!! > 30 }.rowsCount() shouldBe 1 + df3[0][1] shouldBe "John" + + val dataSchema1 = dbConfig.getDataFrameSchema(tableName) + dataSchema1.columns.size shouldBe 3 + dataSchema1.columns["name"]!!.type shouldBe typeOf() + } + // to cover a reported case from https://github.com/Kotlin/dataframe/issues/494 @Test fun `repeated read from table with limit`() { @@ -623,6 +660,51 @@ class JdbcTest { dataSchema1.columns["name"]!!.type shouldBe typeOf() } + @Test + fun `read from sql query with extension functions`() { + @Language("SQL") + val sqlQuery = """ + SELECT c.name as customerName, SUM(s.amount) as totalSalesAmount + FROM Sale s + INNER JOIN Customer c ON s.customerId = c.id + WHERE c.age > 35 + GROUP BY s.customerId, c.name + """.trimIndent() + + val df = connection.toDF(sqlQuery).cast() + + df.rowsCount() shouldBe 2 + df.filter { it[CustomerSales::totalSalesAmount]!! > 100 }.rowsCount() shouldBe 1 + df[0][0] shouldBe "John" + + val df1 = connection.toDF(sqlQuery, 1).cast() + + df1.rowsCount() shouldBe 1 + df1.filter { it[CustomerSales::totalSalesAmount]!! > 100 }.rowsCount() shouldBe 1 + df1[0][0] shouldBe "John" + + val dataSchema = connection.getDataFrameSchema(sqlQuery) + dataSchema.columns.size shouldBe 2 + dataSchema.columns["name"]!!.type shouldBe typeOf() + + val dbConfig = DatabaseConfiguration(url = URL) + val df2 = dbConfig.toDF(sqlQuery).cast() + + df2.rowsCount() shouldBe 2 + df2.filter { it[CustomerSales::totalSalesAmount]!! > 100 }.rowsCount() shouldBe 1 + df2[0][0] shouldBe "John" + + val df3 = dbConfig.toDF( sqlQuery, 1).cast() + + df3.rowsCount() shouldBe 1 + df3.filter { it[CustomerSales::totalSalesAmount]!! > 100 }.rowsCount() shouldBe 1 + df3[0][0] shouldBe "John" + + val dataSchema1 = dbConfig.getDataFrameSchema(sqlQuery) + dataSchema1.columns.size shouldBe 2 + dataSchema1.columns["name"]!!.type shouldBe typeOf() + } + @Test fun `read from sql query with two repeated columns`() { @Language("SQL") From 4c918743dcddbad7476a65a24c013327cda88dbb Mon Sep 17 00:00:00 2001 From: zaleslaw Date: Mon, 29 Jul 2024 18:26:02 +0200 Subject: [PATCH 3/5] Refactor database configuration and dataframe methods. Renamed `DatabaseConfiguration` to `DbConnectionConfig` for clarity. Replaced `.toDF` with `.readDataFrame` methods to improve method naming consistency. These changes enhance code readability and maintainability. --- .../kotlinx/dataframe/io/readJdbc.kt | 40 +++++++++---------- .../kotlinx/dataframe/io/h2/h2Test.kt | 36 ++++++++--------- 2 files changed, 36 insertions(+), 40 deletions(-) diff --git a/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/readJdbc.kt b/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/readJdbc.kt index 0ddd0bdbc..675138132 100644 --- a/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/readJdbc.kt +++ b/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/readJdbc.kt @@ -98,7 +98,7 @@ public data class TableMetadata(val name: String, val schemaName: String?, val c * @property [user] the username used for authentication (optional, default is empty string). * @property [password] the password used for authentication (optional, default is empty string). */ -public data class DatabaseConfiguration(val url: String, val user: String = "", val password: String = "") +public data class DbConnectionConfig(val url: String, val user: String = "", val password: String = "") /** * Reads data from an SQL table and converts it into a DataFrame. @@ -110,7 +110,7 @@ public data class DatabaseConfiguration(val url: String, val user: String = "", * @return the DataFrame containing the data from the SQL table. */ public fun DataFrame.Companion.readSqlTable( - dbConfig: DatabaseConfiguration, + dbConfig: DbConnectionConfig, tableName: String, limit: Int = DEFAULT_LIMIT, inferNullability: Boolean = true, @@ -169,7 +169,7 @@ public fun DataFrame.Companion.readSqlTable( * @return the DataFrame containing the result of the SQL query. */ public fun DataFrame.Companion.readSqlQuery( - dbConfig: DatabaseConfiguration, + dbConfig: DbConnectionConfig, sqlQuery: String, limit: Int = DEFAULT_LIMIT, inferNullability: Boolean = true, @@ -219,19 +219,17 @@ public fun DataFrame.Companion.readSqlQuery( } /** - * Converts the result of an SQL query to the DataFrame. - * Also, it could be a just name of the SQL table. + * Converts the result of an SQL query or SQL table (by name) to the DataFrame. * - * NOTE: It should be a name of one of the existing SQL tables, + * @param [sqlQueryOrTableName] the SQL query to execute or name of the SQL table. + * It should be a name of one of the existing SQL tables, * or the SQL query should start from SELECT and contain one query for reading data without any manipulation. * It should not contain `;` symbol. - * - * @param [sqlQueryOrTableName] the SQL query to execute or name of the SQL table. * @param [limit] the maximum number of rows to retrieve from the result of the SQL query execution. * @param [inferNullability] indicates how the column nullability should be inferred. * @return the DataFrame containing the result of the SQL query. */ -public fun DatabaseConfiguration.toDF( +public fun DbConnectionConfig.readDataFrame( sqlQueryOrTableName: String, limit: Int = DEFAULT_LIMIT, inferNullability: Boolean = true, @@ -255,19 +253,17 @@ private fun isSqlTableName(sqlQueryOrTableName: String): Boolean { } /** - * Converts the result of an SQL query to the DataFrame. - * Also, it could be a just name of the SQL table. + * Converts the result of an SQL query or SQL table (by name) to the DataFrame. * - * NOTE: It should be a name of one of the existing SQL tables, + * @param [sqlQueryOrTableName] the SQL query to execute or name of the SQL table. + * It should be a name of one of the existing SQL tables, * or the SQL query should start from SELECT and contain one query for reading data without any manipulation. * It should not contain `;` symbol. - * - * @param [sqlQueryOrTableName] the SQL query to execute or name of the SQL table. * @param [limit] the maximum number of rows to retrieve from the result of the SQL query execution. * @param [inferNullability] indicates how the column nullability should be inferred. * @return the DataFrame containing the result of the SQL query. */ -public fun Connection.toDF( +public fun Connection.readDataFrame( sqlQueryOrTableName: String, limit: Int = DEFAULT_LIMIT, inferNullability: Boolean = true, @@ -335,7 +331,7 @@ public fun DataFrame.Companion.readResultSet( * * [java.sql.ResultSet]: https://docs.oracle.com/javase/8/docs/api/java/sql/ResultSet.html */ -public fun ResultSet.toDF( +public fun ResultSet.readDataFrame( dbType: DbType, limit: Int = DEFAULT_LIMIT, inferNullability: Boolean = true, @@ -394,7 +390,7 @@ public fun DataFrame.Companion.readResultSet( * * [java.sql.ResultSet]: https://docs.oracle.com/javase/8/docs/api/java/sql/ResultSet.html */ -public fun ResultSet.toDF( +public fun ResultSet.readDataFrame( connection: Connection, limit: Int = DEFAULT_LIMIT, inferNullability: Boolean = true, @@ -413,7 +409,7 @@ public fun ResultSet.toDF( * @return a map of [String] to [AnyFrame] objects representing the non-system tables from the database. */ public fun DataFrame.Companion.readAllSqlTables( - dbConfig: DatabaseConfiguration, + dbConfig: DbConnectionConfig, catalogue: String? = null, limit: Int = DEFAULT_LIMIT, inferNullability: Boolean = true, @@ -481,7 +477,7 @@ public fun DataFrame.Companion.readAllSqlTables( * @return the [DataFrameSchema] object representing the schema of the SQL table */ public fun DataFrame.Companion.getSchemaForSqlTable( - dbConfig: DatabaseConfiguration, + dbConfig: DbConnectionConfig, tableName: String, ): DataFrameSchema { DriverManager.getConnection(dbConfig.url, dbConfig.user, dbConfig.password).use { connection -> @@ -520,7 +516,7 @@ public fun DataFrame.Companion.getSchemaForSqlTable(connection: Connection, tabl * @return the schema of the SQL query as a [DataFrameSchema] object. */ public fun DataFrame.Companion.getSchemaForSqlQuery( - dbConfig: DatabaseConfiguration, + dbConfig: DbConnectionConfig, sqlQuery: String, ): DataFrameSchema { DriverManager.getConnection(dbConfig.url, dbConfig.user, dbConfig.password).use { connection -> @@ -554,7 +550,7 @@ public fun DataFrame.Companion.getSchemaForSqlQuery(connection: Connection, sqlQ * @param [sqlQueryOrTableName] the SQL query to execute and retrieve the schema from. * @return the schema of the SQL query as a [DataFrameSchema] object. */ -public fun DatabaseConfiguration.getDataFrameSchema( +public fun DbConnectionConfig.getDataFrameSchema( sqlQueryOrTableName: String ): DataFrameSchema { return when { @@ -641,7 +637,7 @@ public fun ResultSet.getDataFrameSchema(connection: Connection): DataFrameSchema * @param [dbConfig] the database configuration to connect to the database, including URL, user, and password. * @return a map of [String, DataFrameSchema] objects representing the table name and its schema for each non-system table. */ -public fun DataFrame.Companion.getSchemaForAllSqlTables(dbConfig: DatabaseConfiguration): Map { +public fun DataFrame.Companion.getSchemaForAllSqlTables(dbConfig: DbConnectionConfig): Map { DriverManager.getConnection(dbConfig.url, dbConfig.user, dbConfig.password).use { connection -> return getSchemaForAllSqlTables(connection) } diff --git a/dataframe-jdbc/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/h2/h2Test.kt b/dataframe-jdbc/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/h2/h2Test.kt index 05ae228ca..3c5622ddb 100644 --- a/dataframe-jdbc/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/h2/h2Test.kt +++ b/dataframe-jdbc/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/h2/h2Test.kt @@ -309,7 +309,7 @@ class JdbcTest { dataSchema.columns.size shouldBe 3 dataSchema.columns["name"]!!.type shouldBe typeOf() - val dbConfig = DatabaseConfiguration(url = URL) + val dbConfig = DbConnectionConfig(url = URL) val df2 = DataFrame.readSqlTable(dbConfig, tableName).cast() df2.rowsCount() shouldBe 4 @@ -330,13 +330,13 @@ class JdbcTest { @Test fun `read from table with extension functions`() { val tableName = "Customer" - val df = connection.toDF(tableName).cast() + val df = connection.readDataFrame(tableName).cast() df.rowsCount() shouldBe 4 df.filter { it[Customer::age] != null && it[Customer::age]!! > 30 }.rowsCount() shouldBe 2 df[0][1] shouldBe "John" - val df1 = connection.toDF(tableName, 1).cast() + val df1 = connection.readDataFrame(tableName, 1).cast() df1.rowsCount() shouldBe 1 df1.filter { it[Customer::age] != null && it[Customer::age]!! > 30 }.rowsCount() shouldBe 1 @@ -346,14 +346,14 @@ class JdbcTest { dataSchema.columns.size shouldBe 3 dataSchema.columns["name"]!!.type shouldBe typeOf() - val dbConfig = DatabaseConfiguration(url = URL) - val df2 = dbConfig.toDF(tableName).cast() + val dbConfig = DbConnectionConfig(url = URL) + val df2 = dbConfig.readDataFrame(tableName).cast() df2.rowsCount() shouldBe 4 df2.filter { it[Customer::age] != null && it[Customer::age]!! > 30 }.rowsCount() shouldBe 2 df2[0][1] shouldBe "John" - val df3 = dbConfig.toDF(tableName, 1).cast() + val df3 = dbConfig.readDataFrame(tableName, 1).cast() df3.rowsCount() shouldBe 1 df3.filter { it[Customer::age] != null && it[Customer::age]!! > 30 }.rowsCount() shouldBe 1 @@ -376,7 +376,7 @@ class JdbcTest { df1.filter { it[Customer::age] != null && it[Customer::age]!! > 30 }.rowsCount() shouldBe 1 df1[0][1] shouldBe "John" - val dbConfig = DatabaseConfiguration(url = URL) + val dbConfig = DbConnectionConfig(url = URL) val df2 = DataFrame.readSqlTable(dbConfig, tableName, 2).cast() df2.rowsCount() shouldBe 2 @@ -444,7 +444,7 @@ class JdbcTest { val selectStatement = "SELECT * FROM Customer" st.executeQuery(selectStatement).use { rs -> - val df = rs.toDF(H2(MySql)).cast() + val df = rs.readDataFrame(H2(MySql)).cast() df.rowsCount() shouldBe 4 df.filter { it[Customer::age] != null && it[Customer::age]!! > 30 }.rowsCount() shouldBe 2 @@ -452,7 +452,7 @@ class JdbcTest { rs.beforeFirst() - val df1 = rs.toDF(H2(MySql), 1).cast() + val df1 = rs.readDataFrame(H2(MySql), 1).cast() df1.rowsCount() shouldBe 1 df1.filter { it[Customer::age] != null && it[Customer::age]!! > 30 }.rowsCount() shouldBe 1 @@ -466,7 +466,7 @@ class JdbcTest { rs.beforeFirst() - val df2 = rs.toDF(connection).cast() + val df2 = rs.readDataFrame(connection).cast() df2.rowsCount() shouldBe 4 df2.filter { it[Customer::age] != null && it[Customer::age]!! > 30 }.rowsCount() shouldBe 2 @@ -474,7 +474,7 @@ class JdbcTest { rs.beforeFirst() - val df3 = rs.toDF(connection, 1).cast() + val df3 = rs.readDataFrame(connection, 1).cast() df3.rowsCount() shouldBe 1 df3.filter { it[Customer::age] != null && it[Customer::age]!! > 30 }.rowsCount() shouldBe 1 @@ -639,7 +639,7 @@ class JdbcTest { dataSchema.columns.size shouldBe 2 dataSchema.columns["name"]!!.type shouldBe typeOf() - val dbConfig = DatabaseConfiguration(url = URL) + val dbConfig = DbConnectionConfig(url = URL) val df2 = DataFrame.readSqlQuery(dbConfig, sqlQuery).cast() df2.rowsCount() shouldBe 2 @@ -668,13 +668,13 @@ class JdbcTest { GROUP BY s.customerId, c.name """.trimIndent() - val df = connection.toDF(sqlQuery).cast() + val df = connection.readDataFrame(sqlQuery).cast() df.rowsCount() shouldBe 2 df.filter { it[CustomerSales::totalSalesAmount]!! > 100 }.rowsCount() shouldBe 1 df[0][0] shouldBe "John" - val df1 = connection.toDF(sqlQuery, 1).cast() + val df1 = connection.readDataFrame(sqlQuery, 1).cast() df1.rowsCount() shouldBe 1 df1.filter { it[CustomerSales::totalSalesAmount]!! > 100 }.rowsCount() shouldBe 1 @@ -684,14 +684,14 @@ class JdbcTest { dataSchema.columns.size shouldBe 2 dataSchema.columns["name"]!!.type shouldBe typeOf() - val dbConfig = DatabaseConfiguration(url = URL) - val df2 = dbConfig.toDF(sqlQuery).cast() + val dbConfig = DbConnectionConfig(url = URL) + val df2 = dbConfig.readDataFrame(sqlQuery).cast() df2.rowsCount() shouldBe 2 df2.filter { it[CustomerSales::totalSalesAmount]!! > 100 }.rowsCount() shouldBe 1 df2[0][0] shouldBe "John" - val df3 = dbConfig.toDF( sqlQuery, 1).cast() + val df3 = dbConfig.readDataFrame( sqlQuery, 1).cast() df3.rowsCount() shouldBe 1 df3.filter { it[CustomerSales::totalSalesAmount]!! > 100 }.rowsCount() shouldBe 1 @@ -784,7 +784,7 @@ class JdbcTest { // TODO: fix nullability saleDataSchema.columns["amount"]!!.type shouldBe typeOf() - val dbConfig = DatabaseConfiguration(url = URL) + val dbConfig = DbConnectionConfig(url = URL) val dataframes2 = DataFrame.readAllSqlTables(dbConfig).values.toList() val customerDf2 = dataframes2[0].cast() From f0664244bf27546b45b654de42880f0f182e0a88 Mon Sep 17 00:00:00 2001 From: zaleslaw Date: Mon, 29 Jul 2024 18:49:54 +0200 Subject: [PATCH 4/5] Refactor SQL reading and schema functions in readJdbc.kt Simplify the logic to use single-expression functions for readability. Ensure consistent formatting and make error messages more explicit. This change also corrects minor indentation issues in SQL query strings within tests. --- .../kotlinx/dataframe/io/readJdbc.kt | 103 ++++++++++-------- .../kotlinx/dataframe/io/h2/h2Test.kt | 19 +++- 2 files changed, 74 insertions(+), 48 deletions(-) diff --git a/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/readJdbc.kt b/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/readJdbc.kt index 675138132..d515e8f46 100644 --- a/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/readJdbc.kt +++ b/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/readJdbc.kt @@ -233,13 +233,26 @@ public fun DbConnectionConfig.readDataFrame( sqlQueryOrTableName: String, limit: Int = DEFAULT_LIMIT, inferNullability: Boolean = true, -): AnyFrame { - return when { - isSqlQuery(sqlQueryOrTableName) -> DataFrame.readSqlQuery(this, sqlQueryOrTableName, limit, inferNullability) - isSqlTableName(sqlQueryOrTableName) -> DataFrame.readSqlTable(this, sqlQueryOrTableName, limit, inferNullability) - else -> throw IllegalArgumentException("$sqlQueryOrTableName should be SQL query or name of one of the existing SQL tables!") +): AnyFrame = + when { + isSqlQuery(sqlQueryOrTableName) -> DataFrame.readSqlQuery( + this, + sqlQueryOrTableName, + limit, + inferNullability, + ) + + isSqlTableName(sqlQueryOrTableName) -> DataFrame.readSqlTable( + this, + sqlQueryOrTableName, + limit, + inferNullability, + ) + + else -> throw IllegalArgumentException( + "$sqlQueryOrTableName should be SQL query or name of one of the existing SQL tables!", + ) } -} private fun isSqlQuery(sqlQueryOrTableName: String): Boolean { val queryPattern = Regex("(?i)\\b(SELECT)\\b") @@ -267,13 +280,26 @@ public fun Connection.readDataFrame( sqlQueryOrTableName: String, limit: Int = DEFAULT_LIMIT, inferNullability: Boolean = true, -): AnyFrame { - return when { - isSqlQuery(sqlQueryOrTableName) -> DataFrame.readSqlQuery(this, sqlQueryOrTableName, limit, inferNullability) - isSqlTableName(sqlQueryOrTableName) -> DataFrame.readSqlTable(this, sqlQueryOrTableName, limit, inferNullability) - else -> throw IllegalArgumentException("$sqlQueryOrTableName should be SQL query or name of one of the existing SQL tables!") +): AnyFrame = + when { + isSqlQuery(sqlQueryOrTableName) -> DataFrame.readSqlQuery( + this, + sqlQueryOrTableName, + limit, + inferNullability, + ) + + isSqlTableName(sqlQueryOrTableName) -> DataFrame.readSqlTable( + this, + sqlQueryOrTableName, + limit, + inferNullability, + ) + + else -> throw IllegalArgumentException( + "$sqlQueryOrTableName should be SQL query or name of one of the existing SQL tables!", + ) } -} /** SQL query is accepted only if it starts from SELECT */ private fun isValid(sqlQuery: String): Boolean { @@ -335,9 +361,7 @@ public fun ResultSet.readDataFrame( dbType: DbType, limit: Int = DEFAULT_LIMIT, inferNullability: Boolean = true, -): AnyFrame { - return DataFrame.Companion.readResultSet(this, dbType, limit, inferNullability) -} +): AnyFrame = DataFrame.Companion.readResultSet(this, dbType, limit, inferNullability) /** * Reads the data from a [ResultSet][java.sql.ResultSet] and converts it into a DataFrame. @@ -394,9 +418,7 @@ public fun ResultSet.readDataFrame( connection: Connection, limit: Int = DEFAULT_LIMIT, inferNullability: Boolean = true, -): AnyFrame { - return DataFrame.Companion.readResultSet(this, connection, limit, inferNullability) -} +): AnyFrame = DataFrame.Companion.readResultSet(this, connection, limit, inferNullability) /** * Reads all non-system tables from a database and returns them @@ -476,10 +498,7 @@ public fun DataFrame.Companion.readAllSqlTables( * @param [tableName] the name of the SQL table for which to retrieve the schema. * @return the [DataFrameSchema] object representing the schema of the SQL table */ -public fun DataFrame.Companion.getSchemaForSqlTable( - dbConfig: DbConnectionConfig, - tableName: String, -): DataFrameSchema { +public fun DataFrame.Companion.getSchemaForSqlTable(dbConfig: DbConnectionConfig, tableName: String): DataFrameSchema { DriverManager.getConnection(dbConfig.url, dbConfig.user, dbConfig.password).use { connection -> return getSchemaForSqlTable(connection, tableName) } @@ -515,10 +534,7 @@ public fun DataFrame.Companion.getSchemaForSqlTable(connection: Connection, tabl * @param [sqlQuery] the SQL query to execute and retrieve the schema from. * @return the schema of the SQL query as a [DataFrameSchema] object. */ -public fun DataFrame.Companion.getSchemaForSqlQuery( - dbConfig: DbConnectionConfig, - sqlQuery: String, -): DataFrameSchema { +public fun DataFrame.Companion.getSchemaForSqlQuery(dbConfig: DbConnectionConfig, sqlQuery: String): DataFrameSchema { DriverManager.getConnection(dbConfig.url, dbConfig.user, dbConfig.password).use { connection -> return getSchemaForSqlQuery(connection, sqlQuery) } @@ -550,15 +566,16 @@ public fun DataFrame.Companion.getSchemaForSqlQuery(connection: Connection, sqlQ * @param [sqlQueryOrTableName] the SQL query to execute and retrieve the schema from. * @return the schema of the SQL query as a [DataFrameSchema] object. */ -public fun DbConnectionConfig.getDataFrameSchema( - sqlQueryOrTableName: String -): DataFrameSchema { - return when { +public fun DbConnectionConfig.getDataFrameSchema(sqlQueryOrTableName: String): DataFrameSchema = + when { isSqlQuery(sqlQueryOrTableName) -> DataFrame.getSchemaForSqlQuery(this, sqlQueryOrTableName) + isSqlTableName(sqlQueryOrTableName) -> DataFrame.getSchemaForSqlTable(this, sqlQueryOrTableName) - else -> throw IllegalArgumentException("$sqlQueryOrTableName should be SQL query or name of one of the existing SQL tables!") + + else -> throw IllegalArgumentException( + "$sqlQueryOrTableName should be SQL query or name of one of the existing SQL tables!", + ) } -} /** * Retrieves the schema of an SQL query result or the SQL table using the provided database configuration. @@ -566,15 +583,16 @@ public fun DbConnectionConfig.getDataFrameSchema( * @param [sqlQueryOrTableName] the SQL query to execute and retrieve the schema from. * @return the schema of the SQL query as a [DataFrameSchema] object. */ -public fun Connection.getDataFrameSchema( - sqlQueryOrTableName: String -): DataFrameSchema { - return when { +public fun Connection.getDataFrameSchema(sqlQueryOrTableName: String): DataFrameSchema = + when { isSqlQuery(sqlQueryOrTableName) -> DataFrame.getSchemaForSqlQuery(this, sqlQueryOrTableName) + isSqlTableName(sqlQueryOrTableName) -> DataFrame.getSchemaForSqlTable(this, sqlQueryOrTableName) - else -> throw IllegalArgumentException("$sqlQueryOrTableName should be SQL query or name of one of the existing SQL tables!") + + else -> throw IllegalArgumentException( + "$sqlQueryOrTableName should be SQL query or name of one of the existing SQL tables!", + ) } -} /** * Retrieves the schema from [ResultSet]. @@ -598,9 +616,7 @@ public fun DataFrame.Companion.getSchemaForResultSet(resultSet: ResultSet, dbTyp * @param [dbType] the type of database that the [ResultSet] belongs to. * @return the schema of the [ResultSet] as a [DataFrameSchema] object. */ -public fun ResultSet.getDataFrameSchema(dbType: DbType): DataFrameSchema { - return DataFrame.getSchemaForResultSet(this, dbType) -} +public fun ResultSet.getDataFrameSchema(dbType: DbType): DataFrameSchema = DataFrame.getSchemaForResultSet(this, dbType) /** * Retrieves the schema from [ResultSet]. @@ -627,9 +643,8 @@ public fun DataFrame.Companion.getSchemaForResultSet(resultSet: ResultSet, conne * @param [connection] the connection to the database (it's required to extract the database type). * @return the schema of the [ResultSet] as a [DataFrameSchema] object. */ -public fun ResultSet.getDataFrameSchema(connection: Connection): DataFrameSchema { - return DataFrame.getSchemaForResultSet(this, connection) -} +public fun ResultSet.getDataFrameSchema(connection: Connection): DataFrameSchema = + DataFrame.getSchemaForResultSet(this, connection) /** * Retrieves the schemas of all non-system tables in the database using the provided database configuration. diff --git a/dataframe-jdbc/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/h2/h2Test.kt b/dataframe-jdbc/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/h2/h2Test.kt index 3c5622ddb..d8c32074a 100644 --- a/dataframe-jdbc/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/h2/h2Test.kt +++ b/dataframe-jdbc/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/h2/h2Test.kt @@ -12,9 +12,19 @@ import org.jetbrains.kotlinx.dataframe.api.cast import org.jetbrains.kotlinx.dataframe.api.filter import org.jetbrains.kotlinx.dataframe.api.schema import org.jetbrains.kotlinx.dataframe.api.select -import org.jetbrains.kotlinx.dataframe.io.* +import org.jetbrains.kotlinx.dataframe.io.DbConnectionConfig import org.jetbrains.kotlinx.dataframe.io.db.H2 import org.jetbrains.kotlinx.dataframe.io.db.MySql +import org.jetbrains.kotlinx.dataframe.io.getDataFrameSchema +import org.jetbrains.kotlinx.dataframe.io.getSchemaForAllSqlTables +import org.jetbrains.kotlinx.dataframe.io.getSchemaForResultSet +import org.jetbrains.kotlinx.dataframe.io.getSchemaForSqlQuery +import org.jetbrains.kotlinx.dataframe.io.getSchemaForSqlTable +import org.jetbrains.kotlinx.dataframe.io.readAllSqlTables +import org.jetbrains.kotlinx.dataframe.io.readDataFrame +import org.jetbrains.kotlinx.dataframe.io.readResultSet +import org.jetbrains.kotlinx.dataframe.io.readSqlQuery +import org.jetbrains.kotlinx.dataframe.io.readSqlTable import org.junit.AfterClass import org.junit.BeforeClass import org.junit.Test @@ -660,13 +670,14 @@ class JdbcTest { @Test fun `read from sql query with extension functions`() { @Language("SQL") - val sqlQuery = """ + val sqlQuery = + """ SELECT c.name as customerName, SUM(s.amount) as totalSalesAmount FROM Sale s INNER JOIN Customer c ON s.customerId = c.id WHERE c.age > 35 GROUP BY s.customerId, c.name - """.trimIndent() + """.trimIndent() val df = connection.readDataFrame(sqlQuery).cast() @@ -691,7 +702,7 @@ class JdbcTest { df2.filter { it[CustomerSales::totalSalesAmount]!! > 100 }.rowsCount() shouldBe 1 df2[0][0] shouldBe "John" - val df3 = dbConfig.readDataFrame( sqlQuery, 1).cast() + val df3 = dbConfig.readDataFrame(sqlQuery, 1).cast() df3.rowsCount() shouldBe 1 df3.filter { it[CustomerSales::totalSalesAmount]!! > 100 }.rowsCount() shouldBe 1 From a96a8078a67452cf9f50a2a1d52501c97fe3bea8 Mon Sep 17 00:00:00 2001 From: zaleslaw Date: Mon, 29 Jul 2024 19:44:11 +0200 Subject: [PATCH 5/5] Rename DatabaseConfiguration to DbConnectionConfig for consistency This commit updates various imports and references from DatabaseConfiguration to DbConnectionConfig across different files. This change ensures consistency in the naming convention used throughout the codebase and documentation, improving clarity and maintenance. --- docs/StardustDocs/topics/readSqlDatabases.md | 78 +++++++++---------- .../SchemaGeneratorPluginIntegrationTest.kt | 8 +- .../ksp/DataFrameJdbcSymbolProcessorTest.kt | 12 +-- 3 files changed, 49 insertions(+), 49 deletions(-) diff --git a/docs/StardustDocs/topics/readSqlDatabases.md b/docs/StardustDocs/topics/readSqlDatabases.md index 8dd330d46..709b7172a 100644 --- a/docs/StardustDocs/topics/readSqlDatabases.md +++ b/docs/StardustDocs/topics/readSqlDatabases.md @@ -65,14 +65,14 @@ For example, if you have a local PostgreSQL database named as `testDatabase` wit you could read first 100 rows and print the data just copying the code below: ```kotlin -import org.jetbrains.kotlinx.dataframe.io.DatabaseConfiguration +import org.jetbrains.kotlinx.dataframe.io.DbConnectionConfig import org.jetbrains.kotlinx.dataframe.api.print val url = "jdbc:postgresql://localhost:5432/testDatabase" val username = "postgres" val password = "password" -val dbConfig = DatabaseConfiguration(url, username, password) +val dbConfig = DbConnectionConfig(url, username, password) val tableName = "Customer" @@ -113,37 +113,37 @@ Find a full example Notebook [here](https://github.com/zaleslaw/KotlinDataFrame- These functions read all data from a specific table in the database. Variants with a limit parameter restrict how many rows will be read from the table. -**readSqlTable(dbConfig: DatabaseConfiguration, tableName: String): AnyFrame** +**readSqlTable(dbConfig: DbConnectionConfig, tableName: String): AnyFrame** Read all data from a specific table in the SQL database and transform it into an AnyFrame object. -The `dbConfig: DatabaseConfiguration` parameter represents the configuration for a database connection, +The `dbConfig: DbConnectionConfig` parameter represents the configuration for a database connection, created under the hood and managed by the library. Typically, it requires a URL, username and password. ```kotlin -import org.jetbrains.kotlinx.dataframe.io.DatabaseConfiguration +import org.jetbrains.kotlinx.dataframe.io.DbConnectionConfig -val dbConfig = DatabaseConfiguration("URL_TO_CONNECT_DATABASE", "USERNAME", "PASSWORD") +val dbConfig = DbConnectionConfig("URL_TO_CONNECT_DATABASE", "USERNAME", "PASSWORD") val users = DataFrame.readSqlTable(dbConfig, "Users") ``` -**readSqlTable(dbConfig: DatabaseConfiguration, tableName: String, limit: Int): AnyFrame** +**readSqlTable(dbConfig: DbConnectionConfig, tableName: String, limit: Int): AnyFrame** A variant of the previous function, but with an added `limit: Int` parameter that allows setting the maximum number of records to be read. ```kotlin -import org.jetbrains.kotlinx.dataframe.io.DatabaseConfiguration +import org.jetbrains.kotlinx.dataframe.io.DbConnectionConfig -val dbConfig = DatabaseConfiguration("URL_TO_CONNECT_DATABASE", "USERNAME", "PASSWORD") +val dbConfig = DbConnectionConfig("URL_TO_CONNECT_DATABASE", "USERNAME", "PASSWORD") val users = DataFrame.readSqlTable(dbConfig, "Users", 100) ``` **readSqlTable(connection: Connection, tableName: String): AnyFrame** -Another variant, where instead of `dbConfig: DatabaseConfiguration` we use a JDBC connection: `Connection` object. +Another variant, where instead of `dbConfig: DbConnectionConfig` we use a JDBC connection: `Connection` object. ```kotlin import java.sql.Connection @@ -177,37 +177,37 @@ connection.close() These functions execute an SQL query on the database and convert the result into a DataFrame. If a limit is provided, only that many rows will be returned from the result. -**readSqlQuery(dbConfig: DatabaseConfiguration, sqlQuery: String): AnyFrame** +**readSqlQuery(dbConfig: DbConnectionConfig, sqlQuery: String): AnyFrame** Execute a specific SQL query on the SQL database and retrieve the resulting data as an AnyFrame. -The `dbConfig: DatabaseConfiguration` parameter represents the configuration for a database connection, +The `dbConfig: DbConnectionConfig` parameter represents the configuration for a database connection, created under the hood and managed by the library. Typically, it requires a URL, username and password. ```kotlin -import org.jetbrains.kotlinx.dataframe.io.DatabaseConfiguration +import org.jetbrains.kotlinx.dataframe.io.DbConnectionConfig -val dbConfig = DatabaseConfiguration("URL_TO_CONNECT_DATABASE", "USERNAME", "PASSWORD") +val dbConfig = DbConnectionConfig("URL_TO_CONNECT_DATABASE", "USERNAME", "PASSWORD") val df = DataFrame.readSqlQuery(dbConfig, "SELECT * FROM Users WHERE age > 35") ``` -**readSqlQuery(dbConfig: DatabaseConfiguration, sqlQuery: String, limit: Int): AnyFrame** +**readSqlQuery(dbConfig: DbConnectionConfig, sqlQuery: String, limit: Int): AnyFrame** A variant of the previous function, but with an added `limit: Int` parameter that allows setting the maximum number of records to be read. ```kotlin -import org.jetbrains.kotlinx.dataframe.io.DatabaseConfiguration +import org.jetbrains.kotlinx.dataframe.io.DbConnectionConfig -val dbConfig = DatabaseConfiguration("URL_TO_CONNECT_DATABASE", "USERNAME", "PASSWORD") +val dbConfig = DbConnectionConfig("URL_TO_CONNECT_DATABASE", "USERNAME", "PASSWORD") val df = DataFrame.readSqlQuery(dbConfig, "SELECT * FROM Users WHERE age > 35", 10) ``` **readSqlQuery(connection: Connection, sqlQuery: String): AnyFrame** -Another variant, where instead of `dbConfig: DatabaseConfiguration` we use a JDBC connection: `Connection` object. +Another variant, where instead of `dbConfig: DbConnectionConfig` we use a JDBC connection: `Connection` object. ```kotlin import java.sql.Connection @@ -319,13 +319,13 @@ Variants with a limit parameter restrict how many rows will be read from each ta Retrieves data from all the non-system tables in the SQL database and returns them as a map of table names to AnyFrame objects. -The `dbConfig: DatabaseConfiguration` parameter represents the configuration for a database connection, +The `dbConfig: DbConnectionConfig` parameter represents the configuration for a database connection, created under the hood and managed by the library. Typically, it requires a URL, username and password. ```kotlin -import org.jetbrains.kotlinx.dataframe.io.DatabaseConfiguration +import org.jetbrains.kotlinx.dataframe.io.DbConnectionConfig -val dbConfig = DatabaseConfiguration("URL_TO_CONNECT_DATABASE", "USERNAME", "PASSWORD") +val dbConfig = DbConnectionConfig("URL_TO_CONNECT_DATABASE", "USERNAME", "PASSWORD") val dataframes = DataFrame.readAllSqlTables(dbConfig) ``` @@ -338,16 +338,16 @@ but with an added `limit: Int` parameter that allows setting the maximum number NOTE: the setting the different limits for different tables is not supported. ```kotlin -import org.jetbrains.kotlinx.dataframe.io.DatabaseConfiguration +import org.jetbrains.kotlinx.dataframe.io.DbConnectionConfig -val dbConfig = DatabaseConfiguration("URL_TO_CONNECT_DATABASE", "USERNAME", "PASSWORD") +val dbConfig = DbConnectionConfig("URL_TO_CONNECT_DATABASE", "USERNAME", "PASSWORD") val dataframes = DataFrame.readAllSqlTables(dbConfig, 100) ``` **readAllSqlTables(connection: Connection): List\** -Another variant, where instead of `dbConfig: DatabaseConfiguration` we use a JDBC connection: `Connection` object. +Another variant, where instead of `dbConfig: DbConnectionConfig` we use a JDBC connection: `Connection` object. ```kotlin import java.sql.Connection @@ -384,24 +384,24 @@ The purpose of these functions is to facilitate the retrieval of table schema. By providing a table name and either a database configuration or connection, these functions return the [DataFrameSchema](schema.md) of the specified table. -**getSchemaForSqlTable(dbConfig: DatabaseConfiguration, tableName: String): DataFrameSchema** +**getSchemaForSqlTable(dbConfig: DbConnectionConfig, tableName: String): DataFrameSchema** This function captures the schema of a specific table from an SQL database. -The `dbConfig: DatabaseConfiguration` parameter represents the configuration for a database connection, +The `dbConfig: DbConnectionConfig` parameter represents the configuration for a database connection, created under the hood and managed by the library. Typically, it requires a URL, username and password. ```kotlin -import org.jetbrains.kotlinx.dataframe.io.DatabaseConfiguration +import org.jetbrains.kotlinx.dataframe.io.DbConnectionConfig -val dbConfig = DatabaseConfiguration("URL_TO_CONNECT_DATABASE", "USERNAME", "PASSWORD") +val dbConfig = DbConnectionConfig("URL_TO_CONNECT_DATABASE", "USERNAME", "PASSWORD") val schema = DataFrame.getSchemaForSqlTable(dbConfig, "Users") ``` **getSchemaForSqlTable(connection: Connection, tableName: String): DataFrameSchema** -Another variant, where instead of `dbConfig: DatabaseConfiguration` we use a JDBC connection: `Connection` object. +Another variant, where instead of `dbConfig: DbConnectionConfig` we use a JDBC connection: `Connection` object. ```kotlin import java.sql.Connection @@ -421,24 +421,24 @@ These functions return the schema of an SQL query result. Once you provide a database configuration or connection and an SQL query, they return the [DataFrameSchema](schema.md) of the query result. -**getSchemaForSqlQuery(dbConfig: DatabaseConfiguration, sqlQuery: String): DataFrameSchema** +**getSchemaForSqlQuery(dbConfig: DbConnectionConfig, sqlQuery: String): DataFrameSchema** This function executes an SQL query on the database and then retrieves the resulting schema. -The `dbConfig: DatabaseConfiguration` parameter represents the configuration for a database connection, +The `dbConfig: DbConnectionConfig` parameter represents the configuration for a database connection, created under the hood and managed by the library. Typically, it requires a URL, username and password. ```kotlin -import org.jetbrains.kotlinx.dataframe.io.DatabaseConfiguration +import org.jetbrains.kotlinx.dataframe.io.DbConnectionConfig -val dbConfig = DatabaseConfiguration("URL_TO_CONNECT_DATABASE", "USERNAME", "PASSWORD") +val dbConfig = DbConnectionConfig("URL_TO_CONNECT_DATABASE", "USERNAME", "PASSWORD") val schema = DataFrame.getSchemaForSqlQuery(dbConfig, "SELECT * FROM Users WHERE age > 35") ``` **getSchemaForSqlQuery(connection: Connection, sqlQuery: String): DataFrameSchema** -Another variant, where instead of `dbConfig: DatabaseConfiguration` we use a JDBC connection: `Connection` object. +Another variant, where instead of `dbConfig: DbConnectionConfig` we use a JDBC connection: `Connection` object. ```kotlin import java.sql.Connection @@ -475,7 +475,7 @@ val schema = DataFrame.getSchemaForResultSet(resultSet, PostgreSql) **getSchemaForSqlQuery(connection: Connection, sqlQuery: String): DataFrameSchema** -Another variant, where instead of `dbConfig: DatabaseConfiguration` we use a JDBC connection: `Connection` object. +Another variant, where instead of `dbConfig: DbConnectionConfig` we use a JDBC connection: `Connection` object. ```kotlin import java.sql.Connection @@ -493,18 +493,18 @@ connection.close() These functions return a list of all [`DataFrameSchema`](schema.md) from all the non-system tables in the SQL database. They can be called with either a database configuration or a connection. -**getSchemaForAllSqlTables(dbConfig: DatabaseConfiguration): Map\** +**getSchemaForAllSqlTables(dbConfig: DbConnectionConfig): Map\** This function retrieves the schema of all tables from an SQL database and returns them as a map of table names to [`DataFrameSchema`](schema.md) objects. -The `dbConfig: DatabaseConfiguration` parameter represents the configuration for a database connection, +The `dbConfig: DbConnectionConfig` parameter represents the configuration for a database connection, created under the hood and managed by the library. Typically, it requires a URL, username and password. ```kotlin -import org.jetbrains.kotlinx.dataframe.io.DatabaseConfiguration +import org.jetbrains.kotlinx.dataframe.io.DbConnectionConfig -val dbConfig = DatabaseConfiguration("URL_TO_CONNECT_DATABASE", "USERNAME", "PASSWORD") +val dbConfig = DbConnectionConfig("URL_TO_CONNECT_DATABASE", "USERNAME", "PASSWORD") val schemas = DataFrame.getSchemaForAllSqlTables(dbConfig) ``` diff --git a/plugins/dataframe-gradle-plugin/src/integrationTest/kotlin/org/jetbrains/dataframe/gradle/SchemaGeneratorPluginIntegrationTest.kt b/plugins/dataframe-gradle-plugin/src/integrationTest/kotlin/org/jetbrains/dataframe/gradle/SchemaGeneratorPluginIntegrationTest.kt index e3d818272..852e89071 100644 --- a/plugins/dataframe-gradle-plugin/src/integrationTest/kotlin/org/jetbrains/dataframe/gradle/SchemaGeneratorPluginIntegrationTest.kt +++ b/plugins/dataframe-gradle-plugin/src/integrationTest/kotlin/org/jetbrains/dataframe/gradle/SchemaGeneratorPluginIntegrationTest.kt @@ -389,12 +389,12 @@ class SchemaGeneratorPluginIntegrationTest : AbstractDataFramePluginIntegrationT /* TODO: test is broken e: file://test3901867314473689900/src/main/kotlin/Main.kt:12:43 Unresolved reference: readSqlTable - e: file://test3901867314473689900/src/main/kotlin/Main.kt:13:43 Unresolved reference: DatabaseConfiguration + e: file://test3901867314473689900/src/main/kotlin/Main.kt:13:43 Unresolved reference: DbConnectionConfig e: file://test3901867314473689900/src/main/kotlin/Main.kt:19:28 Unresolved reference: readSqlTable e: file://test3901867314473689900/src/main/kotlin/Main.kt:20:21 Unresolved reference: age e: file://test3901867314473689900/src/main/kotlin/Main.kt:22:29 Unresolved reference: readSqlTable e: file://test3901867314473689900/src/main/kotlin/Main.kt:23:22 Unresolved reference: age - e: file://test3901867314473689900/src/main/kotlin/Main.kt:25:24 Unresolved reference: DatabaseConfiguration + e: file://test3901867314473689900/src/main/kotlin/Main.kt:25:24 Unresolved reference: DbConnectionConfig e: file://test3901867314473689900/src/main/kotlin/Main.kt:26:29 Unresolved reference: readSqlTable e: file://test3901867314473689900/src/main/kotlin/Main.kt:27:22 Unresolved reference: age e: file://test3901867314473689900/src/main/kotlin/Main.kt:29:29 Unresolved reference: readSqlTable @@ -426,7 +426,7 @@ class SchemaGeneratorPluginIntegrationTest : AbstractDataFramePluginIntegrationT import java.sql.DriverManager import java.sql.SQLException import org.jetbrains.kotlinx.dataframe.io.readSqlTable - import org.jetbrains.kotlinx.dataframe.io.DatabaseConfiguration + import org.jetbrains.kotlinx.dataframe.io.DbConnectionConfig fun main() { Class.forName("org.h2.Driver") @@ -438,7 +438,7 @@ class SchemaGeneratorPluginIntegrationTest : AbstractDataFramePluginIntegrationT val df1 = DataFrame.readSqlTable(connection, tableName, 1).cast() df1.filter { age != null && age > 30 } - val dbConfig = DatabaseConfiguration(url = "$connectionUrl") + val dbConfig = DbConnectionConfig(url = "$connectionUrl") val df2 = DataFrame.readSqlTable(dbConfig, tableName).cast() df2.filter { age != null && age > 30 } diff --git a/plugins/symbol-processor/src/test/kotlin/org/jetbrains/dataframe/ksp/DataFrameJdbcSymbolProcessorTest.kt b/plugins/symbol-processor/src/test/kotlin/org/jetbrains/dataframe/ksp/DataFrameJdbcSymbolProcessorTest.kt index d7a6316e3..ec48e5ef4 100644 --- a/plugins/symbol-processor/src/test/kotlin/org/jetbrains/dataframe/ksp/DataFrameJdbcSymbolProcessorTest.kt +++ b/plugins/symbol-processor/src/test/kotlin/org/jetbrains/dataframe/ksp/DataFrameJdbcSymbolProcessorTest.kt @@ -111,7 +111,7 @@ class DataFrameJdbcSymbolProcessorTest { import java.sql.DriverManager import java.sql.SQLException import org.jetbrains.kotlinx.dataframe.io.readSqlTable - import org.jetbrains.kotlinx.dataframe.io.DatabaseConfiguration + import org.jetbrains.kotlinx.dataframe.io.DbConnectionConfig """.trimIndent(), ), ), @@ -145,7 +145,7 @@ class DataFrameJdbcSymbolProcessorTest { import java.sql.DriverManager import java.sql.SQLException import org.jetbrains.kotlinx.dataframe.io.readSqlTable - import org.jetbrains.kotlinx.dataframe.io.DatabaseConfiguration + import org.jetbrains.kotlinx.dataframe.io.DbConnectionConfig """.trimIndent(), ), ), @@ -185,7 +185,7 @@ class DataFrameJdbcSymbolProcessorTest { import java.sql.DriverManager import java.sql.SQLException import org.jetbrains.kotlinx.dataframe.io.readSqlTable - import org.jetbrains.kotlinx.dataframe.io.DatabaseConfiguration + import org.jetbrains.kotlinx.dataframe.io.DbConnectionConfig fun main() { val tableName = "Customer" @@ -196,7 +196,7 @@ class DataFrameJdbcSymbolProcessorTest { val df1 = DataFrame.readSqlTable(connection, tableName, 1).cast() df1.filter { it[Customer::age] != null && it[Customer::age]!! > 30 } - val dbConfig = DatabaseConfiguration(url = "$CONNECTION_URL") + val dbConfig = DbConnectionConfig(url = "$CONNECTION_URL") val df2 = DataFrame.readSqlTable(dbConfig, tableName).cast() df2.filter { it[Customer::age] != null && it[Customer::age]!! > 30 } @@ -241,7 +241,7 @@ class DataFrameJdbcSymbolProcessorTest { import java.sql.DriverManager import java.sql.SQLException import org.jetbrains.kotlinx.dataframe.io.readSqlTable - import org.jetbrains.kotlinx.dataframe.io.DatabaseConfiguration + import org.jetbrains.kotlinx.dataframe.io.DbConnectionConfig fun main() { val tableName = "Customer" @@ -252,7 +252,7 @@ class DataFrameJdbcSymbolProcessorTest { val df1 = DataFrame.readSqlTable(connection, tableName, 1).cast() df1.filter { it[Customer::age] != null && it[Customer::age]!! > 30 } - val dbConfig = DatabaseConfiguration(url = "$CONNECTION_URL") + val dbConfig = DbConnectionConfig(url = "$CONNECTION_URL") val df2 = DataFrame.readSqlTable(dbConfig, tableName).cast() df2.filter { it[Customer::age] != null && it[Customer::age]!! > 30 }