diff --git a/build.sbt b/build.sbt index 0b44c5a4b870..41ff07dbaff9 100644 --- a/build.sbt +++ b/build.sbt @@ -708,15 +708,6 @@ lazy val `project-manager` = (project in file("lib/scala/project-manager")) case "reference.conf" => MergeStrategy.concat case _ => MergeStrategy.first }, - assemblyOption in assembly := (assemblyOption in assembly).value - .copy( - prependShellScript = Some( - defaultUniversalScript( - shebang = false, - javaOpts = Seq("-Dtruffle.class.path.append=runtime.jar") - ) - ) - ), (Test / test) := (Test / test).dependsOn(`engine-runner` / assembly).value, rebuildNativeImage := NativeImage .buildNativeImage( @@ -1316,7 +1307,9 @@ lazy val database = project .in(file("database")) .settings( autoScalaLibrary := false, - libraryDependencies ++= Seq(), + libraryDependencies ++= Seq( + "org.xerial" % "sqlite-jdbc" % "3.34.0" + ), Compile / packageBin := Def.task { val result = (Compile / packageBin).value val _ = StdBits diff --git a/distribution/std-lib/Base/src/Error/Common.enso b/distribution/std-lib/Base/src/Error/Common.enso new file mode 100644 index 000000000000..bfa9066b34dc --- /dev/null +++ b/distribution/std-lib/Base/src/Error/Common.enso @@ -0,0 +1,13 @@ +from Base import all + +type Illegal_State_Error + ## UNSTABLE + + A generic error that indicates that a given operation cannot be performed + in some specific conditions. + + Arguments: + - message: the error message explaining why the operation cannot be + performed. + - cause: (optional) another error that is the cause of this one. + type Illegal_State_Error message cause=Nothing diff --git a/distribution/std-lib/Base/src/Main.enso b/distribution/std-lib/Base/src/Main.enso index d87c7c41865e..147b4ccf2325 100644 --- a/distribution/std-lib/Base/src/Main.enso +++ b/distribution/std-lib/Base/src/Main.enso @@ -12,6 +12,7 @@ import Base.Data.Pair import Base.Data.Range import Base.Data.Text.Extensions import Base.Data.Vector +import Base.Error.Common import Base.Error.Extensions import Base.Math import Base.Meta @@ -39,6 +40,7 @@ from Base.Data.Noise export all hiding Noise from Base.Data.Pair export Pair from Base.Data.Range export Range from Base.Data.Text.Extensions export Text +from Base.Error.Common export all from Base.Error.Extensions export all from Base.Meta.Enso_Project export all from Base.Polyglot.Java export all diff --git a/distribution/std-lib/Base/src/System/File.enso b/distribution/std-lib/Base/src/System/File.enso index 501d7cdf92bf..0c8134991377 100644 --- a/distribution/std-lib/Base/src/System/File.enso +++ b/distribution/std-lib/Base/src/System/File.enso @@ -231,6 +231,12 @@ type File is_directory : Boolean is_directory = this.prim_file.isDirectory + ## Creates the directory represented by this file if it did not exist. + + It also creates parent directories if they did not exist. + create_directory : Nothing + create_directory = this.prim_file.createDirectories + ## Checks whether the file exists and is a regular file. is_regular_file : Boolean is_regular_file = this.prim_file.isRegularFile diff --git a/distribution/std-lib/Database/THIRD-PARTY/NOTICE b/distribution/std-lib/Database/THIRD-PARTY/NOTICE index 50509f862546..9c3f3ea189e4 100644 --- a/distribution/std-lib/Database/THIRD-PARTY/NOTICE +++ b/distribution/std-lib/Database/THIRD-PARTY/NOTICE @@ -1,2 +1,7 @@ Enso Copyright 2020 - 2021 New Byte Order sp. z o. o. + +'sqlite-jdbc', licensed under the The Apache Software License, Version 2.0, is distributed with the std-lib-Database. +The license information can be found along with the copyright notices. +Copyright notices related to this dependency can be found in the directory `org.xerial.sqlite-jdbc-3.34.0`. + diff --git a/distribution/std-lib/Database/THIRD-PARTY/org.xerial.sqlite-jdbc-3.34.0/LICENSE b/distribution/std-lib/Database/THIRD-PARTY/org.xerial.sqlite-jdbc-3.34.0/LICENSE new file mode 100644 index 000000000000..75b52484ea47 --- /dev/null +++ b/distribution/std-lib/Database/THIRD-PARTY/org.xerial.sqlite-jdbc-3.34.0/LICENSE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/distribution/std-lib/Database/THIRD-PARTY/org.xerial.sqlite-jdbc-3.34.0/LICENSE.zentus b/distribution/std-lib/Database/THIRD-PARTY/org.xerial.sqlite-jdbc-3.34.0/LICENSE.zentus new file mode 100644 index 000000000000..3e044abb4cf8 --- /dev/null +++ b/distribution/std-lib/Database/THIRD-PARTY/org.xerial.sqlite-jdbc-3.34.0/LICENSE.zentus @@ -0,0 +1,24 @@ +Copyright (c) 2006, David Crawshaw. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +SUCH DAMAGE. + diff --git a/distribution/std-lib/Database/THIRD-PARTY/org.xerial.sqlite-jdbc-3.34.0/NOTICES b/distribution/std-lib/Database/THIRD-PARTY/org.xerial.sqlite-jdbc-3.34.0/NOTICES new file mode 100644 index 000000000000..c8e59de430da --- /dev/null +++ b/distribution/std-lib/Database/THIRD-PARTY/org.xerial.sqlite-jdbc-3.34.0/NOTICES @@ -0,0 +1,99 @@ +/*-------------------------------------------------------------------------- + * Copyright 2016 Magnus Reftel + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *--------------------------------------------------------------------------*/ + +/*-------------------------------------------------------------------------- + * Copyright 2007 Taro L. Saito + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *--------------------------------------------------------------------------*/ + +/*-------------------------------------------------------------------------- + * Copyright 2010 Taro L. Saito + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *--------------------------------------------------------------------------*/ + +/*-------------------------------------------------------------------------- + * Copyright 2008 Taro L. Saito + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *--------------------------------------------------------------------------*/ + +Copyright 2009 Taro L. Saito + +/* + * Copyright (c) 2007 David Crawshaw + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + diff --git a/distribution/std-lib/Database/src/Connection/Connection.enso b/distribution/std-lib/Database/src/Connection/Connection.enso index c46151615010..04eb7e1db59a 100644 --- a/distribution/std-lib/Database/src/Connection/Connection.enso +++ b/distribution/std-lib/Database/src/Connection/Connection.enso @@ -1,20 +1,33 @@ from Base import all -import Database.Data.Table + +import Database.Data.Table as Database_Table +import Table.Data.Table as Materialized_Table +import Database.Data.Dialect +import Database.Data.Sql +import Database.Data.Internal.IR +from Database.Data.Sql import Sql_Type +import Table.Internal.Java_Exports + +polyglot java import java.util.ArrayList +polyglot java import java.sql.DriverManager +polyglot java import java.sql.PreparedStatement +polyglot java import java.sql.Types +polyglot java import java.lang.UnsupportedOperationException +polyglot java import java.sql.SQLException +polyglot java import java.sql.SQLTimeoutException type Connection - ## UNSTABLE + ## PRIVATE - A Database connection. + A Database connection using a JDBC driver. Allows to access tables from a database. - type Connection internal_connection - ## UNSTABLE - - SQL dialect that the database associated with that connection is using. - dialect : String - dialect = this.internal_connection.dialect + Arguments: + - java_connection: the resource managing the underlying JDBC connection. + - dialect: the dialect associated with the database we are connected to. + type Connection connection_resource dialect ## UNSTABLE @@ -23,7 +36,245 @@ type Connection Arguments: - name: name of the table to access - access_table : Text -> Table - access_table name = - column_names = this.internal_connection.fetch_column_names name - Table.make_table this name column_names + access_table : Text -> Database_Table + access_table name = here.wrap_sql_errors <| + columns = this.fetch_columns name + Database_Table.make_table this name columns + + ## Closes the connection releasing the underlying database resources + immediately instead of waiting for them to be automatically released. + + The connection is not usable afterwards. + close : Nothing + close = + Managed_Resource.finalize this.connection_resource + + ## ADVANCED + + Executes a raw query and returns the result as an in-memory Table. + + Currently the SQL decimal type is mapped to a floating-point + representation which is not exact. While operations in the database + itself may (or may not, depending on the database engine) be done with + exact precision, the materialized values may loose precision, so extra + care is needed when working with decimal computations that need to be + exact. + + Arguments: + - query: either raw SQL code as Text or an instance of Sql.Statement + representing the query to execute. + - expected_types: an optional array of expected types of each column; + meant only for internal use. + execute_query : Text | Sql.Statement -> Vector Sql.Sql_Type -> Materialized_Table = + execute_query query expected_types=Nothing = here.wrap_sql_errors <| + Resource.bracket (this.prepare_statement query) .close stmt-> + rs = stmt.executeQuery + metadata = rs.getMetaData + ncols = metadata.getColumnCount + column_names = Vector.new ncols ix-> metadata.getColumnName ix+1 + column_types = if expected_types.is_nothing.not then expected_types else + Vector.new ncols (ix -> Sql_Type <| metadata.getColumnType ix+1) + column_builders = column_types.map typ-> + here.create_builder typ + go has_next = if has_next.not then Nothing else + column_builders.map_with_index ix-> builder-> + builder.fetch_and_append rs ix+1 + @Tail_Call go rs.next + go rs.next + columns = column_builders.zip column_names builder-> name-> + builder.make_column name + Materialized_Table.from_columns columns + + ## ADVANCED + + Executes a raw update query. If the query was inserting, updating or + deleting rows, the number of affected rows is returned; otherwise it + returns 0 for other types of queries (like creating or altering tables). + + Arguments: + - query: either raw SQL code as Text or an instance of Sql.Statement + representing the query to execute. + execute_update : Text | Sql.Statement -> Integer + execute_update query = here.wrap_sql_errors <| + Resource.bracket (this.prepare_statement query) .close stmt-> + result = Panic.recover stmt.executeLargeUpdate + result.catch err-> case err of + Polyglot_Error exc -> + case Java.is_instance exc UnsupportedOperationException of + True -> + stmt.executeUpdate + False -> Error.throw err + _ -> Error.throw err + + ## PRIVATE + prepare_statement : Text | Sql.Statement -> PreparedStatement + prepare_statement query = + go template holes=[] = Managed_Resource.with this.connection_resource java_connection-> + stmt = java_connection.prepareStatement template + setup_error = Panic.recover <| + holes.map_with_index ix-> obj-> + position = ix + 1 + case obj.first of + Nothing -> stmt.setNull position obj.second.typeid + _ -> stmt.setObject position obj.first + setup_error.catch error-> + stmt.close + Panic.throw error + stmt + case query of + Text -> go query [] + Sql.Statement _ -> + compiled = query.prepare + go compiled.first compiled.second + + ## PRIVATE + + A helper function that fetches column names and sql types associated with + them for a table in the database. + # fetch_columns : Text -> Vector [Text, Sql_Type] + fetch_columns table_name = + query = IR.Select_All (IR.make_ctx_from table_name) + compiled = this.dialect.generate_sql query + Resource.bracket (this.prepare_statement compiled) .close stmt-> + rs = stmt.executeQuery + metadata = rs.getMetaData + ncols = metadata.getColumnCount + resolve_column ix = + name = metadata.getColumnName ix+1 + typ = metadata.getColumnType ix+1 + [name, Sql_Type typ] + Vector.new ncols resolve_column + +## PRIVATE + + Creates a builder for a column based on a provided SQL type, trying to infer + the best type for the builder. + + WARNING: Currently it coerces decimals into floating point numbers. +create_builder : Sql_Type -> Builder +create_builder sql_type = + initial_size = 10 + if sql_type.is_definitely_boolean then Builder_Boolean (Java_Exports.make_bool_builder) else + if sql_type.is_definitely_integer then Builder_Long (Java_Exports.make_long_builder initial_size) else + is_double = sql_type.is_definitely_double || sql_type==Sql_Type.decimal + if is_double then Builder_Double (Java_Exports.make_double_builder initial_size) else + Builder_Inferred (Java_Exports.make_inferred_builder initial_size) + +type Builder + ## PRIVATE + type Builder_Inferred java_builder + + ## PRIVATE + type Builder_Double java_builder + + ## PRIVATE + type Builder_Long java_builder + + ## PRIVATE + type Builder_Boolean java_builder + + ## PRIVATE + + Fetches the value of ith column from the current row of the result set + and appends it to the builder. + + Arguments: + - rs: the Java ResultSet from which the value will be fetched. + - i: the index of the column to fetch from (starting from 1 as is the + ResultSet convention). + fetch_and_append rs i = case this of + Builder_Inferred _ -> + obj = rs.getObject i + this.java_builder.append obj + Builder_Boolean _ -> + bool = rs.getBoolean i + case rs.wasNull of + True -> this.java_builder.appendNulls 1 + False -> this.java_builder.appendBoolean bool + Builder_Long _ -> + long = rs.getLong i + case rs.wasNull of + True -> this.java_builder.appendNulls 1 + False -> this.java_builder.appendLong long + Builder_Double _ -> + double = rs.getDouble i + case rs.wasNull of + True -> this.java_builder.appendNulls 1 + False -> this.java_builder.appendDouble double + + ## PRIVATE + + Seals the builder and returns a built Java-column. + make_column : Text -> Java_Exports.Column + make_column name = + storage = this.java_builder.seal + Java_Exports.make_column name storage + +## An error indicating that a supported dialect could not be deduced for the + provided URL. +type Unsupported_Dialect url + +## PRIVATE + + Creates a JDBC connection based on a URL and optionally username and + password. +create_jdbc_connection : Text -> Nothing | Text -> Nothing | Text -> Connection +create_jdbc_connection url user=Nothing password=Nothing = here.wrap_sql_errors <| + dialect = Dialect.supported_dialects.find (d -> url.starts_with "jdbc:"+d.name) . map_error (_ -> Unsupported_Dialect url) + java_connection = case user.is_nothing && password.is_nothing of + True -> + DriverManager.getConnection url + False -> + DriverManager.getConnection url user password + resource = Managed_Resource.register java_connection here.close_connection + Connection resource dialect + +## PRIVATE + + This cannot be a closure due to limitations of Managed_Resource. +close_connection connection = + connection.close + +type Sql_Error + ## UNSTABLE + + Indicates an error with executing a query, update or connecting to the + database. + + Wraps an SQLException from the Java drvier. + type Sql_Error java_exception + + ## UNSTABLE + to_text : Text + to_text = this.java_exception.getMessage + + ## UNSTABLE + to_display_text : Text + to_display_text = this.to_text + +type Sql_Timeout_Error + ## UNSTABLE + + Indicates that an operation has timed out. + type Sql_Timeout_Error java_exception + + ## UNSTABLE + to_text : Text + to_text = this.java_exception.getMessage + + ## UNSTABLE + to_display_text : Text + to_display_text = this.to_text + +## PRIVATE + + Executes `action` and returns its result, catching any panics and if they are + coming from JDBC, wraps them with our own error types. +wrap_sql_errors ~action = + result = Panic.recover action + result.catch err-> case err of + Polyglot_Error exc -> + transformed = if Java.is_instance exc SQLTimeoutException then Sql_Timeout_Error exc else + if Java.is_instance exc SQLException then Sql_Error exc else err + Error.throw transformed + _ -> Error.throw err diff --git a/distribution/std-lib/Database/src/Connection/Database.enso b/distribution/std-lib/Database/src/Connection/Database.enso new file mode 100644 index 000000000000..1c5517cfa095 --- /dev/null +++ b/distribution/std-lib/Database/src/Connection/Database.enso @@ -0,0 +1,34 @@ +from Base import all +from Database.Connection.Connection import all + +# TODO [RW] finalize Postgres support +## UNSTABLE + + Tries to connect to the database under a provided URL. + + Currently only SQLite databases are supported. + + The exact URL depends on the database engine. + For SQLite it has format `sqlite:/path/to/database/file`. + + Arguments: + - url: the URL to connect to. + - user: (optional) an username for authentication. + - password: (optional) a password for authentication. +connect : Text -> Nothing | Text -> Nothing | Text -> Connection ! Sql_Error +connect url user=Nothing password=Nothing = + full_url = if url.starts_with "jdbc:" then url else "jdbc:"+url + Connection.create_jdbc_connection full_url user password + +## UNSTABLE + + Connects to an SQLite database in a file on the filesystem. + + It is an alternative to `connect` that resolves a path to the database file. + + Arguments: + - file: the path to the database. +open_sqlite_file : File -> Connection ! Sql_Error +open_sqlite_file file = + url = "sqlite:" + file.absolute.path + here.connect url diff --git a/distribution/std-lib/Database/src/Data/Column.enso b/distribution/std-lib/Database/src/Data/Column.enso index 63837031d02a..a8ab5dd9c577 100644 --- a/distribution/std-lib/Database/src/Data/Column.enso +++ b/distribution/std-lib/Database/src/Data/Column.enso @@ -1,11 +1,15 @@ from Base import all import Database.Data.Internal.Helpers import Database.Data.Internal.IR +from Database.Data.Sql import Sql_Type import Database.Data.Table +from Database.Data.Table import Integrity_Error +import Table.Data.Column as Materialized_Column type Column ## UNSTABLE + PRIVATE Represents a single column backed by a database. @@ -16,8 +20,30 @@ type Column they must both have the same filtering, grouping etc. rules applied to be able to be combined. # type Column (name : Text) (connection : Connection) - # (expression : IR.Expression) (context : IR.Context) - type Column name connection expression context + # (sql_type : Sql_Type) (expression : IR.Expression) + # (context : IR.Context) + type Column name connection sql_type expression context + + ## UNSTABLE + + Returns a text containing an ASCII-art table displaying this data. + + Arguments: + - show_rows: the number of initial rows that should be displayed. + - format_terminal: whether ANSI-terminal formatting should be used + display : Integer -> Boolean -> Text + display show_rows=10 format_terminal=False = + this.to_table.display show_rows format_terminal + + ## UNSTABLE + + Prints an ASCII-art table with this data to the standard output. + + Arguments: + - show_rows: the number of initial rows that should be displayed. + print show_rows=10 = + IO.println (this.display show_rows format_terminal=True) + IO.println '' ## UNSTABLE @@ -30,14 +56,45 @@ type Column Converts this column into a single-column table. to_table : Table.Table to_table = - cols = [[this.name, this.expression]] - Table.Table this.name this.connection cols this.context + Table.Table this.name this.connection [this.as_internal] this.context + + ## UNSTABLE + + Returns a materialized dataframe containing rows of this table. + + Currently the SQL decimal type is mapped to a floating-point + representation which is not exact. While operations in the database + itself may (or may not, depending on the database engine) be done with + exact precision, the materialized values may loose precision, so extra + care is needed when working with decimal computations that need to be + exact. + + Arguments: + - max_rows: specifies a maximum amount of rows to fetch; if not set, all + available rows are fetched. + to_dataframe : (Nothing | Integer) -> Materialized_Column.Column + to_dataframe max_rows=Nothing = + df = this.to_table.to_dataframe max_rows + df.at this.name ## UNSTABLE Returns a vector containing all the elements in this column. + + Currently the SQL decimal type is mapped to a floating-point + representation which is not exact. While operations in the database + itself may (or may not, depending on the database engine) be done with + exact precision, the materialized values may loose precision, so extra + care is needed when working with decimal computations that need to be + exact. to_vector : Vector - to_vector = this.to_table.to_dataframe.at this.name . to_vector + to_vector = + ## We remove the index to avoid fetching index data that will not be + used anyway when constructing the raw Vector. + without_ix = this.to_table.set_index [] + df = without_ix . to_dataframe + raw_column = df.at this.name + raw_column.to_vector ## UNSTABLE @@ -45,6 +102,85 @@ type Column to_sql : Sql.Statement to_sql = this.to_table.to_sql + ## PRIVATE + + Creates a binary operation with given kind and operand. + If not specified, the `new_type` is the same as the current one. + `operand_type` is only relevant if the operand is not a column, it + defaults to the current type if not provided. + make_binary_op : Text -> Text -> (Column | Any) -> (Sql_Type | Nothing) -> (Sql_Type | Nothing) -> Column + make_binary_op op_kind operand new_type=Nothing operand_type=Nothing = + actual_new_type = if new_type.is_nothing then this.sql_type else new_type + case operand of + Column _ _ _ other_expr _ -> + case Helpers.check_integrity this operand of + False -> + Error.throw <| Illegal_State_Error "Cannot compare columns coming from different contexts. Only columns of a single table can be compared." + True -> + new_expr = IR.Operation op_kind [this.expression, other_expr] + Column this.name this.connection actual_new_type new_expr this.context + _ -> + actual_operand_type = if operand_type.is_nothing then this.sql_type else operand_type + other = IR.make_constant actual_operand_type operand + new_expr = IR.Operation op_kind [this.expression, other] + Column this.name this.connection actual_new_type new_expr this.context + + ## PRIVATE + + Helper for implementing unary operators. + make_unary_op : Text -> Text -> (Sql_Type | Nothing) -> Column + make_unary_op op_kind new_type=Nothing = + actual_new_type = if new_type.is_nothing then this.sql_type else new_type + new_expr = IR.Operation op_kind [this.expression] + Column this.name this.connection actual_new_type new_expr this.context + + ## UNSTABLE + + Sums the values in this column. + sum : Any + sum = this.compute_aggregate "SUM" + + ## UNSTABLE + + Computes the maximum element of this column. + max : Any + max = this.compute_aggregate "MAX" + + ## UNSTABLE + + Computes the minimum element of this column. + min : Any + min = this.compute_aggregate "MIN" + + ## UNSTABLE + + Computes the mean of non-missing elements of this column. + mean : Any + mean = this.compute_aggregate "AVG" + + ## PRIVATE + compute_aggregate op_name = + agg = here.make_aggregate this op_name + agg.to_vector . at 0 + + ## UNSTABLE + + Returns the length of this column. + length : Integer + length = this.to_table.row_count + + ## UNSTABLE + + Returns the number of missing items in this column. + count_missing : Integer + count_missing = this.where this.is_missing . length + + ## UNSTABLE + + Returns the number of non-null items in this column. + count : Integer + count = this.where this.is_missing.not . length + ## UNSTABLE Element-wise equality comparison. Returns a column with results of @@ -52,7 +188,7 @@ type Column If `other` is a column, the comparison is performed pairwise between corresponding elements of `this` and `other`. == : Column | Any -> Column - == other = this.make_binary_op "=" other + == other = this.make_binary_op "=" other new_type=Sql_Type.boolean ## UNSTABLE @@ -61,7 +197,7 @@ type Column If `other` is a column, the comparison is performed pairwise between corresponding elements of `this` and `other`. != : Column | Any -> Column - != other = this.make_binary_op "!=" other + != other = this.make_binary_op "!=" other new_type=Sql_Type.boolean ## UNSTABLE @@ -70,7 +206,7 @@ type Column If `other` is a column, the comparison is performed pairwise between corresponding elements of `this` and `other`. >= : Column | Any -> Column - >= other = this.make_binary_op ">=" other + >= other = this.make_binary_op ">=" other new_type=Sql_Type.boolean ## UNSTABLE @@ -79,7 +215,7 @@ type Column If `other` is a column, the comparison is performed pairwise between corresponding elements of `this` and `other`. <= : Column | Any -> Column - <= other = this.make_binary_op "<=" other + <= other = this.make_binary_op "<=" other new_type=Sql_Type.boolean ## UNSTABLE @@ -88,7 +224,7 @@ type Column If `other` is a column, the comparison is performed pairwise between corresponding elements of `this` and `other`. > : Column | Any -> Column - > other = this.make_binary_op ">" other + > other = this.make_binary_op ">" other new_type=Sql_Type.boolean ## UNSTABLE @@ -97,7 +233,7 @@ type Column If `other` is a column, the comparison is performed pairwise between corresponding elements of `this` and `other`. < : Column | Any -> Column - < other = this.make_binary_op "<" other + < other = this.make_binary_op "<" other new_type=Sql_Type.boolean ## UNSTABLE @@ -164,7 +300,7 @@ type Column Returns a column of booleans, with `True` items at the positions where this column contains a `Nothing`. is_missing : Column - is_missing = this.make_unary_op "ISNULL" + is_missing = this.make_unary_op "ISNULL" new_type=Sql_Type.boolean ## UNSTABLE @@ -186,12 +322,11 @@ type Column where filter = case Helpers.check_integrity this filter of False -> - # TODO [RW] more descriptive errors - Error.throw "Filtering over a column with differing context." + Error.throw (Integrity_Error "Column "+filter.name) True -> new_filters = this.context.where_filters + [filter.expression] new_ctx = this.context.set_where_filters new_filters - Column this.name this.connection this.expression new_ctx + Column this.name this.connection this.sql_type this.expression new_ctx ## UNSTABLE @@ -204,7 +339,35 @@ type Column Returns the same column with changed name. rename : Text -> Column rename new_name = case Helpers.ensure_name_is_sane new_name of - True -> Column new_name this.connection this.expression this.context + True -> + is_used_in_index = this.context.meta_index.exists i-> i.name == new_name + case is_used_in_index of + True -> Error.throw <| Illegal_State_Error "Cannot rename the column to "+new_name+", because it has an index with the same name." + False -> + Column new_name this.connection this.sql_type this.expression this.context + + ## UNSTABLE + + Sorts the column according to the specified rules. + + Arguments: + - order: specifies the default sort order for this operation. + - missing_last: specifies the default placement of missing values when + compared to non-missing ones. Note thet this argument is independent + from `order`, i.e. missing values will always be sorted according to + this rule, ignoring the ascending / descending setting. + + > Example + Sorting `column` in ascending order. + column.sort + + > Example + Sorting `column` in descending order, placing missing values at the + top of the resulting column. + column.sort order=Sort_Order.Descending missing_last=False + sort : Sort_Order -> Boolean -> (Any -> Any -> Ordering) | Nothing -> Column + sort order=Sort_Order.Ascending missing_last=True = + this.to_table.sort by=this order=order missing_last=missing_last . at this.name ## UNSTABLE @@ -212,7 +375,7 @@ type Column If `other` is a column, the operation is performed pairwise between corresponding elements of `this` and `other`. starts_with : Column | Text -> Column - starts_with other = this.make_binary_op "starts_with" other + starts_with other = this.make_binary_op "starts_with" other new_type=Sql_Type.boolean ## UNSTABLE @@ -220,7 +383,7 @@ type Column If `other` is a column, the operation is performed pairwise between corresponding elements of `this` and `other`. ends_with : Column | Text -> Column - ends_with other = this.make_binary_op "ends_with" other + ends_with other = this.make_binary_op "ends_with" other new_type=Sql_Type.boolean ## UNSTABLE @@ -228,33 +391,11 @@ type Column If `other` is a column, the operation is performed pairwise between corresponding elements of `this` and `other`. contains : Column | Text -> Column - contains other = this.make_binary_op "contains" other - - ## PRIVATE - - Helper for implementing binary operators. - make_binary_op : Text -> Text -> (Column | Any) -> Column - make_binary_op op_kind operand = - case operand of - Column _ _ other_expr _ -> - case Helpers.check_integrity this operand of - False -> - Error.throw "Cannot compare columns coming from different contexts. Only columns of a single table can be compared." - True -> - new_expr = IR.Operation op_kind [this.expression, other_expr] - Column this.name this.connection new_expr this.context - _ -> - other = IR.make_constant operand - new_expr = IR.Operation op_kind [this.expression, other] - Column this.name this.connection new_expr this.context + contains other = this.make_binary_op "contains" other new_type=Sql_Type.boolean ## PRIVATE - - Helper for implementing unary operators. - make_unary_op : Text -> Text -> Column - make_unary_op op_kind = - new_expr = IR.Operation op_kind [this.expression] - Column this.name this.connection new_expr this.context + as_internal : IR.Internal_Column + as_internal = IR.Internal_Column this.name this.sql_type this.expression type Aggregate_Column ## UNSTABLE @@ -262,8 +403,9 @@ type Aggregate_Column Wraps a column grouped by its index. Allows performing aggregation operations on the contained values. # type Aggregate_Column (name : Text) (connection : Connection) - # (expression : IR.Expression) (context : IR.Context) - type Aggregate_Column name connection expression context + # (sql_type : Sql_Type) (expression : IR.Expression) + # (context : IR.Context) + type Aggregate_Column name connection sql_type expression context ## UNSTABLE @@ -274,7 +416,7 @@ type Aggregate_Column name to generate the resulting column name. sum : Text -> Column sum name_suffix='_sum' = - this.make_aggregate "SUM" name_suffix + here.make_aggregate this "SUM" name_suffix ## UNSTABLE @@ -285,7 +427,7 @@ type Aggregate_Column name to generate the resulting column name. max : Text -> Column max name_suffix='_max' = - this.make_aggregate "MAX" name_suffix + here.make_aggregate this "MAX" name_suffix ## UNSTABLE @@ -296,7 +438,7 @@ type Aggregate_Column name to generate the resulting column name. min : Text -> Column min name_suffix='_min' = - this.make_aggregate "MIN" name_suffix + here.make_aggregate this "MIN" name_suffix ## UNSTABLE @@ -307,7 +449,7 @@ type Aggregate_Column name to generate the resulting column name. count : Text -> Column count name_suffix='_count' = - this.make_aggregate "COUNT" name_suffix + here.make_aggregate this "COUNT" name_suffix new_type=Sql_Type.integer ## UNSTABLE @@ -318,14 +460,16 @@ type Aggregate_Column name to generate the resulting column name. mean : Text -> Column mean name_suffix='_mean' = - this.make_aggregate "AVG" name_suffix - - ## PRIVATE - - A helper method for creating an aggregated column by applying some - operation. - make_aggregate operation name_suffix = - new_name = this.name + name_suffix - expr = IR.Operation operation [this.expression] - case Helpers.ensure_name_is_sane new_name of - True -> Column new_name this.connection expr this.context + here.make_aggregate this "AVG" name_suffix + +## PRIVATE + + A helper method for creating an aggregated column by applying some + operation. +make_aggregate column operation name_suffix="_agg" new_type=Nothing = + actual_new_type = if new_type.is_nothing then column.sql_type else new_type + expr = IR.Operation operation [column.expression] + case Helpers.ensure_name_is_sane name_suffix of + True -> + new_name = column.name + name_suffix + Column new_name column.connection actual_new_type expr column.context diff --git a/distribution/std-lib/Database/src/Data/Dialect.enso b/distribution/std-lib/Database/src/Data/Dialect.enso index 359019ec171e..ba1b7d9e3170 100644 --- a/distribution/std-lib/Database/src/Data/Dialect.enso +++ b/distribution/std-lib/Database/src/Data/Dialect.enso @@ -95,3 +95,7 @@ sqlite = my_mappings = [["starts_with", starts_with], ["contains", contains], ["ends_with", ends_with]] dialect = Base_Generator.base_dialect . extend_with my_mappings Dialect "sqlite" (query -> Base_Generator.generate_query dialect query . build) + +## PRIVATE +supported_dialects : Vector Dialect +supported_dialects = [here.postgresql, here.sqlite] diff --git a/distribution/std-lib/Database/src/Data/Internal/Base_Generator.enso b/distribution/std-lib/Database/src/Data/Internal/Base_Generator.enso index 7abc0d7789a5..03feefd50c5d 100644 --- a/distribution/std-lib/Database/src/Data/Internal/Base_Generator.enso +++ b/distribution/std-lib/Database/src/Data/Internal/Base_Generator.enso @@ -1,6 +1,7 @@ from Base import all import Database.Data.Sql import Database.Data.Internal.IR +from Database.Data.Sql import Sql_Type type Internal_Dialect ## PRIVATE @@ -12,8 +13,13 @@ type Internal_Dialect implementations; each implementation is a function which takes SQL builders for the arguments and should return a builder yielding the whole operation. + - wrap_identifier: a function that converts an arbitrary supported + identifier name in such a way that it can be used in the query; that + usually consists of wrapping the name in quotes and escaping any quotes + within it. # type Internal_Dialect (operation_map : Map Text (Vector Sql.Builder -> Sql.Builder)) - type Internal_Dialect operation_map + # (identifier_wrapper : Text -> Sql.Builder) + type Internal_Dialect operation_map wrap_identifier ## PRIVATE @@ -22,7 +28,7 @@ type Internal_Dialect # extend_with : Vector [Text, Vector Sql.Builder -> Sql.Builder] -> Internal_Dialect extend_with mappings = new_map = mappings.fold this.operation_map (m -> el -> m.insert (el.at 0) (el.at 1)) - Internal_Dialect new_map + Internal_Dialect new_map this.wrap_identifier ## PRIVATE @@ -76,9 +82,19 @@ make_function name = make_constant : Text -> Vector Sql.Builder -> Sql.Builder make_constant code = arguments -> - if arguments.not_empty then Error.throw "No arguments were expected" else + if arguments.not_empty then Error.throw <| Illegal_State_Error "No arguments were expected" else Sql.code code +## PRIVATE + + Wraps the identifier name in quotes and escapes any quotes within the name + with double-quote. This is the simplest way of escaping identifiers that + should work across most dialects. +wrap_in_quotes : Text -> Sql.Builder +wrap_in_quotes identifier = + escaped = identifier.replace '"' '""' + Sql.code '"'+escaped+'"' + ## PRIVATE The base SQL dialect that is shared between most SQL implementations. @@ -95,27 +111,38 @@ base_dialect = agg = [["COUNT", fun "COUNT"], ["MAX", fun "MAX"], ["MIN", fun "MIN"], ["AVG", fun "AVG"], ["SUM", fun "SUM"], ["COUNT_ROWS", here.make_constant "COUNT(*)"]] nulls = [["ISNULL", here.make_right_unary_op "IS NULL"], ["FILLNULL", fun "COALESCE"]] base_map = Map.from_vector (arith + logic + compare + agg + nulls) - Internal_Dialect base_map + Internal_Dialect base_map here.wrap_in_quotes ## PRIVATE Builds code for an expression. generate_expression : Internal_Dialect -> IR.Expression -> Sql.Builder generate_expression dialect expr = case expr of - IR.Column origin name -> Sql.code origin+"."+name - IR.Constant value -> Sql.interpolation value + IR.Column origin name -> + dot = Sql.code '.' + dialect.wrap_identifier origin ++ dot ++ dialect.wrap_identifier name + IR.Constant sql_type value -> Sql.interpolation sql_type value IR.Operation kind arguments -> - op = dialect.operation_map.get_or_else kind (Error.throw "Operation "+op+" is not supported.") + op = dialect.operation_map.get_or_else kind (Error.throw <| Illegal_State_Error "Operation "+op+" is not supported.") parsed_args = arguments.map (here.generate_expression dialect) op parsed_args +## PRIVATE + + Adds an alias for the expression, applicable for expressions that represent + columns or sub-queries. +alias : Internal_Dialect -> Text -> Sql.Builder +alias dialect name = + wrapped = dialect.wrap_identifier name + Sql.code " AS " ++ wrapped + ## PRIVATE Builds code for the FROM clause. generate_from_part : Internal_Dialect -> From_Spec -> Sql.Builder generate_from_part dialect from_spec = case from_spec of IR.From_Table name as_name -> - Sql.code name . alias as_name + dialect.wrap_identifier name ++ here.alias dialect as_name IR.Join kind left_spec right_spec on -> left = here.generate_from_part dialect left_spec right = here.generate_from_part dialect right_spec @@ -128,7 +155,7 @@ generate_from_part dialect from_spec = case from_spec of left ++ (Sql.code " "+prefix+" ") ++ right ++ ons IR.Sub_Query columns context as_name -> sub = here.generate_query dialect (IR.Select columns context) - sub.paren.alias as_name + sub.paren ++ here.alias dialect as_name ## PRIVATE @@ -143,23 +170,40 @@ generate_order dialect order_description = IR.Nulls_Last -> Sql.code " NULLS LAST" (here.generate_expression dialect (order_description.first)) ++ order_suffix ++ nulls_suffix +## PRIVATE +generate_select_context : Internal_Dialect -> IR.Context -> Sql.Builder +generate_select_context dialect ctx = + gen_exprs exprs = exprs.map (here.generate_expression dialect) + from_part = here.generate_from_part dialect ctx.from_spec + where_part = (Sql.join " AND " (gen_exprs ctx.where_filters)) . prefix_if_present " WHERE " + group_part = (Sql.join ", " (gen_exprs ctx.groups)) . prefix_if_present " GROUP BY " + limit_part = case ctx.limit of + Nothing -> Sql.empty + Integer -> Sql.code " LIMIT "+ctx.limit.to_text + + orders = ctx.orders.map (here.generate_order dialect) + order_part = (Sql.join ", " orders) . prefix_if_present " ORDER BY " + (Sql.code " FROM ") ++ from_part ++ where_part ++ group_part ++ order_part ++ limit_part + +## PRIVATE +generate_insert_query dialect table_name pairs = + names = Sql.join ", " <| pairs.map (.first >> dialect.wrap_identifier) + values = Sql.join ", " <| pairs.map (.second >> here.generate_expression dialect) + into = dialect.wrap_identifier table_name + + Sql.code "INSERT INTO " ++ into ++ Sql.code " (" ++ names ++ Sql.code ") VALUES (" ++ values ++ Sql.code ")" + ## PRIVATE Builds code for a whole query. -generate_query : Internal_Dialect -> Query -> Sql.Builder +generate_query : Internal_Dialect -> IR.Query -> Sql.Builder generate_query dialect query = case query of IR.Select columns ctx -> - gen_exprs exprs = exprs.map (here.generate_expression dialect) - gen_column pair = (here.generate_expression dialect pair.second).alias pair.first - + gen_column pair = (here.generate_expression dialect pair.second) ++ here.alias dialect pair.first cols = Sql.join ", " (columns.map gen_column) - - from_part = here.generate_from_part dialect ctx.from_spec - where_part = (Sql.join " AND " (gen_exprs ctx.where_filters)) . prefix_if_present " WHERE " - group_part = (Sql.join ", " (gen_exprs ctx.groups)) . prefix_if_present " GROUP BY " - - orders = ctx.orders.map (here.generate_order dialect) - order_part = (Sql.join ", " orders) . prefix_if_present " ORDER BY " - - (Sql.code "SELECT ") ++ cols ++ (Sql.code " FROM ") ++ from_part ++ where_part ++ group_part ++ order_part - _ -> Error.throw "Unsupported query type." + (Sql.code "SELECT ") ++ cols ++ here.generate_select_context dialect ctx + IR.Select_All ctx -> + (Sql.code "SELECT * ") ++ here.generate_select_context dialect ctx + IR.Insert table_name pairs -> + here.generate_insert_query dialect table_name pairs + _ -> Error.throw <| Illegal_State_Error "Unsupported query type." diff --git a/distribution/std-lib/Database/src/Data/Internal/Helpers.enso b/distribution/std-lib/Database/src/Data/Internal/Helpers.enso index a4b919a3d142..77fdc02872bf 100644 --- a/distribution/std-lib/Database/src/Data/Internal/Helpers.enso +++ b/distribution/std-lib/Database/src/Data/Internal/Helpers.enso @@ -42,7 +42,7 @@ type Unsupported_Name_Error text external names shown to the user, but as a temporary solution we only allow Sql-safe names for columns. - # TODO [RW] better name handling in Tables + # TODO [RW] better name handling in Tables (#1513) ensure_name_is_sane : Text -> Boolean ! Unsupported_Name_Error ensure_name_is_sane name = is_safe = diff --git a/distribution/std-lib/Database/src/Data/Internal/IR.enso b/distribution/std-lib/Database/src/Data/Internal/IR.enso index 6beb54a176c5..00bc30118f9d 100644 --- a/distribution/std-lib/Database/src/Data/Internal/IR.enso +++ b/distribution/std-lib/Database/src/Data/Internal/IR.enso @@ -25,10 +25,12 @@ type Expression be interpolated when building the query. Arguments: + - sql_type: The SQL type that this object is going to be serialized to. + It is usually inferred from the expression's context. - value: the value to be interpolated; it should be a simple Number, Text or other types that are serializable for JDBC. - # type Constant (value : Any) - type Constant value + # type Constant (sql_type : Sql.Sql_Type) (value : Any) + type Constant sql_type value ## PRIVATE @@ -44,6 +46,16 @@ type Expression # type Operation (kind : Text) (expressions : Vector Expression) type Operation kind expressions +type Internal_Column + ## PRIVATE + + An internal column structure. + type Internal_Column name sql_type expression + + ## PRIVATE + rename : Text -> Internal_Column + rename new_name = Internal_Column new_name this.sql_type this.expression + ## PRIVATE A context associated with an SQL query. @@ -68,40 +80,49 @@ type Context - groups: a list of grouping expressions, for each entry a GROUP BY is added, the resulting query can then directly include only the grouped-by columns or aggregate expressions. - - meta_index: a piece of meta-data which specifies default columns for - joining or grouping. + - meta_index: a list of internal columns to use for joining or grouping. + - limit: an optional maximum number of elements that the equery should + return. # type Context (from_spec : From_Spec) (where_filters : Vector Expression) # (orders : Vector [Expression, Order_Direction, Nulls_Order]) - # (groups : Vector Expression) (meta_index : Vector Expression) - type Context from_spec where_filters orders groups meta_index + # (groups : Vector Expression) (meta_index : Vector Internal_Column) + # (limit : Nothing | Integer) + type Context from_spec where_filters orders groups meta_index limit ## PRIVATE Returns a copy of the context with changed `meta_index`. - set_index : Vector Text -> Context + set_index : Vector Internal_Column -> Context set_index new_index = - Context this.from_spec this.where_filters this.orders this.groups new_index + Context this.from_spec this.where_filters this.orders this.groups new_index this.limit ## PRIVATE Returns a copy of the context with changed `where_filters`. set_where_filters : Vector Expression -> Context set_where_filters new_filters = - Context this.from_spec new_filters this.orders this.groups this.meta_index + Context this.from_spec new_filters this.orders this.groups this.meta_index this.limit ## PRIVATE Returns a copy of the context with changed `orders`. # set_orders : Vector [Expression, Order_Direction] -> Context set_orders new_orders = - Context this.from_spec this.where_filters new_orders this.groups this.meta_index + Context this.from_spec this.where_filters new_orders this.groups this.meta_index this.limit ## PRIVATE Returns a copy of the context with changed `groups`. set_groups : Vector Expression -> Context set_groups new_groups = - Context this.from_spec this.where_filters this.orders new_groups this.meta_index + Context this.from_spec this.where_filters this.orders new_groups this.meta_index this.limit + + ## PRIVATE + + Returns a copy of the context with changed `limit`. + set_limit : (Nothing | Integer) -> Context + set_limit new_limit = + Context this.from_spec this.where_filters this.orders this.groups this.meta_index new_limit ## PRIVATE @@ -241,21 +262,34 @@ type Query # type Select (expressions : [Text, Expression]) (context : Context) type Select columns context + ## PRIVATE + + A Select SQL query that gets all columns in a table. + type Select_All context + + ## PRIVATE + + An Insert SQL query that inserts a single row to the table. + + Arguments: + - table_name: name of the table to insert to. + - pairs: a list of pairs consisting of a column name and and expression. + type Insert table_name pairs + ## PRIVATE Creates a query context that just fetches data from a table, without any additional processing. make_ctx_from : Text -> Context make_ctx_from table_name = - Context (From_Table table_name table_name) [] [] [] [] + Context (From_Table table_name table_name) [] [] [] [] Nothing ## PRIVATE Creates an expression which is a simple constant to be interpolated. -make_constant : Any -> Expression -make_constant x = - # TODO [RW] may add some sanitization, like checking if the value type is supported - Constant x +make_constant : Sql.Sql_Type -> Any -> Expression +make_constant sql_type x = + Constant sql_type x ## PRIVATE @@ -265,6 +299,15 @@ substitute_origin : Text -> Text -> Expression substitute_origin old_origin new_origin expr = case expr of Column origin name -> if origin == old_origin then Column new_origin name else expr - Constant _ -> expr + Constant _ _ -> expr Operation kind exprs -> Operation kind (exprs.map (here.substitute_origin old_origin new_origin)) + +## PRIVATE + + Lifts a function mapping expressions into a function mapping internal columns + which applies the original function to their expressions, leaving other + fields as-is. +lift_expression_map : (Expression -> Expression) -> Internal_Column -> Internal_Column +lift_expression_map f col = + Internal_Column col.name col.sql_type (f col.expression) diff --git a/distribution/std-lib/Database/src/Data/Sql.enso b/distribution/std-lib/Database/src/Data/Sql.enso index ecbcfaa8a3c8..562194bff2b7 100644 --- a/distribution/std-lib/Database/src/Data/Sql.enso +++ b/distribution/std-lib/Database/src/Data/Sql.enso @@ -1,5 +1,51 @@ from Base import all import Database.Data.Internal.Vector_Builder +polyglot java import java.sql.Types + +## Represents an internal SQL data-type. +type Sql_Type + ## Represents an internal SQL data-type. + + Arguments: + - typeid: a numerical type id, as defined in `java.sql.Types`. + type Sql_Type typeid + + ## The SQL representation of `Boolean` type. + boolean : Sql_Type + boolean = Sql_Type Types.BOOLEAN + + ## The SQL representation of `Integer` type. + integer : Sql_Type + integer = Sql_Type Types.INTEGER + + ## The SQL type representing decimal numbers. + decimal : Sql_Type + decimal = Sql_Type Types.DECIMAL + + ## PRIVATE + + Returns True if this type represents an integer. It only handles the + standard types so it may return false negatives for non-standard ones. + is_definitely_integer : Boolean + is_definitely_integer = + [Types.INTEGER, Types.SMALLINT, Types.TINYINT].contains this.typeid + + ## PRIVATE + + Returns True if this type represents a boolean. It only handles the + standard types so it may return false negatives for non-standard ones. + is_definitely_boolean : Boolean + is_definitely_boolean = + this.typeid == Types.BOOLEAN + + ## PRIVATE + + Returns True if this type represents a floating point number. It only + handles the standard types so it may return false negatives for + non-standard ones. + is_definitely_double : Boolean + is_definitely_double = + [Types.FLOAT, Types.DOUBLE, Types.REAL].contains this.typeid ## UNSTABLE @@ -13,13 +59,12 @@ type Sql_Fragment A SQL fragment that represents raw SQL code. # type Sql_Code_Part (code : Text) type Sql_Code_Part code - ## UNSTABLE A SQL fragment that represents an object which will be interpolated into the query. - # type Sql_Interpolation (object : Any) - type Sql_Interpolation object + # type Sql_Interpolation (sql_type : Sql_Type) (object : Any) + type Sql_Interpolation sql_type object type Statement ## UNSTABLE @@ -58,23 +103,23 @@ type Statement strings = this.internal_fragments . map <| case _ of Sql_Code_Part code -> code # TODO at some point we may try more sophisticated serialization based on data type - Sql_Interpolation obj -> case obj of + Sql_Interpolation _ obj -> case obj of Number -> obj.to_text _ -> "'" + obj.to_text.replace "'" "''" + "'" strings.join "" ## UNSTABLE - Returns a pair consisting of the Sql code with holes for values and + Returns a pair consisting of the SQL code with holes for values and a list for values that should be substituted. # prepare : [Text, Vector Any] prepare = to_code fragment = case fragment of Sql_Code_Part code -> code - Sql_Interpolation _ -> "?" + Sql_Interpolation _ _ -> "?" to_subst fragment = case fragment of Sql_Code_Part _ -> [] - Sql_Interpolation obj -> [obj] + Sql_Interpolation typ obj -> [[obj, typ]] sql = this.fragments.map to_code . join "" substitutions = this.fragments.flat_map to_subst [sql, substitutions] @@ -86,8 +131,8 @@ type Statement to_json = jsonify fragment = case fragment of Sql_Code_Part code -> Json.from_pairs [["sql_code", code]] - Sql_Interpolation obj -> - inner = obj.to_json + Sql_Interpolation (Sql_Type typeid) obj -> + inner = Json.from_pairs [["value", obj.to_json], ["typeid", typeid]] Json.from_pairs [["sql_interpolation", inner]] fragments = Json.Array (this.internal_fragments.map jsonify) Json.from_pairs [["query", fragments]] @@ -122,13 +167,6 @@ type Builder fragments = here.optimize_fragments this.fragments.build Statement fragments - ## UNSTABLE - - Appends `AS name` to the current fragment. - alias : Text -> Builder - alias name = - this ++ here.code " AS "+name - ## UNSTABLE Wraps the code fragment in parentheses. @@ -167,8 +205,8 @@ code text = ## UNSTABLE Creates a Builder representing an interpolation of the given object. -interpolation : Any -> Builder -interpolation object = Builder (Vector_Builder.from_vector [Sql_Interpolation object]) +interpolation : Sql_Type -> Any -> Builder +interpolation sql_type object = Builder (Vector_Builder.from_vector [Sql_Interpolation sql_type object]) ## UNSTABLE @@ -197,7 +235,7 @@ optimize_fragments fragments = Nothing -> Sql_Code_Part code Sql_Code_Part other -> Sql_Code_Part other+code State.put Sql_Code_Part new_part - Sql_Interpolation _ -> + Sql_Interpolation _ _ -> case last_part of Nothing -> Nothing Sql_Code_Part _ -> diff --git a/distribution/std-lib/Database/src/Data/Table.enso b/distribution/std-lib/Database/src/Data/Table.enso index a686121b487b..b1733bdefa1b 100644 --- a/distribution/std-lib/Database/src/Data/Table.enso +++ b/distribution/std-lib/Database/src/Data/Table.enso @@ -1,20 +1,24 @@ from Base import all -import Table.Data.Table +import Table.Data.Table as Materialized_Table +import Table.Data.Column as Materialized_Column +import Table.Internal.Java_Exports from Table.Data.Table import No_Such_Column_Error from Database.Data.Column as Column_Module import all from Table.Data.Order_Rule as Order_Rule_Module import Order_Rule import Database.Data.Internal.Helpers import Database.Data.Internal.IR +from Database.Data.Internal.IR import Internal_Column +import Database.Data.Sql + +polyglot java import java.sql.JDBCType ## Represents a column-oriented table data structure backed by a database. type Table - - ## UNSTABLE + ## PRIVATE Represents a column-oriented table data structure backed by a database. - # type Table (name : Text) (connection : Connection) - # (internal_columns : Vector [Text, IR.Expression]) + # (internal_columns : Vector Internal_Column) # (context : IR.Context) type Table name connection internal_columns context @@ -27,7 +31,10 @@ type Table - format_terminal: whether ANSI-terminal formatting should be used display : Integer -> Boolean -> Text display show_rows=10 format_terminal=False = - this.to_dataframe.display show_rows format_terminal + df = this.reset_index.to_dataframe max_rows=show_rows + indices_count = this.context.meta_index.length + all_rows_count = this.row_count + here.display_dataframe df indices_count all_rows_count format_terminal ## UNSTABLE @@ -43,14 +50,17 @@ type Table Converts this table to a JSON structure. to_json : Json - to_json = this.to_sql.to_json + to_json = case this.internal_columns.is_empty of + True -> + Json.from_pairs [["query", Nothing], ["message", "The table has no columns so a query cannot be generated."]] + False -> this.to_sql.to_json ## UNSTABLE Returns the column with the given name. at : Text -> Column ! UnknownColumnError at name = - internal = this.internal_columns.find (p -> p.first == name) + internal = this.internal_columns.find (p -> p.name == name) this.make_column internal . map_error (_ -> No_Such_Column_Error name) ## PRIVATE @@ -62,7 +72,7 @@ type Table Text -> Panic.rethrow (this.at column) _ -> if Helpers.check_integrity this column then column else - Panic.throw (IntegrityError "Column "+column.name) + Panic.throw (Integrity_Error "Column "+column.name) ## UNSTABLE @@ -77,12 +87,44 @@ type Table where filter = case Helpers.check_integrity this filter of False -> - Error.throw (IntegrityError "Column "+filter.name) + Error.throw (Integrity_Error "Column "+filter.name) True -> new_filters = this.context.where_filters + [filter.expression] new_ctx = this.context.set_where_filters new_filters this.updated_context new_ctx + ## UNSTABLE + + Returns a new Table that will include at most `max_rows` rows from the + original Table. + + Since this Table is backed by an SQL database, the Table returned by the + `limit` method is deterministic only if the Table has been ordered (using + the `sort` method). + + Otherwise, no order is imposed, so the returned Table will include at most + `max_rows` rows, but there are no guarantees on which rows will be + selected. Moreover, even if the underlying table in the database did not + change, different sets of rows may be returned each time the returned + Table is materialized. + + The limit is applied at the very end, so the new Table behaves exactly as + the old one, just limitting its results when being materialized. + Specifically, applying further filters will still apply to the whole + result set and the limit will be taken after applying these filters. + + > For example: + In the call below, assuming that the table of `t1` contains rows for + numbers 1, 2, ..., 10, will return rows starting from 6 and not an empty + result as one could expect if the limit was applied before the filters. + t1 = table.sort by='A' . limit 5 + t2 = t1.where (t1.at 'A' > 5) + t2.to_dataframe + limit : Integer -> Table + limit max_rows = + new_ctx = this.context.set_limit max_rows + this.updated_context new_ctx + ## UNSTABLE Sets the column value at the given name. If a column with the given name @@ -90,14 +132,18 @@ type Table set : Text -> Column -> Table set name column = case Helpers.ensure_name_is_sane name of True -> - new_col = [name, column.expression] - replace = this.internal_columns.exists (c -> c.first == name) - case replace of - True -> - new_cols = this.internal_columns.map (c -> if c.first == name then new_col else c) - this.updated_columns new_cols + is_used_in_index = this.context.meta_index.exists i-> i.name == name + case is_used_in_index of + True -> Error.throw <| Illegal_State_Error "Cannot override column "+name+", because it is used as an index. Remove the index or use a different name." False -> - this.updated_columns (this.internal_columns + [new_col]) + new_col = Internal_Column name column.sql_type column.expression + replace = this.internal_columns.exists (c -> c.name == name) + case replace of + True -> + new_cols = this.internal_columns.map (c -> if c.name == name then new_col else c) + this.updated_columns new_cols + False -> + this.updated_columns (this.internal_columns + [new_col]) ## UNSTABLE @@ -110,9 +156,21 @@ type Table Sets the index of this table, using the column with the provided name. set_index : Text | Column | Vector (Text | Column) -> Table set_index index = Panic.recover <| - new_index = (Helpers.unify_vector_singleton index).map (this.resolve >> .expression) + new_index = (Helpers.unify_vector_singleton index).map (this.resolve >> .as_internal) new_ctx = this.context.set_index new_index - this.updated_context new_ctx + new_cols = this.internal_columns.filter col-> + turned_into_index = new_index.exists i-> i.name == col.name + turned_into_index.not + this.updated_context new_ctx . updated_columns new_cols + + ## Returns the index (or indexes) of this table, as a column (indexed by itself). + Returns `Nothing` if there is no index set. + index : Column | Vector Column | Nothing + index = + ixes = this.context.meta_index.map this.make_column + len = this.context.meta_index.length + if len == 0 then Nothing else + if len == 1 then ixes.at 0 else ixes ## UNSTABLE @@ -139,6 +197,33 @@ type Table particular rules of the `by` argument. Note thet this argument is independent from `order`, i.e. missing values will always be sorted according to this rule, ignoring the ascending / descending setting. + + > Example + Sorting `table` in ascending order by the value in column `'Quantity'` + table.sort by='Quantity' + + > Example + Sorting `table` in descending order by the value in column `'Quantity'`, + placing missing values at the top of the table. + table.sort by='Quantity' order=Sort_Order.Descending missing_last=False + + > Example + Sorting `table` in ascending order by the value in column `'Quantity'`, + using the value in column `'Rating'` for breaking ties. + table.sort by=['Quantity', 'Rating'] + + > Example + Sorting `table` in ascending order by the value in column `'Quantity'`, + using the value in column `'Rating'` in descending order for breaking + ties. + table.sort by=['Quantity', Order_Rule 'Rating' (order=Sort_Order.Descending)] + + > Example + Sorting `table` in ascending order by the value in an externally + computed column, using the value in column `'Rating'` for breaking + ties. + quality_ratio = table.at 'Rating' / table.at 'Price' + table.sort by=[quality_ratio, 'Rating'] sort : Text | Column | Order_Rule | Vector.Vector (Text | Column | Order_Rule) -> Sort_Order -> Boolean -> Table sort by order=Sort_Order.Ascending missing_last=True = Panic.recover <| order_to_ir = case _ of @@ -151,13 +236,13 @@ type Table [this.resolve elem . expression, order_to_ir order, missing_to_ir missing_last] to_ir elem = case elem of Text -> wrap_elem elem - Column _ _ _ _ -> wrap_elem elem + Column _ _ _ _ _ -> wrap_elem elem Order_Rule elem Nothing my_order my_nulls -> chosen_order = if my_order.is_nothing then order else my_order chosen_nulls = if my_nulls.is_nothing then missing_last else my_nulls [this.resolve elem . expression, order_to_ir chosen_order, missing_to_ir chosen_nulls] Order_Rule _ _ _ _ -> - Error.throw "Custom comparators are not supported in Database" + Error.throw <| Illegal_State_Error "Custom comparators are not supported in Database" elems = Helpers.unify_vector_singleton by . map to_ir new_ctx = this.context.set_orders elems this.updated_context new_ctx @@ -167,7 +252,7 @@ type Table Selects a subset of columns from this table by name. select : Vector Text -> Table select columns = - find_col = (name -> this.internal_columns.find (p -> p.first == name)) + find_col = (name -> this.internal_columns.find (p -> p.name == name)) selected_cols = columns.map (find_col >> .catch) . filter (c -> c.is_nothing.not) this.updated_columns selected_cols @@ -193,19 +278,20 @@ type Table join : Table -> Nothing | Text | Column | Vector (Text | Column) -> Boolean -> Text -> Text -> Table join other on=Nothing drop_unmatched=False left_suffix='_left' right_suffix='_right' = Panic.recover <| Panic.rethrow (Helpers.ensure_name_is_sane left_suffix && Helpers.ensure_name_is_sane right_suffix) + if left_suffix == right_suffix then + Panic.throw <| Illegal_State_Error "left_suffix must be different from right_suffix" kind = if drop_unmatched then IR.Join_Inner else IR.Join_Left - my_index : Vector Expression + my_index : Vector Internal_Column my_index = case on of Nothing -> this.context.meta_index _ -> - (Helpers.unify_vector_singleton on).map (this.resolve >> .expression) + (Helpers.unify_vector_singleton on).map (this.resolve >> .as_internal) other_index = other.context.meta_index case my_index.length == other_index.length of - False -> Panic.throw "Cannot join with multi-indexes of different lengths." + False -> Panic.throw <| Illegal_State_Error "Cannot join with multi-indexes of different lengths." True -> - ## TODO [RW] we may be able to avoid creating subqueries if there are no groups, orders or wheres, - so it may be worth optimizing that here - + # TODO [RW] we may be able to avoid creating subqueries if there are no groups, orders or wheres, + # so it may be worth optimizing that here (#1515) new_table_name = this.name + "_" + other.name aliases = case this.name == other.name of True -> [this.name+left_suffix, other.name+right_suffix] @@ -213,28 +299,31 @@ type Table left_alias = aliases.first right_alias = aliases.second - left_query = IR.Sub_Query this.internal_columns this.context left_alias - right_query = IR.Sub_Query other.internal_columns other.context right_alias + left_subquery_cols = this.internal_columns_with_index.map c-> [c.name, c.expression] + right_subquery_cols = other.internal_columns_with_index.map c-> [c.name, c.expression] + left_query = IR.Sub_Query left_subquery_cols this.context left_alias + right_query = IR.Sub_Query right_subquery_cols other.context right_alias - left_renamed_index = my_index.map (IR.substitute_origin this.name left_alias) - right_renamed_index = other_index.map (IR.substitute_origin other.name right_alias) - on_exprs = left_renamed_index.zip right_renamed_index (l -> r -> IR.Operation "=" [l, r]) + left_renamed_index = my_index.map <| + IR.lift_expression_map (IR.substitute_origin this.name left_alias) + right_renamed_index = other_index.map <| + IR.lift_expression_map (IR.substitute_origin other.name right_alias) + on_exprs = left_renamed_index.zip right_renamed_index l-> r-> + IR.Operation "=" [l.expression, r.expression] new_index = left_renamed_index new_from = IR.Join kind left_query right_query on_exprs - new_ctx = IR.Context new_from [] [] [] new_index + new_limit = Nothing + new_ctx = IR.Context new_from [] [] [] new_index new_limit - left_names = Map.from_vector (this.internal_columns.map (p -> [p.first, True])) - right_names = Map.from_vector (other.internal_columns.map (p -> [p.first, True])) - rename suffix other_names this_name = - if other_names.get_or_else this_name False then this_name+suffix else this_name - rename_left = rename left_suffix right_names - rename_right = rename right_suffix left_names + new_names = here.combine_names (this.internal_columns.map .name) (other.internal_columns.map .name) left_suffix right_suffix + left_names = new_names.first + right_names = new_names.second - new_left_columns = this.internal_columns.map p-> - [rename_left p.first, IR.Column left_alias p.first] - new_right_columns = other.internal_columns.map p-> - [rename_right p.first, IR.Column right_alias p.first] + new_left_columns = this.internal_columns.zip left_names p-> new_name-> + Internal_Column new_name p.sql_type (IR.Column left_alias p.name) + new_right_columns = other.internal_columns.zip right_names p-> new_name-> + Internal_Column new_name p.sql_type (IR.Column right_alias p.name) new_columns = new_left_columns + new_right_columns @@ -248,14 +337,15 @@ type Table If the `by` argument is not set, the index is used for grouping instead. group : Vector Text | Text | Nothing -> Aggregate_Table group by=Nothing = Panic.recover <| - exprs = case by of + cols = case by of Nothing -> - if this.context.meta_index.is_empty then Panic.throw "Trying to group by an empty index." else + if this.context.meta_index.is_empty then Panic.throw <| Illegal_State_Error "Trying to group by an empty index." else this.context.meta_index _ -> - Helpers.unify_vector_singleton by . map (this.resolve >> .expression) - new_ctx = this.context.set_groups exprs - # TODO [RW] when doing materialization, make sure that the index is set properly so that `display` will include the group names + + Helpers.unify_vector_singleton by . map (this.resolve >> .as_internal) + exprs = cols.map .expression + new_ctx = this.context.set_groups exprs . set_index cols Aggregate_Table this.name this.connection this.internal_columns new_ctx ## UNSTABLE @@ -268,21 +358,111 @@ type Table new_ctx = this.context.set_where_filters (this.context.where_filters + filters) this.updated_context new_ctx + ## Returns a new Table without columns that contained any missing values. + + This operation needs to actually materialize the underlying query in + order to know which columns to drop. + drop_missing_columns : Table + drop_missing_columns = + rows_expr = IR.Operation "COUNT_ROWS" [] + all_rows_column_name = "row_count" + make_count_expr expr = IR.Operation "COUNT" [expr] + cols = this.internal_columns.map (c -> [c.name, make_count_expr c.expression]) + query = IR.Select [[all_rows_column_name, rows_expr]]+cols this.context + sql = this.connection.dialect.generate_sql query + table = this.connection.execute_query sql + all_rows = table.at all_rows_column_name . at 0 + kept_columns = this.internal_columns . filter c-> + all_rows == table.at c.name . at 0 + this.updated_columns kept_columns + + ## Returns the amount of rows in this table. + row_count : Integer + row_count = + expr = IR.Operation "COUNT_ROWS" [] + column_name = "row_count" + query = IR.Select [[column_name, expr]] this.context + sql = this.connection.dialect.generate_sql query + table = this.connection.execute_query sql + table.at column_name . at 0 + + ## UNSTABLE + + Returns a materialized dataframe containing rows of this table. + + Currently the SQL decimal type is mapped to a floating-point + representation which is not exact. While operations in the database + itself may (or may not, depending on the database engine) be done with + exact precision, the materialized values may loose precision, so extra + care is needed when working with decimal computations that need to be + exact. + + Arguments: + - max_rows: specifies a maximum amount of rows to fetch; if not set, all + available rows are fetched. + to_dataframe : (Integer | Nothing) -> Materialized_Table.Table + to_dataframe max_rows=Nothing = + case this.context.meta_index.length > 1 of + True -> Error.throw <| Illegal_State_Error "Multi-indexes are not implemented in the dataframes, if you want to materialize such a Table, remove the index first using `set_index`." + False -> + preprocessed = this.reset_index.limit max_rows + case preprocessed.internal_columns.is_empty of + True -> + internal_table = Java_Exports.make_table_without_columns this.row_count + Materialized_Table.Table internal_table + False -> + sql = preprocessed.to_sql + expected_types = preprocessed.internal_columns.map .sql_type + table = this.connection.execute_query sql expected_types + case this.context.meta_index.length == 1 of + False -> table + True -> + ix_col_name = table.columns.first.name + table.set_index ix_col_name + + ## PRIVATE + + Brings the index back as columns. + reset_index : Table + reset_index = + new_cols = this.internal_columns_with_index + new_ctx = this.context.set_index [] + this.updated_context new_ctx . updated_columns new_cols + ## UNSTABLE - Returns an Sql statement that will be used for materializing this table. + Returns an SQL statement that will be used for materializing this table. to_sql : Sql.Statement to_sql = + cols = this.internal_columns.map (c -> [c.name, c.expression]) + case cols.is_empty of + True -> Error.throw <| Illegal_State_Error "Cannot generate SQL for a table with no columns." + False -> + query = IR.Select cols this.context + this.connection.dialect.generate_sql query + + ## Returns a Table describing this table's contents. + + The table lists all columns, counts of non-null items and storage types + of each column. + info : Table + info = cols = this.internal_columns - query = IR.Select cols this.context - this.connection.dialect.generate_sql query + count_columns = cols.map c-> IR.Internal_Column c.name Sql.Sql_Type.integer (IR.Operation "COUNT" [c.expression]) + count_table = this.updated_columns count_columns . to_dataframe + counts = count_table.columns.map c-> c.at 0 + column_type_as_text col = + id = col.sql_type.typeid + JDBCType.valueOf id . getName + types = cols.map column_type_as_text + Materialized_Table.new [["Column", cols.map .name], ["Items Count", counts], ["SQL Type", types]] . set_index "Column" ## PRIVATE Helper to create columns from internal columns. - # make_column : [Text, IR.Expression] -> Column - make_column pair = - Column pair.first this.connection pair.second this.context + make_column : Internal_Column -> Column + make_column internal = + Column internal.name this.connection internal.sql_type internal.expression this.context ## PRIVATE @@ -294,6 +474,37 @@ type Table Returns a copy of this table with updated context. updated_context ctx = Table this.name this.connection this.internal_columns ctx + ## PRIVATE + + Returns a vector that contains first the internal representations of all + indices and then all columns. + internal_columns_with_index : Vector Internal_Column + internal_columns_with_index = + this.context.meta_index + this.internal_columns + + ## PRIVATE + + Inserts a new row to the table. It actually modifies the underlying table + in the database. + + It can only be called on the Table if no operations modifying it have + been performed like modifying, removing or adding columns, filtering, + grouping etc. + insert : Vector Any -> Nothing + insert values = + table_name = case this.context.from_spec of + IR.From_Table name _ -> name + _ -> Error.throw <| Illegal_State_Error "Inserting can only be performed on tables as returned by `access_table`, any further processing is not allowed." + # TODO [RW] before removing the PRIVATE tag, add a check that no bad stuff was done to the table as described above + pairs = this.internal_columns.zip values col-> value-> + [col.name, IR.Constant col.sql_type value] + query = this.connection.dialect.generate_sql <| IR.Insert table_name pairs + affected_rows = this.connection.execute_update query + case affected_rows == 1 of + False -> Error.throw <| Illegal_State_Error "The update unexpectedly affected "+affected_rows.to_text+" rows." + True -> Nothing + + ## Represents a table with grouped rows. type Aggregate_Table @@ -317,46 +528,31 @@ type Aggregate_Table count : Column count = expr = IR.Operation "COUNT_ROWS" [] - Column "count" this.connection expr this.context + Column "count" this.connection Sql.Sql_Type.integer expr this.context ## UNSTABLE Returns an aggregate column with the given name, contained in this table. at : Text -> Column ! No_Such_Column_Error at name = - internal = this.internal_columns.find (p -> p.first == name) + internal = this.internal_columns.find (p -> p.name == name) this.make_column internal . map_error (_ -> No_Such_Column_Error name) - ## UNSTABLE - - Prints an ASCII-art table with this data to the standard output. - - Arguments: - - show_rows: the number of initial rows that should be displayed. - print : Integer -> Nothing - print show_rows=10 = this.values.print show_rows - ## PRIVATE Helper to create aggregate columns from internal columns. - # make_column : [Text, IR.Expression] -> Aggregate_Column - make_column pair = - Aggregate_Column pair.first this.connection pair.second this.context - -type InternalColumn - ## PRIVATE - - An internal column structure which is a part of the Table. - type InternalColumn external_name internal_name expression + # make_column : Internal_Column -> Aggregate_Column + make_column internal = + Aggregate_Column internal.name this.connection internal.sql_type internal.expression this.context -type IntegrityError +type Integrity_Error ## UNSTABLE Signalizes that an operation tried using objects coming from different contexts. To use columns from different tables, you must first join them. - type IntegrityError object_description + type Integrity_Error object_description # Return a readable description of this error. to_text : Text @@ -365,8 +561,74 @@ type IntegrityError ## PRIVATE Creates a Table out of a connection, name and list of column names. -make_table : Connection -> Text -> Vector Text -> Table -make_table connection table_name column_names = + # make_table : Connection -> Text -> Vector [Text, Sql.Sql_Type] -> Table +make_table connection table_name columns = ctx = IR.make_ctx_from table_name - cols = column_names.map (name -> [name, IR.Column table_name name]) + cols = columns.map (p -> Internal_Column p.first p.second (IR.Column table_name p.first)) Table table_name connection cols ctx + +## PRIVATE + + Renders an ASCII-art representation for a Table from a dataframe that + contains a fragment of the underlying data and count of all rows. + + Arguments: + - df: the materialized dataframe that contains the data to be displayed, it + should have no indices set. + - indices_count: indicates how many columns from the materialized dataframe + should be treated as indices in the display (index columns will be bold if + `format_terminal` is enabled). + - all_rows_count: the count of all rows in the underlying Table; if + `all_rows_count` is bigger than the amount of rows of `df`, an additional + line will be included that will say how many hidden rows there are. + - format_term: a boolean flag, specifying whether to use ANSI escape codes + for rich formatting in the terminal. +display_dataframe : Materialized_Table.Table -> Integer -> Integer -> Boolean -> Text +display_dataframe df indices_count all_rows_count format_terminal = + cols = Vector.Vector df.java_table.getColumns + col_names = cols.map .getName + col_vals = cols.map .getStorage + display_rows = df.row_count + rows = Vector.new display_rows row_num-> + col_vals.map col-> + if col.isNa row_num then "Nothing" else Materialized_Column.get_item_string col row_num + table = Materialized_Table.print_table col_names rows indices_count format_terminal + if display_rows == all_rows_count then table else + missing_rows_count = all_rows_count - display_rows + missing = '\n\u2026 and ' + missing_rows_count.to_text + ' hidden rows.' + table + missing + +## PRIVATE + + Creates a list of non-colliding names by merging the two lists and + appending suffixes if necessary. + + If even after appending the suffixes it is impossible to have unique names, + it throws a panic. It returns two vectors, one for each input. It assumes + that the names within each argument itself are unique. +combine_names left_names right_names left_suffix right_suffix = + make_count_map names = + map = names.fold Map.empty acc-> name-> + count = acc.get_or_else name 0 + 1 + acc.insert name count + name-> map.get_or_else name 0 + original_names_count = make_count_map left_names+right_names + add_suffix_if_necessary suffix name = case original_names_count name > 1 of + True -> [name, name+suffix] + False -> [name, name] + left_pairs = left_names.map <| add_suffix_if_necessary left_suffix + right_pairs = right_names.map <| add_suffix_if_necessary right_suffix + + new_names_count = make_count_map (left_pairs+right_pairs . map .second) + catch_ambiguity pairs = pairs.each pair-> + original_name = pair.first + new_name = pair.second + case new_name!=original_name && (new_names_count new_name > 1) of + True -> + Panic.throw <| Illegal_State_Error "Duplicate column "+original_name+" was about to be renamed to "+new_name+" to disambiguate column names, but a column with name "+new_name+" already exists too. Please rename the columns before joining to avoid ambiguity." + False -> Nothing + catch_ambiguity left_pairs + catch_ambiguity right_pairs + new_left_names = left_pairs.map .second + new_right_names = right_pairs.map .second + [new_left_names, new_right_names] diff --git a/distribution/std-lib/Database/src/Main.enso b/distribution/std-lib/Database/src/Main.enso index f85b6a9adb4b..438744f6b78b 100644 --- a/distribution/std-lib/Database/src/Main.enso +++ b/distribution/std-lib/Database/src/Main.enso @@ -1,5 +1,14 @@ import Database.Data.Table +import Database.Data.Column import Database.Connection.Connection +import Database.Connection.Database export Database.Data.Table +export Database.Data.Column export Database.Connection.Connection +from Database.Connection.Database export all + +import Table.Data.Table +import Table.Data.Order_Rule +from Table.Data.Table export No_Such_Column_Error +from Table.Data.Order_Rule export Order_Rule diff --git a/distribution/std-lib/Table/src/Data/Column.enso b/distribution/std-lib/Table/src/Data/Column.enso index 20d85a70754b..a0d6869e9974 100644 --- a/distribution/std-lib/Table/src/Data/Column.enso +++ b/distribution/std-lib/Table/src/Data/Column.enso @@ -25,7 +25,7 @@ type Column row = if storage.isNa num then "Nothing" else here.get_item_string storage num [index.ilocString num, row] - table = Table.print_table [index.getName, col_name] items format_terminal + table = Table.print_table [index.getName, col_name] items 1 format_terminal if num_rows - display_rows <= 0 then table else missing = '\n\u2026 and ' + (num_rows - display_rows).to_text + ' hidden rows.' table + missing @@ -357,7 +357,7 @@ type Column > Example Sorting `column` in descending order, placing missing values at the top of the resulting column. - table.sort order=Sort_Order.Descending missing_last=False + column.sort order=Sort_Order.Descending missing_last=False > Sorting `column` in ascending order, using a custom comparator function. diff --git a/distribution/std-lib/Table/src/Data/Table.enso b/distribution/std-lib/Table/src/Data/Table.enso index 887e7da71ea3..c4023de11dc2 100644 --- a/distribution/std-lib/Table/src/Data/Table.enso +++ b/distribution/std-lib/Table/src/Data/Table.enso @@ -25,13 +25,13 @@ type Table index = this.java_table.getIndex col_names = [index.getName] + cols.map .getName col_vals = cols.map .getStorage - num_rows = this.java_table.nrows + num_rows = this.row_count display_rows = Math.min num_rows show_rows rows = Vector.new display_rows row_num-> cols = col_vals.map col-> if col.isNa row_num then "Nothing" else Column.get_item_string col row_num [index.ilocString row_num] + cols - table = here.print_table col_names rows format_terminal + table = here.print_table col_names rows 1 format_terminal if num_rows - display_rows <= 0 then table else missing = '\n\u2026 and ' + (num_rows - display_rows).to_text + ' hidden rows.' table + missing @@ -61,12 +61,12 @@ type Table to_default_visualization_data : Text to_default_visualization_data = max_size = 10 - nrows = ['number_of_rows', this.nrows] + row_count = ['number_of_rows', this.row_count] cols = this.columns.map c-> name = c.name items = c.to_vector.take_start max_size Json.from_pairs [['name', name], ['data', items]] - Json.from_pairs [nrows, ['columns', cols]] . to_text + Json.from_pairs [row_count, ['columns', cols]] . to_text ## Returns the column with the given name. at : Text -> Column ! No_Such_Column_Error @@ -161,8 +161,8 @@ type Table Table (Java_Table.new (non_missing.map .java_column . to_array) index) ## Returns the amount of rows in this table. - nrows : Integer - nrows = this.java_table.nrows + row_count : Integer + row_count = this.java_table.rowCount ## Returns a Table describing this table's contents. @@ -171,7 +171,7 @@ type Table info : Table info = cols = this.columns - here.new [["Column", cols.map .name], ["Items Count", cols.map .count], ["Storage Type", cols.map .storage_type]] + here.new [["Column", cols.map .name], ["Items Count", cols.map .count], ["Storage Type", cols.map .storage_type]] . set_index "Column" ## Returns an aggregate table resulting from grouping the elements by the value of the specified column. @@ -380,6 +380,9 @@ join tables = tables.reduce .join ## PRIVATE + + Ensures that the `txt` has at least `len` characters by appending spaces at + the end. pad txt len = true_len = txt.characters.length txt + (" ".repeat (len - true_len)) @@ -392,7 +395,19 @@ ansi_bold enabled txt = _ -> if enabled then '\e[1m' + txt + '\e[m' else txt ## PRIVATE -print_table header rows format_term = + + A helper function for creating an ASCII-art representation of tabular data. + + Arguments: + - header: vector of names of columns in the table. + - rows: a vector of rows, where each row is a vector that contains a text + representation of each cell + - indices_count: the number specifying how many columns should be treated as + indices; this will make them in bold font if `format_term` is enabled. + - format_term: a boolean flag, specifying whether to use ANSI escape codes + for rich formatting in the terminal. +print_table : Vector Text -> (Vector (Vector Text)) -> Integer -> Boolean -> Text +print_table header rows indices_count format_term = content_lengths = Vector.new header.length i-> max_row = 0.up_to rows.length . fold 0 a-> j-> Math.max a (rows.at j . at i . characters . length) Math.max max_row (header.at i . characters . length) @@ -400,7 +415,8 @@ print_table header rows format_term = divider = content_lengths . map (l -> "-".repeat l+2) . join '+' row_lines = rows.map r-> x = r.zip content_lengths here.pad - with_bold_ix = [here.ansi_bold format_term (x.at 0)] + x.drop_start 1 + ixes = x.take_start indices_count . map (here.ansi_bold format_term) + with_bold_ix = ixes + x.drop_start indices_count y = with_bold_ix . join ' | ' " " + y ([" " + header_line, divider] + row_lines).join '\n' diff --git a/distribution/std-lib/Table/src/Internal/Java_Exports.enso b/distribution/std-lib/Table/src/Internal/Java_Exports.enso new file mode 100644 index 000000000000..3d47a8dbe61a --- /dev/null +++ b/distribution/std-lib/Table/src/Internal/Java_Exports.enso @@ -0,0 +1,28 @@ +from Base import all + +polyglot java import org.enso.table.data.table.Column +polyglot java import org.enso.table.data.table.Table +polyglot java import org.enso.table.data.index.DefaultIndex +polyglot java import org.enso.table.data.column.builder.object.InferredBuilder +polyglot java import org.enso.table.data.column.builder.object.NumericBuilder +polyglot java import org.enso.table.data.column.builder.object.BoolBuilder + +## PRIVATE +make_bool_builder = BoolBuilder.new + +## PRIVATE +make_double_builder initial_size = NumericBuilder.createDoubleBuilder initial_size + +## PRIVATE +make_long_builder initial_size = NumericBuilder.createLongBuilder initial_size + +## PRIVATE +make_inferred_builder initial_size = InferredBuilder.new initial_size + +## PRIVATE +make_column name storage = Column.new name storage + +## PRIVATE +make_table_without_columns row_count = + index = DefaultIndex.new row_count + Table.new [].to_array index diff --git a/distribution/std-lib/Test/src/Test.enso b/distribution/std-lib/Test/src/Test.enso index c3792a192512..631b9e28c2a8 100644 --- a/distribution/std-lib/Test/src/Test.enso +++ b/distribution/std-lib/Test/src/Test.enso @@ -19,7 +19,7 @@ Spec.is_fail = this.behaviors.any .is_fail Suite.is_fail = this.specs.any .is_fail ## PRIVATE -type Finished_With_Error err +type Finished_With_Error err stack_trace_text ## PRIVATE type Matched_On_Error err @@ -169,15 +169,16 @@ specify label ~behavior pending=Nothing = ## PRIVATE run_spec ~behavior = recovery = Panic.recover <| - behavior.catch err-> Panic.throw (Finished_With_Error err) + result = behavior + result.catch err-> Panic.throw (Finished_With_Error err result.get_stack_trace_text) Nothing maybeExc = case recovery of _ -> Success result = maybeExc.catch ex-> case ex of Failure _ -> ex - Finished_With_Error x -> - Failure ("An unexpected error was returned: " + x.to_text + '\n' + maybeExc.get_stack_trace_text) + Finished_With_Error err stack_trace_text -> + Failure ("An unexpected error was returned: " + err.to_text + '\n' + stack_trace_text) _ -> Failure ("An unexpected panic was thrown: " + ex.to_text + '\n' + maybeExc.get_stack_trace_text) result diff --git a/engine/runtime/src/main/java/org/enso/interpreter/epb/runtime/PolyglotProxy.java b/engine/runtime/src/main/java/org/enso/interpreter/epb/runtime/PolyglotProxy.java index 0b75b81a92e1..e781f99bce05 100644 --- a/engine/runtime/src/main/java/org/enso/interpreter/epb/runtime/PolyglotProxy.java +++ b/engine/runtime/src/main/java/org/enso/interpreter/epb/runtime/PolyglotProxy.java @@ -1,6 +1,7 @@ package org.enso.interpreter.epb.runtime; import com.oracle.truffle.api.dsl.Cached; +import com.oracle.truffle.api.dsl.Cached.Exclusive; import com.oracle.truffle.api.exception.AbstractTruffleException; import com.oracle.truffle.api.interop.ArityException; import com.oracle.truffle.api.interop.ExceptionType; @@ -899,7 +900,7 @@ boolean hasExceptionMessage( Object getExceptionMessage( @CachedLibrary("this.delegate") InteropLibrary errors, @CachedLibrary("this") InteropLibrary node, - @Cached ContextRewrapNode contextRewrapNode) + @Cached @Exclusive ContextRewrapNode contextRewrapNode) throws UnsupportedMessageException { Object p = enterOrigin(node); try { diff --git a/engine/runtime/src/main/java/org/enso/interpreter/node/expression/builtin/error/GetStackTraceTextNode.java b/engine/runtime/src/main/java/org/enso/interpreter/node/expression/builtin/error/GetStackTraceTextNode.java index fa494bdfbe6b..ae30610e0b31 100644 --- a/engine/runtime/src/main/java/org/enso/interpreter/node/expression/builtin/error/GetStackTraceTextNode.java +++ b/engine/runtime/src/main/java/org/enso/interpreter/node/expression/builtin/error/GetStackTraceTextNode.java @@ -3,14 +3,10 @@ import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary; import com.oracle.truffle.api.TruffleStackTrace; import com.oracle.truffle.api.TruffleStackTraceElement; -import com.oracle.truffle.api.dsl.CachedContext; -import com.oracle.truffle.api.dsl.Specialization; import com.oracle.truffle.api.nodes.Node; import java.util.ArrayList; import java.util.Collections; -import org.enso.interpreter.Language; import org.enso.interpreter.dsl.BuiltinMethod; -import org.enso.interpreter.runtime.Context; import org.enso.interpreter.runtime.data.text.Text; import org.enso.interpreter.runtime.error.DataflowError; @@ -38,7 +34,9 @@ String printStackTrace(Throwable throwable) { for (int i = fullStack.size() - 1; i >= 0; i--) { var elem = fullStack.get(i); if (isInit) { - if (elem.getLocation().getRootNode().getLanguageInfo() != null) { + if (elem.getLocation() != null + && elem.getLocation().getRootNode() != null + && elem.getLocation().getRootNode().getLanguageInfo() != null) { isInit = false; } } @@ -56,6 +54,15 @@ String printStackTrace(Throwable throwable) { boolean first = true; for (var errorFrame : stack) { if (errorFrame.getLocation() == null) { + if (errorFrame.getTarget() != null && errorFrame.getTarget().getRootNode() != null) { + var name = errorFrame.getTarget().getRootNode().getName(); + if (first) { + first = false; + } else { + sb.append('\n'); + } + sb.append(" at related to " + name); + } continue; } var rootNode = errorFrame.getLocation().getRootNode(); diff --git a/engine/runtime/src/main/java/org/enso/interpreter/runtime/data/EnsoFile.java b/engine/runtime/src/main/java/org/enso/interpreter/runtime/data/EnsoFile.java index 9512c3ff006b..d920b9eb3754 100644 --- a/engine/runtime/src/main/java/org/enso/interpreter/runtime/data/EnsoFile.java +++ b/engine/runtime/src/main/java/org/enso/interpreter/runtime/data/EnsoFile.java @@ -58,6 +58,10 @@ public boolean isDirectory() { return this.truffleFile.isDirectory(); } + public void createDirectories() throws IOException { + this.truffleFile.createDirectories(); + } + public boolean isRegularFile() { return this.truffleFile.isRegularFile(); } diff --git a/table/src/main/java/org/enso/table/data/column/builder/object/BoolBuilder.java b/table/src/main/java/org/enso/table/data/column/builder/object/BoolBuilder.java index baf09952972e..719724608946 100644 --- a/table/src/main/java/org/enso/table/data/column/builder/object/BoolBuilder.java +++ b/table/src/main/java/org/enso/table/data/column/builder/object/BoolBuilder.java @@ -11,7 +11,7 @@ public class BoolBuilder extends TypedBuilder { int size = 0; @Override - public void append(Object o) { + public void appendNoGrow(Object o) { if (o == null) { isNa.set(size); } else { @@ -22,6 +22,23 @@ public void append(Object o) { size++; } + @Override + public void append(Object o) { + appendNoGrow(o); + } + + /** + * Append a new boolean to this builder. + * + * @param data the boolean to append + */ + public void appendBoolean(boolean data) { + if (data) { + vals.set(size); + } + size++; + } + @Override public void appendNulls(int count) { isNa.set(size, size + count); @@ -38,6 +55,11 @@ public int getCurrentSize() { return size; } + @Override + public int getCurrentCapacity() { + return vals.size(); + } + @Override public void writeTo(Object[] items) { for (int i = 0; i < size; i++) { diff --git a/table/src/main/java/org/enso/table/data/column/builder/object/Builder.java b/table/src/main/java/org/enso/table/data/column/builder/object/Builder.java index 90a7ac45d59a..1b7f086ad6ee 100644 --- a/table/src/main/java/org/enso/table/data/column/builder/object/Builder.java +++ b/table/src/main/java/org/enso/table/data/column/builder/object/Builder.java @@ -5,7 +5,17 @@ /** A builder for creating columns dynamically. */ public abstract class Builder { /** - * Append a new item to this builder. + * Append a new item to this builder, assuming that it has enough allocated space. + * + *

This function should only be used when it is guaranteed that the builder has enough + * capacity, for example if it was initialized with an initial capacity known up-front. + * + * @param o the item to append + */ + public abstract void appendNoGrow(Object o); + + /** + * Append a new item to this builder, increasing the capacity if necessary. * * @param o the item to append */ @@ -25,6 +35,12 @@ public abstract class Builder { /** @return the number of appended elements */ public abstract int getCurrentSize(); + /** + * @return how many elements this builder can hold without growing (including already existing + * elements) + */ + public abstract int getCurrentCapacity(); + /** @return a storage containing all the items appended so far */ public abstract Storage seal(); } diff --git a/table/src/main/java/org/enso/table/data/column/builder/object/InferredBuilder.java b/table/src/main/java/org/enso/table/data/column/builder/object/InferredBuilder.java index 42df594300e0..26f091d74985 100644 --- a/table/src/main/java/org/enso/table/data/column/builder/object/InferredBuilder.java +++ b/table/src/main/java/org/enso/table/data/column/builder/object/InferredBuilder.java @@ -8,15 +8,67 @@ public class InferredBuilder extends Builder { private TypedBuilder currentBuilder = null; private int currentSize = 0; - private final int size; + private final int initialSize; /** * Creates a new instance of this builder, with the given known result size. * - * @param size the result size + * @param initialSize the result size */ - public InferredBuilder(int size) { - this.size = size; + public InferredBuilder(int initialSize) { + this.initialSize = initialSize; + } + + @Override + public void appendNoGrow(Object o) { + if (currentBuilder == null) { + if (o == null) { + currentSize++; + return; + } else { + initBuilderFor(o); + } + } + if (o == null) { + currentBuilder.appendNoGrow(o); + } else { + switch (currentBuilder.getType()) { + case Storage.Type.BOOL: + if (o instanceof Boolean) { + currentBuilder.appendNoGrow(o); + } else { + retypeAndAppend(o); + } + break; + case Storage.Type.LONG: + if (o instanceof Long) { + currentBuilder.appendNoGrow(o); + } else { + retypeAndAppend(o); + } + break; + case Storage.Type.DOUBLE: + if (o instanceof Double) { + currentBuilder.appendNoGrow(o); + } else if (o instanceof Long) { + currentBuilder.appendNoGrow(((Long) o).doubleValue()); + } else { + retypeAndAppend(o); + } + break; + case Storage.Type.STRING: + if (o instanceof String) { + currentBuilder.appendNoGrow(o); + } else { + retypeAndAppend(o); + } + break; + case Storage.Type.OBJECT: + currentBuilder.appendNoGrow(o); + break; + } + } + currentSize++; } @Override @@ -80,20 +132,19 @@ public void appendNulls(int count) { } private void initBuilderFor(Object o) { + int initialCapacity = Math.max(initialSize, currentSize); if (o instanceof Boolean) { currentBuilder = new BoolBuilder(); } else if (o instanceof Double) { - currentBuilder = NumericBuilder.createDoubleBuilder(size); + currentBuilder = NumericBuilder.createDoubleBuilder(initialCapacity); } else if (o instanceof Long) { - currentBuilder = NumericBuilder.createLongBuilder(size); + currentBuilder = NumericBuilder.createLongBuilder(initialCapacity); } else if (o instanceof String) { - currentBuilder = new StringBuilder(size); + currentBuilder = new StringBuilder(initialCapacity); } else { - currentBuilder = new ObjectBuilder(size); - } - for (int i = 0; i < currentSize; i++) { - currentBuilder.append(null); + currentBuilder = new ObjectBuilder(initialCapacity); } + currentBuilder.appendNulls(currentSize); } private void retypeAndAppend(Object o) { @@ -114,7 +165,7 @@ private void retypeAndAppend(Object o) { } private void retypeToObject() { - ObjectBuilder objectBuilder = new ObjectBuilder(size); + ObjectBuilder objectBuilder = new ObjectBuilder(initialSize); currentBuilder.writeTo(objectBuilder.getData()); objectBuilder.setCurrentSize(currentBuilder.getCurrentSize()); currentBuilder = objectBuilder; @@ -125,8 +176,16 @@ public int getCurrentSize() { return currentSize; } + @Override + public int getCurrentCapacity() { + return 0; + } + @Override public Storage seal() { + if (currentBuilder == null) { + initBuilderFor(null); + } return currentBuilder.seal(); } } diff --git a/table/src/main/java/org/enso/table/data/column/builder/object/NumericBuilder.java b/table/src/main/java/org/enso/table/data/column/builder/object/NumericBuilder.java index 9e1b7e90cafb..df9fae1dc8e2 100644 --- a/table/src/main/java/org/enso/table/data/column/builder/object/NumericBuilder.java +++ b/table/src/main/java/org/enso/table/data/column/builder/object/NumericBuilder.java @@ -1,5 +1,6 @@ package org.enso.table.data.column.builder.object; +import java.util.Arrays; import java.util.BitSet; import org.enso.table.data.column.storage.DoubleStorage; import org.enso.table.data.column.storage.LongStorage; @@ -7,14 +8,12 @@ /** A builder for numeric columns. */ public class NumericBuilder extends TypedBuilder { - private final int size; private final BitSet isMissing = new BitSet(); - private final long[] data; + private long[] data; private boolean isDouble; private int currentSize; private NumericBuilder(boolean isDouble, int size) { - this.size = size; this.data = new long[size]; this.isDouble = isDouble; } @@ -64,7 +63,7 @@ public int getType() { } @Override - public void append(Object o) { + public void appendNoGrow(Object o) { if (o == null) { isMissing.set(currentSize++); } else if (isDouble && o instanceof Double) { @@ -76,6 +75,14 @@ public void append(Object o) { } } + @Override + public void append(Object o) { + if (currentSize + 1 > data.length) { + grow(); + } + appendNoGrow(o); + } + @Override public void appendNulls(int count) { isMissing.set(currentSize, currentSize + count); @@ -83,26 +90,66 @@ public void appendNulls(int count) { } /** - * Append a new item in raw form to this builder. + * Append a new item in raw form to this builder, assuming that it has enough allocated space. + * + *

This function should only be used when it is guaranteed that the builder has enough + * capacity, for example if it was initialized with an initial capacity known up-front. * * @param rawData the raw encoding of the item, for long numbers just the number and for doubles, * its long bytes */ - public void appendRaw(long rawData) { + public void appendRawNoGrow(long rawData) { data[currentSize++] = rawData; } + /** + * Append a new integer to this builder. + * + * @param data the integer to append + */ + public void appendLong(long data) { + if (currentSize + 1 > this.data.length) { + grow(); + } + appendRawNoGrow(data); + } + + /** + * Append a new double to this builder. + * + * @param data the double to append + */ + public void appendDouble(double data) { + if (currentSize + 1 > this.data.length) { + grow(); + } + appendRawNoGrow(Double.doubleToRawLongBits(data)); + } + @Override public int getCurrentSize() { return currentSize; } + @Override + public int getCurrentCapacity() { + return data.length; + } + @Override public Storage seal() { if (isDouble) { - return new DoubleStorage(data, size, isMissing); + return new DoubleStorage(data, currentSize, isMissing); } else { - return new LongStorage(data, size, isMissing); + return new LongStorage(data, currentSize, isMissing); + } + } + + private void grow() { + int desiredCapacity = 3; + if (data.length > 1) { + desiredCapacity = (data.length * 3 / 2); } + this.data = Arrays.copyOf(data, desiredCapacity); } } diff --git a/table/src/main/java/org/enso/table/data/column/builder/object/ObjectBuilder.java b/table/src/main/java/org/enso/table/data/column/builder/object/ObjectBuilder.java index 7b01989c719f..f35b781faa08 100644 --- a/table/src/main/java/org/enso/table/data/column/builder/object/ObjectBuilder.java +++ b/table/src/main/java/org/enso/table/data/column/builder/object/ObjectBuilder.java @@ -1,22 +1,20 @@ package org.enso.table.data.column.builder.object; +import java.util.Arrays; import org.enso.table.data.column.storage.ObjectStorage; import org.enso.table.data.column.storage.Storage; /** A builder for boxed object columns. */ public class ObjectBuilder extends TypedBuilder { - private final Object[] data; - private final int size; + private Object[] data; private int currentSize = 0; public ObjectBuilder(int size) { - this.size = size; this.data = new Object[size]; } - public ObjectBuilder(Object[] data, int size) { + public ObjectBuilder(Object[] data) { this.data = data; - this.size = size; } @Override @@ -39,8 +37,16 @@ public int getType() { return Storage.Type.OBJECT; } + @Override + public void appendNoGrow(Object o) { + data[currentSize++] = o; + } + @Override public void append(Object o) { + if (currentSize + 1 > data.length) { + grow(); + } data[currentSize++] = o; } @@ -54,9 +60,14 @@ public int getCurrentSize() { return currentSize; } + @Override + public int getCurrentCapacity() { + return data.length; + } + @Override public Storage seal() { - return new ObjectStorage(data, size); + return new ObjectStorage(data, currentSize); } public Object[] getData() { @@ -64,6 +75,19 @@ public Object[] getData() { } public void setCurrentSize(int currentSize) { + if (currentSize > data.length) grow(currentSize); this.currentSize = currentSize; } + + private void grow() { + if (data.length > 1) { + grow(data.length * 3 / 2); + } else { + grow(3); + } + } + + private void grow(int desiredCapacity) { + this.data = Arrays.copyOf(data, desiredCapacity); + } } diff --git a/table/src/main/java/org/enso/table/data/column/builder/object/StringBuilder.java b/table/src/main/java/org/enso/table/data/column/builder/object/StringBuilder.java index 6e0c42987fc2..24f53ba2850e 100644 --- a/table/src/main/java/org/enso/table/data/column/builder/object/StringBuilder.java +++ b/table/src/main/java/org/enso/table/data/column/builder/object/StringBuilder.java @@ -1,17 +1,16 @@ package org.enso.table.data.column.builder.object; +import java.util.Arrays; import org.enso.table.data.column.storage.Storage; import org.enso.table.data.column.storage.StringStorage; /** A builder for string columns. */ public class StringBuilder extends TypedBuilder { - private final Object[] data; - private final int size; + private Object[] data; private int currentSize = 0; public StringBuilder(int size) { this.data = new Object[size]; - this.size = size; } @Override @@ -29,7 +28,7 @@ public boolean canRetypeTo(long type) { @Override public TypedBuilder retypeTo(long type) { if (type == Storage.Type.OBJECT) { - ObjectBuilder res = new ObjectBuilder(data, size); + ObjectBuilder res = new ObjectBuilder(data); res.setCurrentSize(currentSize); return res; } else { @@ -42,8 +41,16 @@ public int getType() { return Storage.Type.STRING; } + @Override + public void appendNoGrow(Object o) { + data[currentSize++] = o; + } + @Override public void append(Object o) { + if (currentSize + 1 > data.length) { + grow(); + } data[currentSize++] = o; } @@ -57,8 +64,25 @@ public int getCurrentSize() { return currentSize; } + @Override + public int getCurrentCapacity() { + return 0; + } + @Override public Storage seal() { - return new StringStorage(data, size); + return new StringStorage(data, currentSize); + } + + private void grow() { + if (data.length > 1) { + grow(data.length * 3 / 2); + } else { + grow(3); + } + } + + private void grow(int desiredCapacity) { + this.data = Arrays.copyOf(data, desiredCapacity); } } diff --git a/table/src/main/java/org/enso/table/data/column/operation/aggregate/FunctionAggregator.java b/table/src/main/java/org/enso/table/data/column/operation/aggregate/FunctionAggregator.java index bca43b4e9179..eb0f69df9215 100644 --- a/table/src/main/java/org/enso/table/data/column/operation/aggregate/FunctionAggregator.java +++ b/table/src/main/java/org/enso/table/data/column/operation/aggregate/FunctionAggregator.java @@ -38,7 +38,7 @@ public FunctionAggregator( public void nextGroup(IntStream positions) { List items = getItems(positions); Object result = aggregateFunction.apply(items); - builder.append(result); + builder.appendNoGrow(result); } private List getItems(IntStream positions) { diff --git a/table/src/main/java/org/enso/table/data/column/storage/DoubleStorage.java b/table/src/main/java/org/enso/table/data/column/storage/DoubleStorage.java index 4cb290fcc089..ccaf20546b95 100644 --- a/table/src/main/java/org/enso/table/data/column/storage/DoubleStorage.java +++ b/table/src/main/java/org/enso/table/data/column/storage/DoubleStorage.java @@ -92,9 +92,9 @@ private Storage fillMissingDouble(double arg) { long rawArg = Double.doubleToRawLongBits(arg); for (int i = 0; i < size(); i++) { if (isMissing.get(i)) { - builder.appendRaw(rawArg); + builder.appendRawNoGrow(rawArg); } else { - builder.appendRaw(data[i]); + builder.appendRawNoGrow(data[i]); } } return builder.seal(); diff --git a/table/src/main/java/org/enso/table/data/column/storage/LongStorage.java b/table/src/main/java/org/enso/table/data/column/storage/LongStorage.java index 792d47d4a968..05cc967cdd2c 100644 --- a/table/src/main/java/org/enso/table/data/column/storage/LongStorage.java +++ b/table/src/main/java/org/enso/table/data/column/storage/LongStorage.java @@ -142,10 +142,10 @@ private Storage fillMissingDouble(double arg) { long rawArg = Double.doubleToRawLongBits(arg); for (int i = 0; i < size(); i++) { if (isMissing.get(i)) { - builder.appendRaw(rawArg); + builder.appendRawNoGrow(rawArg); } else { double coerced = data[i]; - builder.appendRaw(Double.doubleToRawLongBits(coerced)); + builder.appendRawNoGrow(Double.doubleToRawLongBits(coerced)); } } return builder.seal(); @@ -155,9 +155,9 @@ private Storage fillMissingLong(long arg) { final var builder = NumericBuilder.createLongBuilder(size()); for (int i = 0; i < size(); i++) { if (isMissing.get(i)) { - builder.appendRaw(arg); + builder.appendRawNoGrow(arg); } else { - builder.appendRaw(data[i]); + builder.appendRawNoGrow(data[i]); } } return builder.seal(); diff --git a/table/src/main/java/org/enso/table/data/column/storage/Storage.java b/table/src/main/java/org/enso/table/data/column/storage/Storage.java index 2f4d9406e3f7..e30967b9b882 100644 --- a/table/src/main/java/org/enso/table/data/column/storage/Storage.java +++ b/table/src/main/java/org/enso/table/data/column/storage/Storage.java @@ -108,9 +108,9 @@ public final Storage bimap( for (int i = 0; i < size(); i++) { Object it = getItemBoxed(i); if (it == null) { - builder.append(null); + builder.appendNoGrow(null); } else { - builder.append(function.apply(it, argument)); + builder.appendNoGrow(function.apply(it, argument)); } } return builder.seal(); @@ -162,9 +162,9 @@ public final Storage map(String name, Function function) { for (int i = 0; i < size(); i++) { Object it = getItemBoxed(i); if (it == null) { - builder.append(null); + builder.appendNoGrow(null); } else { - builder.append(function.apply(it)); + builder.appendNoGrow(function.apply(it)); } } return builder.seal(); @@ -187,9 +187,9 @@ public final Storage zip(String name, BiFunction functio Object it1 = getItemBoxed(i); Object it2 = i < arg.size() ? arg.getItemBoxed(i) : null; if (it1 == null || it2 == null) { - builder.append(null); + builder.appendNoGrow(null); } else { - builder.append(function.apply(it1, it2)); + builder.appendNoGrow(function.apply(it1, it2)); } } return builder.seal(); @@ -209,9 +209,9 @@ protected final Storage fillMissingHelper(Object arg, Builder builder) { for (int i = 0; i < size(); i++) { Object it = getItemBoxed(i); if (it == null) { - builder.append(arg); + builder.appendNoGrow(arg); } else { - builder.append(it); + builder.appendNoGrow(it); } } return builder.seal(); diff --git a/table/src/main/java/org/enso/table/data/table/Column.java b/table/src/main/java/org/enso/table/data/table/Column.java index bd787295f0b6..cf0f86f06e54 100644 --- a/table/src/main/java/org/enso/table/data/table/Column.java +++ b/table/src/main/java/org/enso/table/data/table/Column.java @@ -32,6 +32,16 @@ public Column(String name, Index index, Storage storage) { this.index = index; } + /** + * Creates a new column. + * + * @param name the column name + * @param storage the underlying storage + */ + public Column(String name, Storage storage) { + this(name, new DefaultIndex(storage.size()), storage); + } + /** * Converts this column to a single-column table. * @@ -109,7 +119,7 @@ public Column rename(String name) { public static Column fromItems(String name, List items) { InferredBuilder builder = new InferredBuilder(items.size()); for (Object item : items) { - builder.append(item); + builder.appendNoGrow(item); } return new Column(name, new DefaultIndex(items.size()), builder.seal()); } diff --git a/table/src/main/java/org/enso/table/data/table/Table.java b/table/src/main/java/org/enso/table/data/table/Table.java index 4dfef884af5b..0daad9ce1140 100644 --- a/table/src/main/java/org/enso/table/data/table/Table.java +++ b/table/src/main/java/org/enso/table/data/table/Table.java @@ -4,7 +4,6 @@ import java.util.stream.Collectors; import org.enso.table.data.column.builder.object.InferredBuilder; -import org.enso.table.data.column.builder.string.StorageBuilder; import org.enso.table.data.column.storage.BoolStorage; import org.enso.table.data.column.storage.Storage; import org.enso.table.data.index.DefaultIndex; @@ -39,9 +38,9 @@ public Table(Column[] columns, Index index) { } /** @return the number of rows in this table */ - public int nrows() { + public int rowCount() { if (columns == null || columns.length == 0) { - return 0; + return index.size(); } else { return columns[0].getSize(); } @@ -82,7 +81,7 @@ public Table mask(Column maskCol) { BoolStorage storage = (BoolStorage) maskCol.getStorage(); var mask = BoolStorage.toMask(storage); var localStorageMask = new BitSet(); - localStorageMask.set(0, nrows()); + localStorageMask.set(0, rowCount()); mask.and(localStorageMask); int cardinality = mask.cardinality(); Column[] newColumns = new Column[columns.length]; @@ -194,7 +193,7 @@ public Table join(Table other, boolean dropUnmatched, String on, String lsuffix, // The tables have exactly the same indexes, so they may be just be concatenated horizontally return hconcat(other, lsuffix, rsuffix); } - int s = nrows(); + int s = rowCount(); List[] matches = new List[s]; if (on == null) { for (int i = 0; i < s; i++) { @@ -289,8 +288,8 @@ private String suffixIfNecessary(Set names, String name, String suffix) public Table concat(Table other) { Index newIndex = concatIndexes(index, other.index); List newColumns = new ArrayList<>(); - int leftLen = nrows(); - int rightLen = other.nrows(); + int leftLen = rowCount(); + int rightLen = other.rowCount(); for (Column c : columns) { Column match = other.getColumnByName(c.getName()); Storage storage = @@ -314,10 +313,10 @@ public Table concat(Table other) { private Storage concatStorages(Storage left, Storage right) { InferredBuilder builder = new InferredBuilder(left.size() + right.size()); for (int i = 0; i < left.size(); i++) { - builder.append(left.getItemBoxed(i)); + builder.appendNoGrow(left.getItemBoxed(i)); } for (int j = 0; j < right.size(); j++) { - builder.append(right.getItemBoxed(j)); + builder.appendNoGrow(right.getItemBoxed(j)); } return builder.seal(); } @@ -328,7 +327,7 @@ private Storage nullPad(int nullCount, Storage storage, boolean start) { builder.appendNulls(nullCount); } for (int i = 0; i < storage.size(); i++) { - builder.append(storage.getItemBoxed(i)); + builder.appendNoGrow(storage.getItemBoxed(i)); } if (!start) { builder.appendNulls(nullCount); @@ -342,10 +341,10 @@ private Index concatIndexes(Index left, Index right) { } else { InferredBuilder builder = new InferredBuilder(left.size() + right.size()); for (int i = 0; i < left.size(); i++) { - builder.append(left.iloc(i)); + builder.appendNoGrow(left.iloc(i)); } for (int j = 0; j < right.size(); j++) { - builder.append(right.iloc(j)); + builder.appendNoGrow(right.iloc(j)); } Storage storage = builder.seal(); return HashIndex.fromStorage(left.getName(), storage); diff --git a/test/Database_Tests/src/Codegen_Spec.enso b/test/Database_Tests/src/Codegen_Spec.enso index 5d1cc8b7c6ac..fb07c1c4badb 100644 --- a/test/Database_Tests/src/Codegen_Spec.enso +++ b/test/Database_Tests/src/Codegen_Spec.enso @@ -2,145 +2,175 @@ from Base import all from Database import all from Table import No_Such_Column_Error, Order_Rule import Database.Data.Dialect +from Database.Data.Sql import Sql_Type import Test import Database_Tests.Helpers.Fake_Test_Connection spec = + int = Sql_Type.integer + bool = Sql_Type.boolean + str = Sql_Type 424242 test_connection = - table1 = ["T1", ["A", "B", "C"]] - table2 = ["T2", ["D", "E", "F"]] - table3 = ["T3", ["A", "E", "F"]] + table1 = ["T1", [["A", int], ["B", str], ["C", bool]]] + table2 = ["T2", [["D", int], ["E", int], ["F", bool]]] + table3 = ["T3", [["A", int], ["E", bool], ["F", int]]] tables = Map.from_vector [table1, table2, table3] Fake_Test_Connection.make Dialect.sqlite tables t1 = test_connection.access_table "T1" - Test.group "JSON serialization" <| + Test.group "[Codegen] JSON serialization" <| Test.specify "should serialize Tables and Columns to their SQL representation" <| - q1 = t1.where (t1.at "B" == 42) . to_json - part1 = Json.from_pairs [["sql_code", "SELECT T1.A AS A, T1.B AS B, T1.C AS C FROM T1 AS T1 WHERE (T1.B = "]] - part2 = Json.from_pairs [["sql_interpolation", 42]] + q1 = t1.where (t1.at "A" == 42) . to_json + part1 = Json.from_pairs [["sql_code", 'SELECT "T1"."A" AS "A", "T1"."B" AS "B", "T1"."C" AS "C" FROM "T1" AS "T1" WHERE ("T1"."A" = ']] + interp = Json.from_pairs [["value", 42], ["typeid", int.typeid]] + part2 = Json.from_pairs [["sql_interpolation", interp]] part3 = Json.from_pairs [["sql_code", ")"]] expected = Json.from_pairs [["query", Json.Array [part1, part2, part3]]] q1.should_equal expected q2 = t1.at "A" . to_json - q2.should_equal (Json.from_pairs [["query", Json.Array [Json.from_pairs [["sql_code", "SELECT T1.A AS A FROM T1 AS T1"]]]]]) + q2.should_equal (Json.from_pairs [["query", Json.Array [Json.from_pairs [["sql_code", 'SELECT "T1"."A" AS "A" FROM "T1" AS "T1"']]]]]) - Test.group "Basic Select" <| + Test.group "[Codegen] Basic Select" <| Test.specify "should select columns from a table" <| - t1.to_sql.prepare . should_equal ["SELECT T1.A AS A, T1.B AS B, T1.C AS C FROM T1 AS T1", []] + t1.to_sql.prepare . should_equal ['SELECT "T1"."A" AS "A", "T1"."B" AS "B", "T1"."C" AS "C" FROM "T1" AS "T1"', []] t2 = t1.select ["C", "B", "undefined"] - t2.to_sql.prepare . should_equal ["SELECT T1.C AS C, T1.B AS B FROM T1 AS T1", []] + t2.to_sql.prepare . should_equal ['SELECT "T1"."C" AS "C", "T1"."B" AS "B" FROM "T1" AS "T1"', []] foo = t1.at "A" . rename "FOO" - foo.to_sql.prepare . should_equal ["SELECT T1.A AS FOO FROM T1 AS T1", []] + foo.to_sql.prepare . should_equal ['SELECT "T1"."A" AS "FOO" FROM "T1" AS "T1"', []] t3 = t2.set "bar" foo - t3.to_sql.prepare . should_equal ["SELECT T1.C AS C, T1.B AS B, T1.A AS bar FROM T1 AS T1", []] + t3.to_sql.prepare . should_equal ['SELECT "T1"."C" AS "C", "T1"."B" AS "B", "T1"."A" AS "bar" FROM "T1" AS "T1"', []] Test.specify "should fail if at is called for a nonexisting column" <| t1.at "undefined" . should_fail_with No_Such_Column_Error - Test.group "Building Expressions" <| + Test.specify "should allow to limit the amount of returned results" <| + t2 = t1.limit 5 + t2.to_sql.prepare . should_equal ['SELECT "T1"."A" AS "A", "T1"."B" AS "B", "T1"."C" AS "C" FROM "T1" AS "T1" LIMIT 5', []] + + Test.specify "should work correctly when there are no columns" <| + empty = t1.select [] + json = Json.from_pairs [["query", Nothing], ["message", "The table has no columns so a query cannot be generated."]] + empty.to_json . should_equal json + empty.columns.length . should_equal 0 + empty.to_sql . should_fail_with Illegal_State_Error + + Test.group "[Codegen] Building Expressions" <| Test.specify "should allow building expressions from columns and constants" <| a = t1.at "A" b = t1.at "B" + c = t1.at "C" arith = (a * 2) + 1 - bool = (a || a.not) && True + logic = (c || c.not) && True cmp = (a / a >= b) && (a - b < a) - arith.to_sql.prepare . should_equal ["SELECT ((T1.A * ?) + ?) AS A FROM T1 AS T1", [2, 1]] - bool.to_sql.prepare . should_equal ["SELECT ((T1.A OR (NOT T1.A)) AND ?) AS A FROM T1 AS T1", [True]] - cmp.to_sql.prepare . should_equal ["SELECT (((T1.A / T1.A) >= T1.B) AND ((T1.A - T1.B) < T1.A)) AS A FROM T1 AS T1", []] + arith.to_sql.prepare . should_equal ['SELECT (("T1"."A" * ?) + ?) AS "A" FROM "T1" AS "T1"', [[2, int], [1, int]]] + logic.to_sql.prepare . should_equal ['SELECT (("T1"."C" OR (NOT "T1"."C")) AND ?) AS "C" FROM "T1" AS "T1"', [[True, bool]]] + cmp.to_sql.prepare . should_equal ['SELECT ((("T1"."A" / "T1"."A") >= "T1"."B") AND (("T1"."A" - "T1"."B") < "T1"."A")) AS "A" FROM "T1" AS "T1"', []] Test.specify "should support simple text operations" <| - a = t1.at "A" - add = a + "SUFFIX" - add.to_sql.prepare . should_equal ["SELECT (T1.A + ?) AS A FROM T1 AS T1", ["SUFFIX"]] + b = t1.at "B" + add = b + "SUFFIX" + add.to_sql.prepare . should_equal ['SELECT ("T1"."B" + ?) AS "B" FROM "T1" AS "T1"', [["SUFFIX", str]]] - ends = a.ends_with "suf" - starts = a.starts_with "pref" - contains = a.contains "inf" - ends.to_sql.prepare . should_equal ["SELECT (T1.A LIKE ('%' || ?)) AS A FROM T1 AS T1", ["suf"]] - starts.to_sql.prepare . should_equal ["SELECT (T1.A LIKE (? || '%')) AS A FROM T1 AS T1", ["pref"]] - contains.to_sql.prepare . should_equal ["SELECT (T1.A LIKE ('%' || ? || '%')) AS A FROM T1 AS T1", ["inf"]] + ends = b.ends_with "suf" + starts = b.starts_with "pref" + contains = b.contains "inf" + ends.to_sql.prepare . should_equal ['SELECT ("T1"."B" LIKE (\'%\' || ?)) AS "B" FROM "T1" AS "T1"', [["suf", str]]] + starts.to_sql.prepare . should_equal ['SELECT ("T1"."B" LIKE (? || \'%\')) AS "B" FROM "T1" AS "T1"', [["pref", str]]] + contains.to_sql.prepare . should_equal ['SELECT ("T1"."B" LIKE (\'%\' || ? || \'%\')) AS "B" FROM "T1" AS "T1"', [["inf", str]]] - Test.group "Masking Tables and Columns" <| + Test.group "[Codegen] Masking Tables and Columns" <| Test.specify "should allow filtering table rows based on a boolean expression" <| t2 = t1.where (t1.at "A" == 42) - t2.to_sql.prepare . should_equal ["SELECT T1.A AS A, T1.B AS B, T1.C AS C FROM T1 AS T1 WHERE (T1.A = ?)", [42]] + t2.to_sql.prepare . should_equal ['SELECT "T1"."A" AS "A", "T1"."B" AS "B", "T1"."C" AS "C" FROM "T1" AS "T1" WHERE ("T1"."A" = ?)', [[42, int]]] Test.specify "should allow selecting column rows based on a boolean expression" <| c2 = (t1.at "B").where (t1.at "A" == t1.at "C") - c2.to_sql.prepare . should_equal ["SELECT T1.B AS B FROM T1 AS T1 WHERE (T1.A = T1.C)", []] + c2.to_sql.prepare . should_equal ['SELECT "T1"."B" AS "B" FROM "T1" AS "T1" WHERE ("T1"."A" = "T1"."C")', []] - Test.group "Joining Tables" <| + Test.group "[Codegen] Joining Tables" <| t2 = test_connection.access_table "T2" t3 = test_connection.access_table "T3" Test.specify "should allow joining tables index-on-index" <| r1 = t1.set_index 'A' . join (t2.set_index 'D') - r1.to_sql.prepare . should_equal ["SELECT T1.A AS A, T1.B AS B, T1.C AS C, T2.D AS D, T2.E AS E, T2.F AS F FROM (SELECT T1.A AS A, T1.B AS B, T1.C AS C FROM T1 AS T1) AS T1 LEFT JOIN (SELECT T2.D AS D, T2.E AS E, T2.F AS F FROM T2 AS T2) AS T2 ON (T1.A = T2.D)", []] + r1.to_sql.prepare . should_equal ['SELECT "T1"."B" AS "B", "T1"."C" AS "C", "T2"."E" AS "E", "T2"."F" AS "F" FROM (SELECT "T1"."A" AS "A", "T1"."B" AS "B", "T1"."C" AS "C" FROM "T1" AS "T1") AS "T1" LEFT JOIN (SELECT "T2"."D" AS "D", "T2"."E" AS "E", "T2"."F" AS "F" FROM "T2" AS "T2") AS "T2" ON ("T1"."A" = "T2"."D")', []] Test.specify "should allow joining tables column-on-index" <| r1 = t1.join (t2.set_index 'D') on='B' drop_unmatched=True - r1.to_sql.prepare . should_equal ["SELECT T1.A AS A, T1.B AS B, T1.C AS C, T2.D AS D, T2.E AS E, T2.F AS F FROM (SELECT T1.A AS A, T1.B AS B, T1.C AS C FROM T1 AS T1) AS T1 INNER JOIN (SELECT T2.D AS D, T2.E AS E, T2.F AS F FROM T2 AS T2) AS T2 ON (T1.B = T2.D)", []] + r1.to_sql.prepare . should_equal ['SELECT "T1"."A" AS "A", "T1"."B" AS "B", "T1"."C" AS "C", "T2"."E" AS "E", "T2"."F" AS "F" FROM (SELECT "T1"."A" AS "A", "T1"."B" AS "B", "T1"."C" AS "C" FROM "T1" AS "T1") AS "T1" INNER JOIN (SELECT "T2"."D" AS "D", "T2"."E" AS "E", "T2"."F" AS "F" FROM "T2" AS "T2") AS "T2" ON ("T1"."B" = "T2"."D")', []] Test.specify "should append suffixes to disambiguate column names" <| r1 = t1.join (t3.set_index 'E') on='A' - r1.to_sql.prepare . should_equal ["SELECT T1.A AS A_left, T1.B AS B, T1.C AS C, T3.A AS A_right, T3.E AS E, T3.F AS F FROM (SELECT T1.A AS A, T1.B AS B, T1.C AS C FROM T1 AS T1) AS T1 LEFT JOIN (SELECT T3.A AS A, T3.E AS E, T3.F AS F FROM T3 AS T3) AS T3 ON (T1.A = T3.E)", []] + r1.to_sql.prepare . should_equal ['SELECT "T1"."A" AS "A_left", "T1"."B" AS "B", "T1"."C" AS "C", "T3"."A" AS "A_right", "T3"."F" AS "F" FROM (SELECT "T1"."A" AS "A", "T1"."B" AS "B", "T1"."C" AS "C" FROM "T1" AS "T1") AS "T1" LEFT JOIN (SELECT "T3"."E" AS "E", "T3"."A" AS "A", "T3"."F" AS "F" FROM "T3" AS "T3") AS "T3" ON ("T1"."A" = "T3"."E")', []] + + Test.specify "should avoid duplicates when disambiguating column names" <| + connection = + table1 = ["T1", [["X", int], ["A", int], ["A_left", int]]] + table2 = ["T2", [["X", int], ["A", int], ["B", int]]] + tables = Map.from_vector [table1, table2] + Fake_Test_Connection.make Dialect.sqlite tables + t1 = connection.access_table "T1" + t2 = connection.access_table "T2" + (t1.set_index "X").join (t2.set_index "X") . should_fail_with Illegal_State_Error + + Test.specify "should ensure that name suffixes are distinct" <| + err = (t1.set_index 'A').join (t2.set_index 'D') left_suffix='foo' right_suffix='foo' + err . should_fail_with Illegal_State_Error Test.specify "should correctly handle self-joins" <| r1 = t1.join (t1.set_index 'A') on='B' - r1.to_sql.prepare . should_equal ["SELECT T1_left.A AS A_left, T1_left.B AS B_left, T1_left.C AS C_left, T1_right.A AS A_right, T1_right.B AS B_right, T1_right.C AS C_right FROM (SELECT T1.A AS A, T1.B AS B, T1.C AS C FROM T1 AS T1) AS T1_left LEFT JOIN (SELECT T1.A AS A, T1.B AS B, T1.C AS C FROM T1 AS T1) AS T1_right ON (T1_left.B = T1_right.A)", []] + r1.to_sql.prepare . should_equal ['SELECT "T1_left"."A" AS "A", "T1_left"."B" AS "B_left", "T1_left"."C" AS "C_left", "T1_right"."B" AS "B_right", "T1_right"."C" AS "C_right" FROM (SELECT "T1"."A" AS "A", "T1"."B" AS "B", "T1"."C" AS "C" FROM "T1" AS "T1") AS "T1_left" LEFT JOIN (SELECT "T1"."A" AS "A", "T1"."B" AS "B", "T1"."C" AS "C" FROM "T1" AS "T1") AS "T1_right" ON ("T1_left"."B" = "T1_right"."A")', []] - Test.group "Filling Missing Values" <| - Test.specify "should allow to replace missing values in a column with a constant" <| + Test.group "[Codegen] Handling Missing Values" <| + Test.specify "fill_missing should allow to replace missing values in a column with a constant" <| c = t1.at "A" . fill_missing "not-applicable" - c.to_sql.prepare . should_equal ["SELECT COALESCE(T1.A, ?) AS A FROM T1 AS T1", ["not-applicable"]] + c.to_sql.prepare . should_equal ['SELECT COALESCE("T1"."A", ?) AS "A" FROM "T1" AS "T1"', [["not-applicable", int]]] - Test.group "Dropping Missing Values" <| - Test.specify "should drop missing rows in a Column" <| + Test.specify "drop_missing should drop missing rows in a Column" <| col = t1.at "A" . drop_missing - col.to_sql.prepare . should_equal ["SELECT T1.A AS A FROM T1 AS T1 WHERE (NOT (T1.A IS NULL))", []] + col.to_sql.prepare . should_equal ['SELECT "T1"."A" AS "A" FROM "T1" AS "T1" WHERE (NOT ("T1"."A" IS NULL))', []] - Test.specify "should drop rows that contain at least one missing column in a Table" <| + Test.specify "drop_missing_rows should drop rows that contain at least one missing column in a Table" <| t2 = t1.drop_missing_rows - t2.to_sql.prepare . should_equal ["SELECT T1.A AS A, T1.B AS B, T1.C AS C FROM T1 AS T1 WHERE (NOT (T1.A IS NULL)) AND (NOT (T1.B IS NULL)) AND (NOT (T1.C IS NULL))", []] + t2.to_sql.prepare . should_equal ['SELECT "T1"."A" AS "A", "T1"."B" AS "B", "T1"."C" AS "C" FROM "T1" AS "T1" WHERE (NOT ("T1"."A" IS NULL)) AND (NOT ("T1"."B" IS NULL)) AND (NOT ("T1"."C" IS NULL))', []] - Test.group "Aggregation" <| + Test.group "[Codegen] Aggregation" <| agg = t1.group by='A' Test.specify "should allow counting group sizes" <| - agg.count.to_sql.prepare . should_equal ["SELECT COUNT(*) AS count FROM T1 AS T1 GROUP BY T1.A", []] + agg.count.to_sql.prepare . should_equal ['SELECT COUNT(*) AS "count" FROM "T1" AS "T1" GROUP BY "T1"."A"', []] Test.specify "should allow aggregating columns with basic arithmetic aggregators" <| c1 = agg.at 'B' . mean - c1.to_sql.prepare . should_equal ["SELECT AVG(T1.B) AS B_mean FROM T1 AS T1 GROUP BY T1.A", []] + c1.to_sql.prepare . should_equal ['SELECT AVG("T1"."B") AS "B_mean" FROM "T1" AS "T1" GROUP BY "T1"."A"', []] c2 = agg.at 'B' . min - c2.to_sql.prepare . should_equal ["SELECT MIN(T1.B) AS B_min FROM T1 AS T1 GROUP BY T1.A", []] + c2.to_sql.prepare . should_equal ['SELECT MIN("T1"."B") AS "B_min" FROM "T1" AS "T1" GROUP BY "T1"."A"', []] Test.specify "should allow grouping by multiple columns" <| agg = t1.group by=['A','B'] - agg.count.to_sql.prepare . should_equal ["SELECT COUNT(*) AS count FROM T1 AS T1 GROUP BY T1.A, T1.B", []] + agg.count.to_sql.prepare . should_equal ['SELECT COUNT(*) AS "count" FROM "T1" AS "T1" GROUP BY "T1"."A", "T1"."B"', []] - Test.group "Sorting" <| + Test.group "[Codegen] Sorting" <| Test.specify "should allow sorting by a single column name" <| r1 = t1.sort by="A" . at "B" - r1.to_sql.prepare . should_equal ["SELECT T1.B AS B FROM T1 AS T1 ORDER BY T1.A ASC NULLS LAST", []] + r1.to_sql.prepare . should_equal ['SELECT "T1"."B" AS "B" FROM "T1" AS "T1" ORDER BY "T1"."A" ASC NULLS LAST', []] r2 = t1.sort by="B" missing_last=False order=Sort_Order.Descending . at "A" - r2.to_sql.prepare . should_equal ["SELECT T1.A AS A FROM T1 AS T1 ORDER BY T1.B DESC NULLS FIRST", []] + r2.to_sql.prepare . should_equal ['SELECT "T1"."A" AS "A" FROM "T1" AS "T1" ORDER BY "T1"."B" DESC NULLS FIRST', []] Test.specify 'should allow sorting by multiple column names' <| r1 = t1.sort by=['A', 'B'] - r1.to_sql.prepare . should_equal ["SELECT T1.A AS A, T1.B AS B, T1.C AS C FROM T1 AS T1 ORDER BY T1.A ASC NULLS LAST, T1.B ASC NULLS LAST", []] + r1.to_sql.prepare . should_equal ['SELECT "T1"."A" AS "A", "T1"."B" AS "B", "T1"."C" AS "C" FROM "T1" AS "T1" ORDER BY "T1"."A" ASC NULLS LAST, "T1"."B" ASC NULLS LAST', []] Test.specify 'should allow sorting by expressions' <| sum = t1.at 'A' + t1.at 'B' r1 = t1.sort by=sum . at "C" - r1.to_sql.prepare . should_equal ["SELECT T1.C AS C FROM T1 AS T1 ORDER BY (T1.A + T1.B) ASC NULLS LAST", []] + r1.to_sql.prepare . should_equal ['SELECT "T1"."C" AS "C" FROM "T1" AS "T1" ORDER BY ("T1"."A" + "T1"."B") ASC NULLS LAST', []] Test.specify 'should allow sorting with specific by-column rules' <| r1 = t1.sort by=['A', (Order_Rule 'B' order=Sort_Order.Descending)] - r1.to_sql.prepare . should_equal ["SELECT T1.A AS A, T1.B AS B, T1.C AS C FROM T1 AS T1 ORDER BY T1.A ASC NULLS LAST, T1.B DESC NULLS LAST", []] + r1.to_sql.prepare . should_equal ['SELECT "T1"."A" AS "A", "T1"."B" AS "B", "T1"."C" AS "C" FROM "T1" AS "T1" ORDER BY "T1"."A" ASC NULLS LAST, "T1"."B" DESC NULLS LAST', []] Test.specify 'should return dataflow error when passed a non-existent column' <| r = t1.sort by='foobar' diff --git a/test/Database_Tests/src/Database_Spec.enso b/test/Database_Tests/src/Database_Spec.enso new file mode 100644 index 000000000000..645cb66d0a90 --- /dev/null +++ b/test/Database_Tests/src/Database_Spec.enso @@ -0,0 +1,369 @@ +from Base import all +from Database import all +from Database.Connection.Connection import Sql_Error +import Test + +spec prefix connection pending=Nothing = + make_table name column_names column_typenames = Panic.recover <| + # TODO this is a hack with no sanitization, just for testing; it should be removed when proper create table is supported by the library + cols = column_names.zip column_typenames name-> typ-> + name + " " + typ + sql = "CREATE TABLE " + name + " (" + (cols.join ", ") + ")" + Panic.rethrow <| connection.execute_update sql + Panic.rethrow <| connection.access_table name + t1 = make_table "T1" ["A", "B", "C"] ["INT", "INT", "INT"] + t1.insert [1, 2, 3] + t1.insert [4, 5, 6] + Test.group prefix+"Basic Table Access" pending=pending <| + Test.specify "should allow to materialize tables and columns into local memory" <| + df = t1.to_dataframe + a = t1.at 'A' . to_dataframe + df.at 'A' . to_vector . should_equal [1, 4] + a.to_vector . should_equal [1, 4] + Test.specify "should allow to materialize columns directly into a Vector" <| + v = t1.at 'A' . to_vector + v . should_equal [1, 4] + Test.specify "should preserve indexes when materializing tables" <| + # TODO add multi indexes when implemented + df = t1.set_index 'A' . to_dataframe + df.at 'B' . to_vector . should_equal [2, 5] + df.columns.map .name . should_equal ['B', 'C'] + ix = df.index + ix.name . should_equal 'A' + ix.to_vector . should_equal [1, 4] + Test.specify "should preserve indexes when materializing columns" <| + # TODO add multi indexes when implemented + b = t1.set_index 'A' . at 'B' + col = b . to_dataframe + col.to_vector . should_equal [2, 5] + + ix = col.index + ix.name . should_equal 'A' + ix.to_vector . should_equal [1, 4] + + ix2 = b.to_table.index + ix2.name . should_equal 'A' + ix2.to_vector . should_equal [1, 4] + Test.specify "should work correctly when there are no columns" <| + empty = t1.select [] + empty.to_dataframe.columns.length . should_equal 0 + empty.to_dataframe.row_count . should_equal empty.row_count + Test.specify "should handle bigger result sets" <| + table = make_table "Big" ["A", "B", "C"] ["INT", "REAL", "VARCHAR"] + n = 1000 + 0.up_to n . each ix-> + table.insert [ix, ix * 3.1415926, ix.to_text] + materialized = table.to_dataframe + materialized.row_count . should_equal n + + Test.group prefix+"Mapping Operations" pending=pending <| + t2 = make_table "T2" ["x", "y", "b"] ["INT", "INT", "BOOLEAN"] + t2.insert [1, 2, False] + t2.insert [4, 3, False] + t2.insert [5, 5, True] + t2.insert [Nothing, Nothing, Nothing] + x = t2.at "x" + y = t2.at "y" + b = t2.at "b" + Test.specify "should allow combining columns with supported operations" <| + (x + y).to_vector . should_equal [3, 7, 10, Nothing] + (x - y).to_vector . should_equal [-1, 1, 0, Nothing] + (x * y).to_vector . should_equal [2, 12, 25, Nothing] + (x / y).to_vector . should_equal [0, 1, 1, Nothing] + (x == y).to_vector . should_equal [False, False, True, Nothing] + (x != y).to_vector . should_equal [True, True, False, Nothing] + (x < y).to_vector . should_equal [True, False, False, Nothing] + (x <= y).to_vector . should_equal [True, False, True, Nothing] + (x > y).to_vector . should_equal (x <= y).not.to_vector + (x >= y).to_vector . should_equal (x < y).not.to_vector + (((x < y) || (x == y)) == (x <= y)).to_vector . should_equal [True, True, True, Nothing] + (b || b.not).to_vector . should_equal [True, True, True, Nothing] + + Test.specify "should allow casting constants to be applied to the whole column" <| + (x + 100).to_vector . should_equal [101, 104, 105, Nothing] + (x * 10).to_vector . should_equal [10, 40, 50, Nothing] + (x / 2).to_vector . should_equal [0, 2, 2, Nothing] + (x - 10).to_vector . should_equal [-9, -6, -5, Nothing] + (x == 4).to_vector . should_equal [False, True, False, Nothing] + (x < 1000).to_vector . should_equal [True, True, True, Nothing] + (b || False).to_vector . should_equal [False, False, True, Nothing] + (b || True).to_vector . should_equal [True, True, True, True] + (b && False).to_vector . should_equal [False, False, False, False] + (x + Nothing).to_vector . should_equal [Nothing, Nothing, Nothing, Nothing] + x.is_missing.to_vector . should_equal [False, False, False, True] + (x == Nothing).to_vector . should_equal [Nothing, Nothing, Nothing, Nothing] + + t3 = make_table "T3" ["s1", "s2"] ["VARCHAR", "VARCHAR"] + t3.insert ["foobar", "foo"] + t3.insert ["bar", "ar" ] + t3.insert ["baz", "a" ] + t3.insert [Nothing, Nothing] + s1 = t3.at "s1" + s2 = t3.at "s2" + Test.specify "should handle Text operations" <| + s1.starts_with s2 . to_vector . should_equal [True, False, False, Nothing] + s1.starts_with "foo" . to_vector . should_equal [True, False, False, Nothing] + s1.starts_with "ba" . to_vector . should_equal [False, True, True, Nothing] + s1.starts_with Nothing . to_vector . should_equal [Nothing, Nothing, Nothing, Nothing] + + s1.contains s2 . to_vector . should_equal [True, True, True, Nothing] + s1.contains "a" . to_vector . should_equal [True, True, True, Nothing] + s1.contains "oo" . to_vector . should_equal [True, False, False, Nothing] + s1.contains Nothing . to_vector . should_equal [Nothing, Nothing, Nothing, Nothing] + + s1.ends_with s2 . to_vector . should_equal [False, True, False, Nothing] + s1.ends_with "ar" . to_vector . should_equal [True, True, False, Nothing] + s1.ends_with "a" . to_vector . should_equal [False, False, False, Nothing] + s1.ends_with Nothing . to_vector . should_equal [Nothing, Nothing, Nothing, Nothing] + + Test.group prefix+"Masking Tables" pending=pending <| + Test.specify "should allow to select rows from a table or column based on an expression" <| + t2 = t1.where (t1.at "A" == 1) + df = t2.to_dataframe + df.at "A" . to_vector . should_equal [1] + df.at "B" . to_vector . should_equal [2] + df.at "C" . to_vector . should_equal [3] + t2.at "A" . to_vector . should_equal [1] + t2.at "B" . to_vector . should_equal [2] + t2.at "C" . to_vector . should_equal [3] + + Test.group prefix+"Joining Tables" pending=pending <| + a = make_table "TA" ["x", "y"] ["INTEGER", "VARCHAR"] + a.insert [0, "foo"] + a.insert [1, "bar"] + a.insert [7, "baz"] + a.insert [3, "spam"] + a.insert [6, "eggs"] + b = make_table "TB" ["w", "z"] ["INTEGER", "VARCHAR"] + b.insert [6, "foo"] + b.insert [3, "foo"] + b.insert [5, "bar"] + b.insert [5, "spam"] + b.insert [3, "bar"] + b.insert [3, "eggs"] + ## The tests below use `sort`, because the SQL backend is not guaranteed + to return the rows in any particular order. This is the `sort` from + the Dataframes library, so it is independent of the library under + testing here. + Test.specify "should allow joining tables index-on-index" <| + r_1 = a.set_index 'x' . join (b.set_index 'w') . to_dataframe . sort by=['y', 'z'] + r_1.at 'y' . to_vector . should_equal ['bar', 'baz', 'eggs', 'foo', 'spam', 'spam', 'spam'] + r_1.at 'z' . to_vector . should_equal [Nothing, Nothing, 'foo', Nothing, 'bar', 'eggs', 'foo'] + + r_2 = a.set_index 'y' . join (b.set_index 'z') drop_unmatched=True . to_dataframe . sort by=['x', 'w'] + r_2.at 'x' . to_vector . should_equal [0, 0, 1, 1, 3, 6] + r_2.at 'w' . to_vector . should_equal [3, 6, 3, 5, 5, 3] + + Test.specify "should allow joining tables column-on-index" <| + r_1 = a.join (b.set_index 'w') on='x' . to_dataframe . sort by=['y', 'z'] + r_1.at 'y' . to_vector . should_equal ['bar', 'baz', 'eggs', 'foo', 'spam', 'spam', 'spam'] + r_1.at 'z' . to_vector . should_equal [Nothing, Nothing, 'foo', Nothing, 'bar', 'eggs', 'foo'] + r_2 = a.join (b.set_index 'z') drop_unmatched=True on='y' . to_dataframe . sort by=['x', 'w'] + r_2.at 'x' . to_vector . should_equal [0, 0, 1, 1, 3, 6] + r_2.at 'w' . to_vector . should_equal [3, 6, 3, 5, 5, 3] + + Test.specify "should allow self-joins and append suffixes to disambiguate column names" <| + r_1 = a.join (a.set_index 'x') on='x' . to_dataframe . sort by='x' + r_1.columns.map .name . should_equal ['x', 'y_left', 'y_right'] + r_1.at 'x' . should_equal [0, 1, 3, 6, 7] + expected_y = ['foo', 'bar', 'spam', 'eggs', 'baz'] + r_1.at 'y_left' . should_equal expected_y + r_1.at 'y_right' . should_equal expected_y + + r_2 = a.set_index 'x' . join (a.set_index 'x') left_suffix='_old' right_suffix='_new' + r_2.columns.map .name . should_equal ['y_old', 'y_new'] + + Test.group prefix+"Missing Values" pending=pending <| + t4 = make_table "T4" ["A", "B", "C"] ["INT", "BOOLEAN", "VARCHAR"] + t4.insert [0, True, ""] + t4.insert [1, Nothing, "foo"] + t4.insert [Nothing, True, "bar"] + t4.insert [42, False, Nothing] + t4.insert [Nothing, Nothing, Nothing] + Test.specify "fill_missing should replace nulls" <| + t4.at 'A' . fill_missing 10 . to_vector . should_equal [0, 1, 10, 42, 10] + t4.at 'B' . fill_missing False . to_vector . should_equal [True, False, True, False, False] + t4.at 'C' . fill_missing "NA" . to_vector . should_equal ["", "foo", "bar", "NA", "NA"] + + Test.specify "should correctly be counted" <| + t4.row_count . should_equal 5 + col = t4.at 'A' + col.length . should_equal 5 + col.count . should_equal 3 + col.count_missing . should_equal 2 + + Test.specify "drop_missing should drop missing rows in a Column" <| + col = t4.at 'A' + col.drop_missing.to_vector . should_equal [0, 1, 42] + + Test.specify "drop_missing_rows should drop rows that contain at least one missing column in a Table" <| + d = t4.drop_missing_rows.to_dataframe + d.at 'A' . to_vector . should_equal [0] + d.at 'B' . to_vector . should_equal [True] + d.at 'C' . to_vector . should_equal [""] + + Test.specify "drop_missing_columns should drop columns that contain at least one missing row in a Table" <| + t5 = make_table "T5" ["A", "B"] ["INT", "BOOLEAN", "VARCHAR"] + t5.insert [1, True, "foo"] + t5.insert [2, False, Nothing] + t5.insert [3, Nothing, "aaa"] + + r = t5.drop_missing_columns + r.columns.map .name . should_equal ["A"] + r.at "A" . to_vector . should_equal [1, 2, 3] + + empty = t4.drop_missing_columns + empty.columns.length . should_equal 0 + empty.to_dataframe.columns.length . should_equal 0 + + Test.group prefix+"Aggregation" pending=pending <| + t = make_table "T6" ['name', 'price', 'quantity'] ['VARCHAR', 'DECIMAL', 'INTEGER'] + t.insert ["foo", 0.4, 10] + t.insert ["bar", 3.5, 20] + t.insert ["foo", Nothing, 30] + t.insert ["baz", 6.7, 40] + t.insert ["foo", Nothing, 50] + t.insert ["bar", 97, 60] + t.insert ["quux", Nothing, 70] + agg = t.group by='name' + ## A helper which makes sure that the groups are ordered according to the index, using the Table library + determinize col = + df = col.to_dataframe.to_table + df.sort by=df.index . at col.name + + Test.specify "should allow counting group sizes" <| + determinize agg.count . to_vector . should_equal [2, 1, 3, 1] + + Test.specify "should allow aggregating columns with basic arithmetic aggregators" <| + determinize (agg.at 'price' . mean) . to_vector . should_equal [50.25, 6.7, 0.4, Nothing] + determinize (agg.at 'price' . min) . to_vector . should_equal [3.5, 6.7, 0.4, Nothing] + determinize (agg.at 'price' . max) . to_vector . should_equal [97, 6.7, 0.4, Nothing] + + Test.group prefix+"Column-wide statistics" pending=pending <| + Test.specify 'should allow computing basic column-wide stats' <| + t7 = make_table "T7" ['price'] ['DECIMAL'] + price = t7.at 'price' + [0.4, 3.5, Nothing, 6.7, Nothing, 97, Nothing] . each x-> + t7.insert [x] + + price.sum.should_equal 107.6 + price.min.should_equal 0.4 + price.max.should_equal 97 + price.mean.should_equal 26.9 + + Test.group prefix+"Sorting" pending=pending <| + df = make_table "clothes" ['Id', 'Name', 'Quantity', 'Rating', 'Price'] ['INTEGER', 'VARCHAR', 'INTEGER', 'DECIMAL', 'DECIMAL'] + df.insert [1,'shoes',20,3.0,37.2] + df.insert [2,'trousers',10,Nothing,42.1] + df.insert [3,'dress',20,7.3,64.1] + df.insert [4,'skirt',10,3.0,87.4] + df.insert [5,'blouse',30,2.2,13.5] + df.insert [6,'t-shirt',30,Nothing,64.2] + + Test.specify "should allow sorting by a single column name" <| + r_1 = df.sort by="Quantity" + r_1.at 'Id' . to_vector . should_equal [2,4,1,3,5,6] + + r_2 = df.sort by="Rating" missing_last=False + r_2.at 'Id' . to_vector . should_equal [2,6,5,1,4,3] + + r_3 = df.sort by="Rating" missing_last=False order=Sort_Order.Descending + r_3.at 'Id' . to_vector . should_equal [2,6,3,1,4,5] + + Test.specify 'should allow sorting by multiple column names' <| + r_1 = df.sort by=['Quantity', 'Rating'] + r_1.at 'Id' . to_vector . should_equal [4,2,1,3,5,6] + + r_2 = df.sort by=['Rating', 'Quantity'] missing_last=False order=Sort_Order.Descending + r_2.at 'Id' . to_vector . should_equal [6,2,3,1,4,5] + + Test.specify 'should allow sorting by external columns' <| + quality_ratio = df.at 'Rating' / df.at 'Price' + + r_1 = df.sort by=quality_ratio + r_1.at 'Id' . to_vector . should_equal [4,1,3,5,2,6] + + r_2 = df.sort by=['Quantity', quality_ratio] + r_2.at 'Id' . to_vector . should_equal [4,2,1,3,5,6] + + Test.specify 'should allow sorting with specific by-column rules' <| + r_1 = df.sort by=['Quantity', (Order_Rule 'Price' order=Sort_Order.Descending)] + r_1.at 'Id' . to_vector . should_equal [4,2,3,1,6,5] + + Test.specify 'should return dataflow error when passed a non-existent column' <| + r = df.sort by='foobar' + r.should_fail_with No_Such_Column_Error + + Test.specify 'should correctly reorder all kinds of columns and leave the original columns untouched' <| + df = make_table "T8" ['ord', 'ints', 'reals', 'bools', 'texts'] ['INTEGER', 'INTEGER', 'DECIMAL', 'BOOLEAN', 'VARCHAR'] + r = df.sort by='ord' + df.insert [0, 1, 1.3, False, "foo"] + df.insert [3, 2, 4.6, False, "foo"] + df.insert [2, 3, 3.2, True, "bar"] + df.insert [4, 4, 5.2, True, "baz"] + df.insert [1, 5, 1.6, False, "spam"] + + ints = [1, 2, 3, 4, 5] + reals = [1.3, 4.6, 3.2, 5.2, 1.6] + bools = [False, False, True, True, False] + texts = ["foo", "foo", "bar", "baz", "spam"] + + r.at 'ints' . to_vector . should_equal [1, 5, 3, 2, 4] + df.at 'ints' . to_vector . should_equal ints + + r.at 'reals' . to_vector . should_equal [1.3, 1.6, 3.2, 4.6, 5.2] + df.at 'reals' . to_vector . should_equal reals + + r.at 'bools' . to_vector . should_equal [False, False, True, False, True] + df.at 'bools' . to_vector . should_equal bools + + r.at 'texts' . to_vector . should_equal ['foo', 'spam', 'bar', 'foo', 'baz'] + df.at 'texts' . to_vector . should_equal texts + + Test.specify 'should sort columns with specified ordering and missing placement' <| + c = df.at 'Rating' + + r_1 = c.sort + r_1.to_vector.should_equal [2.2, 3.0, 3.0, 7.3, Nothing, Nothing] + + r_2 = c.sort order=Sort_Order.Descending + r_2.to_vector.should_equal [7.3, 3.0, 3.0, 2.2, Nothing, Nothing] + + r_3 = c.sort order=Sort_Order.Descending missing_last=False + r_3.to_vector.should_equal [Nothing, Nothing, 7.3, 3.0, 3.0, 2.2] + + Test.group prefix+"Info" pending=pending <| + Test.specify "should return Table information" <| + t = make_table "T9" ["strs", "ints", "bools"] ["VARCHAR", "INTEGER", "BOOLEAN"] + t.insert ["a", Nothing, False] + t.insert ["abc", Nothing, Nothing] + t.insert ["def", 42, True] + i = t.info + i.index . to_vector . should_equal ["strs", "ints", "bools"] + i.at "Items Count" . to_vector . should_equal [3, 1, 2] + i.at "SQL Type" . to_vector . should_equal ["VARCHAR", "INTEGER", "BOOLEAN"] + +sqlite_specific_spec connection = + Test.group "[SQLite] Error Handling" <| + Test.specify "should wrap errors" <| + connection.execute_query "foobar" . should_fail_with Sql_Error + connection.execute_update "foobar" . should_fail_with Sql_Error + + action = connection.execute_query "SELECT A FROM undefined_table" + action . should_fail_with Sql_Error + action.catch.to_text . should_equal "[SQLITE_ERROR] SQL error or missing database (no such table: undefined_table)" + +sqlite_spec = + Enso_Project.data.create_directory + file = Enso_Project.data / "sqlite_test.db" + file.delete_if_exists + connection = Database.open_sqlite_file file + here.spec "[SQLite] " connection + here.sqlite_specific_spec connection + connection.close + file.delete + +postgres_spec = + # TODO [RW] use env vars to read tmp DB config + connection = Error.throw "PostgreSQL test database is not configured" + here.spec "[PostgreSQL] " connection pending="PostgreSQL test database is not configured." diff --git a/test/Database_Tests/src/Helpers/Fake_Test_Connection.enso b/test/Database_Tests/src/Helpers/Fake_Test_Connection.enso index 0d5bb3db546e..d1f6902c4f39 100644 --- a/test/Database_Tests/src/Helpers/Fake_Test_Connection.enso +++ b/test/Database_Tests/src/Helpers/Fake_Test_Connection.enso @@ -1,15 +1,36 @@ from Base import all -from Database.Connection.Connection as Connection_Module import Connection +import Database.Data.Table as Database_Table type Fake_Test_Connection - # type Fake_Test_Connection (tables : Map Text (Vector Text)) - # (dialect : Text) + # type Fake_Test_Connection (tables : Map Text (Vector [Text, Sql_Type])) + # (dialect : Text) type Fake_Test_Connection tables dialect - fetch_column_names : Text -> Vector Text - fetch_column_names name = - this.tables.get name + ## PRIVATE + access_table : Text -> Database_Table + access_table name = + columns = this.tables.get name + Database_Table.make_table this name columns + + ## PRIVATE + close : Nothing + close = Nothing + + ## PRIVATE + explain_query_plan : Sql.Statement -> Text + explain_query_plan _ = + Error.throw "Materialization not supported on fake connection." + + ## PRIVATE + execute_query : Text | Sql.Statement -> Materialized_Table = + execute_query _ = + Error.throw "Materialization not supported on fake connection." + + ## PRIVATE + execute_update : Text | Sql.Statement -> Integer + execute_update _ = + Error.throw "Materialization not supported on fake connection." ## PRIVATE make dialect tables = - Connection (Fake_Test_Connection tables dialect) + Fake_Test_Connection tables dialect diff --git a/test/Database_Tests/src/Main.enso b/test/Database_Tests/src/Main.enso index f2fc8d7eccc7..7e4f0e90bd49 100644 --- a/test/Database_Tests/src/Main.enso +++ b/test/Database_Tests/src/Main.enso @@ -1,6 +1,9 @@ from Base import all import Test import Database_Tests.Codegen_Spec +import Database_Tests.Database_Spec main = Test.Suite.runMain <| Codegen_Spec.spec + Database_Spec.sqlite_spec + Database_Spec.postgres_spec diff --git a/test/Table_Tests/src/Table_Spec.enso b/test/Table_Tests/src/Table_Spec.enso index 07a0b3525afc..177302cda74e 100644 --- a/test/Table_Tests/src/Table_Spec.enso +++ b/test/Table_Tests/src/Table_Spec.enso @@ -319,7 +319,7 @@ spec = c = ["objs", [1, "a", "c", Any]] r = Table.new [a, b, c] i = r.info - i.at "Column" . to_vector . should_equal ["strs", "ints", "objs"] + i.index . to_vector . should_equal ["strs", "ints", "objs"] i.at "Items Count" . to_vector . should_equal [3, 2, 4] i.at "Storage Type" . to_vector . should_equal [Storage.Text, Storage.Integer, Storage.Any] diff --git a/tools/legal-review/std-lib-Database/org.xerial.sqlite-jdbc-3.34.0/copyright-add b/tools/legal-review/std-lib-Database/org.xerial.sqlite-jdbc-3.34.0/copyright-add new file mode 100644 index 000000000000..1e521c0d3688 --- /dev/null +++ b/tools/legal-review/std-lib-Database/org.xerial.sqlite-jdbc-3.34.0/copyright-add @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2007 David Crawshaw + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ diff --git a/tools/legal-review/std-lib-Database/org.xerial.sqlite-jdbc-3.34.0/copyright-ignore b/tools/legal-review/std-lib-Database/org.xerial.sqlite-jdbc-3.34.0/copyright-ignore new file mode 100644 index 000000000000..d9997fce671f --- /dev/null +++ b/tools/legal-review/std-lib-Database/org.xerial.sqlite-jdbc-3.34.0/copyright-ignore @@ -0,0 +1,3 @@ +Copyright (c) 2007 David Crawshaw +copyright notice and this permission notice appear in all copies. +this work for additional information regarding copyright ownership. diff --git a/tools/legal-review/std-lib-Database/org.xerial.sqlite-jdbc-3.34.0/copyright-keep b/tools/legal-review/std-lib-Database/org.xerial.sqlite-jdbc-3.34.0/copyright-keep new file mode 100644 index 000000000000..719e348e537b --- /dev/null +++ b/tools/legal-review/std-lib-Database/org.xerial.sqlite-jdbc-3.34.0/copyright-keep @@ -0,0 +1 @@ +Copyright 2009 Taro L. Saito diff --git a/tools/legal-review/std-lib-Database/org.xerial.sqlite-jdbc-3.34.0/copyright-keep-context b/tools/legal-review/std-lib-Database/org.xerial.sqlite-jdbc-3.34.0/copyright-keep-context new file mode 100644 index 000000000000..d0c5b7e34850 --- /dev/null +++ b/tools/legal-review/std-lib-Database/org.xerial.sqlite-jdbc-3.34.0/copyright-keep-context @@ -0,0 +1,4 @@ +Copyright 2007 Taro L. Saito +Copyright 2008 Taro L. Saito +Copyright 2010 Taro L. Saito +Copyright 2016 Magnus Reftel diff --git a/tools/legal-review/std-lib-Database/org.xerial.sqlite-jdbc-3.34.0/custom-license b/tools/legal-review/std-lib-Database/org.xerial.sqlite-jdbc-3.34.0/custom-license new file mode 100644 index 000000000000..6b1d0bfabc3c --- /dev/null +++ b/tools/legal-review/std-lib-Database/org.xerial.sqlite-jdbc-3.34.0/custom-license @@ -0,0 +1 @@ +LICENSE diff --git a/tools/legal-review/std-lib-Database/org.xerial.sqlite-jdbc-3.34.0/files-keep b/tools/legal-review/std-lib-Database/org.xerial.sqlite-jdbc-3.34.0/files-keep new file mode 100644 index 000000000000..e724e5582868 --- /dev/null +++ b/tools/legal-review/std-lib-Database/org.xerial.sqlite-jdbc-3.34.0/files-keep @@ -0,0 +1,2 @@ +META-INF/maven/org.xerial/sqlite-jdbc/LICENSE.zentus +META-INF/maven/org.xerial/sqlite-jdbc/LICENSE diff --git a/tools/legal-review/std-lib-Database/report-state b/tools/legal-review/std-lib-Database/report-state index 7c695619527e..fba9da8df831 100644 --- a/tools/legal-review/std-lib-Database/report-state +++ b/tools/legal-review/std-lib-Database/report-state @@ -1,3 +1,3 @@ -BB22995D8FF0335A65990C0B1FD160E5FE693EE2A38D6EF77B2278096009406D -389C2F543AE11424145EEEF058221BA0340BC5598849C76048D25325292F364B +B738F5334F240F21BD20E350ABA4989B0DE0A0C0C0E85A289F0B926E08D0B38E +89EC5A7095BBF020EE88A12B9EEBC317B4096E60B30CBB84C92F165AA443A44E 0