From 57685b432ffabc9bcb64067b55eeca379fbc7913 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Fri, 5 Mar 2021 19:32:33 +0100 Subject: [PATCH] Implement new builder-based materialization (but it doesn't work yet) --- .../Database/src/Connection/Connection.enso | 94 +++++++++++++++---- .../std-lib/Database/src/Data/Sql.enso | 29 ++++++ .../Table/src/Internal/Java_Exports.enso | 19 ++++ .../org/enso/table/data/table/Column.java | 10 ++ test/Database_Tests/src/Database_Spec.enso | 2 +- 5 files changed, 134 insertions(+), 20 deletions(-) create mode 100644 distribution/std-lib/Table/src/Internal/Java_Exports.enso diff --git a/distribution/std-lib/Database/src/Connection/Connection.enso b/distribution/std-lib/Database/src/Connection/Connection.enso index adec0b29fa423..9e5f4320b9eb8 100644 --- a/distribution/std-lib/Database/src/Connection/Connection.enso +++ b/distribution/std-lib/Database/src/Connection/Connection.enso @@ -6,6 +6,7 @@ import Database.Data.Dialect import Database.Data.Sql import Database.Data.Internal.IR from Database.Data.Sql import Sql_Type +import Table.Internal.Java_Exports polyglot java import java.util.ArrayList polyglot java import java.sql.DriverManager @@ -63,29 +64,19 @@ type JDBC_Connection rs = stmt.executeQuery metadata = rs.getMetaData ncols = metadata.getColumnCount - ## TODO [RW] we wrap everything in objects, in the future we may - want to try a more performant solution for columns of numbers or - booleans - column_names = Vector.new ncols (ix -> metadata.getColumnName ix+1) column_types = if expected_types.is_nothing.not then expected_types else - Vector.new ncols (ix -> Sql.Sql_Type <| metadata.getColumnType ix+1) - columns = column_names.map (name -> [name, ArrayList.new]) + Vector.new ncols (ix -> Sql_Type <| metadata.getColumnType ix+1) + column_builders = column_types.map typ-> + here.create_builder typ go has_next = if has_next.not then Nothing else - columns.map_with_index ix-> pair-> - results = pair.second - typ = column_types.at ix - ## getObject handles most types by default correctly, but it - would cast booleans to integers - obj = case typ == Sql.Sql_Type.boolean of - True -> - bool = rs.getBoolean ix+1 - if rs.wasNull then Nothing else bool - False -> rs.getObject ix+1 - results.add obj + column_builders.map_with_index ix-> builder-> + builder.fetch_and_append rs ix+1 go rs.next go rs.next - column_vectors = columns.map (pair -> [pair.first, Vector.Vector pair.second.toArray]) - Materialized_Table.new column_vectors + columns = column_builders.map_with_index ix-> builder-> + name = metadata.getColumnName ix+1 + builder.make_column name + Materialized_Table.from_columns columns ## ADVANCED @@ -146,6 +137,71 @@ type JDBC_Connection [name, Sql_Type typ] Vector.new ncols resolve_column +## PRIVATE + + Creates a builder for a column based on a provided SQL type, trying to infer + the best type for the builder. + + WARNING: Currently it coerces decimals into floating point numbers. +create_builder : Sql_Type -> Builder +create_builder sql_type = + initial_size = 10 + if sql_type.is_definitely_boolean then Builder_Boolean (Java_Exports.make_bool_builder) else + if sql_type.is_definitely_integer then Builder_Long (Java_Exports.make_long_builder initial_size) else + is_double = sql_type.is_definitely_double || sql_type==Sql_Type.decimal + if is_double then Builder_Double (Java_Exports.make_double_builder initial_size) else + Builder_Inferred (Java_Exports.make_inferred_builder initial_size) + +type Builder + ## PRIVATE + type Builder_Inferred java_builder + + ## PRIVATE + type Builder_Double java_builder + + ## PRIVATE + type Builder_Long java_builder + + ## PRIVATE + type Builder_Boolean java_builder + + ## PRIVATE + + Fetches the value of ith column from the current row of the result set + and appends it to the builder. + + Arguments: + - rs: the Java ResultSet from which the value will be fetched. + - i: the index of the column to fetch from (starting from 1 as is the + ResultSet convention). + fetch_and_append rs i = case this of + Builder_Inferred _ -> + obj = rs.getObject i + this.java_builder.append obj + Builder_Boolean _ -> + bool = rs.getBoolean i + case rs.wasNull of + True -> this.java_builder.appendNulls 1 + False -> this.java_builder.appendBoolean bool + Builder_Long _ -> + long = rs.getLong i + case rs.wasNull of + True -> this.java_builder.appendNulls 1 + False -> this.java_builder.appendLong long + Builder_Double _ -> + double = rs.getDouble i + case rs.wasNull of + True -> this.java_builder.appendNulls 1 + False -> this.java_builder.appendDouble double + + ## PRIVATE + + Seals the builder and returns a built Java-column. + make_column : Text -> Java_Exports.Column + make_column name = + storage = this.java_builder.seal + Java_Exports.make_column name storage + ## An error indicating that a supported dialect could not be deduced for the provided URL. type UnsupportedDialect url diff --git a/distribution/std-lib/Database/src/Data/Sql.enso b/distribution/std-lib/Database/src/Data/Sql.enso index f1f0a775a0eb1..562194bff2b73 100644 --- a/distribution/std-lib/Database/src/Data/Sql.enso +++ b/distribution/std-lib/Database/src/Data/Sql.enso @@ -18,6 +18,35 @@ type Sql_Type integer : Sql_Type integer = Sql_Type Types.INTEGER + ## The SQL type representing decimal numbers. + decimal : Sql_Type + decimal = Sql_Type Types.DECIMAL + + ## PRIVATE + + Returns True if this type represents an integer. It only handles the + standard types so it may return false negatives for non-standard ones. + is_definitely_integer : Boolean + is_definitely_integer = + [Types.INTEGER, Types.SMALLINT, Types.TINYINT].contains this.typeid + + ## PRIVATE + + Returns True if this type represents a boolean. It only handles the + standard types so it may return false negatives for non-standard ones. + is_definitely_boolean : Boolean + is_definitely_boolean = + this.typeid == Types.BOOLEAN + + ## PRIVATE + + Returns True if this type represents a floating point number. It only + handles the standard types so it may return false negatives for + non-standard ones. + is_definitely_double : Boolean + is_definitely_double = + [Types.FLOAT, Types.DOUBLE, Types.REAL].contains this.typeid + ## UNSTABLE A fragment of a SQL query. diff --git a/distribution/std-lib/Table/src/Internal/Java_Exports.enso b/distribution/std-lib/Table/src/Internal/Java_Exports.enso new file mode 100644 index 0000000000000..60a5a18717fc0 --- /dev/null +++ b/distribution/std-lib/Table/src/Internal/Java_Exports.enso @@ -0,0 +1,19 @@ +polyglot java import org.enso.table.data.table.Column +polyglot java import org.enso.table.data.column.builder.object.InferredBuilder +polyglot java import org.enso.table.data.column.builder.object.NumericBuilder +polyglot java import org.enso.table.data.column.builder.object.BoolBuilder + +## PRIVATE +make_bool_builder = BoolBuilder.new + +## PRIVATE +make_double_builder initial_size = NumericBuilder.createDoubleBuilder initial_size + +## PRIVATE +make_long_builder initial_size = NumericBuilder.createLongBuilder initial_size + +## PRIVATE +make_inferred_builder initial_size = InferredBuilder.new initial_size + +## PRIVATE +make_column name storage = Column.new name storage diff --git a/table/src/main/java/org/enso/table/data/table/Column.java b/table/src/main/java/org/enso/table/data/table/Column.java index 453a42987dcb1..cf0f86f06e54b 100644 --- a/table/src/main/java/org/enso/table/data/table/Column.java +++ b/table/src/main/java/org/enso/table/data/table/Column.java @@ -32,6 +32,16 @@ public Column(String name, Index index, Storage storage) { this.index = index; } + /** + * Creates a new column. + * + * @param name the column name + * @param storage the underlying storage + */ + public Column(String name, Storage storage) { + this(name, new DefaultIndex(storage.size()), storage); + } + /** * Converts this column to a single-column table. * diff --git a/test/Database_Tests/src/Database_Spec.enso b/test/Database_Tests/src/Database_Spec.enso index d2488b74ccdf8..b880cb555bfb1 100644 --- a/test/Database_Tests/src/Database_Spec.enso +++ b/test/Database_Tests/src/Database_Spec.enso @@ -14,7 +14,7 @@ spec connection = t1 = make_table "T1" ["A", "B", "C"] ["INT", "INT", "INT"] t1.insert [1, 2, 3] t1.insert [4, 5, 6] - Test.group "Basic Table Access" <| + Test.group "Basic Table Access" <| # FIXME [RW] TODO [RW] add test for many rows Test.specify "should allow to materialize tables and columns into local memory" <| df = t1.to_dataframe a = t1.at 'A' . to_dataframe