diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso index 5139c3d69dfe..41c7a41103c0 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso @@ -2158,11 +2158,18 @@ type Table expected_types = self.columns.map .value_type actual_types = materialized_table.columns.map .value_type - expected_types.zip actual_types . fold materialized_table acc-> types_pair-> - expected_type = types_pair.first - actual_type = types_pair.second - if expected_type == actual_type then acc else - Warning.attach (Inexact_Type_Coercion.Warning expected_type actual_type) acc + warnings_builder = Vector.new_builder + expected_types.zip actual_types expected_type-> actual_type-> + if expected_type == actual_type then Nothing else + expected_type_kind = Meta.meta expected_type . constructor + actual_type_kind = Meta.meta actual_type . constructor + ## We ignore simple approximations that our in-memory backend does - things like adding default + timezone (because we do not have Date_Time without timezone in-memory), + or changing Float32 to Float64 are silently ignored. + However, bigger changes, like a Binary type column getting coerced to Mixed - _will_ still be reported. + if expected_type_kind == actual_type_kind then Nothing else + warnings_builder.append (Inexact_Type_Coercion.Warning expected_type actual_type) + Problem_Behavior.Report_Warning.attach_problems_before warnings_builder.to_vector materialized_table ## PRIVATE Creates a query corresponding to this table. diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Column_Fetcher.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Column_Fetcher.enso index 0eca147b6ebb..c625e01d6263 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Column_Fetcher.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Column_Fetcher.enso @@ -6,6 +6,8 @@ import Standard.Table.Data.Type.Value_Type.Bits import Standard.Table.Data.Type.Value_Type.Value_Type import Standard.Table.Internal.Java_Exports +from project.Errors import Unsupported_Database_Operation + polyglot java import java.sql.ResultSet polyglot java import java.time.LocalTime as Java_Local_Time @@ -145,6 +147,17 @@ date_time_fetcher = make_builder_from_java_object_builder java_builder Column_Fetcher.Value fetch_value make_builder +## PRIVATE + A column fetcher that fetches the database column without timezone, + interpreting it as LocalDateTime and then converting to Enso Date_Time by + adding the default system timezone. +local_date_time_fetcher = + fetch_value rs i = JDBCUtils.getLocalDateTimeAsZoned rs i + make_builder initial_size _ = + java_builder = Java_Exports.make_date_time_builder initial_size + make_builder_from_java_object_builder java_builder + Column_Fetcher.Value fetch_value make_builder + ## PRIVATE A default implementation that will assign specialized fetchers for the Integer, Float, Char and Boolean value types and a fallback for any other @@ -160,14 +173,15 @@ default_fetcher_for_value_type value_type = Value_Type.Boolean -> boolean_fetcher Value_Type.Time -> time_fetcher # We currently don't distinguish timestamps without a timezone on the Enso value side. - Value_Type.Date_Time _ -> date_time_fetcher + Value_Type.Date_Time has_timezone -> + if has_timezone then date_time_fetcher else local_date_time_fetcher # If we can determine that scale = 0 Value_Type.Decimal _ scale -> is_guaranteed_integer = scale.is_nothing.not && scale <= 0 case is_guaranteed_integer of True -> big_integer_fetcher # If we cannot guarantee that the column is integer, we will fall back to Float values, since there is no BigDecimal implementation yet. - # TODO I think we should add a warning somewhere + # In another place this will trigger a Inexact_Type_Coercion warning. False -> double_fetcher _ -> fallback_fetcher diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/JDBC_Connection.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/JDBC_Connection.enso index 357902d68fdd..4ded72974923 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/JDBC_Connection.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/JDBC_Connection.enso @@ -226,8 +226,8 @@ type JDBC_Connection It is the caller's responsibility to call this method from within a transaction to ensure consistency. - batch_insert : Text -> Statement_Setter -> Materialized_Table -> Integer -> Integer | Nothing -> Nothing - batch_insert self insert_template statement_setter table batch_size row_limit=Nothing = + batch_insert : Text -> Statement_Setter -> Materialized_Table -> Integer -> Vector Value_Type | Nothing -> Integer | Nothing -> Nothing + batch_insert self insert_template statement_setter table batch_size expected_type_hints=Nothing row_limit=Nothing = In_Transaction.ensure_in_transaction <| self.with_connection java_connection-> handle_sql_errors related_query=insert_template <| Managed_Resource.bracket (java_connection.prepareStatement insert_template) .close stmt-> log_sql_if_enabled self insert_template @@ -244,7 +244,7 @@ type JDBC_Connection Panic.throw <| Illegal_State.Error "A single update within the batch unexpectedly affected "+affected_rows.to_text+" rows." 0.up_to num_rows . each row_id-> values = columns.map col-> col.at row_id - set_statement_values stmt statement_setter values + set_statement_values stmt statement_setter values expected_type_hints=expected_type_hints stmt.addBatch if (row_id+1 % batch_size) == 0 then check_rows stmt.executeBatch batch_size if (num_rows % batch_size) != 0 then check_rows stmt.executeBatch (num_rows % batch_size) @@ -298,9 +298,17 @@ handle_sql_errors ~action related_query=Nothing = ## PRIVATE Uses the provided `Statement_Setter` strategy to fill holes in a provided `PreparedStatement`. -set_statement_values stmt statement_setter values = + + A list of expected value types can be passed as `expected_type_hints` to add + these hints to the `Statement_Setter` to customize the behaviour for some + specific target value types. +set_statement_values : PreparedStatement -> Statement_Setter -> Vector -> Vector Value_Type | Nothing -> Nothing +set_statement_values stmt statement_setter values expected_type_hints=Nothing = values.each_with_index ix-> value-> - statement_setter.fill_hole stmt (ix + 1) value + type_hint = case expected_type_hints of + Nothing -> Nothing + hints : Vector -> hints.at ix + statement_setter.fill_hole stmt (ix + 1) type_hint value ## PRIVATE A helper that logs performed SQL queries/statements to a file, if an diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Dialect.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Dialect.enso index 1d60727d54a9..818284970ae6 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Dialect.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Dialect.enso @@ -280,7 +280,7 @@ type Postgres_Dialect True -> Value_Type.Decimal 1000 scale # If the needed precision is too big, we cannot set it, so we set the precision to unlimited. This loses scale. False -> Value_Type.Decimal Nothing Nothing - Warning.attach (Inexact_Type_Coercion.Warning base_type new_type) new_type + Warning.attach (Inexact_Type_Coercion.Warning base_type new_type unavailable=False) new_type False -> base_type _ -> base_type diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Type_Mapping.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Type_Mapping.enso index f283e9d8bfa6..fa2eba2413a7 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Type_Mapping.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Type_Mapping.enso @@ -55,6 +55,10 @@ type Postgres_Type_Mapping type_name = if with_timezone then "timestamptz" else "timestamp" SQL_Type.Value Types.TIMESTAMP type_name Value_Type.Binary _ _ -> + ## `bytea` is the compact storage for binary data, but it does not allow setting fixed size. + We currently do not use Binary datatype much, so it is hard to decide which one is more appropriate, + but we may also consider using the standard SQL `bit(n)` and `bit varying(n)` types. + See: https://www.postgresql.org/docs/current/datatype-bit.html SQL_Type.Value Types.BINARY "bytea" precision=max_precision Value_Type.Mixed -> Error.throw (Unsupported_Database_Operation.Error "Postgres tables do not support Mixed types.") diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Statement_Setter.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Statement_Setter.enso index 6f29166d7f85..8a03ef039c46 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Statement_Setter.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Statement_Setter.enso @@ -1,6 +1,8 @@ from Standard.Base import all import Standard.Base.Errors.Illegal_State.Illegal_State +import Standard.Table.Data.Type.Value_Type.Value_Type + polyglot java import java.math.BigDecimal as Java_Big_Decimal polyglot java import java.sql.PreparedStatement polyglot java import java.sql.Types as Java_Types @@ -11,7 +13,7 @@ polyglot java import org.enso.database.JDBCUtils type Statement_Setter ## PRIVATE Encapsulates the logic for filling a hole in a prepared statement. - Value (fill_hole : PreparedStatement -> Integer -> Any -> Nothing) + Value (fill_hole : PreparedStatement -> Integer -> Value_Type|Nothing -> Any -> Nothing) ## PRIVATE The default setter that is handling simple commonly supported types. @@ -25,12 +27,13 @@ type Statement_Setter It will panic if called. null : Statement_Setter null = - fill_hole_unexpected _ _ _ = + fill_hole_unexpected _ _ _ _ = Panic.throw (Illegal_State.Error "The associated statement does not expect any values to be set. This is a bug in the Database library.") Statement_Setter.Value fill_hole_unexpected ## PRIVATE -fill_hole_default stmt i value = case value of +fill_hole_default : PreparedStatement -> Integer -> Value_Type|Nothing -> Any -> Nothing +fill_hole_default stmt i type_hint value = case value of Nothing -> stmt.setNull i Java_Types.NULL _ : Boolean -> stmt.setBoolean i value _ : Integer -> case NumericConverter.isBigInteger value of @@ -40,7 +43,16 @@ fill_hole_default stmt i value = case value of False -> stmt.setLong i value _ : Float -> stmt.setDouble i value _ : Text -> stmt.setString i value - _ : Date_Time -> JDBCUtils.setZonedDateTime stmt i value + _ : Date_Time -> + has_timezone = case type_hint of + Value_Type.Date_Time with_timezone -> with_timezone + # We include the timezone by default + _ -> True + case has_timezone of + True -> + JDBCUtils.setZonedDateTime stmt i value + False -> + JDBCUtils.setLocalDateTime stmt i value ## Time_Of_Day and Date sometimes work ok, but sometimes are passed as `org.graalvm.polyglot.Value` to the JDBC driver which is then unable to infer the correct type for them. Instead, we use these helper functions diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Upload_Table.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Upload_Table.enso index eecd74ffe87a..b044a6002a97 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Upload_Table.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Upload_Table.enso @@ -72,12 +72,25 @@ internal_create_table_structure connection table_name structure primary_key temp ## PRIVATE A helper that can upload a table from any backend to a database. It should be run within a transaction and wrapped in `handle_upload_errors`. -internal_upload_table source_table connection table_name primary_key temporary on_problems=Problem_Behavior.Report_Error row_limit=Nothing = + + Arguments: + - source_table: the table to be uploaded. + If it's a `Database_Table`, the query will be materialized as a new table. + If it's an `In_Memory_Table`, the data will be uploaded to the newly created table. + - connection: the connection to the database. + - table_name: the name of the table to be created. + - primary_key: the primary key of the table to be created. Can be `Nothing` to set no key. + - temporary: if `True`, the table will be created as temporary. + - structure_hint: If set, it can be used to hint what types should be used for the columns of the table. Useful if the types slightly differ from the in-memory source types. + - on_problems: the behavior to be used when reporting problems. + - row_limit: if set, only the first `row_limit` rows will be uploaded. +internal_upload_table : Database_Table | In_Memory_Table -> Connection -> Text -> Nothing | Vector Text -> Boolean -> Vector Column_Description -> Problem_Behavior -> Integer|Nothing -> Database_Table +internal_upload_table source_table connection table_name primary_key temporary structure_hint=Nothing on_problems=Problem_Behavior.Report_Error row_limit=Nothing = case source_table of _ : In_Memory_Table -> - internal_upload_in_memory_table source_table connection table_name primary_key temporary on_problems row_limit + internal_upload_in_memory_table source_table connection table_name primary_key temporary structure_hint on_problems row_limit _ : Database_Table -> - internal_upload_database_table source_table connection table_name primary_key temporary on_problems row_limit + internal_upload_database_table source_table connection table_name primary_key temporary structure_hint on_problems row_limit _ -> Panic.throw <| Illegal_Argument.Error ("Unsupported table type: " + Meta.get_qualified_type_name source_table) @@ -110,9 +123,11 @@ select_into_table_implementation source_table connection table_name primary_key ## PRIVATE It should be run within a transaction and wrapped in `handle_upload_errors`. -internal_upload_in_memory_table source_table connection table_name primary_key temporary on_problems row_limit = +internal_upload_in_memory_table source_table connection table_name primary_key temporary structure_hint on_problems row_limit = In_Transaction.ensure_in_transaction <| - created_table_name = internal_create_table_structure connection table_name structure=source_table primary_key=primary_key temporary=temporary on_problems=on_problems + verify_structure_hint structure_hint source_table.column_names + structure = structure_hint.if_nothing source_table + created_table_name = internal_create_table_structure connection table_name structure=structure primary_key=primary_key temporary=temporary on_problems=on_problems column_names = source_table.column_names ## `created_table_name.if_not_error` is used to ensure that if there are @@ -124,21 +139,24 @@ internal_upload_in_memory_table source_table connection table_name primary_key t insert_template = make_batched_insert_template connection created_table_name column_names statement_setter = connection.dialect.get_statement_setter Panic.rethrow <| - connection.jdbc_connection.batch_insert insert_template statement_setter source_table batch_size=default_batch_size row_limit=row_limit + expected_type_hints = align_structure connection structure . map .value_type + connection.jdbc_connection.batch_insert insert_template statement_setter source_table batch_size=default_batch_size expected_type_hints=expected_type_hints row_limit=row_limit upload_status.if_not_error <| connection.query (SQL_Query.Table_Name created_table_name) ## PRIVATE It should be run within a transaction and wrapped in `handle_upload_errors`. -internal_upload_database_table source_table connection table_name primary_key temporary on_problems row_limit = +internal_upload_database_table source_table connection table_name primary_key temporary structure_hint on_problems row_limit = In_Transaction.ensure_in_transaction <| connection_check = if source_table.connection.jdbc_connection == connection.jdbc_connection then True else Error.throw (Unsupported_Database_Operation.Error "The Database table to be uploaded must be coming from the same connection as the connection on which the new table is being created. Cross-connection uploads are currently not supported. To work around this, you can first `.read` the table into memory and then upload it from memory to a different connection.") + verify_structure_hint structure_hint source_table.column_names + structure = structure_hint.if_nothing source_table connection_check.if_not_error <| # Warning: in some DBs, calling a DDL query in a transaction may commit it. We may have to have some special handling for this. - created_table_name = internal_create_table_structure connection table_name structure=source_table primary_key=primary_key temporary=temporary on_problems=on_problems + created_table_name = internal_create_table_structure connection table_name structure=structure primary_key=primary_key temporary=temporary on_problems=on_problems upload_status = created_table_name.if_not_error <| internal_translate_known_upload_errors source_table connection primary_key <| effective_source_table = case row_limit of @@ -155,6 +173,13 @@ internal_upload_database_table source_table connection table_name primary_key te upload_status.if_not_error <| connection.query (SQL_Query.Table_Name created_table_name) +## PRIVATE +verify_structure_hint structure_hint column_names = + if structure_hint.is_nothing.not then + column_names.zip structure_hint expected_name-> column_description-> + if column_description.name != expected_name then + Panic.throw (Illegal_State.Error ("The provided structure hint does not match the column names of the source table. Expected: "+column_names.to_display_text+", got: "+(structure_hint.map .name . to_display_text)+". This is a bug in the Database library.")) + ## PRIVATE Ensures that provided primary key columns are present in the table and that there are no duplicates. @@ -309,7 +334,9 @@ common_update_table (source_table : Database_Table | In_Memory_Table) (target_ta dry_run = Context.Output.is_enabled.not row_limit = if dry_run then dry_run_row_limit else Nothing check_for_null_keys_if_any_keys_set source_table effective_key_columns <| Context.Output.with_enabled <| - tmp_table = internal_upload_table source_table connection tmp_table_name primary_key=effective_key_columns temporary=True on_problems=Problem_Behavior.Report_Error row_limit=row_limit + structure_hint = target_table.select_columns source_table.column_names reorder=True . columns . map c-> + Column_Description.Value c.name c.value_type + tmp_table = internal_upload_table source_table connection tmp_table_name primary_key=effective_key_columns structure_hint=structure_hint temporary=True on_problems=Problem_Behavior.Report_Error row_limit=row_limit tmp_table.if_not_error <| resulting_table = append_to_existing_table tmp_table target_table update_action effective_key_columns dry_run=dry_run ## We don't need to drop the table if append panics, because @@ -440,7 +467,7 @@ check_update_arguments_structure_match source_table target_table key_columns upd source_type = source_column.value_type target_type = target_column.value_type if source_type == target_type then [] else - if source_type.can_be_widened_to target_type then [Inexact_Type_Coercion.Warning source_type target_type] else + if source_type.can_be_widened_to target_type then [Inexact_Type_Coercion.Warning source_type target_type unavailable=False] else Error.throw (Column_Type_Mismatch.Error source_column.name target_type source_type) source_columns = Set.from_vector source_table.column_names @@ -544,7 +571,7 @@ prepare_source_table_for_delete_matching (key_values_to_delete : Database_Table Delete_Rows_Source.Value prepared_table Nothing _ : In_Memory_Table -> tmp_table_name = connection.base_connection.table_naming_helper.generate_random_table_name "enso-temp-keys-table-" - uploaded_table = internal_upload_in_memory_table prepared_table connection tmp_table_name primary_key=key_columns temporary=True on_problems=Problem_Behavior.Report_Error row_limit=dry_run_row_limit + uploaded_table = internal_upload_in_memory_table prepared_table connection tmp_table_name primary_key=key_columns temporary=True structure_hint=Nothing on_problems=Problem_Behavior.Report_Error row_limit=dry_run_row_limit row_limit_exceeded = prepared_table.row_count > dry_run_row_limit dry_run_message_suffix = case row_limit_exceeded of diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Type/Storage.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Type/Storage.enso index 16bbad0e8ff7..f1cd4a6f77b4 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Type/Storage.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Type/Storage.enso @@ -61,7 +61,10 @@ closest_storage_type value_type = case value_type of Value_Type.Date_Time _ -> DateTimeType.INSTANCE Value_Type.Time -> TimeOfDayType.INSTANCE Value_Type.Mixed -> AnyObjectType.INSTANCE - Value_Type.Decimal _ _ -> BigIntegerType.INSTANCE + Value_Type.Decimal _ scale -> + is_integer = scale.is_nothing || scale <= 0 + if is_integer then BigIntegerType.INSTANCE else + Error.throw (Illegal_Argument.Error "Columns of type "+value_type.to_display_text+" are currently not supported in the in-memory backend - only Decimal of integer type (scale <= 0) is supported. You may cast the column to Float first (lossy conversion).") _ -> Error.throw (Illegal_Argument.Error "Columns of type "+value_type.to_display_text+" are currently not supported in the in-memory backend.") diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Type/Value_Type.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Type/Value_Type.enso index 7af1c1298fe7..b54fabe86c6c 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Type/Value_Type.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Type/Value_Type.enso @@ -285,6 +285,9 @@ type Value_Type fits_variability = if target_variable_length then True else self_variable_length == target_variable_length fits_variability && (target_size >= self_size) _ -> False + Value_Type.Date_Time _ -> case target_type of + Value_Type.Date_Time _ -> True + _ -> False _ -> False ## PRIVATE @@ -402,7 +405,9 @@ type Value_Type True -> "Char (variable length, max_size=" + size_text + ")" False -> "Char (fixed length, size=" + size_text + ")" Value_Type.Date -> "Date" - Value_Type.Date_Time with_timezone -> "Date_Time (with_timezone=" + with_timezone.to_text + ")" + Value_Type.Date_Time with_timezone -> + tz_suffix = if with_timezone then " (with timezone)" else " (without timezone)" + "Date_Time" + tz_suffix Value_Type.Time -> "Time" Value_Type.Binary size variable_length -> size_text = case size of diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Errors.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Errors.enso index ed357b814cdd..1b1281ee2d3c 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Errors.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Errors.enso @@ -545,14 +545,18 @@ type Inexact_Type_Coercion ## PRIVATE Indicates that the requested `Value_Type` is not available in the given backend, so it was replaced by its closest available type. - Warning (requested_type : Value_Type) (actual_type : Value_Type) + Warning (requested_type : Value_Type) (actual_type : Value_Type) (unavailable:Boolean = True) ## PRIVATE Create a human-readable version of the error. to_display_text : Text to_display_text self = - "The requested type ["+self.requested_type.to_text+"] is not available in the given backend, so it was replaced by its closest available type ["+self.actual_type.to_text+"]." + case self.unavailable of + True -> + "The requested type "+self.requested_type.to_display_text+" is not available in the given backend, so it was replaced by its closest available type "+self.actual_type.to_display_text+"." + False -> + "The type "+self.requested_type.to_display_text+" has been coerced to "+self.actual_type.to_display_text+". Some information may be lost." ## PRIVATE diff --git a/engine/runtime/src/main/java/org/enso/interpreter/runtime/data/EnsoTimeZone.java b/engine/runtime/src/main/java/org/enso/interpreter/runtime/data/EnsoTimeZone.java index 65f7742e4991..7dc87bc3cf91 100644 --- a/engine/runtime/src/main/java/org/enso/interpreter/runtime/data/EnsoTimeZone.java +++ b/engine/runtime/src/main/java/org/enso/interpreter/runtime/data/EnsoTimeZone.java @@ -15,6 +15,7 @@ import org.enso.interpreter.runtime.EnsoContext; import org.enso.interpreter.runtime.data.text.Text; import org.enso.interpreter.runtime.library.dispatch.TypesLibrary; +import org.enso.polyglot.common_utils.Core_Date_Utils; @ExportLibrary(InteropLibrary.class) @ExportLibrary(TypesLibrary.class) @@ -78,7 +79,7 @@ public static EnsoTimeZone create(long hours, long minutes, long seconds) { autoRegister = false) @CompilerDirectives.TruffleBoundary public static EnsoTimeZone system() { - return new EnsoTimeZone(ZoneId.systemDefault()); + return new EnsoTimeZone(Core_Date_Utils.defaultSystemZone()); } @ExportMessage diff --git a/lib/scala/common-polyglot-core-utils/src/main/java/org/enso/polyglot/common_utils/Core_Date_Utils.java b/lib/scala/common-polyglot-core-utils/src/main/java/org/enso/polyglot/common_utils/Core_Date_Utils.java index 02d8105f6ba9..890835b7a477 100644 --- a/lib/scala/common-polyglot-core-utils/src/main/java/org/enso/polyglot/common_utils/Core_Date_Utils.java +++ b/lib/scala/common-polyglot-core-utils/src/main/java/org/enso/polyglot/common_utils/Core_Date_Utils.java @@ -116,4 +116,8 @@ public static ZonedDateTime parseZonedDateTime(String dateString, DateTimeFormat "Unable to parse Text '" + dateString + "' to Date_Time due to arithmetic error.", e); } } + + public static ZoneId defaultSystemZone() { + return ZoneId.systemDefault(); + } } diff --git a/std-bits/database/src/main/java/org/enso/database/JDBCUtils.java b/std-bits/database/src/main/java/org/enso/database/JDBCUtils.java index bb6232feaef8..9fbc29c0ce05 100644 --- a/std-bits/database/src/main/java/org/enso/database/JDBCUtils.java +++ b/std-bits/database/src/main/java/org/enso/database/JDBCUtils.java @@ -5,9 +5,11 @@ import java.sql.SQLException; import java.sql.Types; import java.time.LocalDate; +import java.time.LocalDateTime; import java.time.LocalTime; import java.time.OffsetDateTime; import java.time.ZonedDateTime; +import org.enso.polyglot.common_utils.Core_Date_Utils; public class JDBCUtils { @@ -25,12 +27,32 @@ public static ZonedDateTime getZonedDateTime(ResultSet rs, int columnIndex) thro return offsetDateTime.toZonedDateTime(); } + /** + * Gets a ZonedDateTime from a ResultSet, interpreting the result as LocalDateTime and then adding + * the system default timezone. + */ + public static ZonedDateTime getLocalDateTimeAsZoned(ResultSet rs, int columnIndex) + throws SQLException { + LocalDateTime localDateTime = rs.getObject(columnIndex, LocalDateTime.class); + if (localDateTime == null) { + return null; + } + return localDateTime.atZone(Core_Date_Utils.defaultSystemZone()); + } + /** Sets a ZonedDateTime in a PreparedStatement. */ public static void setZonedDateTime( PreparedStatement stmt, int columnIndex, ZonedDateTime zonedDateTime) throws SQLException { stmt.setObject(columnIndex, zonedDateTime.toOffsetDateTime(), Types.TIMESTAMP_WITH_TIMEZONE); } + /** Sets a ZonedDateTime converting it to LocalDateTime in a PreparedStatement. */ + public static void setLocalDateTime( + PreparedStatement stmt, int columnIndex, ZonedDateTime zonedDateTime) throws SQLException { + LocalDateTime localDateTime = zonedDateTime.toLocalDateTime(); + stmt.setObject(columnIndex, localDateTime, Types.TIMESTAMP); + } + /** Sets a LocalTime in a PreparedStatement. */ public static void setLocalTime(PreparedStatement stmt, int columnIndex, LocalTime localTime) throws SQLException { diff --git a/test/Table_Tests/src/Database/Postgres_Spec.enso b/test/Table_Tests/src/Database/Postgres_Spec.enso index 8786391d5a1b..778097e64c27 100644 --- a/test/Table_Tests/src/Database/Postgres_Spec.enso +++ b/test/Table_Tests/src/Database/Postgres_Spec.enso @@ -26,6 +26,7 @@ import project.Database.Upload_Spec import project.Database.Helpers.Name_Generator import project.Database.Types.Postgres_Type_Mapping_Spec import project.Common_Table_Operations +from project.Common_Table_Operations.Util import all from project.Database.Types.Postgres_Type_Mapping_Spec import default_text postgres_specific_spec connection db_name setup = @@ -303,7 +304,9 @@ postgres_specific_spec connection db_name setup = # Unfortunately, performing operations on a Decimal column in postgres can lose information about it being an integer column. t2.at "Y" . value_type . scale . should_equal Nothing t2.at "X" . to_vector . should_equal [10, x] + # Only works by approximation: t2.at "Y" . to_vector . should_equal [20, x+10] + t2.at "Y" . cast Value_Type.Char . to_vector . should_equal ["20", (x+10).to_text] m2 = t2.remove_warnings.read m2.at "X" . value_type . should_be_a (Value_Type.Decimal ...) @@ -338,6 +341,100 @@ postgres_specific_spec connection db_name setup = w4.requested_type . should_equal (Value_Type.Decimal precision=Nothing scale=Nothing) w4.actual_type . should_equal Value_Type.Float + Test.specify "should round-trip timestamptz column, preserving instant but converting to UTC" <| + table_name = Name_Generator.random_name "TimestampTZ" + table = connection.create_table table_name [Column_Description.Value "A" (Value_Type.Date_Time with_timezone=True)] primary_key=[] + + dt1 = Date_Time.new 2022 05 04 15 30 + dt2 = Date_Time.new 2022 05 04 15 30 zone=(Time_Zone.utc) + dt3 = Date_Time.new 2022 05 04 15 30 zone=(Time_Zone.parse "US/Hawaii") + dt4 = Date_Time.new 2022 05 04 15 30 zone=(Time_Zone.parse "Europe/Warsaw") + v = [dt1, dt2, dt3, dt4] + + Problems.assume_no_problems <| + table.update_rows (Table.new [["A", v]]) update_action=Update_Action.Insert + + tz_zero = Time_Zone.parse "Z" + v_at_z = v.map dt-> dt.at_zone tz_zero + table.at "A" . to_vector . should_equal v_at_z + table.at "A" . to_vector . should_equal_tz_agnostic v + + ## We also check how the timestamp column behaves with interpolations: + (See analogous test showing a less nice behaviour without timezones below.) + v.each my_dt-> Test.with_clue my_dt.to_text+" " <| + # Checks if the two date-times represent the same instant in time. + is_same_time_instant dt1 dt2 = + dt1.at_zone tz_zero == dt2.at_zone tz_zero + local_equals = v.filter (is_same_time_instant my_dt) + # Depending on test runner's timezone, the `my_dt` may be equal to 1 or 2 entries in `v`. + [1, 2].should_contain local_equals.length + + Test.with_clue " == "+local_equals.to_text+": " <| + t2 = table.filter "A" (Filter_Condition.Equal to=my_dt) + t2.row_count . should_equal local_equals.length + t2.at "A" . to_vector . should_equal_tz_agnostic local_equals + + Test.specify "will round-trip timestamp column without timezone by converting it to UTC" <| + table_name = Name_Generator.random_name "Timestamp" + table = connection.create_table table_name [Column_Description.Value "A" (Value_Type.Date_Time with_timezone=False)] primary_key=[] + Problems.assume_no_problems table + + dt1 = Date_Time.new 2022 05 04 15 30 + dt2 = Date_Time.new 2022 05 04 15 30 zone=(Time_Zone.utc) + dt3 = Date_Time.new 2022 05 04 15 30 zone=(Time_Zone.parse "US/Hawaii") + dt4 = Date_Time.new 2022 05 04 15 30 zone=(Time_Zone.parse "Europe/Warsaw") + v = [dt1, dt2, dt3, dt4] + + source_table = Table.new [["A", v]] + source_table.at "A" . value_type . should_equal (Value_Type.Date_Time with_timezone=True) + w = Problems.expect_only_warning Inexact_Type_Coercion <| + table.update_rows source_table update_action=Update_Action.Insert + w.requested_type . should_equal (source_table.at "A" . value_type) + w.actual_type . should_equal (table.at "A" . value_type) + w.to_display_text . should_equal "The type Date_Time (with timezone) has been coerced to Date_Time (without timezone). Some information may be lost." + + # When uploading we want to just strip the timezone information and treat every timestamp as LocalDateTime. + # This is verified by checking the text representation in the DB: it should show the same local time in all 4 cases, regardless of original timezone. + local_dt = "2022-05-04 15:30:00" + table.at "A" . cast Value_Type.Char . to_vector . should_equal [local_dt, local_dt, local_dt, local_dt] + + # Then when downloaded, it should be interpreted at the 'system default' timezone. + materialized_table = table.read + materialized_table.at "A" . to_vector . should_equal [dt1, dt1, dt1, dt1] + + # The Inexact_Type_Coercion warning is silenced for this case: + Problems.assume_no_problems materialized_table + + # We also check how the timestamp column behaves with interpolations: + v.each my_dt-> Test.with_clue my_dt.to_text+": " <| + t2 = table.filter "A" (Filter_Condition.Equal to=my_dt) + is_in_system_tz = my_dt.at_zone Time_Zone.system . time_of_day == my_dt.time_of_day + ## Unfortunately, this will work in the following way: + - if the date-time represented as local time in the current system default timezone is 15:30, + then it will match _all_ entries (as they do not have a timezone). + - otherwise, the date-time is converted into UTC before being passed to the Database, + and only then the timezone is stripped - so the local time is actually shifted. + + This is not ideal - ideally we'd want the local date time to be extracted from the timestamp directly, + before any date conversions happen - this way in _all_ 4 cases we would get 15:30 local time + and all rows would always be matching. + + That logic is applied when uploading a table. However, in custom queries, we do not currently have + enough metadata to infer that the Date_Time that is being passed to a given `?` hole in the query + should be treated as a local date-time or a zoned date-time. + Thus, we pass it as zoned by default to avoid losing information - and that triggers the conversion + on the Database side. If we want to change that, we would need to add metadata within our operations, + so that an operation like `==` will infer the expected type of the `?` hole based on the type of the + second operand. + case is_in_system_tz of + True -> + my_dt.at_zone Time_Zone.system . time_of_day . to_display_text . should_equal "15:30:00" + t2.row_count . should_equal 4 + t2.at "A" . to_vector . should_equal [dt1, dt1, dt1, dt1] + False -> + my_dt.at_zone Time_Zone.system . time_of_day . to_display_text . should_not_equal "15:30:00" + t2.row_count . should_equal 0 + t2.at "A" . to_vector . should_equal [] Test.group "[PostgreSQL] math functions" <| Test.specify "round, trunc, ceil, floor" <| diff --git a/test/Table_Tests/src/Database/Types/Postgres_Type_Mapping_Spec.enso b/test/Table_Tests/src/Database/Types/Postgres_Type_Mapping_Spec.enso index ab7ab6962491..6e5b669f5b7d 100644 --- a/test/Table_Tests/src/Database/Types/Postgres_Type_Mapping_Spec.enso +++ b/test/Table_Tests/src/Database/Types/Postgres_Type_Mapping_Spec.enso @@ -1,10 +1,11 @@ from Standard.Base import all +import Standard.Base.Errors.Illegal_Argument.Illegal_Argument -from Standard.Table import Aggregate_Column, Value_Type +from Standard.Table import Aggregate_Column, Value_Type, Table import Standard.Table.Data.Type.Value_Type.Bits from Standard.Table.Errors import Inexact_Type_Coercion -from Standard.Database import SQL_Query +from Standard.Database import all from Standard.Test import Problems, Test, Test_Suite import Standard.Test.Extensions @@ -128,6 +129,51 @@ spec connection = t2.at "b" . value_type . should_equal (Value_Type.Integer Bits.Bits_16) Problems.expect_warning Inexact_Type_Coercion t2 + Test.group "[PostgreSQL] Type Edge Cases" <| + Test.specify "will fail to read a BigDecimal column and suggest to cast it to Float" <| + table_name = Name_Generator.random_name "BigDecimal" + table = connection.create_table table_name [Column_Description.Value "B" (Value_Type.Decimal precision=100 scale=5)] primary_key=[] + Problems.assume_no_problems table + + Problems.expect_only_warning Inexact_Type_Coercion <| + table.update_rows (Table.new [["B", [1.5, 2.5]]]) update_action=Update_Action.Insert + + table.at "B" . value_type . should_equal (Value_Type.Decimal precision=100 scale=5) + + m2 = table.read + m2.at "B" . value_type . should_equal Value_Type.Float + m2.at "B" . to_vector . should_equal [1.5, 2.5] + w2 = Problems.expect_only_warning Inexact_Type_Coercion m2 + w2.requested_type . should_equal (Value_Type.Decimal precision=100 scale=5) + w2.actual_type . should_equal Value_Type.Float + + Test.specify "should warn when fetching a Binary column and coercing it to Mixed because in-memory does not support Binary" <| + table_name = Name_Generator.random_name "Bin" + table = connection.create_table table_name [Column_Description.Value "B" (Value_Type.Binary size=10)] primary_key=[] + w0 = Problems.expect_only_warning Inexact_Type_Coercion table + w0.requested_type . should_equal (Value_Type.Binary size=10) + w0.actual_type . should_equal (Value_Type.Binary variable_length=True size=2147483647) + table_clean = table.remove_warnings + + Problems.assume_no_problems <| + table_clean.update_rows (connection.query 'SELECT decode(\'ffff\', \'hex\') AS "B"') update_action=Update_Action.Insert + Problems.assume_no_problems <| + table_clean.update_rows (connection.query 'SELECT decode(\'caffee\', \'hex\') AS "B"') update_action=Update_Action.Insert + Problems.assume_no_problems <| + table_clean.update_rows (connection.query 'SELECT decode(\'beef\', \'hex\') AS "B"') update_action=Update_Action.Insert + + materialized_table = table_clean.read + materialized_table.at "B" . value_type . should_equal Value_Type.Mixed + w = Problems.expect_only_warning Inexact_Type_Coercion materialized_table + w.requested_type . should_be_a (Value_Type.Binary ...) + w.actual_type . should_equal Value_Type.Mixed + + # The materialized data is represented as array of signed bytes + ffff = [-1, -1] + caffee = [-54, -1, -18] + beef = [-66, -17] + materialized_table.at "B" . to_vector . should_equal [ffff, caffee, beef] + main = Test_Suite.run_main <| connection = create_connection_builder Nothing spec connection diff --git a/test/Table_Tests/src/Helpers/Value_Type_Spec.enso b/test/Table_Tests/src/Helpers/Value_Type_Spec.enso index 646dc443fc1f..294dc0d80192 100644 --- a/test/Table_Tests/src/Helpers/Value_Type_Spec.enso +++ b/test/Table_Tests/src/Helpers/Value_Type_Spec.enso @@ -22,7 +22,7 @@ spec = Value_Type.Date.to_display_text . should_equal "Date" Value_Type.Time.to_display_text . should_equal "Time" - Value_Type.Date_Time.to_display_text . should_equal "Date_Time (with_timezone=True)" + Value_Type.Date_Time.to_display_text . should_equal "Date_Time (with timezone)" Value_Type.Mixed.to_display_text . should_equal "Mixed" Value_Type.Unsupported_Data_Type.to_display_text . should_equal "Unsupported_Data_Type" diff --git a/test/Table_Tests/src/In_Memory/Column_Spec.enso b/test/Table_Tests/src/In_Memory/Column_Spec.enso index bae7477ed55c..734560fc2f94 100644 --- a/test/Table_Tests/src/In_Memory/Column_Spec.enso +++ b/test/Table_Tests/src/In_Memory/Column_Spec.enso @@ -218,7 +218,10 @@ spec = r5 = Column.from_vector "X" [12, Date_Time.new 2023 08 22 18 17 zone=Time_Zone.utc] Value_Type.Integer r5.should_fail_with Invalid_Value_Type - r5.catch.to_display_text.should_contain "Expected type Integer (64 bits), but got a value 2023-08-22 18:17:00[UTC] of type Date_Time (with_timezone=True)" + # Split, because depending on the platform the date time may get a `[UTC]` suffix or not. + r5.catch.to_display_text.should_contain "Expected type Integer (64 bits), but got a value 2023-08-22 18:17:00" + r5.catch.to_display_text.should_contain "of type Date_Time (with timezone)" + r6 = Column.from_vector "X" [12, Date.new 2023 08 22] Value_Type.Byte r6.should_fail_with Invalid_Value_Type