enso-org · mergify · Apr 21, 2023 · Apr 6, 2023 · Apr 17, 2023 · Apr 17, 2023
@@ -86,6 +86,18 @@ type Vector a
     from_array : Array -> Vector
     from_array array = @Builtin_Method "Vector.from_array"
 
+    ## PRIVATE
+       A helper method that takes a vector or an array or a single element and
+       returns a vector.
+
+       If it got a vector or array, a vector containing the same elements is
+       returned.
+       If it got a single element, a vector containing this element is returned.
+    unify_vector_or_element value = case value of
+        vec : Vector -> vec
+        arr : Array -> Vector.from_polyglot_array arr
+        single_element -> [single_element]
+
     ## Creates a new vector of the given length, filling the elements with
        the provided constant.
 

@@ -15,7 +15,7 @@ import project.Nothing.Nothing
    > Example
      Dropping into a debugging REPL during execution.
 
-        Debug.breakpoint
+        Standard.Base.Runtime.Debug.breakpoint
 breakpoint : Nothing
 breakpoint = @Builtin_Method "Debug.breakpoint"
 

@@ -1,7 +1,7 @@
 from Standard.Base import all
 import Standard.Base.Errors.Unimplemented.Unimplemented
 
-from Standard.Table import Aggregate_Column, Join_Kind
+from Standard.Table import Aggregate_Column, Join_Kind, Value_Type
 import Standard.Table.Internal.Naming_Helpers.Naming_Helpers
 import Standard.Table.Internal.Problem_Builder.Problem_Builder
 
@@ -137,15 +137,22 @@ type Dialect
         Unimplemented.throw "This is an interface only."
 
     ## PRIVATE
-       Specifies if the cast used to reconcile column types should be done after
-       performing the union. If `False`, the cast will be done before the union.
-
-       Most databases that care about column types will want to do the cast
-       before the union operation to ensure that types are aligned when merging.
-       For an SQLite workaround to work, it's better to do the cast after the
-       union operation.
-    cast_after_union : Boolean
-    cast_after_union self =
+       Specifies if the Database distinguishes a seprarate `NaN` value for
+       floating point columns. Some databases will not be able to distinguish
+       NaN from NULL.
+    supports_separate_nan : Boolean
+    supports_separate_nan self =
+        Unimplemented.throw "This is an interface only."
+
+    ## PRIVATE
+       Performs any transformations on a column resulting from unifying other
+       columns.
+
+       These transformations depend on the dialect. They can be used to align
+       the result types, for example.
+    adapt_unified_column : Internal_Column -> Value_Type -> (SQL_Expression -> SQL_Type_Reference) -> Internal_Column
+    adapt_unified_column self column approximate_result_type infer_result_type_from_database_callback =
+        _ = [column, approximate_result_type, infer_result_type_from_database_callback]
         Unimplemented.throw "This is an interface only."
 
     ## PRIVATE

@@ -17,6 +17,7 @@ import Standard.Table.Data.Match_Columns as Match_Columns_Helpers
 import Standard.Table.Data.Report_Unmatched.Report_Unmatched
 import Standard.Table.Data.Row.Row
 import Standard.Table.Data.Table.Table as Materialized_Table
+import Standard.Table.Data.Type.Value_Type_Helpers
 import Standard.Table.Internal.Aggregate_Column_Helper
 import Standard.Table.Internal.Java_Exports
 import Standard.Table.Internal.Table_Helpers
@@ -41,7 +42,6 @@ import project.Internal.IR.Internal_Column.Internal_Column
 import project.Internal.IR.SQL_Join_Kind.SQL_Join_Kind
 import project.Internal.IR.Query.Query
 import project.Internal.SQL_Type_Reference.SQL_Type_Reference
-from project.Data.Column import find_argument_type
 
 from project.Errors import Unsupported_Database_Operation, Integrity_Error, Unsupported_Name
 
@@ -621,7 +621,9 @@ type Table
         resolved = case column of
             _ : Text -> self.compute column on_problems
             _ -> column
-        renamed = if new_name.is_nothing then resolved else resolved.rename new_name
+        renamed = case new_name of
+           Nothing -> resolved
+           _ : Text -> resolved.rename new_name
 
         renamed.if_not_error <|
             index = self.internal_columns.index_of (c -> c.name == renamed.name)
@@ -662,7 +664,7 @@ type Table
         get_column name = self.at name
         type_mapping = self.connection.dialect.get_type_mapping
         make_constant value =
-            argument_value_type = find_argument_type value
+            argument_value_type = Value_Type_Helpers.find_argument_type value
             sql_type = case argument_value_type of
                 Nothing -> SQL_Type.null
                 _ -> type_mapping.value_type_to_sql argument_value_type Problem_Behavior.Ignore
@@ -1151,31 +1153,28 @@ type Table
                         sql_type.catch Inexact_Type_Coercion error->
                             Panic.throw <|
                                 Illegal_State.Error "Unexpected inexact type coercion in Union. The union logic should only operate in types supported by the given backend. This is a bug in the Database library. The coercion was: "+error.to_display_text cause=error
-                        [column_set, sql_type]
+                        [column_set, sql_type, result_type]
             good_columns = merged_columns.filter r-> r.is_nothing.not
             if good_columns.is_empty then Error.throw No_Output_Columns else
                 problem_builder.attach_problems_before on_problems <|
-                    cast_after_union = dialect.cast_after_union
                     queries = all_tables.map_with_index i-> t->
                         columns_to_select = good_columns.map description->
                             column_set = description.first
-                            result_type = description.second
+                            sql_type = description.second
                             column_name = column_set.name
-                            input_column = case column_set.column_indices.at i of
+                            case column_set.column_indices.at i of
                                 Nothing ->
                                     typ = SQL_Type_Reference.from_constant SQL_Type.null
                                     expr = SQL_Expression.Literal "NULL"
-                                    Internal_Column.Value column_name typ expr
+                                    null_column = Internal_Column.Value column_name typ expr
+                                    ## We assume that the type for this
+                                       expression will never be queried - it is
+                                       just used internally to build the Union
+                                       operation and never exposed externally.
+                                    infer_return_type _ = SQL_Type_Reference.null
+                                    dialect.make_cast null_column sql_type infer_return_type
                                 corresponding_column_index : Integer ->
                                     t.at corresponding_column_index . as_internal . rename column_name
-                            ## We return `null` return type, as this type should
-                               never be queried - we will just put it into the
-                               union and the overall queried type will be taken
-                               from there. This is just needed to create an
-                               internal representation.
-                            infer_return_type _ = SQL_Type_Reference.null
-                            if cast_after_union then input_column else
-                                dialect.make_cast input_column result_type infer_return_type
                         pairs = columns_to_select.map c->
                             [c.name, c.expression]
                         Query.Select pairs t.context
@@ -1191,15 +1190,11 @@ type Table
                         SQL_Type_Reference.new self.connection new_ctx expression
                     new_columns = good_columns.map description->
                         column_set = description.first
-                        result_type = description.second
+                        result_type = description.at 2
                         name = column_set.name
                         expression = SQL_Expression.Column union_alias name
-                        case cast_after_union of
-                            True ->
-                                input_column = Internal_Column.Value name SQL_Type_Reference.null expression
-                                dialect.make_cast input_column result_type infer_return_type
-                            False ->
-                                Internal_Column.Value name (infer_return_type expression) expression
+                        input_column = Internal_Column.Value name (infer_return_type expression) expression
+                        dialect.adapt_unified_column input_column result_type infer_return_type
 
                     Table.Value union_alias self.connection new_columns new_ctx
 

@@ -6,11 +6,14 @@ import Standard.Base.Errors.Unimplemented.Unimplemented
 import Standard.Table.Data.Aggregate_Column.Aggregate_Column
 import Standard.Table.Internal.Naming_Helpers.Naming_Helpers
 import Standard.Table.Internal.Problem_Builder.Problem_Builder
+from Standard.Table import Value_Type
 from Standard.Table.Data.Aggregate_Column.Aggregate_Column import all
+from Standard.Table.Errors import Inexact_Type_Coercion
 
 import project.Connection.Connection.Connection
 import project.Data.Dialect
 import project.Data.SQL.Builder
+import project.Data.SQL.SQL_Fragment
 import project.Data.SQL_Statement.SQL_Statement
 import project.Data.SQL_Type.SQL_Type
 import project.Data.Table.Table
@@ -148,8 +151,36 @@ type Postgres_Dialect
     needs_execute_query_for_type_inference self = False
 
     ## PRIVATE
-    cast_after_union : Boolean
-    cast_after_union self = False
+    supports_separate_nan : Boolean
+    supports_separate_nan self = True
+
+    ## PRIVATE
+       There is a bug in Postgres type inference, where if we unify two
+       fixed-length char columns of length N and M, the result type is said to
+       be a **fixed-length** column of length max_int4. This is wrong, and in
+       practice the column is just a variable-length text. This method detects
+       this situations and overrides the type to make it correct.
+    adapt_unified_column : Internal_Column -> Value_Type -> (SQL_Expression -> SQL_Type_Reference) -> Internal_Column
+    adapt_unified_column self column approximate_result_type infer_result_type_from_database_callback =
+        needs_char_length_override_check = case approximate_result_type of
+            Value_Type.Char _ _ -> True
+            _ -> False
+        case needs_char_length_override_check of
+            True ->
+                type_mapping = self.get_type_mapping
+                db_type = type_mapping.sql_type_to_value_type column.sql_type_reference.get
+                case db_type of
+                    Value_Type.Char _ _ -> case db_type == approximate_result_type of
+                        True -> column
+                        False ->
+                            type_override = type_mapping.value_type_to_sql approximate_result_type Problem_Behavior.Report_Error
+                            type_override.catch Inexact_Type_Coercion _->
+                                Panic.throw <|
+                                    Illegal_State.Error "The target type ("+db_type.to_display_text+") that we need to cast to seems to not be supported by the Dialect. This is not expected. It is a bug in the Database library."
+                            self.make_cast column type_override infer_result_type_from_database_callback
+                    _ -> Panic.throw <|
+                        Illegal_State.Error "The type computed by our logic is Char, but the Database computed a non-text type ("+db_type.to_display_text+"). This should never happen and should be reported as a bug in the Database library."
+            False -> column
 
     ## PRIVATE
     prepare_fetch_types_query : SQL_Expression -> Context -> SQL_Statement
@@ -180,9 +211,52 @@ make_internal_generator_dialect =
     stddev_samp = ["STDDEV_SAMP", Base_Generator.make_function "stddev_samp"]
     stats = [agg_median, agg_mode, agg_percentile, stddev_pop, stddev_samp]
     date_ops = [make_extract_as_int "year" "YEAR", make_extract_as_int "month" "MONTH", make_extract_as_int "day" "DAY"]
-    my_mappings = text + counts + stats + first_last_aggregators + arith_extensions + bool + date_ops
+    special_overrides = [is_null, is_empty]
+    my_mappings = text + counts + stats + first_last_aggregators + arith_extensions + bool + date_ops + special_overrides
     Base_Generator.base_dialect . extend_with my_mappings
 
+## PRIVATE
+   This overrides the default behaviour, due to a weird behaviour of Postgres -
+   it wants to determine the type for the parameter provided to IS NULL.
+
+   But when the parameter is NULL, the type is unspecified. This only occurs if
+   a constant-NULL column is created in an expression builder `make_constant`
+   when computing an expression. We do not want to give it a default type, as
+   it needs to be flexible - this NULL column may be used in expressions of
+   various types. Only with IS NULL, having no type associated will fail with
+   `ERROR: could not determine data type of parameter`. To circumvent this
+   issue, we simply check if the parameter to be provided there is a `Nothing`
+   interpolation. If it is, we will just rewrite the expression to `TRUE` since
+   that is the expected result of `NULL IS NULL`.
+
+   With the IR refactor, this should be done in some preprocess pass that still
+   works on SQL_Expression and not raw SQL.
+is_null = Base_Generator.lift_unary_op "IS_NULL" arg->
+    if represents_an_untyped_null_expression arg then Builder.code "TRUE" else
+        Builder.code "(" ++ arg.paren ++ " IS NULL)"
+
+## PRIVATE
+   See `is_null` above.
+
+   It is a heuristic that will match generated expressions coming from
+   a NULL Literal or a Nothing constant. This should be enough, as any more
+   complex expression should have some type associated with it.
+represents_an_untyped_null_expression arg =
+    vec = arg.fragments.build
+    if vec.length != 1 then False else
+        case vec.first of
+            SQL_Fragment.Code_Part "NULL" -> True
+            SQL_Fragment.Interpolation Nothing -> True
+            _ -> False
+
+## PRIVATE
+   The same issue as with `is_null` above, but here we can assume that `arg`
+   represents some `text` value, so we can just CAST it.
+is_empty = Base_Generator.lift_unary_op "IS_EMPTY" arg->
+    is_null = (arg.paren ++ "::text IS NULL").paren
+    is_empty = (arg ++ " = ''").paren
+    (is_null ++ " OR " ++ is_empty).paren
+
 ## PRIVATE
 agg_count_is_null = Base_Generator.lift_unary_op "COUNT_IS_NULL" arg->
     Builder.code "COUNT(CASE WHEN " ++ arg.paren ++ " IS NULL THEN 1 END)"

@@ -9,6 +9,7 @@ import project.Data.SQL_Type.SQL_Type
 import project.Internal.IR.SQL_Expression.SQL_Expression
 import project.Internal.SQL_Type_Mapping
 import project.Internal.SQL_Type_Reference.SQL_Type_Reference
+from project.Errors import Unsupported_Database_Operation
 
 polyglot java import java.sql.Types
 
@@ -37,9 +38,11 @@ type Postgres_Type_Mapping
                 SQL_Type.Value Types.DECIMAL "decimal" precision scale
             Value_Type.Char size variable ->
                 case variable of
-                    True  -> case size of
-                        Nothing -> SQL_Type.Value Types.VARCHAR "text"
-                        _       -> SQL_Type.Value Types.VARCHAR "varchar" size
+                    True  ->
+                        is_unbounded = size.is_nothing || (size == max_precision)
+                        case is_unbounded of
+                            True  -> SQL_Type.Value Types.VARCHAR "text"
+                            False -> SQL_Type.Value Types.VARCHAR "varchar" size
                     False -> SQL_Type.Value Types.CHAR "char" size
             Value_Type.Time ->
                 SQL_Type.Value Types.TIME "time"
@@ -51,7 +54,7 @@ type Postgres_Type_Mapping
             Value_Type.Binary _ _ ->
                 SQL_Type.Value Types.BINARY "bytea" precision=max_precision
             Value_Type.Mixed ->
-                Error.throw (Illegal_Argument.Error "Postgres tables do not support Mixed types.")
+                Error.throw (Unsupported_Database_Operation.Error "Postgres tables do not support Mixed types.")
             Value_Type.Unsupported_Data_Type type_name underlying_type ->
                 underlying_type.if_nothing <|
                     Error.throw <|
@@ -115,7 +118,8 @@ complex_types_map = Map.from_vector <|
     make_decimal sql_type =
         Value_Type.Decimal sql_type.precision sql_type.scale
     make_varchar sql_type =
-        Value_Type.Char size=sql_type.precision variable_length=True
+        effective_size = if sql_type.precision == max_precision then Nothing else sql_type.precision
+        Value_Type.Char size=effective_size variable_length=True
     make_char sql_type =
         Value_Type.Char size=sql_type.precision variable_length=False
     make_binary variable sql_type =

@@ -2,6 +2,7 @@ from Standard.Base import all
 
 import Standard.Table.Internal.Naming_Helpers.Naming_Helpers
 from Standard.Table import Aggregate_Column
+from Standard.Table import Value_Type
 
 import project.Connection.Connection.Connection
 import project.Data.Dialect
@@ -119,8 +120,14 @@ type Redshift_Dialect
     needs_execute_query_for_type_inference self = False
 
     ## PRIVATE
-    cast_after_union : Boolean
-    cast_after_union self = False
+    supports_separate_nan : Boolean
+    supports_separate_nan self = True
+
+    ## PRIVATE
+    adapt_unified_column : Internal_Column -> Value_Type -> (SQL_Expression -> SQL_Type_Reference) -> Internal_Column
+    adapt_unified_column self column approximate_result_type infer_result_type_from_database_callback =
+        _ = [approximate_result_type, infer_result_type_from_database_callback]
+        column
 
     ## PRIVATE
     prepare_fetch_types_query : SQL_Expression -> Context -> SQL_Statement

@@ -6,6 +6,7 @@ import Standard.Table.Data.Aggregate_Column.Aggregate_Column
 import Standard.Table.Internal.Naming_Helpers.Naming_Helpers
 import Standard.Table.Internal.Problem_Builder.Problem_Builder
 from Standard.Table.Data.Aggregate_Column.Aggregate_Column import all
+from Standard.Table import Value_Type
 
 import project.Connection.Connection.Connection
 import project.Data.SQL.Builder
@@ -153,8 +154,20 @@ type SQLite_Dialect
     needs_execute_query_for_type_inference self = True
 
     ## PRIVATE
-    cast_after_union : Boolean
-    cast_after_union self = True
+    supports_separate_nan : Boolean
+    supports_separate_nan self = False
+
+    ## PRIVATE
+       SQLite allows mixed type columns, but we want our columns to be uniform.
+       So after unifying columns with mixed types, we add a cast to ensure that.
+    adapt_unified_column : Internal_Column -> Value_Type -> (SQL_Expression -> SQL_Type_Reference) -> Internal_Column
+    adapt_unified_column self column approximate_result_type infer_result_type_from_database_callback =
+        # TODO [RW] This may be revisited with #6281.
+        case approximate_result_type of
+            Nothing -> column
+            _ ->
+                sql_type = self.get_type_mapping.value_type_to_sql approximate_result_type Problem_Behavior.Ignore
+                self.make_cast column sql_type infer_result_type_from_database_callback
 
     ## PRIVATE
     prepare_fetch_types_query : SQL_Expression -> Context -> SQL_Statement