Snowflake Dialect pt. 6 - Union, Distinct and other improvements (#10576

) - Part of #9486 - Fixes `Table.union`, `merge` and `distinct` tests - Replaces `distinct_on` in `Context` that was actually a Postgres specific addition leaking into the base with a more abstract `Context_Extension` mechanism. - This allows us to implement the Snowflake-specific `DISTINCT` using `QUALIFY`.
enso-org · Jul 19, 2024 · 7fd8701 · 7fd8701
1 parent 2e0fa89
commit 7fd8701
Show file tree

Hide file tree

Showing 49 changed files with 677 additions and 504 deletions.
diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Numbers.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Numbers.enso
@@ -1248,6 +1248,10 @@ type Integer
             Integer.parse "20220216"
     parse text:Text (radix=10:Integer) -> Integer ! Number_Parse_Error = integer_parse text radix
 
+    ## PRIVATE
+    fits_in_long self -> Boolean =
+        self >= Long.MIN_VALUE && self <= Long.MAX_VALUE
+
 ## A syntax error when parsing a double.
 @Builtin_Type
 type Number_Parse_Error

diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/DB_Column.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/DB_Column.enso
@@ -578,8 +578,9 @@ type DB_Column
                 _ : DB_Column -> other.value_type.is_decimal
                 _ -> False
             either_is_decimal = self.value_type.is_decimal || other_is_decimal
-            if either_is_decimal then self.make_binary_op "DECIMAL_DIV" other else
-                self.make_binary_op "/" other
+            new_name = self.naming_helper.binary_operation_name "/" self other
+            if either_is_decimal then self.make_binary_op "DECIMAL_DIV" other new_name else
+                self.make_binary_op "/" other new_name
 
     ## ALIAS modulo, modulus
        GROUP Standard.Base.Operators

diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/DB_Table.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/DB_Table.enso
@@ -1323,9 +1323,10 @@ type DB_Table
     distinct self columns=self.column_names case_sensitivity:Case_Sensitivity=..Default on_problems:Problem_Behavior=..Report_Warning =
         key_columns = self.columns_helper.select_columns columns Case_Sensitivity.Default reorder=True error_on_missing_columns=True on_problems=on_problems . catch No_Output_Columns _->
             Error.throw No_Input_Columns_Selected
-        problem_builder = Problem_Builder.new
-        new_table = self.connection.dialect.prepare_distinct self key_columns case_sensitivity problem_builder
-        problem_builder.attach_problems_before on_problems new_table
+        key_columns.if_not_error <|
+            problem_builder = Problem_Builder.new
+            new_table = self.connection.dialect.prepare_distinct self key_columns case_sensitivity problem_builder
+            problem_builder.attach_problems_before on_problems new_table
 
     ## GROUP Standard.Base.Selections
        ICON preparation
@@ -2612,16 +2613,8 @@ type DB_Table
             actual_types = materialized_table.columns.map .value_type
             expected_types.zip actual_types expected_type-> actual_type->
                 if expected_type == actual_type then Nothing else
-                    expected_type_kind = Meta.meta expected_type . constructor
-                    actual_type_kind = Meta.meta actual_type . constructor
-                    ## We ignore simple approximations that our in-memory backend does - things like adding default
-                       timezone (because we do not have Date_Time without timezone in-memory),
-                       or changing Float32 to Float64 are silently ignored.
-                       However, bigger changes, like a Binary type column getting coerced to Mixed - _will_ still be reported.
-                    if expected_type_kind == actual_type_kind then Nothing else
-                        # If the reverse was an implicit conversion, undoing it also should not yield warnings:
-                        if self.connection.dialect.get_type_mapping.is_implicit_conversion actual_type expected_type then Nothing else
-                            warnings_builder.append (Inexact_Type_Coercion.Warning expected_type actual_type)
+                    if self.connection.dialect.get_type_mapping.should_warn_on_materialize expected_type actual_type then
+                        warnings_builder.append (Inexact_Type_Coercion.Warning expected_type actual_type)
 
             result = max_rows.attach_warning materialized_table
             Problem_Behavior.Report_Warning.attach_problems_before warnings_builder.to_vector result

diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Base_Generator.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Base_Generator.enso
@@ -420,25 +420,46 @@ generate_order dialect order_descriptor =
 
 
 ## PRIVATE
-
    Generates SQL code corresponding to a SELECT statement.
 
    Arguments:
    - dialect: The SQL dialect for which the code is being generated.
    - ctx: A description of the SELECT clause.
-generate_select_context : Dialect -> Context -> SQL_Builder
-generate_select_context dialect ctx =
+generate_select : Dialect -> Vector | Nothing -> Context -> SQL_Builder
+generate_select dialect columns ctx =
     gen_exprs exprs = exprs.map (generate_expression dialect)
+    gen_column pair = (generate_expression dialect pair.second) ++ alias dialect pair.first
+
+    generated_columns = case columns of
+        Nothing -> SQL_Builder.code "*"
+        _ -> SQL_Builder.join ", " (columns.map gen_column)
+
     from_part = generate_from_part dialect ctx.from_spec
     where_part = (SQL_Builder.join " AND " (gen_exprs ctx.where_filters)) . prefix_if_present " WHERE "
     group_part = (SQL_Builder.join ", " (gen_exprs ctx.groups)) . prefix_if_present " GROUP BY "
+
+    orders = ctx.orders.map (generate_order dialect)
+    order_part = (SQL_Builder.join ", " orders) . prefix_if_present " ORDER BY "
+
     limit_part = case ctx.limit of
         Nothing -> ""
         _ : Integer -> " LIMIT " + ctx.limit.to_text
 
-    orders = ctx.orders.map (generate_order dialect)
-    order_part = (SQL_Builder.join ", " orders) . prefix_if_present " ORDER BY "
-    (SQL_Builder.code " FROM ") ++ from_part ++ where_part ++ group_part ++ order_part ++ limit_part
+    extensions = ctx.extensions.map extension->
+        part = extension.run_generator (gen_exprs extension.expressions)
+        [extension.position, part]
+
+    parts = Vector.build builder->
+        builder.append [100, SQL_Builder.code "SELECT "]
+        builder.append [200, generated_columns]
+        builder.append [300, SQL_Builder.code " FROM " ++ from_part]
+        builder.append [400, where_part]
+        builder.append [500, group_part]
+        builder.append [600, order_part]
+        builder.append [700, limit_part]
+        extensions.each builder.append
+
+    SQL_Builder.join "" <| parts.sort on=(.first) . map .second
 
 ## PRIVATE
 
@@ -467,18 +488,7 @@ generate_insert_query dialect table_name pairs =
 generate_query : Dialect -> Query -> SQL_Builder
 generate_query dialect query = case query of
     Query.Select columns ctx ->
-        gen_column pair = (generate_expression dialect pair.second) ++ alias dialect pair.first
-        cols = case columns of
-            Nothing -> SQL_Builder.code "*"
-            _ -> SQL_Builder.join ", " (columns.map gen_column)
-        prefix = case ctx.distinct_on of
-            Nothing -> SQL_Builder.code ""
-            expressions : Vector ->
-                ## TODO I just realised this does not make sense in other backends than Postgres,
-                   so we should probably fail in such cases; probably rewrite into a generic modifier? or a transform?
-                generated = SQL_Builder.join ", " (expressions.map (generate_expression dialect))
-                SQL_Builder.code "DISTINCT ON (" ++ generated ++ ") "
-        SQL_Builder.code "SELECT " ++ prefix ++ cols ++ generate_select_context dialect ctx
+        generate_select dialect columns ctx
     Query.Insert table_name pairs ->
         generate_insert_query dialect table_name pairs
     Query.Create_Table name columns primary_key temporary ->

diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/IR/Context.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/IR/Context.enso
@@ -5,6 +5,7 @@ import project.Internal.IR.From_Spec.From_Spec
 import project.Internal.IR.Internal_Column.Internal_Column
 import project.Internal.IR.Order_Descriptor.Order_Descriptor
 import project.Internal.IR.SQL_Expression.SQL_Expression
+import project.SQL.SQL_Builder
 
 ## PRIVATE
 
@@ -22,7 +23,7 @@ type Context
          details.
     for_table : Text -> Text -> Any -> Context
     for_table table_name alias=table_name internal_temporary_keep_alive_reference=Nothing =
-        Context.Value (From_Spec.Table table_name alias internal_temporary_keep_alive_reference=internal_temporary_keep_alive_reference) [] [] [] Nothing Nothing
+        Context.Value (From_Spec.Table table_name alias internal_temporary_keep_alive_reference=internal_temporary_keep_alive_reference) [] [] [] Nothing []
 
     ## PRIVATE
 
@@ -33,7 +34,7 @@ type Context
        - alias: An alias name to use for table within the query.
     for_query : Text -> Text -> Context
     for_query raw_sql alias =
-        Context.Value (From_Spec.Query raw_sql alias) [] [] [] Nothing Nothing
+        Context.Value (From_Spec.Query raw_sql alias) [] [] [] Nothing []
 
     ## PRIVATE
 
@@ -43,7 +44,7 @@ type Context
        - subquery: The subquery to lift into a context.
     for_subquery : From_Spec -> Context
     for_subquery subquery =
-        Context.Value subquery [] [] [] Nothing Nothing
+        Context.Value subquery [] [] [] Nothing []
 
     ## PRIVATE
 
@@ -66,7 +67,7 @@ type Context
          grouped-by columns or aggregate expressions.
        - limit: an optional maximum number of elements that the query should
          return.
-    Value (from_spec : From_Spec) (where_filters : Vector SQL_Expression) (orders : Vector Order_Descriptor) (groups : Vector SQL_Expression) (limit : Nothing | Integer) (distinct_on : Nothing | Vector SQL_Expression)
+    Value (from_spec : From_Spec) (where_filters : Vector SQL_Expression) (orders : Vector Order_Descriptor) (groups : Vector SQL_Expression) (limit : Nothing | Integer) (extensions : Vector Context_Extension)
 
     ## PRIVATE
 
@@ -76,7 +77,7 @@ type Context
        - new_filters: The new filters to set in the query.
     set_where_filters : Vector SQL_Expression -> Context
     set_where_filters self new_filters =
-        Context.Value self.from_spec new_filters self.orders self.groups self.limit self.distinct_on
+        Context.Value self.from_spec new_filters self.orders self.groups self.limit self.extensions
 
     ## PRIVATE
 
@@ -87,7 +88,7 @@ type Context
          query.
     add_where_filters : Vector SQL_Expression -> Context
     add_where_filters self new_filters =
-        Context.Value self.from_spec (self.where_filters+new_filters) self.orders self.groups self.limit self.distinct_on
+        Context.Value self.from_spec (self.where_filters+new_filters) self.orders self.groups self.limit self.extensions
 
     ## PRIVATE
 
@@ -97,7 +98,7 @@ type Context
        - new_orders: The new ordering clauses to set in the query.
     set_orders : Vector Order_Descriptor -> Context
     set_orders self new_orders =
-        Context.Value self.from_spec self.where_filters new_orders self.groups self.limit self.distinct_on
+        Context.Value self.from_spec self.where_filters new_orders self.groups self.limit self.extensions
 
     ## PRIVATE
 
@@ -114,7 +115,7 @@ type Context
        - new_orders: The new ordering clauses to add to the query.
     add_orders : Vector Order_Descriptor -> Context
     add_orders self new_orders =
-        Context.Value self.from_spec self.where_filters new_orders+self.orders self.groups self.limit self.distinct_on
+        Context.Value self.from_spec self.where_filters new_orders+self.orders self.groups self.limit self.extensions
 
     ## PRIVATE
 
@@ -124,7 +125,7 @@ type Context
        - new_groups: The new grouping clauses to set in the query.
     set_groups : Vector SQL_Expression -> Context
     set_groups self new_groups =
-        Context.Value self.from_spec self.where_filters self.orders new_groups self.limit self.distinct_on
+        Context.Value self.from_spec self.where_filters self.orders new_groups self.limit self.extensions
 
     ## PRIVATE
 
@@ -134,14 +135,13 @@ type Context
        - new_limit: The new limit clauses to set in the query.
     set_limit : (Nothing | Integer) -> Context
     set_limit self new_limit =
-       Context.Value self.from_spec self.where_filters self.orders self.groups new_limit self.distinct_on
+       Context.Value self.from_spec self.where_filters self.orders self.groups new_limit self.extensions
 
     ## PRIVATE
-
-       Returns a copy of the context with changed `distinct_on` expressions.
-    set_distinct_on : (Nothing | Vector SQL_Expression) -> Context
-    set_distinct_on self new_distinct_on =
-       Context.Value self.from_spec self.where_filters self.orders self.groups self.limit new_distinct_on
+       Returns a copy of the context with an added extension.
+    add_extension : Context_Extension -> Context
+    add_extension self extension =
+       Context.Value self.from_spec self.where_filters self.orders self.groups self.limit (self.extensions + [extension])
 
     ## PRIVATE
 
@@ -176,3 +176,27 @@ type Context
 type Subquery_Setup
     ## PRIVATE
     Value (subquery : From_Spec) (new_columns : Vector (Vector Internal_Column))
+
+## PRIVATE
+   Describes an extension to a Context that can be used to add additional custom
+   SQL code as part of the query.
+type Context_Extension
+    ## A recipe for building the extension.
+
+       Arguments:
+       - position: Determines where the extension code should be inserted.
+         The positions of common query parts are following:
+         - 100 - the SELECT keyword
+         - 200 - the column descriptions
+         - 300 - the FROM part
+         - 400 - the WHERE part
+         - 500 - the GROUP BY part
+         - 600 - the ORDER BY part
+         - 700 - the LIMIT part
+         Setting the position to a value between these will result in it being
+         sorted in the desired place.
+       - expressions: Sub-expressions needed for the part. They will be
+         generated and the results of that will be passed to `run_generator`.
+       - run_generator: A function that takes the generated expressions and
+         returns the SQL code that will be inserted at the desired position.
+    Value (position : Integer) (expressions : Vector SQL_Expression) (run_generator : Vector SQL_Builder -> SQL_Builder)
diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Dialect.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Dialect.enso
@@ -19,6 +19,7 @@ import project.Internal.Common.Database_Distinct_Helper
 import project.Internal.Common.Database_Join_Helper
 import project.Internal.Error_Mapper.Error_Mapper
 import project.Internal.IR.Context.Context
+import project.Internal.IR.Context.Context_Extension
 import project.Internal.IR.From_Spec.From_Spec
 import project.Internal.IR.Internal_Column.Internal_Column
 import project.Internal.IR.Nulls_Order.Nulls_Order
@@ -122,7 +123,7 @@ type Postgres_Dialect
         distinct_expressions = new_key_columns.map column->
             value_type = type_mapping.sql_type_to_value_type column.sql_type_reference.get
             Database_Distinct_Helper.make_distinct_expression case_sensitivity problem_builder column value_type
-        new_context = Context.for_subquery setup.subquery . set_distinct_on distinct_expressions
+        new_context = Context.for_subquery setup.subquery . add_extension (make_distinct_extension distinct_expressions)
         table.updated_context_and_columns new_context new_columns subquery=True
 
     ## PRIVATE
@@ -764,6 +765,12 @@ as_int64 expr =
 as_int32 expr =
     SQL_Builder.code "(" ++ expr ++ "::int4)"
 
+## PRIVATE
+make_distinct_extension expressions =
+    run_generator sql_expressions =
+        SQL_Builder.code "DISTINCT ON (" ++ (SQL_Builder.join ", " sql_expressions) ++ ") "
+    Context_Extension.Value position=120 expressions=expressions run_generator=run_generator
+
 ## PRIVATE
    The RUNTIME_ERROR operation should allow the query to compile fine and it
    will not prevent it from running if the branch including this operation is

diff --git a/...ribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Type_Mapping.enso b/...ribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Type_Mapping.enso
@@ -131,6 +131,10 @@ type Postgres_Type_Mapping
         _ = [source_type, target_type]
         False
 
+    ## PRIVATE
+    should_warn_on_materialize (db_type : Value_Type) (in_memory_type : Value_Type) -> Boolean =
+        SQL_Type_Mapping.default_should_warn_on_materialize db_type in_memory_type
+
     ## PRIVATE
     is_integer_type (value_type : Value_Type) -> Boolean = value_type.is_integer
 

diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/SQL_Type_Mapping.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/SQL_Type_Mapping.enso
@@ -99,6 +99,13 @@ type SQL_Type_Mapping
         _ = [source_type, target_type]
         Unimplemented.throw "This is an interface only."
 
+    ## PRIVATE
+       Specifies if the given type coercion should raise an
+       `Inexact_Type_Coercion` warning when materializing a table into memory.
+    should_warn_on_materialize (db_type : Value_Type) (in_memory_type : Value_Type) -> Boolean =
+        _ = [db_type, in_memory_type]
+        Unimplemented.throw "This is an interface only."
+
     ## PRIVATE
        Specifies if this backend recognizes the given type as an integer type.
 
@@ -133,3 +140,11 @@ default_sql_type_to_text sql_type =
             if sql_type.scale.is_nothing then "(" + sql_type.precision.to_text + ")" else
                 " (" + sql_type.precision.to_text + "," + sql_type.scale.to_text + ")"
     sql_type.name.trim + suffix
+
+## PRIVATE
+default_should_warn_on_materialize (db_type : Value_Type) (in_memory_type : Value_Type) =
+    ## We ignore simple approximations that our in-memory backend does - things like adding default
+       timezone (because we do not have Date_Time without timezone in-memory),
+       or changing Float32 to Float64 are silently ignored.
+       However, bigger changes, like a Binary type column getting coerced to Mixed - _will_ still be reported.
+    (Meta.meta db_type . constructor) != (Meta.meta in_memory_type . constructor)
diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/SQLite/SQLite_Type_Mapping.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/SQLite/SQLite_Type_Mapping.enso
@@ -123,6 +123,10 @@ type SQLite_Type_Mapping
         _ = [source_type, target_type]
         False
 
+    ## PRIVATE
+    should_warn_on_materialize (db_type : Value_Type) (in_memory_type : Value_Type) -> Boolean =
+        SQL_Type_Mapping.default_should_warn_on_materialize db_type in_memory_type
+
     ## PRIVATE
     is_integer_type (value_type : Value_Type) -> Boolean = value_type.is_integer
 

diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Upload_Table.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Upload_Table.enso
@@ -4,6 +4,7 @@ import Standard.Base.Errors.Common.Dry_Run_Operation
 import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
 import Standard.Base.Errors.Illegal_State.Illegal_State
 import Standard.Base.Runtime.Context
+from Standard.Base.Logging import all
 from Standard.Base.Runtime import assert
 
 import Standard.Table.Internal.Problem_Builder.Problem_Builder
@@ -517,10 +518,15 @@ dry_run_row_limit = 1000
    It is a panic, because it is never expected to happen in user code - if it
    happens, it is a bug in our code.
 check_transaction_ddl_support connection =
-    supports_ddl = connection.jdbc_connection.with_metadata metadata->
-        metadata.supportsDataDefinitionAndDataManipulationTransactions
-    if supports_ddl.not then
-        Panic.throw (Illegal_State.Error "The connection "+connection.to_text+" does not support transactional DDL statements. Our current implementation of table updates relies on transactional DDL. To support this driver, the logic needs to be amended.")
+    connection.jdbc_connection.with_metadata metadata->
+        supports_ddl = metadata.supportsDataDefinitionAndDataManipulationTransactions && metadata.dataDefinitionIgnoredInTransactions.not
+        if supports_ddl.not then
+            Panic.throw (Illegal_State.Error "The connection "+connection.to_text+" does not support transactional DDL statements. Our current implementation of table updates relies on transactional DDL. To support this driver, the logic needs to be amended.")
+        ddl_causes_commit = metadata.dataDefinitionCausesTransactionCommit
+        if ddl_causes_commit then
+            # TODO fix for Snowflake support
+            #Panic.throw (Illegal_State.Error "The connection "+connection.to_text+" does not fully support DDL statements as part of complex transactions - DDL causes a commit, so we cannot compose it. To support this driver, the logic needs to be amended.")
+            Nothing
 
 ## PRIVATE
 common_delete_rows target_table key_values_to_delete key_columns allow_duplicate_matches =

diff --git a/distribution/lib/Standard/Image/0.0.0-dev/src/Image.enso b/distribution/lib/Standard/Image/0.0.0-dev/src/Image.enso
@@ -107,7 +107,6 @@ type Image
                 _ -> [flags]
             int_flags = MatOfInt.new (write_flags.flat_map x-> [x.to_integer, x.value])
             write_to_local_file file:File =
-                IO.println "Writing the image to a file: "+file.path
                 Panic.catch JException (Java_Codecs.write file.path self.opencv_mat int_flags) _->
                      Error.throw (File_Error.IO_Error path.file 'Failed to write to the file')
             r = if path.is_local then write_to_local_file path.file else