Implement add_row_number for Database backends, fix primary key inf…

…erence for SQLite (#7174) Closes #6921 and also closes #7037
enso-org · Jul 3, 2023 · 4ccf356 · 4ccf356
1 parent 3c93c25
commit 4ccf356
Show file tree

Hide file tree

Showing 20 changed files with 397 additions and 146 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -502,6 +502,7 @@
 - [Implemented `Table.update_database_table`.][7035]
 - [Removed `module` argument from `enso_project` and other minor tweaks.][7052]
 - [Integrated Database write operations with Execution Contexts.][7072]
+- [Implemented `add_row_number` for Database tables.][7174]
 
 [debug-shortcuts]:
   https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug
@@ -726,6 +727,7 @@
 [7035]: https://github.com/enso-org/enso/pull/7035
 [7052]: https://github.com/enso-org/enso/pull/7052
 [7072]: https://github.com/enso-org/enso/pull/7072
+[7174]: https://github.com/enso-org/enso/pull/7174
 
 #### Enso Compiler
 

diff --git a/distribution/lib/Standard/AWS/0.0.0-dev/src/Database/Redshift/Internal/Redshift_Dialect.enso b/distribution/lib/Standard/AWS/0.0.0-dev/src/Database/Redshift/Internal/Redshift_Dialect.enso
@@ -27,7 +27,7 @@ import Standard.Database.Internal.Postgres.Postgres_Type_Mapping.Postgres_Type_M
 import Standard.Database.Internal.SQL_Type_Mapping.SQL_Type_Mapping
 import Standard.Database.Internal.SQL_Type_Reference.SQL_Type_Reference
 import Standard.Database.Internal.Statement_Setter.Statement_Setter
-from Standard.Database.Errors import Unsupported_Database_Operation
+from Standard.Database.Errors import SQL_Error, Unsupported_Database_Operation
 
 import project.Database.Redshift.Internal.Redshift_Error_Mapper.Redshift_Error_Mapper
 
@@ -167,3 +167,11 @@ type Redshift_Dialect
     ## PRIVATE
     get_error_mapper : Error_Mapper
     get_error_mapper self = Redshift_Error_Mapper
+
+    ## PRIVATE
+       The dialect-dependent strategy to get the Primary Key for a given table.
+
+       Returns `Nothing` if the key is not defined.
+    fetch_primary_key : Connection -> Text -> Vector Text ! Nothing
+    fetch_primary_key self connection table_name =
+        Dialect.default_fetch_primary_key connection table_name
diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect.enso
@@ -1,7 +1,7 @@
 from Standard.Base import all
 import Standard.Base.Errors.Unimplemented.Unimplemented
 
-from Standard.Table import Aggregate_Column, Join_Kind, Value_Type
+from Standard.Table import Aggregate_Column, Join_Kind, Value_Type, Column_Selector
 import Standard.Table.Internal.Naming_Helpers.Naming_Helpers
 import Standard.Table.Internal.Problem_Builder.Problem_Builder
 
@@ -23,7 +23,8 @@ import project.Internal.SQLite.SQLite_Dialect
 import project.Internal.SQL_Type_Mapping.SQL_Type_Mapping
 import project.Internal.SQL_Type_Reference.SQL_Type_Reference
 import project.Internal.Statement_Setter.Statement_Setter
-from project.Errors import Unsupported_Database_Operation
+from project.Errors import SQL_Error, Unsupported_Database_Operation
+from project.Internal.Result_Set import result_set_to_table
 
 ## PRIVATE
 
@@ -213,6 +214,15 @@ type Dialect
     default_table_types self =
         Unimplemented.throw "This is an interface only."
 
+    ## PRIVATE
+       The dialect-dependent strategy to get the Primary Key for a given table.
+
+       Returns `Nothing` if the key is not defined.
+    fetch_primary_key : Connection -> Text -> Vector Text ! Nothing
+    fetch_primary_key self connection table_name =
+        _ = [connection, table_name]
+        Unimplemented.throw "This is an interface only."
+
 ## PRIVATE
 
    The dialect of SQLite databases.
@@ -229,3 +239,14 @@ postgres = Postgres_Dialect.postgres
 default_fetch_types_query dialect expression context =
     empty_context = context.add_where_filters [SQL_Expression.Literal "FALSE"]
     dialect.generate_sql (Query.Select [["typed_column", expression]] empty_context)
+
+## PRIVATE
+   Default implementation relying on DatabaseMetaData.
+default_fetch_primary_key connection table_name =
+    connection.jdbc_connection.with_metadata metadata->
+        rs = metadata.getPrimaryKeys Nothing Nothing table_name
+        keys_table = result_set_to_table rs connection.dialect.make_column_fetcher_for_type
+        # The names of the columns are sometimes lowercase and sometimes uppercase, so we do a case insensitive select first.
+        selected = keys_table.select_columns [Column_Selector.By_Name "COLUMN_NAME", Column_Selector.By_Name "KEY_SEQ"] reorder=True
+        key_column_names = selected.order_by 1 . at 0 . to_vector
+        if key_column_names.is_empty then Nothing else key_column_names
diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso
@@ -21,10 +21,11 @@ import Standard.Table.Data.Report_Unmatched.Report_Unmatched
 import Standard.Table.Data.Row.Row
 import Standard.Table.Data.Table.Table as Materialized_Table
 import Standard.Table.Data.Type.Value_Type_Helpers
+import Standard.Table.Internal.Add_Row_Number
 import Standard.Table.Internal.Aggregate_Column_Helper
+import Standard.Table.Internal.Problem_Builder.Problem_Builder
 import Standard.Table.Internal.Table_Helpers
 import Standard.Table.Internal.Table_Helpers.Table_Column_Helper
-import Standard.Table.Internal.Problem_Builder.Problem_Builder
 import Standard.Table.Internal.Unique_Name_Strategy.Unique_Name_Strategy
 import Standard.Table.Internal.Widget_Helpers
 from Standard.Table.Data.Column import get_item_string, normalize_string_for_display
@@ -36,15 +37,17 @@ import project.Data.Column.Column
 import project.Data.SQL_Query.SQL_Query
 import project.Data.SQL_Statement.SQL_Statement
 import project.Data.SQL_Type.SQL_Type
-import project.Internal.Helpers
 import project.Internal.Aggregate_Helper
+import project.Internal.Base_Generator
 import project.Internal.Common.Database_Join_Helper
+import project.Internal.Helpers
 import project.Internal.IR.Context.Context
-import project.Internal.IR.SQL_Expression.SQL_Expression
 import project.Internal.IR.From_Spec.From_Spec
 import project.Internal.IR.Internal_Column.Internal_Column
-import project.Internal.IR.SQL_Join_Kind.SQL_Join_Kind
+import project.Internal.IR.Order_Descriptor.Order_Descriptor
 import project.Internal.IR.Query.Query
+import project.Internal.IR.SQL_Expression.SQL_Expression
+import project.Internal.IR.SQL_Join_Kind.SQL_Join_Kind
 import project.Internal.SQL_Type_Reference.SQL_Type_Reference
 
 from project.Errors import Unsupported_Database_Operation, Integrity_Error, Unsupported_Name, Table_Not_Found
@@ -559,10 +562,43 @@ type Table
     @group_by Widget_Helpers.make_column_name_vector_selector
     @order_by Widget_Helpers.make_order_by_selector
     add_row_number : Text -> Integer -> Integer -> Vector (Text | Integer | Column_Selector) | Text | Integer -> Vector (Text | Sort_Column) | Text -> Problem_Behavior -> Table
-    add_row_number self name="Row" from=1 step=1 group_by=[] order_by=[] on_problems=Problem_Behavior.Report_Warning =
-        _ = [name, from, step, group_by, order_by, on_problems]
-        msg = "`Table.add_row_number` is not yet implemented in the Database backend."
-        Error.throw (Unsupported_Database_Operation.Error msg)
+    add_row_number self (name:Text = "Row") (from:Integer = 1) (step:Integer = 1) group_by=[] order_by=[] on_problems=Problem_Behavior.Report_Warning =
+        problem_builder = Problem_Builder.new error_on_missing_columns=True
+        grouping_columns = self.columns_helper.select_columns_helper group_by True problem_builder
+        grouping_columns.each internal_column->
+            column = self.make_column internal_column
+            if column.value_type.is_floating_point then
+                problem_builder.report_other_warning (Floating_Point_Equality.Error column.name)
+        ordering = Table_Helpers.resolve_order_by self.columns order_by problem_builder
+        problem_builder.attach_problems_before on_problems <|
+            order_descriptors = case ordering.is_empty of
+                False -> ordering.map element->
+                    column = element.column
+                    associated_selector = element.associated_selector
+                    self.connection.dialect.prepare_order_descriptor column associated_selector.direction text_ordering=Nothing
+                True -> case self.default_ordering of
+                    Nothing -> Error.throw (Illegal_Argument.Error "No `order_by` is specified and the table has no existing ordering (e.g. from an `order_by` operation or a primary key). Some ordering is required for `add_row_number` in Database tables.")
+                    descriptors -> descriptors
+            grouping_expressions = grouping_columns.map .expression
+
+            separator = SQL_Expression.Literal Base_Generator.row_number_parameter_separator
+            # The SQL row_number() counts from 1, so we adjust the offset.
+            offset = from - step
+            params = [SQL_Expression.Constant offset, SQL_Expression.Constant step] + order_descriptors + [separator] + grouping_expressions
+            new_expr = SQL_Expression.Operation "ROW_NUMBER" params
+
+            type_mapping = self.connection.dialect.get_type_mapping
+            infer_from_database_callback expression =
+                SQL_Type_Reference.new self.connection self.context expression
+            new_type_ref = type_mapping.infer_return_type infer_from_database_callback "ROW_NUMBER" [] new_expr
+
+            new_column = Internal_Column.Value name new_type_ref new_expr
+
+            rebuild_table columns =
+                self.updated_columns (columns.map .as_internal)
+            renamed_table = Add_Row_Number.rename_columns_if_needed self name on_problems rebuild_table
+            renamed_table.updated_columns (renamed_table.internal_columns + [new_column])
+
 
     ## UNSTABLE
 
@@ -825,22 +861,51 @@ type Table
               table.order_by [(Sort_Column.Select_By_Name "a.*" use_regex=True case_sensitivity=Case_Sensitivity.Insensitive)]
     @columns Widget_Helpers.make_order_by_selector
     order_by : Vector (Text | Sort_Column) | Text -> Text_Ordering -> Boolean -> Problem_Behavior -> Table  ! Incomparable_Values | No_Input_Columns_Selected | Missing_Input_Columns
-    order_by self (columns = ([(Sort_Column.Name (self.columns.at 0 . name))])) text_ordering=Text_Ordering.Default error_on_missing_columns=True on_problems=Problem_Behavior.Report_Warning = Panic.handle_wrapped_dataflow_error <|
+    order_by self (columns = ([(Sort_Column.Name (self.columns.at 0 . name))])) text_ordering=Text_Ordering.Default error_on_missing_columns=True on_problems=Problem_Behavior.Report_Warning =
         problem_builder = Problem_Builder.new error_on_missing_columns=error_on_missing_columns types_to_always_throw=[No_Input_Columns_Selected]
         columns_for_ordering = Table_Helpers.prepare_order_by self.columns columns problem_builder
         problem_builder.attach_problems_before on_problems <|
             new_order_descriptors = columns_for_ordering.map selected_column->
                 column = selected_column.column
                 associated_selector = selected_column.associated_selector
                 effective_text_ordering = if column.value_type.is_text then text_ordering else Nothing
-                ## FIXME [RW] this is only needed because `Vector.map` does not
-                   propagate dataflow errors correctly. See:
-                   https://www.pivotaltracker.com/story/show/181057718
-                Panic.throw_wrapped_if_error <|
-                    self.connection.dialect.prepare_order_descriptor column associated_selector.direction effective_text_ordering
+                self.connection.dialect.prepare_order_descriptor column associated_selector.direction effective_text_ordering
             new_ctx = self.context.add_orders new_order_descriptors
             self.updated_context new_ctx
 
+    ## PRIVATE
+       Returns the default ordering used for operations like `add_row_number` or
+       `take`.
+
+       If the table was recently ordered by operations like `order_by`, that
+       will determine the ordering. Otherwise, the primary key is used if
+       available.
+    default_ordering : Vector Order_Descriptor | Nothing
+    default_ordering self =
+        explicit_ordering = self.context.orders
+        if explicit_ordering.not_empty then explicit_ordering else
+            case self.get_primary_key of
+                Nothing -> Nothing
+                primary_key_column_names : Vector -> case self.context.from_spec of
+                    From_Spec.Table _ alias _ ->
+                        primary_key_column_names.map column_name->
+                            column_expression = SQL_Expression.Column alias column_name
+                            Order_Descriptor.Value column_expression Sort_Direction.Ascending
+                    _ -> Nothing
+
+    ## PRIVATE
+       Returns the primary key defined for the table, if applicable.
+    get_primary_key : Vector Text | Nothing
+    get_primary_key self = case self.context.from_spec of
+        From_Spec.Table table_name _ _ ->
+            # The primary key may not be valid anymore after grouping!
+            is_primary_key_still_valid = self.context.groups.is_empty
+            if is_primary_key_still_valid.not then Nothing else
+                result = self.connection.dialect.fetch_primary_key self.connection table_name
+                result.catch Any _->Nothing
+        # If the key is a result of a join, union or a subquery then it has no notion of primary key.
+        _ -> Nothing
+
     ## Returns the distinct set of rows within the specified columns from the
        input table.
 

diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Base_Generator.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Base_Generator.enso
@@ -187,7 +187,8 @@ base_dialect =
     nulls = [["IS_NULL", make_right_unary_op "IS NULL"], ["FILL_NULL", make_function "COALESCE"]]
     contains = [["IS_IN", make_is_in], ["IS_IN_COLUMN", make_is_in_column]]
     types = [simple_cast]
-    base_map = Map.from_vector (arith + logic + compare + functions + agg + counts + text + nulls + contains + types)
+    windows = [["ROW_NUMBER", make_row_number]]
+    base_map = Map.from_vector (arith + logic + compare + functions + agg + counts + text + nulls + contains + types + windows)
     Internal_Dialect.Value base_map wrap_in_quotes
 
 ## PRIVATE
@@ -265,6 +266,39 @@ make_is_in_column arguments = case arguments.length of
         Builder.code "CASE WHEN " ++ expr ++ " IS NULL THEN " ++ has_nulls ++ " ELSE " ++ is_in ++ " END"
     _ -> Error.throw <| Illegal_State.Error ("The operation IS_IN_COLUMN requires at exactly 3 arguments: the expression, the IN subquery, the subquery checking for nulls.")
 
+## PRIVATE
+make_row_number : Vector Builder -> Builder
+make_row_number arguments = if arguments.length < 4 then Error.throw (Illegal_State.Error "Wrong amount of parameters in ROW_NUMBER IR. This is a bug in the Database library.") else
+    offset = arguments.at 0
+    step = arguments.at 1
+
+    separator_ix = arguments.index_of code->
+        code.build.prepare.first == row_number_parameter_separator
+    ordering = arguments.take (Range.new 2 separator_ix)
+    grouping = arguments.drop (separator_ix+1)
+
+    group_part = if grouping.length == 0 then "" else
+        Builder.code "PARTITION BY " ++ Builder.join ", " grouping
+    Builder.code "(row_number() OVER (" ++ group_part ++ " ORDER BY " ++ Builder.join ", " ordering ++ ") * " ++ step.paren ++ " + " ++ offset.paren ++ ")"
+
+## PRIVATE
+   This is a terrible hack, but I could not figure a decent way to have an
+   operation take a variable number of arguments of multiple kinds (here both
+   groups and orders are varying).
+
+   Currently, the IR just allows to put a list of parameters for the operation
+   and they are all converted into SQL code before being passed to the
+   particular operation builder. So at this stage there is no way to distinguish
+   the arguments.
+
+   So to distinguish different groups of arguments, we use this 'fake' parameter
+   to act as a separator. This parameter is not supposed to end up in the
+   generated SQL code.
+
+   This is yet another argument for the IR redesign.
+row_number_parameter_separator =
+    "--<!PARAMETER_SEPARATOR!>--"
+
 ## PRIVATE
 
    Builds code for an expression.

diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Dialect.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Dialect.enso
@@ -36,7 +36,7 @@ import project.Internal.Postgres.Postgres_Error_Mapper.Postgres_Error_Mapper
 import project.Internal.SQL_Type_Mapping.SQL_Type_Mapping
 import project.Internal.SQL_Type_Reference.SQL_Type_Reference
 import project.Internal.Statement_Setter.Statement_Setter
-from project.Errors import Unsupported_Database_Operation
+from project.Errors import SQL_Error, Unsupported_Database_Operation
 
 ## PRIVATE
 
@@ -219,6 +219,14 @@ type Postgres_Dialect
     get_error_mapper : Error_Mapper
     get_error_mapper self = Postgres_Error_Mapper
 
+    ## PRIVATE
+       The dialect-dependent strategy to get the Primary Key for a given table.
+
+       Returns `Nothing` if the key is not defined.
+    fetch_primary_key : Connection -> Text -> Vector Text ! Nothing
+    fetch_primary_key self connection table_name =
+        Dialect.default_fetch_primary_key connection table_name
+
 ## PRIVATE
 make_internal_generator_dialect =
     cases = [["LOWER", Base_Generator.make_function "LOWER"], ["UPPER", Base_Generator.make_function "UPPER"]]

diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/SQLite/SQLite_Dialect.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/SQLite/SQLite_Dialect.enso
@@ -33,7 +33,7 @@ import project.Internal.SQL_Type_Reference.SQL_Type_Reference
 import project.Internal.SQLite.SQLite_Type_Mapping.SQLite_Type_Mapping
 import project.Internal.SQLite.SQLite_Error_Mapper.SQLite_Error_Mapper
 import project.Internal.Statement_Setter.Statement_Setter
-from project.Errors import Unsupported_Database_Operation
+from project.Errors import SQL_Error, Unsupported_Database_Operation
 
 ## PRIVATE
 
@@ -239,6 +239,24 @@ type SQLite_Dialect
     get_error_mapper : Error_Mapper
     get_error_mapper self = SQLite_Error_Mapper
 
+    ## PRIVATE
+       The dialect-dependent strategy to get the Primary Key for a given table.
+
+       Returns `Nothing` if the key is not defined.
+
+       Custom handling is required, because the default DatabaseMetaData
+       implementation does not correctly handle temporary tables.
+    fetch_primary_key : Connection -> Text -> Vector Text ! Nothing
+    fetch_primary_key self connection table_name =
+        wrapped_name = self.internal_generator_dialect.wrap_identifier table_name
+        query = Builder.code "pragma table_info(" ++ wrapped_name ++ ")"
+        info_table = connection.read_statement query.build
+        ## The `pk` field is non-zero if the columns is part of the primary key.
+           The column value indicates the position in the key.
+           See: https://www.sqlite.org/pragma.html#pragma_table_info
+        v = info_table.filter "pk" (>0) . order_by "pk" . at "name" . to_vector
+        if v.is_empty then Nothing else v
+
 ## PRIVATE
 make_internal_generator_dialect =
     text = [starts_with, contains, ends_with, make_case_sensitive]+concat_ops+trim_ops

diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/SQLite/SQLite_Type_Mapping.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/SQLite/SQLite_Type_Mapping.enso
@@ -182,7 +182,7 @@ operations_map =
     always_boolean_ops = ["==", "!=", "equals_ignore_case", ">=", "<=", "<", ">", "BETWEEN", "AND", "OR", "NOT", "IS_NULL", "IS_EMPTY", "LIKE", "IS_IN", "IS_IN_COLUMN", "starts_with", "ends_with", "contains", "BOOL_OR", "IS_INF"]
     always_floating_ops = ["/", "mod", "AVG", "STDDEV_POP", "STDDEV_SAMP", "ROUND"]
     always_text_ops = ["ADD_TEXT", "CONCAT", "CONCAT_QUOTE_IF_NEEDED", "MAKE_CASE_SENSITIVE", "FOLD_CASE", "TRIM", "LTRIM", "RTRIM"]
-    always_integer_ops = ["COUNT", "COUNT_IS_NULL", "COUNT_DISTINCT", "COUNT_DISTINCT_INCLUDE_NULL", "COUNT_EMPTY", "COUNT_NOT_EMPTY", "COUNT_ROWS"]
+    always_integer_ops = ["COUNT", "COUNT_IS_NULL", "COUNT_DISTINCT", "COUNT_DISTINCT_INCLUDE_NULL", "COUNT_EMPTY", "COUNT_NOT_EMPTY", "COUNT_ROWS", "ROW_NUMBER"]
     same_as_first = ["TRUNCATE", "CEIL", "FLOOR"]
     arithmetic_ops = ["ADD_NUMBER", "-", "*", "^", "%", "SUM"]
     merge_input_types_ops = ["ROW_MAX", "ROW_MIN", "MAX", "MIN", "FILL_NULL", "COALESCE"]