Skip to content

Commit

Permalink
Implement add_row_number for Database backends, fix primary key inf…
Browse files Browse the repository at this point in the history
…erence for SQLite (#7174)

Closes #6921 and also closes #7037
  • Loading branch information
radeusgd authored Jul 3, 2023
1 parent 3c93c25 commit 4ccf356
Show file tree
Hide file tree
Showing 20 changed files with 397 additions and 146 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -502,6 +502,7 @@
- [Implemented `Table.update_database_table`.][7035]
- [Removed `module` argument from `enso_project` and other minor tweaks.][7052]
- [Integrated Database write operations with Execution Contexts.][7072]
- [Implemented `add_row_number` for Database tables.][7174]

[debug-shortcuts]:
https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug
Expand Down Expand Up @@ -726,6 +727,7 @@
[7035]: https://github.com/enso-org/enso/pull/7035
[7052]: https://github.com/enso-org/enso/pull/7052
[7072]: https://github.com/enso-org/enso/pull/7072
[7174]: https://github.com/enso-org/enso/pull/7174

#### Enso Compiler

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ import Standard.Database.Internal.Postgres.Postgres_Type_Mapping.Postgres_Type_M
import Standard.Database.Internal.SQL_Type_Mapping.SQL_Type_Mapping
import Standard.Database.Internal.SQL_Type_Reference.SQL_Type_Reference
import Standard.Database.Internal.Statement_Setter.Statement_Setter
from Standard.Database.Errors import Unsupported_Database_Operation
from Standard.Database.Errors import SQL_Error, Unsupported_Database_Operation

import project.Database.Redshift.Internal.Redshift_Error_Mapper.Redshift_Error_Mapper

Expand Down Expand Up @@ -167,3 +167,11 @@ type Redshift_Dialect
## PRIVATE
get_error_mapper : Error_Mapper
get_error_mapper self = Redshift_Error_Mapper

## PRIVATE
The dialect-dependent strategy to get the Primary Key for a given table.

Returns `Nothing` if the key is not defined.
fetch_primary_key : Connection -> Text -> Vector Text ! Nothing
fetch_primary_key self connection table_name =
Dialect.default_fetch_primary_key connection table_name
25 changes: 23 additions & 2 deletions distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect.enso
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from Standard.Base import all
import Standard.Base.Errors.Unimplemented.Unimplemented

from Standard.Table import Aggregate_Column, Join_Kind, Value_Type
from Standard.Table import Aggregate_Column, Join_Kind, Value_Type, Column_Selector
import Standard.Table.Internal.Naming_Helpers.Naming_Helpers
import Standard.Table.Internal.Problem_Builder.Problem_Builder

Expand All @@ -23,7 +23,8 @@ import project.Internal.SQLite.SQLite_Dialect
import project.Internal.SQL_Type_Mapping.SQL_Type_Mapping
import project.Internal.SQL_Type_Reference.SQL_Type_Reference
import project.Internal.Statement_Setter.Statement_Setter
from project.Errors import Unsupported_Database_Operation
from project.Errors import SQL_Error, Unsupported_Database_Operation
from project.Internal.Result_Set import result_set_to_table

## PRIVATE

Expand Down Expand Up @@ -213,6 +214,15 @@ type Dialect
default_table_types self =
Unimplemented.throw "This is an interface only."

## PRIVATE
The dialect-dependent strategy to get the Primary Key for a given table.

Returns `Nothing` if the key is not defined.
fetch_primary_key : Connection -> Text -> Vector Text ! Nothing
fetch_primary_key self connection table_name =
_ = [connection, table_name]
Unimplemented.throw "This is an interface only."

## PRIVATE

The dialect of SQLite databases.
Expand All @@ -229,3 +239,14 @@ postgres = Postgres_Dialect.postgres
default_fetch_types_query dialect expression context =
empty_context = context.add_where_filters [SQL_Expression.Literal "FALSE"]
dialect.generate_sql (Query.Select [["typed_column", expression]] empty_context)

## PRIVATE
Default implementation relying on DatabaseMetaData.
default_fetch_primary_key connection table_name =
connection.jdbc_connection.with_metadata metadata->
rs = metadata.getPrimaryKeys Nothing Nothing table_name
keys_table = result_set_to_table rs connection.dialect.make_column_fetcher_for_type
# The names of the columns are sometimes lowercase and sometimes uppercase, so we do a case insensitive select first.
selected = keys_table.select_columns [Column_Selector.By_Name "COLUMN_NAME", Column_Selector.By_Name "KEY_SEQ"] reorder=True
key_column_names = selected.order_by 1 . at 0 . to_vector
if key_column_names.is_empty then Nothing else key_column_names
93 changes: 79 additions & 14 deletions distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,11 @@ import Standard.Table.Data.Report_Unmatched.Report_Unmatched
import Standard.Table.Data.Row.Row
import Standard.Table.Data.Table.Table as Materialized_Table
import Standard.Table.Data.Type.Value_Type_Helpers
import Standard.Table.Internal.Add_Row_Number
import Standard.Table.Internal.Aggregate_Column_Helper
import Standard.Table.Internal.Problem_Builder.Problem_Builder
import Standard.Table.Internal.Table_Helpers
import Standard.Table.Internal.Table_Helpers.Table_Column_Helper
import Standard.Table.Internal.Problem_Builder.Problem_Builder
import Standard.Table.Internal.Unique_Name_Strategy.Unique_Name_Strategy
import Standard.Table.Internal.Widget_Helpers
from Standard.Table.Data.Column import get_item_string, normalize_string_for_display
Expand All @@ -36,15 +37,17 @@ import project.Data.Column.Column
import project.Data.SQL_Query.SQL_Query
import project.Data.SQL_Statement.SQL_Statement
import project.Data.SQL_Type.SQL_Type
import project.Internal.Helpers
import project.Internal.Aggregate_Helper
import project.Internal.Base_Generator
import project.Internal.Common.Database_Join_Helper
import project.Internal.Helpers
import project.Internal.IR.Context.Context
import project.Internal.IR.SQL_Expression.SQL_Expression
import project.Internal.IR.From_Spec.From_Spec
import project.Internal.IR.Internal_Column.Internal_Column
import project.Internal.IR.SQL_Join_Kind.SQL_Join_Kind
import project.Internal.IR.Order_Descriptor.Order_Descriptor
import project.Internal.IR.Query.Query
import project.Internal.IR.SQL_Expression.SQL_Expression
import project.Internal.IR.SQL_Join_Kind.SQL_Join_Kind
import project.Internal.SQL_Type_Reference.SQL_Type_Reference

from project.Errors import Unsupported_Database_Operation, Integrity_Error, Unsupported_Name, Table_Not_Found
Expand Down Expand Up @@ -559,10 +562,43 @@ type Table
@group_by Widget_Helpers.make_column_name_vector_selector
@order_by Widget_Helpers.make_order_by_selector
add_row_number : Text -> Integer -> Integer -> Vector (Text | Integer | Column_Selector) | Text | Integer -> Vector (Text | Sort_Column) | Text -> Problem_Behavior -> Table
add_row_number self name="Row" from=1 step=1 group_by=[] order_by=[] on_problems=Problem_Behavior.Report_Warning =
_ = [name, from, step, group_by, order_by, on_problems]
msg = "`Table.add_row_number` is not yet implemented in the Database backend."
Error.throw (Unsupported_Database_Operation.Error msg)
add_row_number self (name:Text = "Row") (from:Integer = 1) (step:Integer = 1) group_by=[] order_by=[] on_problems=Problem_Behavior.Report_Warning =
problem_builder = Problem_Builder.new error_on_missing_columns=True
grouping_columns = self.columns_helper.select_columns_helper group_by True problem_builder
grouping_columns.each internal_column->
column = self.make_column internal_column
if column.value_type.is_floating_point then
problem_builder.report_other_warning (Floating_Point_Equality.Error column.name)
ordering = Table_Helpers.resolve_order_by self.columns order_by problem_builder
problem_builder.attach_problems_before on_problems <|
order_descriptors = case ordering.is_empty of
False -> ordering.map element->
column = element.column
associated_selector = element.associated_selector
self.connection.dialect.prepare_order_descriptor column associated_selector.direction text_ordering=Nothing
True -> case self.default_ordering of
Nothing -> Error.throw (Illegal_Argument.Error "No `order_by` is specified and the table has no existing ordering (e.g. from an `order_by` operation or a primary key). Some ordering is required for `add_row_number` in Database tables.")
descriptors -> descriptors
grouping_expressions = grouping_columns.map .expression

separator = SQL_Expression.Literal Base_Generator.row_number_parameter_separator
# The SQL row_number() counts from 1, so we adjust the offset.
offset = from - step
params = [SQL_Expression.Constant offset, SQL_Expression.Constant step] + order_descriptors + [separator] + grouping_expressions
new_expr = SQL_Expression.Operation "ROW_NUMBER" params

type_mapping = self.connection.dialect.get_type_mapping
infer_from_database_callback expression =
SQL_Type_Reference.new self.connection self.context expression
new_type_ref = type_mapping.infer_return_type infer_from_database_callback "ROW_NUMBER" [] new_expr

new_column = Internal_Column.Value name new_type_ref new_expr

rebuild_table columns =
self.updated_columns (columns.map .as_internal)
renamed_table = Add_Row_Number.rename_columns_if_needed self name on_problems rebuild_table
renamed_table.updated_columns (renamed_table.internal_columns + [new_column])


## UNSTABLE

Expand Down Expand Up @@ -825,22 +861,51 @@ type Table
table.order_by [(Sort_Column.Select_By_Name "a.*" use_regex=True case_sensitivity=Case_Sensitivity.Insensitive)]
@columns Widget_Helpers.make_order_by_selector
order_by : Vector (Text | Sort_Column) | Text -> Text_Ordering -> Boolean -> Problem_Behavior -> Table ! Incomparable_Values | No_Input_Columns_Selected | Missing_Input_Columns
order_by self (columns = ([(Sort_Column.Name (self.columns.at 0 . name))])) text_ordering=Text_Ordering.Default error_on_missing_columns=True on_problems=Problem_Behavior.Report_Warning = Panic.handle_wrapped_dataflow_error <|
order_by self (columns = ([(Sort_Column.Name (self.columns.at 0 . name))])) text_ordering=Text_Ordering.Default error_on_missing_columns=True on_problems=Problem_Behavior.Report_Warning =
problem_builder = Problem_Builder.new error_on_missing_columns=error_on_missing_columns types_to_always_throw=[No_Input_Columns_Selected]
columns_for_ordering = Table_Helpers.prepare_order_by self.columns columns problem_builder
problem_builder.attach_problems_before on_problems <|
new_order_descriptors = columns_for_ordering.map selected_column->
column = selected_column.column
associated_selector = selected_column.associated_selector
effective_text_ordering = if column.value_type.is_text then text_ordering else Nothing
## FIXME [RW] this is only needed because `Vector.map` does not
propagate dataflow errors correctly. See:
https://www.pivotaltracker.com/story/show/181057718
Panic.throw_wrapped_if_error <|
self.connection.dialect.prepare_order_descriptor column associated_selector.direction effective_text_ordering
self.connection.dialect.prepare_order_descriptor column associated_selector.direction effective_text_ordering
new_ctx = self.context.add_orders new_order_descriptors
self.updated_context new_ctx

## PRIVATE
Returns the default ordering used for operations like `add_row_number` or
`take`.

If the table was recently ordered by operations like `order_by`, that
will determine the ordering. Otherwise, the primary key is used if
available.
default_ordering : Vector Order_Descriptor | Nothing
default_ordering self =
explicit_ordering = self.context.orders
if explicit_ordering.not_empty then explicit_ordering else
case self.get_primary_key of
Nothing -> Nothing
primary_key_column_names : Vector -> case self.context.from_spec of
From_Spec.Table _ alias _ ->
primary_key_column_names.map column_name->
column_expression = SQL_Expression.Column alias column_name
Order_Descriptor.Value column_expression Sort_Direction.Ascending
_ -> Nothing

## PRIVATE
Returns the primary key defined for the table, if applicable.
get_primary_key : Vector Text | Nothing
get_primary_key self = case self.context.from_spec of
From_Spec.Table table_name _ _ ->
# The primary key may not be valid anymore after grouping!
is_primary_key_still_valid = self.context.groups.is_empty
if is_primary_key_still_valid.not then Nothing else
result = self.connection.dialect.fetch_primary_key self.connection table_name
result.catch Any _->Nothing
# If the key is a result of a join, union or a subquery then it has no notion of primary key.
_ -> Nothing

## Returns the distinct set of rows within the specified columns from the
input table.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,8 @@ base_dialect =
nulls = [["IS_NULL", make_right_unary_op "IS NULL"], ["FILL_NULL", make_function "COALESCE"]]
contains = [["IS_IN", make_is_in], ["IS_IN_COLUMN", make_is_in_column]]
types = [simple_cast]
base_map = Map.from_vector (arith + logic + compare + functions + agg + counts + text + nulls + contains + types)
windows = [["ROW_NUMBER", make_row_number]]
base_map = Map.from_vector (arith + logic + compare + functions + agg + counts + text + nulls + contains + types + windows)
Internal_Dialect.Value base_map wrap_in_quotes

## PRIVATE
Expand Down Expand Up @@ -265,6 +266,39 @@ make_is_in_column arguments = case arguments.length of
Builder.code "CASE WHEN " ++ expr ++ " IS NULL THEN " ++ has_nulls ++ " ELSE " ++ is_in ++ " END"
_ -> Error.throw <| Illegal_State.Error ("The operation IS_IN_COLUMN requires at exactly 3 arguments: the expression, the IN subquery, the subquery checking for nulls.")

## PRIVATE
make_row_number : Vector Builder -> Builder
make_row_number arguments = if arguments.length < 4 then Error.throw (Illegal_State.Error "Wrong amount of parameters in ROW_NUMBER IR. This is a bug in the Database library.") else
offset = arguments.at 0
step = arguments.at 1

separator_ix = arguments.index_of code->
code.build.prepare.first == row_number_parameter_separator
ordering = arguments.take (Range.new 2 separator_ix)
grouping = arguments.drop (separator_ix+1)

group_part = if grouping.length == 0 then "" else
Builder.code "PARTITION BY " ++ Builder.join ", " grouping
Builder.code "(row_number() OVER (" ++ group_part ++ " ORDER BY " ++ Builder.join ", " ordering ++ ") * " ++ step.paren ++ " + " ++ offset.paren ++ ")"

## PRIVATE
This is a terrible hack, but I could not figure a decent way to have an
operation take a variable number of arguments of multiple kinds (here both
groups and orders are varying).

Currently, the IR just allows to put a list of parameters for the operation
and they are all converted into SQL code before being passed to the
particular operation builder. So at this stage there is no way to distinguish
the arguments.

So to distinguish different groups of arguments, we use this 'fake' parameter
to act as a separator. This parameter is not supposed to end up in the
generated SQL code.

This is yet another argument for the IR redesign.
row_number_parameter_separator =
"--<!PARAMETER_SEPARATOR!>--"

## PRIVATE

Builds code for an expression.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ import project.Internal.Postgres.Postgres_Error_Mapper.Postgres_Error_Mapper
import project.Internal.SQL_Type_Mapping.SQL_Type_Mapping
import project.Internal.SQL_Type_Reference.SQL_Type_Reference
import project.Internal.Statement_Setter.Statement_Setter
from project.Errors import Unsupported_Database_Operation
from project.Errors import SQL_Error, Unsupported_Database_Operation

## PRIVATE

Expand Down Expand Up @@ -219,6 +219,14 @@ type Postgres_Dialect
get_error_mapper : Error_Mapper
get_error_mapper self = Postgres_Error_Mapper

## PRIVATE
The dialect-dependent strategy to get the Primary Key for a given table.

Returns `Nothing` if the key is not defined.
fetch_primary_key : Connection -> Text -> Vector Text ! Nothing
fetch_primary_key self connection table_name =
Dialect.default_fetch_primary_key connection table_name

## PRIVATE
make_internal_generator_dialect =
cases = [["LOWER", Base_Generator.make_function "LOWER"], ["UPPER", Base_Generator.make_function "UPPER"]]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ import project.Internal.SQL_Type_Reference.SQL_Type_Reference
import project.Internal.SQLite.SQLite_Type_Mapping.SQLite_Type_Mapping
import project.Internal.SQLite.SQLite_Error_Mapper.SQLite_Error_Mapper
import project.Internal.Statement_Setter.Statement_Setter
from project.Errors import Unsupported_Database_Operation
from project.Errors import SQL_Error, Unsupported_Database_Operation

## PRIVATE

Expand Down Expand Up @@ -239,6 +239,24 @@ type SQLite_Dialect
get_error_mapper : Error_Mapper
get_error_mapper self = SQLite_Error_Mapper

## PRIVATE
The dialect-dependent strategy to get the Primary Key for a given table.

Returns `Nothing` if the key is not defined.

Custom handling is required, because the default DatabaseMetaData
implementation does not correctly handle temporary tables.
fetch_primary_key : Connection -> Text -> Vector Text ! Nothing
fetch_primary_key self connection table_name =
wrapped_name = self.internal_generator_dialect.wrap_identifier table_name
query = Builder.code "pragma table_info(" ++ wrapped_name ++ ")"
info_table = connection.read_statement query.build
## The `pk` field is non-zero if the columns is part of the primary key.
The column value indicates the position in the key.
See: https://www.sqlite.org/pragma.html#pragma_table_info
v = info_table.filter "pk" (>0) . order_by "pk" . at "name" . to_vector
if v.is_empty then Nothing else v

## PRIVATE
make_internal_generator_dialect =
text = [starts_with, contains, ends_with, make_case_sensitive]+concat_ops+trim_ops
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ operations_map =
always_boolean_ops = ["==", "!=", "equals_ignore_case", ">=", "<=", "<", ">", "BETWEEN", "AND", "OR", "NOT", "IS_NULL", "IS_EMPTY", "LIKE", "IS_IN", "IS_IN_COLUMN", "starts_with", "ends_with", "contains", "BOOL_OR", "IS_INF"]
always_floating_ops = ["/", "mod", "AVG", "STDDEV_POP", "STDDEV_SAMP", "ROUND"]
always_text_ops = ["ADD_TEXT", "CONCAT", "CONCAT_QUOTE_IF_NEEDED", "MAKE_CASE_SENSITIVE", "FOLD_CASE", "TRIM", "LTRIM", "RTRIM"]
always_integer_ops = ["COUNT", "COUNT_IS_NULL", "COUNT_DISTINCT", "COUNT_DISTINCT_INCLUDE_NULL", "COUNT_EMPTY", "COUNT_NOT_EMPTY", "COUNT_ROWS"]
always_integer_ops = ["COUNT", "COUNT_IS_NULL", "COUNT_DISTINCT", "COUNT_DISTINCT_INCLUDE_NULL", "COUNT_EMPTY", "COUNT_NOT_EMPTY", "COUNT_ROWS", "ROW_NUMBER"]
same_as_first = ["TRUNCATE", "CEIL", "FLOOR"]
arithmetic_ops = ["ADD_NUMBER", "-", "*", "^", "%", "SUM"]
merge_input_types_ops = ["ROW_MAX", "ROW_MIN", "MAX", "MIN", "FILL_NULL", "COALESCE"]
Expand Down
Loading

0 comments on commit 4ccf356

Please sign in to comment.