Skip to content

Commit

Permalink
Snowflake Dialect pt. 6 - Union, Distinct and other improvements (#10576
Browse files Browse the repository at this point in the history
)

- Part of #9486
- Fixes `Table.union`, `merge` and `distinct` tests
- Replaces `distinct_on` in `Context` that was actually a Postgres specific addition leaking into the base with a more abstract `Context_Extension` mechanism.
- This allows us to implement the Snowflake-specific `DISTINCT` using `QUALIFY`.
  • Loading branch information
radeusgd authored Jul 19, 2024
1 parent 2e0fa89 commit 7fd8701
Show file tree
Hide file tree
Showing 49 changed files with 677 additions and 504 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -1248,6 +1248,10 @@ type Integer
Integer.parse "20220216"
parse text:Text (radix=10:Integer) -> Integer ! Number_Parse_Error = integer_parse text radix

## PRIVATE
fits_in_long self -> Boolean =
self >= Long.MIN_VALUE && self <= Long.MAX_VALUE

## A syntax error when parsing a double.
@Builtin_Type
type Number_Parse_Error
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -578,8 +578,9 @@ type DB_Column
_ : DB_Column -> other.value_type.is_decimal
_ -> False
either_is_decimal = self.value_type.is_decimal || other_is_decimal
if either_is_decimal then self.make_binary_op "DECIMAL_DIV" other else
self.make_binary_op "/" other
new_name = self.naming_helper.binary_operation_name "/" self other
if either_is_decimal then self.make_binary_op "DECIMAL_DIV" other new_name else
self.make_binary_op "/" other new_name

## ALIAS modulo, modulus
GROUP Standard.Base.Operators
Expand Down
19 changes: 6 additions & 13 deletions distribution/lib/Standard/Database/0.0.0-dev/src/DB_Table.enso
Original file line number Diff line number Diff line change
Expand Up @@ -1323,9 +1323,10 @@ type DB_Table
distinct self columns=self.column_names case_sensitivity:Case_Sensitivity=..Default on_problems:Problem_Behavior=..Report_Warning =
key_columns = self.columns_helper.select_columns columns Case_Sensitivity.Default reorder=True error_on_missing_columns=True on_problems=on_problems . catch No_Output_Columns _->
Error.throw No_Input_Columns_Selected
problem_builder = Problem_Builder.new
new_table = self.connection.dialect.prepare_distinct self key_columns case_sensitivity problem_builder
problem_builder.attach_problems_before on_problems new_table
key_columns.if_not_error <|
problem_builder = Problem_Builder.new
new_table = self.connection.dialect.prepare_distinct self key_columns case_sensitivity problem_builder
problem_builder.attach_problems_before on_problems new_table

## GROUP Standard.Base.Selections
ICON preparation
Expand Down Expand Up @@ -2612,16 +2613,8 @@ type DB_Table
actual_types = materialized_table.columns.map .value_type
expected_types.zip actual_types expected_type-> actual_type->
if expected_type == actual_type then Nothing else
expected_type_kind = Meta.meta expected_type . constructor
actual_type_kind = Meta.meta actual_type . constructor
## We ignore simple approximations that our in-memory backend does - things like adding default
timezone (because we do not have Date_Time without timezone in-memory),
or changing Float32 to Float64 are silently ignored.
However, bigger changes, like a Binary type column getting coerced to Mixed - _will_ still be reported.
if expected_type_kind == actual_type_kind then Nothing else
# If the reverse was an implicit conversion, undoing it also should not yield warnings:
if self.connection.dialect.get_type_mapping.is_implicit_conversion actual_type expected_type then Nothing else
warnings_builder.append (Inexact_Type_Coercion.Warning expected_type actual_type)
if self.connection.dialect.get_type_mapping.should_warn_on_materialize expected_type actual_type then
warnings_builder.append (Inexact_Type_Coercion.Warning expected_type actual_type)

result = max_rows.attach_warning materialized_table
Problem_Behavior.Report_Warning.attach_problems_before warnings_builder.to_vector result
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -420,25 +420,46 @@ generate_order dialect order_descriptor =


## PRIVATE

Generates SQL code corresponding to a SELECT statement.

Arguments:
- dialect: The SQL dialect for which the code is being generated.
- ctx: A description of the SELECT clause.
generate_select_context : Dialect -> Context -> SQL_Builder
generate_select_context dialect ctx =
generate_select : Dialect -> Vector | Nothing -> Context -> SQL_Builder
generate_select dialect columns ctx =
gen_exprs exprs = exprs.map (generate_expression dialect)
gen_column pair = (generate_expression dialect pair.second) ++ alias dialect pair.first

generated_columns = case columns of
Nothing -> SQL_Builder.code "*"
_ -> SQL_Builder.join ", " (columns.map gen_column)

from_part = generate_from_part dialect ctx.from_spec
where_part = (SQL_Builder.join " AND " (gen_exprs ctx.where_filters)) . prefix_if_present " WHERE "
group_part = (SQL_Builder.join ", " (gen_exprs ctx.groups)) . prefix_if_present " GROUP BY "

orders = ctx.orders.map (generate_order dialect)
order_part = (SQL_Builder.join ", " orders) . prefix_if_present " ORDER BY "

limit_part = case ctx.limit of
Nothing -> ""
_ : Integer -> " LIMIT " + ctx.limit.to_text

orders = ctx.orders.map (generate_order dialect)
order_part = (SQL_Builder.join ", " orders) . prefix_if_present " ORDER BY "
(SQL_Builder.code " FROM ") ++ from_part ++ where_part ++ group_part ++ order_part ++ limit_part
extensions = ctx.extensions.map extension->
part = extension.run_generator (gen_exprs extension.expressions)
[extension.position, part]

parts = Vector.build builder->
builder.append [100, SQL_Builder.code "SELECT "]
builder.append [200, generated_columns]
builder.append [300, SQL_Builder.code " FROM " ++ from_part]
builder.append [400, where_part]
builder.append [500, group_part]
builder.append [600, order_part]
builder.append [700, limit_part]
extensions.each builder.append

SQL_Builder.join "" <| parts.sort on=(.first) . map .second

## PRIVATE

Expand Down Expand Up @@ -467,18 +488,7 @@ generate_insert_query dialect table_name pairs =
generate_query : Dialect -> Query -> SQL_Builder
generate_query dialect query = case query of
Query.Select columns ctx ->
gen_column pair = (generate_expression dialect pair.second) ++ alias dialect pair.first
cols = case columns of
Nothing -> SQL_Builder.code "*"
_ -> SQL_Builder.join ", " (columns.map gen_column)
prefix = case ctx.distinct_on of
Nothing -> SQL_Builder.code ""
expressions : Vector ->
## TODO I just realised this does not make sense in other backends than Postgres,
so we should probably fail in such cases; probably rewrite into a generic modifier? or a transform?
generated = SQL_Builder.join ", " (expressions.map (generate_expression dialect))
SQL_Builder.code "DISTINCT ON (" ++ generated ++ ") "
SQL_Builder.code "SELECT " ++ prefix ++ cols ++ generate_select_context dialect ctx
generate_select dialect columns ctx
Query.Insert table_name pairs ->
generate_insert_query dialect table_name pairs
Query.Create_Table name columns primary_key temporary ->
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import project.Internal.IR.From_Spec.From_Spec
import project.Internal.IR.Internal_Column.Internal_Column
import project.Internal.IR.Order_Descriptor.Order_Descriptor
import project.Internal.IR.SQL_Expression.SQL_Expression
import project.SQL.SQL_Builder

## PRIVATE

Expand All @@ -22,7 +23,7 @@ type Context
details.
for_table : Text -> Text -> Any -> Context
for_table table_name alias=table_name internal_temporary_keep_alive_reference=Nothing =
Context.Value (From_Spec.Table table_name alias internal_temporary_keep_alive_reference=internal_temporary_keep_alive_reference) [] [] [] Nothing Nothing
Context.Value (From_Spec.Table table_name alias internal_temporary_keep_alive_reference=internal_temporary_keep_alive_reference) [] [] [] Nothing []

## PRIVATE

Expand All @@ -33,7 +34,7 @@ type Context
- alias: An alias name to use for table within the query.
for_query : Text -> Text -> Context
for_query raw_sql alias =
Context.Value (From_Spec.Query raw_sql alias) [] [] [] Nothing Nothing
Context.Value (From_Spec.Query raw_sql alias) [] [] [] Nothing []

## PRIVATE

Expand All @@ -43,7 +44,7 @@ type Context
- subquery: The subquery to lift into a context.
for_subquery : From_Spec -> Context
for_subquery subquery =
Context.Value subquery [] [] [] Nothing Nothing
Context.Value subquery [] [] [] Nothing []

## PRIVATE

Expand All @@ -66,7 +67,7 @@ type Context
grouped-by columns or aggregate expressions.
- limit: an optional maximum number of elements that the query should
return.
Value (from_spec : From_Spec) (where_filters : Vector SQL_Expression) (orders : Vector Order_Descriptor) (groups : Vector SQL_Expression) (limit : Nothing | Integer) (distinct_on : Nothing | Vector SQL_Expression)
Value (from_spec : From_Spec) (where_filters : Vector SQL_Expression) (orders : Vector Order_Descriptor) (groups : Vector SQL_Expression) (limit : Nothing | Integer) (extensions : Vector Context_Extension)

## PRIVATE

Expand All @@ -76,7 +77,7 @@ type Context
- new_filters: The new filters to set in the query.
set_where_filters : Vector SQL_Expression -> Context
set_where_filters self new_filters =
Context.Value self.from_spec new_filters self.orders self.groups self.limit self.distinct_on
Context.Value self.from_spec new_filters self.orders self.groups self.limit self.extensions

## PRIVATE

Expand All @@ -87,7 +88,7 @@ type Context
query.
add_where_filters : Vector SQL_Expression -> Context
add_where_filters self new_filters =
Context.Value self.from_spec (self.where_filters+new_filters) self.orders self.groups self.limit self.distinct_on
Context.Value self.from_spec (self.where_filters+new_filters) self.orders self.groups self.limit self.extensions

## PRIVATE

Expand All @@ -97,7 +98,7 @@ type Context
- new_orders: The new ordering clauses to set in the query.
set_orders : Vector Order_Descriptor -> Context
set_orders self new_orders =
Context.Value self.from_spec self.where_filters new_orders self.groups self.limit self.distinct_on
Context.Value self.from_spec self.where_filters new_orders self.groups self.limit self.extensions

## PRIVATE

Expand All @@ -114,7 +115,7 @@ type Context
- new_orders: The new ordering clauses to add to the query.
add_orders : Vector Order_Descriptor -> Context
add_orders self new_orders =
Context.Value self.from_spec self.where_filters new_orders+self.orders self.groups self.limit self.distinct_on
Context.Value self.from_spec self.where_filters new_orders+self.orders self.groups self.limit self.extensions

## PRIVATE

Expand All @@ -124,7 +125,7 @@ type Context
- new_groups: The new grouping clauses to set in the query.
set_groups : Vector SQL_Expression -> Context
set_groups self new_groups =
Context.Value self.from_spec self.where_filters self.orders new_groups self.limit self.distinct_on
Context.Value self.from_spec self.where_filters self.orders new_groups self.limit self.extensions

## PRIVATE

Expand All @@ -134,14 +135,13 @@ type Context
- new_limit: The new limit clauses to set in the query.
set_limit : (Nothing | Integer) -> Context
set_limit self new_limit =
Context.Value self.from_spec self.where_filters self.orders self.groups new_limit self.distinct_on
Context.Value self.from_spec self.where_filters self.orders self.groups new_limit self.extensions

## PRIVATE

Returns a copy of the context with changed `distinct_on` expressions.
set_distinct_on : (Nothing | Vector SQL_Expression) -> Context
set_distinct_on self new_distinct_on =
Context.Value self.from_spec self.where_filters self.orders self.groups self.limit new_distinct_on
Returns a copy of the context with an added extension.
add_extension : Context_Extension -> Context
add_extension self extension =
Context.Value self.from_spec self.where_filters self.orders self.groups self.limit (self.extensions + [extension])

## PRIVATE

Expand Down Expand Up @@ -176,3 +176,27 @@ type Context
type Subquery_Setup
## PRIVATE
Value (subquery : From_Spec) (new_columns : Vector (Vector Internal_Column))

## PRIVATE
Describes an extension to a Context that can be used to add additional custom
SQL code as part of the query.
type Context_Extension
## A recipe for building the extension.

Arguments:
- position: Determines where the extension code should be inserted.
The positions of common query parts are following:
- 100 - the SELECT keyword
- 200 - the column descriptions
- 300 - the FROM part
- 400 - the WHERE part
- 500 - the GROUP BY part
- 600 - the ORDER BY part
- 700 - the LIMIT part
Setting the position to a value between these will result in it being
sorted in the desired place.
- expressions: Sub-expressions needed for the part. They will be
generated and the results of that will be passed to `run_generator`.
- run_generator: A function that takes the generated expressions and
returns the SQL code that will be inserted at the desired position.
Value (position : Integer) (expressions : Vector SQL_Expression) (run_generator : Vector SQL_Builder -> SQL_Builder)
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import project.Internal.Common.Database_Distinct_Helper
import project.Internal.Common.Database_Join_Helper
import project.Internal.Error_Mapper.Error_Mapper
import project.Internal.IR.Context.Context
import project.Internal.IR.Context.Context_Extension
import project.Internal.IR.From_Spec.From_Spec
import project.Internal.IR.Internal_Column.Internal_Column
import project.Internal.IR.Nulls_Order.Nulls_Order
Expand Down Expand Up @@ -122,7 +123,7 @@ type Postgres_Dialect
distinct_expressions = new_key_columns.map column->
value_type = type_mapping.sql_type_to_value_type column.sql_type_reference.get
Database_Distinct_Helper.make_distinct_expression case_sensitivity problem_builder column value_type
new_context = Context.for_subquery setup.subquery . set_distinct_on distinct_expressions
new_context = Context.for_subquery setup.subquery . add_extension (make_distinct_extension distinct_expressions)
table.updated_context_and_columns new_context new_columns subquery=True

## PRIVATE
Expand Down Expand Up @@ -764,6 +765,12 @@ as_int64 expr =
as_int32 expr =
SQL_Builder.code "(" ++ expr ++ "::int4)"

## PRIVATE
make_distinct_extension expressions =
run_generator sql_expressions =
SQL_Builder.code "DISTINCT ON (" ++ (SQL_Builder.join ", " sql_expressions) ++ ") "
Context_Extension.Value position=120 expressions=expressions run_generator=run_generator

## PRIVATE
The RUNTIME_ERROR operation should allow the query to compile fine and it
will not prevent it from running if the branch including this operation is
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,10 @@ type Postgres_Type_Mapping
_ = [source_type, target_type]
False

## PRIVATE
should_warn_on_materialize (db_type : Value_Type) (in_memory_type : Value_Type) -> Boolean =
SQL_Type_Mapping.default_should_warn_on_materialize db_type in_memory_type

## PRIVATE
is_integer_type (value_type : Value_Type) -> Boolean = value_type.is_integer

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,13 @@ type SQL_Type_Mapping
_ = [source_type, target_type]
Unimplemented.throw "This is an interface only."

## PRIVATE
Specifies if the given type coercion should raise an
`Inexact_Type_Coercion` warning when materializing a table into memory.
should_warn_on_materialize (db_type : Value_Type) (in_memory_type : Value_Type) -> Boolean =
_ = [db_type, in_memory_type]
Unimplemented.throw "This is an interface only."

## PRIVATE
Specifies if this backend recognizes the given type as an integer type.

Expand Down Expand Up @@ -133,3 +140,11 @@ default_sql_type_to_text sql_type =
if sql_type.scale.is_nothing then "(" + sql_type.precision.to_text + ")" else
" (" + sql_type.precision.to_text + "," + sql_type.scale.to_text + ")"
sql_type.name.trim + suffix

## PRIVATE
default_should_warn_on_materialize (db_type : Value_Type) (in_memory_type : Value_Type) =
## We ignore simple approximations that our in-memory backend does - things like adding default
timezone (because we do not have Date_Time without timezone in-memory),
or changing Float32 to Float64 are silently ignored.
However, bigger changes, like a Binary type column getting coerced to Mixed - _will_ still be reported.
(Meta.meta db_type . constructor) != (Meta.meta in_memory_type . constructor)
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,10 @@ type SQLite_Type_Mapping
_ = [source_type, target_type]
False

## PRIVATE
should_warn_on_materialize (db_type : Value_Type) (in_memory_type : Value_Type) -> Boolean =
SQL_Type_Mapping.default_should_warn_on_materialize db_type in_memory_type

## PRIVATE
is_integer_type (value_type : Value_Type) -> Boolean = value_type.is_integer

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import Standard.Base.Errors.Common.Dry_Run_Operation
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
import Standard.Base.Errors.Illegal_State.Illegal_State
import Standard.Base.Runtime.Context
from Standard.Base.Logging import all
from Standard.Base.Runtime import assert

import Standard.Table.Internal.Problem_Builder.Problem_Builder
Expand Down Expand Up @@ -517,10 +518,15 @@ dry_run_row_limit = 1000
It is a panic, because it is never expected to happen in user code - if it
happens, it is a bug in our code.
check_transaction_ddl_support connection =
supports_ddl = connection.jdbc_connection.with_metadata metadata->
metadata.supportsDataDefinitionAndDataManipulationTransactions
if supports_ddl.not then
Panic.throw (Illegal_State.Error "The connection "+connection.to_text+" does not support transactional DDL statements. Our current implementation of table updates relies on transactional DDL. To support this driver, the logic needs to be amended.")
connection.jdbc_connection.with_metadata metadata->
supports_ddl = metadata.supportsDataDefinitionAndDataManipulationTransactions && metadata.dataDefinitionIgnoredInTransactions.not
if supports_ddl.not then
Panic.throw (Illegal_State.Error "The connection "+connection.to_text+" does not support transactional DDL statements. Our current implementation of table updates relies on transactional DDL. To support this driver, the logic needs to be amended.")
ddl_causes_commit = metadata.dataDefinitionCausesTransactionCommit
if ddl_causes_commit then
# TODO fix for Snowflake support
#Panic.throw (Illegal_State.Error "The connection "+connection.to_text+" does not fully support DDL statements as part of complex transactions - DDL causes a commit, so we cannot compose it. To support this driver, the logic needs to be amended.")
Nothing

## PRIVATE
common_delete_rows target_table key_values_to_delete key_columns allow_duplicate_matches =
Expand Down
1 change: 0 additions & 1 deletion distribution/lib/Standard/Image/0.0.0-dev/src/Image.enso
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,6 @@ type Image
_ -> [flags]
int_flags = MatOfInt.new (write_flags.flat_map x-> [x.to_integer, x.value])
write_to_local_file file:File =
IO.println "Writing the image to a file: "+file.path
Panic.catch JException (Java_Codecs.write file.path self.opencv_mat int_flags) _->
Error.throw (File_Error.IO_Error path.file 'Failed to write to the file')
r = if path.is_local then write_to_local_file path.file else
Expand Down
Loading

0 comments on commit 7fd8701

Please sign in to comment.