From 0f4f8a0542a246b3d83f461d4ad26f759a24d5ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Thu, 6 Apr 2023 18:49:14 +0200 Subject: [PATCH] Full-joins in SQLite (#6215) Closes #5254 In #6189 the SQLite version was bumped to a newer release which has builtin support for Full and Right joins, so no workaround is no longer needed. --- .../Database/0.0.0-dev/src/Data/Dialect.enso | 8 --- .../Database/0.0.0-dev/src/Data/Table.enso | 4 +- .../Internal/Common/Database_Join_Helper.enso | 6 -- .../Internal/Postgres/Postgres_Dialect.enso | 7 --- .../Internal/Redshift/Redshift_Dialect.enso | 7 --- .../src/Internal/SQLite/SQLite_Dialect.enso | 16 ----- .../Join/Join_Spec.enso | 62 ++++++++----------- .../src/Common_Table_Operations/Main.enso | 5 +- .../Table_Tests/src/Database/SQLite_Spec.enso | 2 +- 9 files changed, 30 insertions(+), 87 deletions(-) diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect.enso index 645455bfa5e7..4733a1948b47 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect.enso @@ -61,14 +61,6 @@ type Dialect _ = [internal_column, sort_direction, text_ordering] Unimplemented.throw "This is an interface only." - ## PRIVATE - Prepares a join operation, returning a new table instance encapsulating a - proper query. - prepare_join : Connection -> Join_Kind -> Text -> From_Spec -> From_Spec -> Vector -> Vector -> Vector -> Table - prepare_join self connection join_kind new_table_name left_subquery right_subquery on_expressions where_expressions columns_to_select = - _ = [connection, join_kind, new_table_name, left_subquery, right_subquery, on_expressions, where_expressions, columns_to_select] - Unimplemented.throw "This is an interface only." - ## PRIVATE Prepares a distinct operation. prepare_distinct : Table -> Vector -> Case_Sensitivity -> Problem_Builder -> Table diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso index f6a8a4fd9d83..0d68dcfbd83d 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso @@ -952,7 +952,9 @@ type Table Join_Kind.Right_Exclusive -> SQL_Join_Kind.Right problem_builder.attach_problems_before on_problems <| - self.connection.dialect.prepare_join self.connection sql_join_kind new_table_name left_setup.subquery right_setup.subquery on_expressions where_expressions columns_to_select=result_columns + new_from = From_Spec.Join sql_join_kind left_setup.subquery right_setup.subquery on_expressions + new_ctx = Context.for_subquery new_from . set_where_filters where_expressions + Table.Value new_table_name self.connection result_columns new_ctx ## ALIAS Cartesian Join Joins tables by pairing every row of the left table with every row of the diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Common/Database_Join_Helper.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Common/Database_Join_Helper.enso index 69a775717055..bcd871267e45 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Common/Database_Join_Helper.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Common/Database_Join_Helper.enso @@ -15,12 +15,6 @@ import project.Internal.IR.Internal_Column.Internal_Column import project.Internal.IR.SQL_Expression.SQL_Expression import project.Internal.SQL_Type_Reference.SQL_Type_Reference -## PRIVATE -default_prepare_join connection join_kind new_table_name left_subquery right_subquery on_expressions where_expressions columns_to_select = - new_from = From_Spec.Join join_kind left_subquery right_subquery on_expressions - new_ctx = Context.for_subquery new_from . set_where_filters where_expressions - Table.Value new_table_name connection columns_to_select new_ctx - ## PRIVATE make_join_helpers left_table right_table left_column_mapping right_column_mapping = ## Resolves the column in the original table and finds the expression diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Dialect.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Dialect.enso index 58020da1c791..a698436946f6 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Dialect.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Dialect.enso @@ -80,13 +80,6 @@ type Postgres_Dialect prepare_order_descriptor self internal_column sort_direction text_ordering = make_order_descriptor internal_column sort_direction text_ordering - ## PRIVATE - Prepares a join operation, returning a new table instance encapsulating a - proper query. - prepare_join : Connection -> SQL_Join_Kind -> Text -> From_Spec -> From_Spec -> Vector -> Vector -> Vector -> Table - prepare_join self connection join_kind new_table_name left_subquery right_subquery on_expressions where_expressions columns_to_select = - Database_Join_Helper.default_prepare_join connection join_kind new_table_name left_subquery right_subquery on_expressions where_expressions columns_to_select - ## PRIVATE Prepares a distinct operation. prepare_distinct : Table -> Vector -> Case_Sensitivity -> Problem_Builder -> Table diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Redshift/Redshift_Dialect.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Redshift/Redshift_Dialect.enso index ab314cf34b2a..fa5521dc5cd2 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Redshift/Redshift_Dialect.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Redshift/Redshift_Dialect.enso @@ -73,13 +73,6 @@ type Redshift_Dialect prepare_order_descriptor self internal_column sort_direction text_ordering = Postgres_Dialect.make_order_descriptor internal_column sort_direction text_ordering - ## PRIVATE - Prepares a join operation, returning a new table instance encapsulating a - proper query. - prepare_join : Connection -> SQL_Join_Kind -> Text -> From_Spec -> From_Spec -> Vector -> Vector -> Vector -> Table - prepare_join self connection join_kind new_table_name left_subquery right_subquery on_expressions where_expressions columns_to_select = - Database_Join_Helper.default_prepare_join connection join_kind new_table_name left_subquery right_subquery on_expressions where_expressions columns_to_select - ## PRIVATE A heuristic used by `Connection.query` to determine if a given text looks like a SQL query for the given dialect or is rather a table name. diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/SQLite/SQLite_Dialect.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/SQLite/SQLite_Dialect.enso index 50b91d48f865..41dd8548206b 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/SQLite/SQLite_Dialect.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/SQLite/SQLite_Dialect.enso @@ -90,22 +90,6 @@ type SQLite_Dialect True -> Order_Descriptor.Value internal_column.expression sort_direction collation="NOCASE" - ## PRIVATE - Prepares a join operation, returning a new table instance encapsulating a - proper query. - prepare_join : Connection -> SQL_Join_Kind -> Text -> From_Spec -> From_Spec -> Vector -> Vector -> Vector -> Table - prepare_join self connection join_kind new_table_name left_subquery right_subquery on_expressions where_expressions columns_to_select = case join_kind of - SQL_Join_Kind.Right -> - # We just do a left join with swapped order of sub-queries, while keeping the original order of columns. - Database_Join_Helper.default_prepare_join connection SQL_Join_Kind.Left new_table_name right_subquery left_subquery on_expressions where_expressions columns_to_select - SQL_Join_Kind.Full -> - ## TODO workaround for full joins by left outer + right-anti union - https://www.pivotaltracker.com/story/show/184090548 - Error.throw (Unsupported_Database_Operation.Error "Full outer joins are not YET supported by the SQLite backend. You may need to materialize the Table to perform this operation.") - _ -> - # Other kinds of joins just fall back to the default logic. - Database_Join_Helper.default_prepare_join connection join_kind new_table_name left_subquery right_subquery on_expressions where_expressions columns_to_select - ## PRIVATE Prepares a distinct operation. prepare_distinct : Table -> Vector -> Case_Sensitivity -> Problem_Builder -> Table diff --git a/test/Table_Tests/src/Common_Table_Operations/Join/Join_Spec.enso b/test/Table_Tests/src/Common_Table_Operations/Join/Join_Spec.enso index 3a7801fbb3d7..7288eb388836 100644 --- a/test/Table_Tests/src/Common_Table_Operations/Join/Join_Spec.enso +++ b/test/Table_Tests/src/Common_Table_Operations/Join/Join_Spec.enso @@ -46,16 +46,11 @@ spec setup = Test.specify "should allow Full join" <| t3 = t1.join t2 join_kind=Join_Kind.Full |> materialize |> _.order_by ["X", "W"] - case setup.test_selection.supports_full_join of - True -> - expect_column_names ["X", "Y", "Z", "W"] t3 - t3.at "X" . to_vector . should_equal [Nothing, 1, 2, 2, 3] - t3.at "Y" . to_vector . should_equal [Nothing, 4, 5, 5, 6] - t3.at "Z" . to_vector . should_equal [4, Nothing, 2, 2, 3] - t3.at "W" . to_vector . should_equal [7, Nothing, 4, 6, 5] - False -> - t3.should_fail_with Unsupported_Database_Operation - + expect_column_names ["X", "Y", "Z", "W"] t3 + t3.at "X" . to_vector . should_equal [Nothing, 1, 2, 2, 3] + t3.at "Y" . to_vector . should_equal [Nothing, 4, 5, 5, 6] + t3.at "Z" . to_vector . should_equal [4, Nothing, 2, 2, 3] + t3.at "W" . to_vector . should_equal [7, Nothing, 4, 6, 5] Test.specify "should allow Left Outer join" <| t4 = t1.join t2 join_kind=Join_Kind.Left_Outer |> materialize |> _.order_by ["X", "W"] @@ -263,17 +258,14 @@ spec setup = t2.at "Right_A" . to_vector . should_equal ["B", "C", "C", "D"] t3 = t1.join t1 join_kind=Join_Kind.Full on=(Join_Condition.Equals left="X" right="Y") |> materialize |> _.order_by ["X", "Y", "Right_X"] - case setup.test_selection.supports_full_join of - True -> - expect_column_names ["X", "Y", "A", "Right_X", "Right_Y", "Right_A"] t3 - t3.at "X" . to_vector . should_equal [Nothing, Nothing, 0, 1, 2, 2, 3] - t3.at "Right_Y" . to_vector . should_equal [100, 4, Nothing, 1, 2, 2, 3] - - t3.at "Y" . to_vector . should_equal [Nothing, Nothing, 1, 2, 3, 100, 4] - t3.at "A" . to_vector . should_equal [Nothing, Nothing, "B", "C", "D", "X", "E"] - t3.at "Right_X" . to_vector . should_equal [2, 3, Nothing, 0, 1, 1, 2] - t3.at "Right_A" . to_vector . should_equal ["X", "E", Nothing, "B", "C", "C", "D"] - False -> Nothing + expect_column_names ["X", "Y", "A", "Right_X", "Right_Y", "Right_A"] t3 + t3.at "X" . to_vector . should_equal [Nothing, Nothing, 0, 1, 2, 2, 3] + t3.at "Right_Y" . to_vector . should_equal [100, 4, Nothing, 1, 2, 2, 3] + + t3.at "Y" . to_vector . should_equal [Nothing, Nothing, 1, 2, 3, 100, 4] + t3.at "A" . to_vector . should_equal [Nothing, Nothing, "B", "C", "D", "X", "E"] + t3.at "Right_X" . to_vector . should_equal [2, 3, Nothing, 0, 1, 1, 2] + t3.at "Right_A" . to_vector . should_equal ["X", "E", Nothing, "B", "C", "C", "D"] t4 = table_builder [["X", [Nothing, "a", "B"]], ["Y", ["ą", "b", Nothing]], ["Z", [1, 2, 3]]] t5 = t4.join t4 on=(Join_Condition.Equals_Ignore_Case left="Y" right="X") |> materialize |> _.order_by ["Y"] @@ -487,21 +479,17 @@ spec setup = r3.at 4 . should_equal [2, 3, 2, 5] t4 = t1.join t2 on=[Join_Condition.Equals "A" "C"] join_kind=Join_Kind.Full - case setup.test_selection.supports_full_join of - True -> - expect_column_names ["A", "B", "C", "D"] t4 - r4 = materialize t4 . order_by ["A", "B", "D", "C"] . rows . map .to_vector - within_table t4 <| - r4.length . should_equal 7 - r4.at 0 . should_equal [Nothing, Nothing, Nothing, Nothing] - r4.at 1 . should_equal [Nothing, Nothing, 4, Nothing] - r4.at 2 . should_equal [Nothing, Nothing, Nothing, 6] - r4.at 3 . should_equal [Nothing, 4, Nothing, Nothing] - r4.at 4 . should_equal [Nothing, 4, Nothing, 6] - r4.at 5 . should_equal [1, 7, Nothing, Nothing] - r4.at 6 . should_equal [2, 3, 2, 5] - False -> - (materialize t4) . should_fail_with Unsupported_Database_Operation + expect_column_names ["A", "B", "C", "D"] t4 + r4 = materialize t4 . order_by ["A", "B", "D", "C"] . rows . map .to_vector + within_table t4 <| + r4.length . should_equal 7 + r4.at 0 . should_equal [Nothing, Nothing, Nothing, Nothing] + r4.at 1 . should_equal [Nothing, Nothing, 4, Nothing] + r4.at 2 . should_equal [Nothing, Nothing, Nothing, 6] + r4.at 3 . should_equal [Nothing, 4, Nothing, Nothing] + r4.at 4 . should_equal [Nothing, 4, Nothing, 6] + r4.at 5 . should_equal [1, 7, Nothing, Nothing] + r4.at 6 . should_equal [2, 3, 2, 5] t4_2 = t1.join t2 on=[Join_Condition.Equals "A" "C"] join_kind=Join_Kind.Left_Outer expect_column_names ["A", "B", "C", "D"] t4_2 @@ -617,7 +605,7 @@ spec setup = r2.at 0 . should_equal [2, 20, 5, 5, 100] r2.at 1 . should_equal [3, 30, 7, 7, 200] - Test.specify "should allow full joins with more complex join conditions" pending=(if setup.test_selection.supports_full_join.not then "Full join workaround is not implemented for this backend yet.") <| + Test.specify "should allow full joins with more complex join conditions" <| t1 = table_builder [["X", ["a", "b", "c"]], ["Y", [10, 20, 30]]] t2 = table_builder [["X", ["Ć", "A", "b"]], ["Z", [100, 200, 300]]] diff --git a/test/Table_Tests/src/Common_Table_Operations/Main.enso b/test/Table_Tests/src/Common_Table_Operations/Main.enso index b0447b634060..d6cd891ebc9d 100644 --- a/test/Table_Tests/src/Common_Table_Operations/Main.enso +++ b/test/Table_Tests/src/Common_Table_Operations/Main.enso @@ -81,9 +81,6 @@ type Test_Selection - is_nan_and_nothing_distinct: Specifies if the backend is able to distinguish between a decimal NaN value and a missing value (Enso's Nothing, or SQL's NULL). If `False`, NaN is treated as a NULL. - - supports_full_join: Specifies if the backend supports full joins. - SQLite doesn't so we need to disable them until we implement a proper - workaround. - distinct_returns_first_row_from_group_if_ordered: If `order_by` was applied before, the distinct operation will return the first row from each group. Guaranteed in the in-memory backend, but may not be @@ -91,7 +88,7 @@ type Test_Selection - date_time: Specifies if the backend supports date/time operations. - fixed_length_text_columns: Specifies if the backend supports fixed length text columns. - Config supports_case_sensitive_columns=True order_by=True natural_ordering=False case_insensitive_ordering=True order_by_unicode_normalization_by_default=False case_insensitive_ascii_only=False take_drop=True allows_mixed_type_comparisons=True supports_unicode_normalization=False is_nan_and_nothing_distinct=True supports_full_join=True distinct_returns_first_row_from_group_if_ordered=True date_time=True fixed_length_text_columns=False + Config supports_case_sensitive_columns=True order_by=True natural_ordering=False case_insensitive_ordering=True order_by_unicode_normalization_by_default=False case_insensitive_ascii_only=False take_drop=True allows_mixed_type_comparisons=True supports_unicode_normalization=False is_nan_and_nothing_distinct=True distinct_returns_first_row_from_group_if_ordered=True date_time=True fixed_length_text_columns=False spec setup = Core_Spec.spec setup diff --git a/test/Table_Tests/src/Database/SQLite_Spec.enso b/test/Table_Tests/src/Database/SQLite_Spec.enso index cc0e7efb942a..fae2e551beb8 100644 --- a/test/Table_Tests/src/Database/SQLite_Spec.enso +++ b/test/Table_Tests/src/Database/SQLite_Spec.enso @@ -120,7 +120,7 @@ sqlite_spec connection prefix = Common_Spec.spec prefix connection sqlite_specific_spec prefix connection - common_selection = Common_Table_Operations.Main.Test_Selection.Config supports_case_sensitive_columns=False order_by=True natural_ordering=False case_insensitive_ordering=True case_insensitive_ascii_only=True take_drop=False is_nan_and_nothing_distinct=False supports_full_join=False date_time=False + common_selection = Common_Table_Operations.Main.Test_Selection.Config supports_case_sensitive_columns=False order_by=True natural_ordering=False case_insensitive_ordering=True case_insensitive_ascii_only=True take_drop=False is_nan_and_nothing_distinct=False date_time=False ## For now `advanced_stats`, `first_last`, `text_shortest_longest` and `multi_distinct` remain disabled, because SQLite does not provide the