diff --git a/pkg/sql/logictest/testdata/logic_test/inverted_join_geospatial b/pkg/sql/logictest/testdata/logic_test/inverted_join_geospatial index 1644a64ca128..3189599ae398 100644 --- a/pkg/sql/logictest/testdata/logic_test/inverted_join_geospatial +++ b/pkg/sql/logictest/testdata/logic_test/inverted_join_geospatial @@ -165,7 +165,7 @@ AND (ST_DFullyWithin(rtable.geom, ltable.geom1, 100) OR ST_Intersects('POINT(1.0 ---- 1 13 -# These queries perform semi-joins, which are converted to inner joins by the +# These queries perform semi-joins, which are converted to paired joins by the # optimizer. query I SELECT lk FROM ltable WHERE EXISTS (SELECT * FROM rtable WHERE ST_Intersects(ltable.geom2, rtable.geom)) diff --git a/pkg/sql/logictest/testdata/logic_test/inverted_join_geospatial_dist b/pkg/sql/logictest/testdata/logic_test/inverted_join_geospatial_dist index a506569d95c3..bdd639f71b65 100644 --- a/pkg/sql/logictest/testdata/logic_test/inverted_join_geospatial_dist +++ b/pkg/sql/logictest/testdata/logic_test/inverted_join_geospatial_dist @@ -79,13 +79,13 @@ AND (ST_DFullyWithin(rtable.geom, ltable.geom1, 100) OR ST_Intersects('POINT(1.0 ---- https://cockroachdb.github.io/distsqlplan/decode.html#eJzsVUFv2jAYve9XWN-lIBmIE6DUp1QrTKlo0gGHTRWqMuJ1WVM7s52uVcV_n5JUKwlgUk27wQnb7_l9ed_35BdQvxKgMB9Pxx8XKJMJmsyCK3Qz_nI9Pfd81Lrw5ov552kbvUKSe4zkfYlKdPgtYegy8Hwki__uHRMPtzGP2BMKfNRS-jbmmknFVlq1Snw3xxD8yihWbXTuXyClb1fikckK0K4CS2R-b_Q9S5Ln37H-EfPWBgSjqgyxrDYKZqhaysl14PmLFulaiHSt9klNZQkYuIiYHz4wBfQGCGCwAYMDSwypFCumlJD50UsB9KInoBaGmKeZzreXGFZCMqAvoGOdMKCwyAVmLIyY7FmAIWI6jJPi-rJkN5XxQyifAcM8DbmiqNOzYbnGIDL9dq_S4R0DSta4ubbHH5nULLoUMWey51Tlt5oH-C9j_JTKeiNdGyN3UO-a6xS7e1pUcop-dC1K6WQanC9Gu1pjEYtYW7-J5Uzq6xNK6adxcDVezL6W2oAhyDRFLsGujV0Hu_29BtrvMTA37rV3w53mvfVuKsR9lqKfIuZIcIrcfl7VVhoKQ4Y7TRzuNXGIUc77jyYOqybut8_Za9-baxkXMmKSRRXLlusdBvuiI9LeWQ24W7pfkSbNY0eaxa5nd3pOw-AdUK8Fr38MXj14BwzcCN7pMXjbwbObT7_dcPqdTsPZP6Bdm_3Bcfbrs3_AwI3ZHx1n3_zo7LBvxlQquGKN3hQrf5RYdMfKF0yJTK7YtRSrQqZcBgWv2IiY0uUpKRceL4_yAjfJxEi2zWTbSHYqZFInO-ayLbN038gemMkDI3loJg__5aNPjeSRWXlkJJ-ZyWfvKnu5_vAnAAD__96MMnU= -# This query performs a semi-join, which is converted to an inner join by the +# This query performs a semi-join, which is converted to paired joins by the # optimizer. query T SELECT url FROM [EXPLAIN (DISTSQL) SELECT lk FROM ltable WHERE EXISTS (SELECT * FROM rtable WHERE ST_Intersects(ltable.geom2, rtable.geom))] ---- -https://cockroachdb.github.io/distsqlplan/decode.html#eJzElVFP2zAQx9_3Kax7opu71klaIE-ZRqZ16lrWIg0JVSg0J8gIcWY7Ewj1u09JKtKG1k4Ho49J7uf7-ey_8gjydwwuTP2h__mMZCImXybj7-TCPz8dfhqMyMHJYHo2_TFskWVJfFtWxCq4ipH8_OpPfOKf51XkYFnzviwRqyVSXUaJQiFxruRBSX-8Rn5n0WVh8dRqzYBCwkMcBXcowb0ABhQsoGDDjEIq-Byl5CL_9FgUDsJ7cLsUoiTNVP56RmHOBYL7CCpSMYILZ3mDCQYhik4XKISogiguli9VvFREd4F4AArTNEikS9qdvOk4Uy7xGPVsmC0o8ExVTaQKrhFctqDNRQbJHxQKw288SlB07HWXchBePojLKAnxHugT4d-nojZFz6LEc1qrmpbO1NrFNDdcTqy_0bKa2JDz2ywlv3iUEJ64xMu3NR5tsu2t225VtXdRPYmkipK56hyvi3r51RmLEAWGecNat2qBqwdyE8ibZ_RsURk5W42qdXjZq77Oh3KhrdrMMng_jWzrvHoN7LJkk99GtRFv87TDerXKzb37a71Z8ySyZknsWO0iKDtn0aBSy6KzxywaTFeyeLjvLBpUq0vdfbMwslcNo_3KYbSaB8JqGAi7_S9xMIjU4tDbYxwMpitxONp3HAyq1bVibxYH61Xj4PzHf9OGxhOUKU8kNvrzdHN1DK-x3KrkmZjjqeDzok35OC644kWIUpVfWfkwSMpPueAqzLSwpYctLWzrYbsOs1XYWYPZbjDrvoju6WlHu2sD3NMfVl8_s76WPtTDh1r4SA8faeFjPXz8kqPWw6ajNtCG02L6bJlofbiYIV1MHy9myBd7dsvXcceAP7vmuxyagTadmgk3DV6fsjo9W7z7GwAA__9Kzz7E +https://cockroachdb.github.io/distsqlplan/decode.html#eJzUlN9v2j4Uxd-_f4V1n8pXZpAEaJunTFuqpaLQAdIqVVGVxXdV1tTObGeiQvzvkxM2fghM2F7aR8fn5J7P1ZEXoH7k4MM0HIYfZqSUObmajG_IfXh3O3wfjcjZx2g6m34etshKkj_VilwnX3MkXz6Fk5CEd0ZFzlaa_2uJ3JQo_ZBxjVJhqtVZ7X73iOLZpSthdWq1YqDABcNR8owK_HtwgIILFDyIKRRSpKiUkOZqUQkjNge_SyHjRanN55hCKiSCvwCd6RzBh5kZMMGEoex0gQJDnWR59fs6SlDI7DmRL0BhWiRc-aTdMUPHpfZJ4NDAg3hJQZR6PUTp5BHBd5a0eZCI_0SpkV2LjKPseNtZ6kUEZhEPGWc4B_rHEc4LubPFwKUk6LWAwrdMKk2-i4yTjJMiySSytjluIrg08GjQPwjingJiAFYLHWxDzF4K9MkwvJqRaXgTketxNAL6m22956EQT2VRhxbcJ4FZxni0j3FgGBWmgjM75EE27yDbGqnkQjKUyLZ44uUe-pFoi6JzuSPcP7q3NdppXlSnWVE7brvq0clVPRJlp6q911vVIyAbVT1_c1V1m_fFbdgXr_03bTkSZKct_dfbliMgG225eHNtOfJoT1AVgits9G51zcOH7BHrV1KJUqZ4K0VajamP48pXfWCodH3r1IeI11cm4KbZsZpdu9m1mr0ts7Nr9uyxu_bRPau7bzf3reaB3Tz4F-hzq_nCPvnCar60my9Pih0v__sVAAD__xtCcVg= # Left joins are converted to paired joins by the optimizer. query T @@ -199,7 +199,7 @@ SELECT url FROM [EXPLAIN (DISTSQL) SELECT lk FROM ltable WHERE EXISTS (SELECT * FROM rtable2@geom_index WHERE ST_Intersects(ltable.geom1, rtable2.geom)) ORDER BY lk] ---- -https://cockroachdb.github.io/distsqlplan/decode.html#eJzMlVFP2zwUhu-_X2GdK_rNXeskLZCrbCPTOnUta5HGhCoUGgsyQpzZzgRC_e-Tk440KbXT0QGXic_j8-bYj3IP4mcMLkz9of_hBGU8Rh8n4y_ozD89Hr4bjNDe0WB6Mv06bKFlSXxdVMQyuIgp-vbJn_jIP1VVaG9Z839RwvMSy7uk7OY8SkJ6uywX8jxKJOWCzqXYK3Z6q6oI_gPlj60WGk-O_Al6_x3F1zPAkLCQjoIbKsA9AwIYLMBgwwxDytmcCsG4WrrPCwfhLbhdDFGSZlK9nmGYM07BvQcZyZiCCyeq24QGIeWdLmAIqQyiON--yOWlPLoJ-B1gmKZBIlzU7qim40y6yCPYs2C2wMAyWTYRMrik4JIFbh5kkPyiXNLwM4sSyjt2Ncv6KAE_IP5tymsz9SyMPKdVyYk9G3u9jWmtbdKqlMup9R9PWo5tyNh1lqIfLEoQS1ykYjgq2eix0P1Ws-Ha28Q9ioSMkrnsHFbDeuoKjXlIOQ1Vw1q3coOLO3QViKs1erYoEzkbE5X7sKJXfZ83xUYbYxPLkPthZBvn1dt5uhFrs7RDepXyTe37lfakuZSkmZQdq507s7WWhig1LZ0X1tKQdkXL_degpSFueb-7z-Yl2amX9o69tJqLYTUUw27_jRaGIDUtei-shSHtihYHr0ELQ9zyepFn08LaqRbOP_xdPdJ4QkXKEkEb_Ym6KjoNL2nxqYJlfE6POZvnbYrHcc7lL0IqZLFKiodBUiypgKsw0cKWHra0sK2H7TpMVmGnApPtYNJ9Et3T0472qw1wT39Yff3M-lp6Xw_va-EDPXyghQ_18OFTjloPm47aQBtOi-jdMtF6uYjBLqLXixj8Imu3vIo7Bnztmm9zaAbadGom3DR4vWV1erb473cAAAD__7TDS8A= +https://cockroachdb.github.io/distsqlplan/decode.html#eJzUlFFP2z4Uxd__n8K6T_Q_d22StkCeso2gBZWWtZXGhCKUxXcoI9iZ7Uwg1O8-OemAdNRNtxd4dHxO7vldHfke1I8cfJiH4_DDgpQyJ8ez6Sm5CM_Pxu-iCdk7iuaL-adxh6wk-XWtyHXyNUfy-WM4C0l4blRkb6X5v5bISuIGVyhuLjPO8HYlV_oy4xqlwlSrvfpPb43Kob9N1bHTIdPZUTgj77-Q_DoGClwwnCQ3qMC_AAcouEDBg5hCIUWKSglpru4rYcRuwe9TyHhRavM5ppAKieDfg850juDDwkybYcJQ9vpAgaFOsrz6fZ0rKGR2k8g7oDAvEq580u2ZodNS-yRwaOBCvKQgSv04ROnkCsF3lrR9kIj_RKmRnYiMo-x5zSx_rhLogyW8LeTaTgOXkmDQAQrfMqk0-S4yTjJOiiSTyLrm2GCggUeDIQ1GG2HcXWAMxGqpoybI4q5An4zD4wWZh6cROZlGE6APfI_LHgtxXRZ1cMF9YgIOTObJc6j7BlVhKjizs27k8zbyPWIJyVAiaxIFzhuIl88sYSK6ougdNtSbpg8a0532nXXadbbndqtK7dzaLVHWWjt42a3dAvOktfuvsrVu-964LXvjdf-mNVuCrLVm-LJbswXmSWsOXmVrtrzlM1SF4ApbvWN98xAiu8L64VSilCmeSZFWY-rjtPJVHxgqXd869SHi9ZUJ-NTsWM2u3exazV7D7KybPXvsvn30wOoe2s1Dq3lkN4_-BXrfaj6wTz6wmg_t5sOdYsfL_34FAAD__7Tvflc= query T SELECT url FROM [EXPLAIN (DISTSQL) diff --git a/pkg/sql/logictest/testdata/logic_test/inverted_join_geospatial_explain b/pkg/sql/logictest/testdata/logic_test/inverted_join_geospatial_explain index 560f4afe043c..e784ca832795 100644 --- a/pkg/sql/logictest/testdata/logic_test/inverted_join_geospatial_explain +++ b/pkg/sql/logictest/testdata/logic_test/inverted_join_geospatial_explain @@ -146,39 +146,34 @@ project · · · table ltable@primary · · · spans FULL SCAN · · -# This query performs a semi-join, which is converted to an inner join by the +# This query performs a semi-join, which is converted to paired joins by the # optimizer. query TTTTT EXPLAIN (VERBOSE) SELECT lk FROM ltable WHERE EXISTS (SELECT * FROM rtable WHERE ST_Intersects(ltable.geom2, rtable.geom)) ---- -· distribution local · · -· vectorized true · · -project · · (lk) · - │ estimated row count 10 (missing stats) · · - └── distinct · · (lk, geom2) · - │ estimated row count 1000 (missing stats) · · - │ distinct on lk · · - │ order key lk · · - └── project · · (lk, geom2) +lk - └── project · · (lk, geom2, geom) +lk - │ estimated row count 9801 (missing stats) · · - └── lookup join (inner) · · (lk, geom2, rk1, rk2, geom) +lk - │ table rtable@primary · · - │ equality (rk1, rk2) = (rk1,rk2) · · - │ equality cols are key · · · - │ pred st_intersects(geom2, geom) · · - └── project · · (lk, geom2, rk1, rk2) +lk - │ estimated row count 10000 (missing stats) · · - └── inverted join (inner) · · (lk, geom2, rk1, rk2, geom_inverted_key) +lk - │ table rtable@geom_index · · - │ inverted expr st_intersects(geom2, geom_inverted_key) · · - └── scan · · (lk, geom2) +lk -· estimated row count 1000 (missing stats) · · -· table ltable@primary · · -· spans FULL SCAN · · +· distribution local · · +· vectorized true · · +project · · (lk) · + │ estimated row count 10 (missing stats) · · + └── project · · (lk, geom2) · + │ estimated row count 10 (missing stats) · · + └── lookup join (semi) · · (lk, geom2, rk1, rk2, cont) · + │ table rtable@primary · · + │ equality (rk1, rk2) = (rk1,rk2) · · + │ equality cols are key · · · + │ pred st_intersects(geom2, geom) · · + └── project · · (lk, geom2, rk1, rk2, cont) · + │ estimated row count 10000 (missing stats) · · + └── inverted join (inner) · · (lk, geom2, rk1, rk2, geom_inverted_key, cont) · + │ table rtable@geom_index · · + │ inverted expr st_intersects(geom2, geom_inverted_key) · · + └── scan · · (lk, geom2) · +· estimated row count 1000 (missing stats) · · +· table ltable@primary · · +· spans FULL SCAN · · -# Left joins are also converted to an inner join by the optimizer. +# Left outer joins are also converted to paired joins by the optimizer. query TTTTT EXPLAIN (VERBOSE) SELECT lk, rk1 FROM ltable LEFT JOIN rtable ON ST_Intersects(ltable.geom1, rtable.geom) diff --git a/pkg/sql/opt/exec/execbuilder/testdata/inverted_index b/pkg/sql/opt/exec/execbuilder/testdata/inverted_index index 88edfb70213e..a5bb358eff16 100644 --- a/pkg/sql/opt/exec/execbuilder/testdata/inverted_index +++ b/pkg/sql/opt/exec/execbuilder/testdata/inverted_index @@ -785,59 +785,36 @@ query T EXPLAIN (OPT, VERBOSE) SELECT * FROM geo_table2 WHERE EXISTS (SELECT * FROM geo_table@geom_index WHERE ST_Intersects(geo_table2.geom, geo_table.geom)) ---- -project +semi-join (lookup geo_table) ├── columns: k:1 geom:2 + ├── key columns: [5] = [5] + ├── lookup columns are key ├── immutable ├── stats: [rows=10] - ├── cost: 112690.199 + ├── cost: 112684.05 ├── key: (1) ├── fd: (1)-->(2) ├── prune: (1) - └── distinct-on - ├── columns: geo_table2.k:1 geo_table2.geom:2 - ├── grouping columns: geo_table2.k:1 - ├── internal-ordering: +1 - ├── immutable - ├── stats: [rows=999.947218, distinct(1)=999.947218, null(1)=0] - ├── cost: 112690.089 - ├── key: (1) - ├── fd: (1)-->(2) - ├── inner-join (lookup geo_table) - │ ├── columns: geo_table2.k:1 geo_table2.geom:2 geo_table.geom:6 - │ ├── key columns: [5] = [5] - │ ├── lookup columns are key - │ ├── immutable - │ ├── stats: [rows=9801, distinct(1)=999.947218, null(1)=0] - │ ├── cost: 112484.05 - │ ├── fd: (1)-->(2) - │ ├── ordering: +1 - │ ├── prune: (1) - │ ├── interesting orderings: (+1) - │ ├── inner-join (inverted-lookup geo_table@geom_index) - │ │ ├── columns: geo_table2.k:1 geo_table2.geom:2 geo_table.k:5 - │ │ ├── inverted-expr - │ │ │ └── st_intersects(geo_table2.geom:2, geo_table.geom:6) - │ │ ├── stats: [rows=10000, distinct(1)=999.956829, null(1)=0] - │ │ ├── cost: 41784.03 - │ │ ├── key: (1,5) - │ │ ├── fd: (1)-->(2) - │ │ ├── ordering: +1 - │ │ ├── scan geo_table2 - │ │ │ ├── columns: geo_table2.k:1 geo_table2.geom:2 - │ │ │ ├── stats: [rows=1000, distinct(1)=1000, null(1)=0, distinct(2)=100, null(2)=10] - │ │ │ ├── cost: 1084.02 - │ │ │ ├── key: (1) - │ │ │ ├── fd: (1)-->(2) - │ │ │ ├── ordering: +1 - │ │ │ ├── prune: (1,2) - │ │ │ ├── interesting orderings: (+1) - │ │ │ └── unfiltered-cols: (1-4) - │ │ └── filters (true) - │ └── filters - │ └── st_intersects(geo_table2.geom:2, geo_table.geom:6) [outer=(2,6), immutable, constraints=(/2: (/NULL - ]; /6: (/NULL - ])] - └── aggregations - └── const-agg [as=geo_table2.geom:2, outer=(2)] - └── geo_table2.geom:2 + ├── inner-join (inverted-lookup geo_table@geom_index) + │ ├── columns: geo_table2.k:1 geo_table2.geom:2 geo_table.k:5 continuation:11 + │ ├── inverted-expr + │ │ └── st_intersects(geo_table2.geom:2, geo_table.geom:6) + │ ├── stats: [rows=10000, distinct(1)=999.956829, null(1)=0] + │ ├── cost: 41984.03 + │ ├── key: (1,5) + │ ├── fd: (1)-->(2), (5)-->(11) + │ ├── scan geo_table2 + │ │ ├── columns: geo_table2.k:1 geo_table2.geom:2 + │ │ ├── stats: [rows=1000, distinct(1)=1000, null(1)=0, distinct(2)=100, null(2)=10] + │ │ ├── cost: 1084.02 + │ │ ├── key: (1) + │ │ ├── fd: (1)-->(2) + │ │ ├── prune: (1,2) + │ │ ├── interesting orderings: (+1) + │ │ └── unfiltered-cols: (1-4) + │ └── filters (true) + └── filters + └── st_intersects(geo_table2.geom:2, geo_table.geom:6) [outer=(2,6), immutable, constraints=(/2: (/NULL - ]; /6: (/NULL - ])] query T EXPLAIN (OPT, VERBOSE) SELECT * FROM geo_table2 WHERE NOT EXISTS (SELECT * FROM geo_table@geom_index diff --git a/pkg/sql/opt/xform/join_funcs.go b/pkg/sql/opt/xform/join_funcs.go index d030f817f6f6..809ce407351b 100644 --- a/pkg/sql/opt/xform/join_funcs.go +++ b/pkg/sql/opt/xform/join_funcs.go @@ -403,10 +403,12 @@ func (c *CustomFuncs) GenerateLookupJoins( }) } -// constructContinuationColumnForPairedLeftJoin constructs a continuation -// column ID for the paired-joiners used for left joins when the first join -// generates false positives (due to inverted index or non-covering index). -func (c *CustomFuncs) constructContinuationColumnForPairedLeftJoin() opt.ColumnID { +// constructContinuationColumnForPairedJoin constructs a continuation column +// ID for the paired-joiners used for left outer/semi/anti joins when the +// first join generates false positives (due to an inverted index or +// non-covering index). The first join will be either a left outer join or +// an inner join. +func (c *CustomFuncs) constructContinuationColumnForPairedJoin() opt.ColumnID { return c.e.f.Metadata().AddColumn("continuation", c.BoolType()) } @@ -447,19 +449,6 @@ func (c *CustomFuncs) GenerateInvertedJoins( if scanPrivate.Flags.NoIndexJoin { return } - if joinType == opt.SemiJoinOp { - // We cannot use a non-covering index for semi join. Note that - // since the semi join doesn't pass through any columns, "non - // covering" here means that not all columns in the ON condition are - // available. - // - // For semi joins, we may still be able to generate an inverted join - // by converting it to an inner join using the ConvertSemiToInnerJoin - // rule. Any semi join that could use an inverted index would already be - // transformed into an inner join by ConvertSemiToInnerJoin, so semi - // joins can be ignored here. - return - } if pkCols == nil { tab := c.e.mem.Metadata().Table(scanPrivate.Table) @@ -477,11 +466,16 @@ func (c *CustomFuncs) GenerateInvertedJoins( continuationCol := opt.ColumnID(0) invertedJoinType := joinType - // Anti joins are converted to a pair consisting of a left join and - // anti join. + // Anti joins are converted to a pair consisting of a left inverted join + // and anti lookup join. if joinType == opt.LeftJoinOp || joinType == opt.AntiJoinOp { - continuationCol = c.constructContinuationColumnForPairedLeftJoin() + continuationCol = c.constructContinuationColumnForPairedJoin() invertedJoinType = opt.LeftJoinOp + } else if joinType == opt.SemiJoinOp { + // Semi joins are converted to a pair consisting of an inner inverted + // join and semi lookup join. + continuationCol = c.constructContinuationColumnForPairedJoin() + invertedJoinType = opt.InnerJoinOp } invertedJoin := memo.InvertedJoinExpr{Input: input} invertedJoin.JoinPrivate = *joinPrivate @@ -491,7 +485,7 @@ func (c *CustomFuncs) GenerateInvertedJoins( invertedJoin.InvertedExpr = invertedExpr invertedJoin.InvertedCol = scanPrivate.Table.IndexColumnID(index, 0) invertedJoin.Cols = indexCols.Union(inputCols) - if invertedJoinType == opt.LeftJoinOp { + if continuationCol != 0 { invertedJoin.Cols.Add(continuationCol) invertedJoin.IsFirstJoinInPairedJoiner = true invertedJoin.ContinuationCol = continuationCol @@ -516,7 +510,7 @@ func (c *CustomFuncs) GenerateInvertedJoins( indexJoin.KeyCols = pkCols indexJoin.Cols = scanPrivate.Cols.Union(inputCols) indexJoin.LookupColsAreTableKey = true - if invertedJoinType == opt.LeftJoinOp { + if continuationCol != 0 { indexJoin.IsSecondJoinInPairedJoiner = true } diff --git a/pkg/sql/opt/xform/rules/join.opt b/pkg/sql/opt/xform/rules/join.opt index 838206ae4efb..2605de086843 100644 --- a/pkg/sql/opt/xform/rules/join.opt +++ b/pkg/sql/opt/xform/rules/join.opt @@ -94,7 +94,9 @@ # # Similar to CommuteSemiJoin, this rule allows semi joins to be commuted. This # rule is also useful because it allows us to generate lookup joins and -# inverted lookup joins for cases where the index is not covering. +# inverted lookup joins for cases where the index is not covering. For inverted +# lookup joins, the GenerateInvertedJoins* rules will also apply, and should +# be lower cost since they avoid the DistinctOn operation. [ConvertSemiToInnerJoin, Explore] (SemiJoin $left:* @@ -145,10 +147,8 @@ # GenerateInvertedJoins creates InvertedJoin operators for all inverted # indexes (of the Scan table) which allow it. See the GenerateInvertedJoins # custom function for more details. -# TODO(rytaft): Add support for SemiJoin. Currently it is supported by first -# converting it to InnerJoin using the rule ConvertSemiToInnerJoin. [GenerateInvertedJoins, Explore] -(InnerJoin | LeftJoin | AntiJoin +(InnerJoin | LeftJoin | SemiJoin | AntiJoin $left:* (Scan $scanPrivate:*) & (IsCanonicalScan $scanPrivate) & @@ -162,7 +162,7 @@ # GenerateInvertedJoinsFromSelect is similar to GenerateInvertedJoins, but # applies when the input is a Select. [GenerateInvertedJoinsFromSelect, Explore] -(InnerJoin | LeftJoin | AntiJoin +(InnerJoin | LeftJoin | SemiJoin | AntiJoin $left:* (Select (Scan $scanPrivate:*) & diff --git a/pkg/sql/opt/xform/testdata/rules/join b/pkg/sql/opt/xform/testdata/rules/join index bc7f0f6bd7ff..4643d0f1cca4 100644 --- a/pkg/sql/opt/xform/testdata/rules/join +++ b/pkg/sql/opt/xform/testdata/rules/join @@ -4055,68 +4055,35 @@ project └── filters └── st_covers(c.geom:10, n.geom:16) OR (name:15 LIKE 'Upper%') [outer=(10,15,16), immutable] -# Semi-joins are supported by converting them to an inner join wrapped in -# a DistinctOn and a Project. -opt expect=(GenerateInvertedJoins,ConvertSemiToInnerJoin) +# Semi-joins are supported by converting them to a paired-join consisting of +# an inner inverted join followed by a left semi lookup join. +opt expect=GenerateInvertedJoins SELECT * FROM nyc_census_blocks AS c WHERE EXISTS ( SELECT * FROM nyc_neighborhoods@nyc_neighborhoods_geo_idx AS n WHERE ST_Covers(c.geom, n.geom) ) ---- -project +semi-join (lookup nyc_neighborhoods [as=n]) ├── columns: gid:1!null blkid:2 popn_total:3 popn_white:4 popn_black:5 popn_nativ:6 popn_asian:7 popn_other:8 boroname:9 geom:10 + ├── key columns: [13] = [13] + ├── lookup columns are key ├── immutable ├── key: (1) ├── fd: (1)-->(2-10) - └── distinct-on - ├── columns: c.gid:1!null blkid:2 popn_total:3 popn_white:4 popn_black:5 popn_nativ:6 popn_asian:7 popn_other:8 c.boroname:9 c.geom:10!null - ├── grouping columns: c.gid:1!null - ├── internal-ordering: +1 - ├── immutable - ├── key: (1) - ├── fd: (1)-->(2-10) - ├── inner-join (lookup nyc_neighborhoods [as=n]) - │ ├── columns: c.gid:1!null blkid:2 popn_total:3 popn_white:4 popn_black:5 popn_nativ:6 popn_asian:7 popn_other:8 c.boroname:9 c.geom:10!null n.geom:16!null - │ ├── key columns: [13] = [13] - │ ├── lookup columns are key - │ ├── immutable - │ ├── fd: (1)-->(2-10) - │ ├── ordering: +1 - │ ├── inner-join (inverted-lookup nyc_neighborhoods@nyc_neighborhoods_geo_idx [as=n]) - │ │ ├── columns: c.gid:1!null blkid:2 popn_total:3 popn_white:4 popn_black:5 popn_nativ:6 popn_asian:7 popn_other:8 c.boroname:9 c.geom:10 n.gid:13!null - │ │ ├── inverted-expr - │ │ │ └── st_covers(c.geom:10, n.geom:16) - │ │ ├── key: (1,13) - │ │ ├── fd: (1)-->(2-10) - │ │ ├── ordering: +1 - │ │ ├── scan nyc_census_blocks [as=c] - │ │ │ ├── columns: c.gid:1!null blkid:2 popn_total:3 popn_white:4 popn_black:5 popn_nativ:6 popn_asian:7 popn_other:8 c.boroname:9 c.geom:10 - │ │ │ ├── key: (1) - │ │ │ ├── fd: (1)-->(2-10) - │ │ │ └── ordering: +1 - │ │ └── filters (true) - │ └── filters - │ └── st_covers(c.geom:10, n.geom:16) [outer=(10,16), immutable, constraints=(/10: (/NULL - ]; /16: (/NULL - ])] - └── aggregations - ├── const-agg [as=blkid:2, outer=(2)] - │ └── blkid:2 - ├── const-agg [as=popn_total:3, outer=(3)] - │ └── popn_total:3 - ├── const-agg [as=popn_white:4, outer=(4)] - │ └── popn_white:4 - ├── const-agg [as=popn_black:5, outer=(5)] - │ └── popn_black:5 - ├── const-agg [as=popn_nativ:6, outer=(6)] - │ └── popn_nativ:6 - ├── const-agg [as=popn_asian:7, outer=(7)] - │ └── popn_asian:7 - ├── const-agg [as=popn_other:8, outer=(8)] - │ └── popn_other:8 - ├── const-agg [as=c.boroname:9, outer=(9)] - │ └── c.boroname:9 - └── const-agg [as=c.geom:10, outer=(10)] - └── c.geom:10 + ├── inner-join (inverted-lookup nyc_neighborhoods@nyc_neighborhoods_geo_idx [as=n]) + │ ├── columns: c.gid:1!null blkid:2 popn_total:3 popn_white:4 popn_black:5 popn_nativ:6 popn_asian:7 popn_other:8 c.boroname:9 c.geom:10 n.gid:13!null continuation:19 + │ ├── inverted-expr + │ │ └── st_covers(c.geom:10, n.geom:16) + │ ├── key: (1,13) + │ ├── fd: (1)-->(2-10), (13)-->(19) + │ ├── scan nyc_census_blocks [as=c] + │ │ ├── columns: c.gid:1!null blkid:2 popn_total:3 popn_white:4 popn_black:5 popn_nativ:6 popn_asian:7 popn_other:8 c.boroname:9 c.geom:10 + │ │ ├── key: (1) + │ │ └── fd: (1)-->(2-10) + │ └── filters (true) + └── filters + └── st_covers(c.geom:10, n.geom:16) [outer=(10,16), immutable, constraints=(/10: (/NULL - ]; /16: (/NULL - ])] # Anti-joins are supported by converting them to a paired-join consisting of # a left outer inverted join followed by a left anti lookup join. @@ -4509,67 +4476,34 @@ project └── n.boroname:14 = 'Manhattan' [outer=(14), constraints=(/14: [/'Manhattan' - /'Manhattan']; tight), fd=()-->(14)] # Inverted "semi-join". -opt expect=(GenerateInvertedJoinsFromSelect,ConvertSemiToInnerJoin) +opt expect=GenerateInvertedJoinsFromSelect SELECT * FROM nyc_census_blocks AS c WHERE EXISTS ( SELECT * FROM nyc_neighborhoods@nyc_neighborhoods_geo_idx AS n WHERE ST_Covers(c.geom, n.geom) AND n.boroname = 'Manhattan' ) ---- -project +semi-join (lookup nyc_neighborhoods [as=n]) ├── columns: gid:1!null blkid:2 popn_total:3 popn_white:4 popn_black:5 popn_nativ:6 popn_asian:7 popn_other:8 boroname:9 geom:10 + ├── key columns: [13] = [13] + ├── lookup columns are key ├── immutable ├── key: (1) ├── fd: (1)-->(2-10) - └── distinct-on - ├── columns: c.gid:1!null blkid:2 popn_total:3 popn_white:4 popn_black:5 popn_nativ:6 popn_asian:7 popn_other:8 c.boroname:9 c.geom:10!null - ├── grouping columns: c.gid:1!null - ├── internal-ordering: +1 opt(14) - ├── immutable - ├── key: (1) - ├── fd: (1)-->(2-10) - ├── inner-join (lookup nyc_neighborhoods [as=n]) - │ ├── columns: c.gid:1!null blkid:2 popn_total:3 popn_white:4 popn_black:5 popn_nativ:6 popn_asian:7 popn_other:8 c.boroname:9 c.geom:10!null n.boroname:14!null n.geom:16!null - │ ├── key columns: [13] = [13] - │ ├── lookup columns are key - │ ├── immutable - │ ├── fd: ()-->(14), (1)-->(2-10) - │ ├── ordering: +1 opt(14) [actual: +1] - │ ├── inner-join (inverted-lookup nyc_neighborhoods@nyc_neighborhoods_geo_idx,partial [as=n]) - │ │ ├── columns: c.gid:1!null blkid:2 popn_total:3 popn_white:4 popn_black:5 popn_nativ:6 popn_asian:7 popn_other:8 c.boroname:9 c.geom:10 n.gid:13!null - │ │ ├── inverted-expr - │ │ │ └── st_covers(c.geom:10, n.geom:16) - │ │ ├── key: (1,13) - │ │ ├── fd: (1)-->(2-10) - │ │ ├── ordering: +1 - │ │ ├── scan nyc_census_blocks [as=c] - │ │ │ ├── columns: c.gid:1!null blkid:2 popn_total:3 popn_white:4 popn_black:5 popn_nativ:6 popn_asian:7 popn_other:8 c.boroname:9 c.geom:10 - │ │ │ ├── key: (1) - │ │ │ ├── fd: (1)-->(2-10) - │ │ │ └── ordering: +1 - │ │ └── filters (true) - │ └── filters - │ ├── st_covers(c.geom:10, n.geom:16) [outer=(10,16), immutable, constraints=(/10: (/NULL - ]; /16: (/NULL - ])] - │ └── n.boroname:14 = 'Manhattan' [outer=(14), constraints=(/14: [/'Manhattan' - /'Manhattan']; tight), fd=()-->(14)] - └── aggregations - ├── const-agg [as=blkid:2, outer=(2)] - │ └── blkid:2 - ├── const-agg [as=popn_total:3, outer=(3)] - │ └── popn_total:3 - ├── const-agg [as=popn_white:4, outer=(4)] - │ └── popn_white:4 - ├── const-agg [as=popn_black:5, outer=(5)] - │ └── popn_black:5 - ├── const-agg [as=popn_nativ:6, outer=(6)] - │ └── popn_nativ:6 - ├── const-agg [as=popn_asian:7, outer=(7)] - │ └── popn_asian:7 - ├── const-agg [as=popn_other:8, outer=(8)] - │ └── popn_other:8 - ├── const-agg [as=c.boroname:9, outer=(9)] - │ └── c.boroname:9 - └── const-agg [as=c.geom:10, outer=(10)] - └── c.geom:10 + ├── inner-join (inverted-lookup nyc_neighborhoods@nyc_neighborhoods_geo_idx,partial [as=n]) + │ ├── columns: c.gid:1!null blkid:2 popn_total:3 popn_white:4 popn_black:5 popn_nativ:6 popn_asian:7 popn_other:8 c.boroname:9 c.geom:10 n.gid:13!null continuation:20 + │ ├── inverted-expr + │ │ └── st_covers(c.geom:10, n.geom:16) + │ ├── key: (1,13) + │ ├── fd: (1)-->(2-10), (13)-->(20) + │ ├── scan nyc_census_blocks [as=c] + │ │ ├── columns: c.gid:1!null blkid:2 popn_total:3 popn_white:4 popn_black:5 popn_nativ:6 popn_asian:7 popn_other:8 c.boroname:9 c.geom:10 + │ │ ├── key: (1) + │ │ └── fd: (1)-->(2-10) + │ └── filters (true) + └── filters + ├── st_covers(c.geom:10, n.geom:16) [outer=(10,16), immutable, constraints=(/10: (/NULL - ]; /16: (/NULL - ])] + └── n.boroname:14 = 'Manhattan' [outer=(14), constraints=(/14: [/'Manhattan' - /'Manhattan']; tight), fd=()-->(14)] # Inverted "anti-join". opt expect=GenerateInvertedJoinsFromSelect