Skip to content

Commit

Permalink
opt: use paired joins for left semi inverted joins
Browse files Browse the repository at this point in the history
Semi joins using the inverted index used to be converted
to an inner inverted join followed by an inner lookup join
and a distinct for de-duplication. They are now converted
to paired-joins consisting of an inner inverted join
followed by a left semi lookup join, which is more
efficient.

Release note: None
  • Loading branch information
sumeerbhola committed Oct 27, 2020
1 parent 5e3c201 commit dd16fc3
Show file tree
Hide file tree
Showing 7 changed files with 108 additions and 208 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ AND (ST_DFullyWithin(rtable.geom, ltable.geom1, 100) OR ST_Intersects('POINT(1.0
----
1 13

# These queries perform semi-joins, which are converted to inner joins by the
# These queries perform semi-joins, which are converted to paired joins by the
# optimizer.
query I
SELECT lk FROM ltable WHERE EXISTS (SELECT * FROM rtable WHERE ST_Intersects(ltable.geom2, rtable.geom))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,13 +79,13 @@ AND (ST_DFullyWithin(rtable.geom, ltable.geom1, 100) OR ST_Intersects('POINT(1.0
----
https://cockroachdb.github.io/distsqlplan/decode.html#eJzsVUFv2jAYve9XWN-lIBmIE6DUp1QrTKlo0gGHTRWqMuJ1WVM7s52uVcV_n5JUKwlgUk27wQnb7_l9ed_35BdQvxKgMB9Pxx8XKJMJmsyCK3Qz_nI9Pfd81Lrw5ov552kbvUKSe4zkfYlKdPgtYegy8Hwki__uHRMPtzGP2BMKfNRS-jbmmknFVlq1Snw3xxD8yihWbXTuXyClb1fikckK0K4CS2R-b_Q9S5Ln37H-EfPWBgSjqgyxrDYKZqhaysl14PmLFulaiHSt9klNZQkYuIiYHz4wBfQGCGCwAYMDSwypFCumlJD50UsB9KInoBaGmKeZzreXGFZCMqAvoGOdMKCwyAVmLIyY7FmAIWI6jJPi-rJkN5XxQyifAcM8DbmiqNOzYbnGIDL9dq_S4R0DSta4ubbHH5nULLoUMWey51Tlt5oH-C9j_JTKeiNdGyN3UO-a6xS7e1pUcop-dC1K6WQanC9Gu1pjEYtYW7-J5Uzq6xNK6adxcDVezL6W2oAhyDRFLsGujV0Hu_29BtrvMTA37rV3w53mvfVuKsR9lqKfIuZIcIrcfl7VVhoKQ4Y7TRzuNXGIUc77jyYOqybut8_Za9-baxkXMmKSRRXLlusdBvuiI9LeWQ24W7pfkSbNY0eaxa5nd3pOw-AdUK8Fr38MXj14BwzcCN7pMXjbwbObT7_dcPqdTsPZP6Bdm_3Bcfbrs3_AwI3ZHx1n3_zo7LBvxlQquGKN3hQrf5RYdMfKF0yJTK7YtRSrQqZcBgWv2IiY0uUpKRceL4_yAjfJxEi2zWTbSHYqZFInO-ayLbN038gemMkDI3loJg__5aNPjeSRWXlkJJ-ZyWfvKnu5_vAnAAD__96MMnU=

# This query performs a semi-join, which is converted to an inner join by the
# This query performs a semi-join, which is converted to paired joins by the
# optimizer.
query T
SELECT url FROM [EXPLAIN (DISTSQL)
SELECT lk FROM ltable WHERE EXISTS (SELECT * FROM rtable WHERE ST_Intersects(ltable.geom2, rtable.geom))]
----
https://cockroachdb.github.io/distsqlplan/decode.html#eJzElVFP2zAQx9_3Kax7opu71klaIE-ZRqZ16lrWIg0JVSg0J8gIcWY7Ewj1u09JKtKG1k4Ho49J7uf7-ey_8gjydwwuTP2h__mMZCImXybj7-TCPz8dfhqMyMHJYHo2_TFskWVJfFtWxCq4ipH8_OpPfOKf51XkYFnzviwRqyVSXUaJQiFxruRBSX-8Rn5n0WVh8dRqzYBCwkMcBXcowb0ABhQsoGDDjEIq-Byl5CL_9FgUDsJ7cLsUoiTNVP56RmHOBYL7CCpSMYILZ3mDCQYhik4XKISogiguli9VvFREd4F4AArTNEikS9qdvOk4Uy7xGPVsmC0o8ExVTaQKrhFctqDNRQbJHxQKw288SlB07HWXchBePojLKAnxHugT4d-nojZFz6LEc1qrmpbO1NrFNDdcTqy_0bKa2JDz2ywlv3iUEJ64xMu3NR5tsu2t225VtXdRPYmkipK56hyvi3r51RmLEAWGecNat2qBqwdyE8ibZ_RsURk5W42qdXjZq77Oh3KhrdrMMng_jWzrvHoN7LJkk99GtRFv87TDerXKzb37a71Z8ySyZknsWO0iKDtn0aBSy6KzxywaTFeyeLjvLBpUq0vdfbMwslcNo_3KYbSaB8JqGAi7_S9xMIjU4tDbYxwMpitxONp3HAyq1bVibxYH61Xj4PzHf9OGxhOUKU8kNvrzdHN1DK-x3KrkmZjjqeDzok35OC644kWIUpVfWfkwSMpPueAqzLSwpYctLWzrYbsOs1XYWYPZbjDrvoju6WlHu2sD3NMfVl8_s76WPtTDh1r4SA8faeFjPXz8kqPWw6ajNtCG02L6bJlofbiYIV1MHy9myBd7dsvXcceAP7vmuxyagTadmgk3DV6fsjo9W7z7GwAA__9Kzz7E
https://cockroachdb.github.io/distsqlplan/decode.html#eJzUlN9v2j4Uxd-_f4V1n8pXZpAEaJunTFuqpaLQAdIqVVGVxXdV1tTObGeiQvzvkxM2fghM2F7aR8fn5J7P1ZEXoH7k4MM0HIYfZqSUObmajG_IfXh3O3wfjcjZx2g6m34etshKkj_VilwnX3MkXz6Fk5CEd0ZFzlaa_2uJ3JQo_ZBxjVJhqtVZ7X73iOLZpSthdWq1YqDABcNR8owK_HtwgIILFDyIKRRSpKiUkOZqUQkjNge_SyHjRanN55hCKiSCvwCd6RzBh5kZMMGEoex0gQJDnWR59fs6SlDI7DmRL0BhWiRc-aTdMUPHpfZJ4NDAg3hJQZR6PUTp5BHBd5a0eZCI_0SpkV2LjKPseNtZ6kUEZhEPGWc4B_rHEc4LubPFwKUk6LWAwrdMKk2-i4yTjJMiySSytjluIrg08GjQPwjingJiAFYLHWxDzF4K9MkwvJqRaXgTketxNAL6m22956EQT2VRhxbcJ4FZxni0j3FgGBWmgjM75EE27yDbGqnkQjKUyLZ44uUe-pFoi6JzuSPcP7q3NdppXlSnWVE7brvq0clVPRJlp6q911vVIyAbVT1_c1V1m_fFbdgXr_03bTkSZKct_dfbliMgG225eHNtOfJoT1AVgits9G51zcOH7BHrV1KJUqZ4K0VajamP48pXfWCodH3r1IeI11cm4KbZsZpdu9m1mr0ts7Nr9uyxu_bRPau7bzf3reaB3Tz4F-hzq_nCPvnCar60my9Pih0v__sVAAD__xtCcVg=

# Left joins are converted to paired joins by the optimizer.
query T
Expand Down Expand Up @@ -199,7 +199,7 @@ SELECT url FROM [EXPLAIN (DISTSQL)
SELECT lk FROM ltable WHERE EXISTS (SELECT * FROM rtable2@geom_index
WHERE ST_Intersects(ltable.geom1, rtable2.geom)) ORDER BY lk]
----
https://cockroachdb.github.io/distsqlplan/decode.html#eJzMlVFP2zwUhu-_X2GdK_rNXeskLZCrbCPTOnUta5HGhCoUGgsyQpzZzgRC_e-Tk440KbXT0QGXic_j8-bYj3IP4mcMLkz9of_hBGU8Rh8n4y_ozD89Hr4bjNDe0WB6Mv06bKFlSXxdVMQyuIgp-vbJn_jIP1VVaG9Z839RwvMSy7uk7OY8SkJ6uywX8jxKJOWCzqXYK3Z6q6oI_gPlj60WGk-O_Al6_x3F1zPAkLCQjoIbKsA9AwIYLMBgwwxDytmcCsG4WrrPCwfhLbhdDFGSZlK9nmGYM07BvQcZyZiCCyeq24QGIeWdLmAIqQyiON--yOWlPLoJ-B1gmKZBIlzU7qim40y6yCPYs2C2wMAyWTYRMrik4JIFbh5kkPyiXNLwM4sSyjt2Ncv6KAE_IP5tymsz9SyMPKdVyYk9G3u9jWmtbdKqlMup9R9PWo5tyNh1lqIfLEoQS1ykYjgq2eix0P1Ws-Ha28Q9ioSMkrnsHFbDeuoKjXlIOQ1Vw1q3coOLO3QViKs1erYoEzkbE5X7sKJXfZ83xUYbYxPLkPthZBvn1dt5uhFrs7RDepXyTe37lfakuZSkmZQdq507s7WWhig1LZ0X1tKQdkXL_degpSFueb-7z-Yl2amX9o69tJqLYTUUw27_jRaGIDUtei-shSHtihYHr0ELQ9zyepFn08LaqRbOP_xdPdJ4QkXKEkEb_Ym6KjoNL2nxqYJlfE6POZvnbYrHcc7lL0IqZLFKiodBUiypgKsw0cKWHra0sK2H7TpMVmGnApPtYNJ9Et3T0472qw1wT39Yff3M-lp6Xw_va-EDPXyghQ_18OFTjloPm47aQBtOi-jdMtF6uYjBLqLXixj8Imu3vIo7Bnztmm9zaAbadGom3DR4vWV1erb473cAAAD__7TDS8A=
https://cockroachdb.github.io/distsqlplan/decode.html#eJzUlFFP2z4Uxd__n8K6T_Q_d22StkCeso2gBZWWtZXGhCKUxXcoI9iZ7Uwg1O8-OemAdNRNtxd4dHxO7vldHfke1I8cfJiH4_DDgpQyJ8ez6Sm5CM_Pxu-iCdk7iuaL-adxh6wk-XWtyHXyNUfy-WM4C0l4blRkb6X5v5bISuIGVyhuLjPO8HYlV_oy4xqlwlSrvfpPb43Kob9N1bHTIdPZUTgj77-Q_DoGClwwnCQ3qMC_AAcouEDBg5hCIUWKSglpru4rYcRuwe9TyHhRavM5ppAKieDfg850juDDwkybYcJQ9vpAgaFOsrz6fZ0rKGR2k8g7oDAvEq580u2ZodNS-yRwaOBCvKQgSv04ROnkCsF3lrR9kIj_RKmRnYiMo-x5zSx_rhLogyW8LeTaTgOXkmDQAQrfMqk0-S4yTjJOiiSTyLrm2GCggUeDIQ1GG2HcXWAMxGqpoybI4q5An4zD4wWZh6cROZlGE6APfI_LHgtxXRZ1cMF9YgIOTObJc6j7BlVhKjizs27k8zbyPWIJyVAiaxIFzhuIl88sYSK6ougdNtSbpg8a0532nXXadbbndqtK7dzaLVHWWjt42a3dAvOktfuvsrVu-964LXvjdf-mNVuCrLVm-LJbswXmSWsOXmVrtrzlM1SF4ApbvWN98xAiu8L64VSilCmeSZFWY-rjtPJVHxgqXd869SHi9ZUJ-NTsWM2u3exazV7D7KybPXvsvn30wOoe2s1Dq3lkN4_-BXrfaj6wTz6wmg_t5sOdYsfL_34FAAD__7Tvflc=

query T
SELECT url FROM [EXPLAIN (DISTSQL)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -146,39 +146,34 @@ project · ·
· table ltable@primary · ·
· spans FULL SCAN · ·

# This query performs a semi-join, which is converted to an inner join by the
# This query performs a semi-join, which is converted to paired joins by the
# optimizer.
query TTTTT
EXPLAIN (VERBOSE)
SELECT lk FROM ltable WHERE EXISTS (SELECT * FROM rtable WHERE ST_Intersects(ltable.geom2, rtable.geom))
----
· distribution local · ·
· vectorized true · ·
project · · (lk) ·
│ estimated row count 10 (missing stats) · ·
└── distinct · · (lk, geom2) ·
│ estimated row count 1000 (missing stats) · ·
│ distinct on lk · ·
│ order key lk · ·
└── project · · (lk, geom2) +lk
└── project · · (lk, geom2, geom) +lk
│ estimated row count 9801 (missing stats) · ·
└── lookup join (inner) · · (lk, geom2, rk1, rk2, geom) +lk
│ table rtable@primary · ·
│ equality (rk1, rk2) = (rk1,rk2) · ·
│ equality cols are key · · ·
│ pred st_intersects(geom2, geom) · ·
└── project · · (lk, geom2, rk1, rk2) +lk
│ estimated row count 10000 (missing stats) · ·
└── inverted join (inner) · · (lk, geom2, rk1, rk2, geom_inverted_key) +lk
│ table rtable@geom_index · ·
│ inverted expr st_intersects(geom2, geom_inverted_key) · ·
└── scan · · (lk, geom2) +lk
· estimated row count 1000 (missing stats) · ·
· table ltable@primary · ·
· spans FULL SCAN · ·
· distribution local · ·
· vectorized true · ·
project · · (lk) ·
│ estimated row count 10 (missing stats) · ·
└── project · · (lk, geom2) ·
│ estimated row count 10 (missing stats) · ·
└── lookup join (semi) · · (lk, geom2, rk1, rk2, cont) ·
│ table rtable@primary · ·
│ equality (rk1, rk2) = (rk1,rk2) · ·
│ equality cols are key · · ·
│ pred st_intersects(geom2, geom) · ·
└── project · · (lk, geom2, rk1, rk2, cont) ·
│ estimated row count 10000 (missing stats) · ·
└── inverted join (inner) · · (lk, geom2, rk1, rk2, geom_inverted_key, cont) ·
│ table rtable@geom_index · ·
│ inverted expr st_intersects(geom2, geom_inverted_key) · ·
└── scan · · (lk, geom2) ·
· estimated row count 1000 (missing stats) · ·
· table ltable@primary · ·
· spans FULL SCAN · ·

# Left joins are also converted to an inner join by the optimizer.
# Left outer joins are also converted to paired joins by the optimizer.
query TTTTT
EXPLAIN (VERBOSE)
SELECT lk, rk1 FROM ltable LEFT JOIN rtable ON ST_Intersects(ltable.geom1, rtable.geom)
Expand Down
71 changes: 24 additions & 47 deletions pkg/sql/opt/exec/execbuilder/testdata/inverted_index
Original file line number Diff line number Diff line change
Expand Up @@ -785,59 +785,36 @@ query T
EXPLAIN (OPT, VERBOSE) SELECT * FROM geo_table2 WHERE EXISTS (SELECT * FROM geo_table@geom_index
WHERE ST_Intersects(geo_table2.geom, geo_table.geom))
----
project
semi-join (lookup geo_table)
├── columns: k:1 geom:2
├── key columns: [5] = [5]
├── lookup columns are key
├── immutable
├── stats: [rows=10]
├── cost: 112690.199
├── cost: 112684.05
├── key: (1)
├── fd: (1)-->(2)
├── prune: (1)
└── distinct-on
├── columns: geo_table2.k:1 geo_table2.geom:2
├── grouping columns: geo_table2.k:1
├── internal-ordering: +1
├── immutable
├── stats: [rows=999.947218, distinct(1)=999.947218, null(1)=0]
├── cost: 112690.089
├── key: (1)
├── fd: (1)-->(2)
├── inner-join (lookup geo_table)
│ ├── columns: geo_table2.k:1 geo_table2.geom:2 geo_table.geom:6
│ ├── key columns: [5] = [5]
│ ├── lookup columns are key
│ ├── immutable
│ ├── stats: [rows=9801, distinct(1)=999.947218, null(1)=0]
│ ├── cost: 112484.05
│ ├── fd: (1)-->(2)
│ ├── ordering: +1
│ ├── prune: (1)
│ ├── interesting orderings: (+1)
│ ├── inner-join (inverted-lookup geo_table@geom_index)
│ │ ├── columns: geo_table2.k:1 geo_table2.geom:2 geo_table.k:5
│ │ ├── inverted-expr
│ │ │ └── st_intersects(geo_table2.geom:2, geo_table.geom:6)
│ │ ├── stats: [rows=10000, distinct(1)=999.956829, null(1)=0]
│ │ ├── cost: 41784.03
│ │ ├── key: (1,5)
│ │ ├── fd: (1)-->(2)
│ │ ├── ordering: +1
│ │ ├── scan geo_table2
│ │ │ ├── columns: geo_table2.k:1 geo_table2.geom:2
│ │ │ ├── stats: [rows=1000, distinct(1)=1000, null(1)=0, distinct(2)=100, null(2)=10]
│ │ │ ├── cost: 1084.02
│ │ │ ├── key: (1)
│ │ │ ├── fd: (1)-->(2)
│ │ │ ├── ordering: +1
│ │ │ ├── prune: (1,2)
│ │ │ ├── interesting orderings: (+1)
│ │ │ └── unfiltered-cols: (1-4)
│ │ └── filters (true)
│ └── filters
│ └── st_intersects(geo_table2.geom:2, geo_table.geom:6) [outer=(2,6), immutable, constraints=(/2: (/NULL - ]; /6: (/NULL - ])]
└── aggregations
└── const-agg [as=geo_table2.geom:2, outer=(2)]
└── geo_table2.geom:2
├── inner-join (inverted-lookup geo_table@geom_index)
│ ├── columns: geo_table2.k:1 geo_table2.geom:2 geo_table.k:5 continuation:11
│ ├── inverted-expr
│ │ └── st_intersects(geo_table2.geom:2, geo_table.geom:6)
│ ├── stats: [rows=10000, distinct(1)=999.956829, null(1)=0]
│ ├── cost: 41984.03
│ ├── key: (1,5)
│ ├── fd: (1)-->(2), (5)-->(11)
│ ├── scan geo_table2
│ │ ├── columns: geo_table2.k:1 geo_table2.geom:2
│ │ ├── stats: [rows=1000, distinct(1)=1000, null(1)=0, distinct(2)=100, null(2)=10]
│ │ ├── cost: 1084.02
│ │ ├── key: (1)
│ │ ├── fd: (1)-->(2)
│ │ ├── prune: (1,2)
│ │ ├── interesting orderings: (+1)
│ │ └── unfiltered-cols: (1-4)
│ └── filters (true)
└── filters
└── st_intersects(geo_table2.geom:2, geo_table.geom:6) [outer=(2,6), immutable, constraints=(/2: (/NULL - ]; /6: (/NULL - ])]

query T
EXPLAIN (OPT, VERBOSE) SELECT * FROM geo_table2 WHERE NOT EXISTS (SELECT * FROM geo_table@geom_index
Expand Down
Loading

0 comments on commit dd16fc3

Please sign in to comment.