Skip to content

Commit

Permalink
opt: use paired joins for left semi inverted joins
Browse files Browse the repository at this point in the history
Semi joins using the inverted index used to be converted
to an inner inverted join followed by an inner lookup join
and a distinct for de-duplication. They are now converted
to paired-joins consisting of a left outer inverted join
followed by a left semi lookup join, which can be more
efficient.

Release note: None
  • Loading branch information
sumeerbhola committed Oct 26, 2020
1 parent ac5825b commit 5863460
Show file tree
Hide file tree
Showing 7 changed files with 95 additions and 200 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ AND (ST_DFullyWithin(rtable.geom, ltable.geom1, 100) OR ST_Intersects('POINT(1.0
----
1 13

# These queries perform semi-joins, which are converted to inner joins by the
# These queries perform semi-joins, which are converted to paired joins by the
# optimizer.
query I
SELECT lk FROM ltable WHERE EXISTS (SELECT * FROM rtable WHERE ST_Intersects(ltable.geom2, rtable.geom))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,13 +79,13 @@ AND (ST_DFullyWithin(rtable.geom, ltable.geom1, 100) OR ST_Intersects('POINT(1.0
----
https://cockroachdb.github.io/distsqlplan/decode.html#eJzsVUFv2jAYve9XWN-lIBmIE6DUp1QrTKlo0gGHTRWqMuJ1WVM7s52uVcV_n5JUKwlgUk27wQnb7_l9ed_35BdQvxKgMB9Pxx8XKJMJmsyCK3Qz_nI9Pfd81Lrw5ov552kbvUKSe4zkfYlKdPgtYegy8Hwki__uHRMPtzGP2BMKfNRS-jbmmknFVlq1Snw3xxD8yihWbXTuXyClb1fikckK0K4CS2R-b_Q9S5Ln37H-EfPWBgSjqgyxrDYKZqhaysl14PmLFulaiHSt9klNZQkYuIiYHz4wBfQGCGCwAYMDSwypFCumlJD50UsB9KInoBaGmKeZzreXGFZCMqAvoGOdMKCwyAVmLIyY7FmAIWI6jJPi-rJkN5XxQyifAcM8DbmiqNOzYbnGIDL9dq_S4R0DSta4ubbHH5nULLoUMWey51Tlt5oH-C9j_JTKeiNdGyN3UO-a6xS7e1pUcop-dC1K6WQanC9Gu1pjEYtYW7-J5Uzq6xNK6adxcDVezL6W2oAhyDRFLsGujV0Hu_29BtrvMTA37rV3w53mvfVuKsR9lqKfIuZIcIrcfl7VVhoKQ4Y7TRzuNXGIUc77jyYOqybut8_Za9-baxkXMmKSRRXLlusdBvuiI9LeWQ24W7pfkSbNY0eaxa5nd3pOw-AdUK8Fr38MXj14BwzcCN7pMXjbwbObT7_dcPqdTsPZP6Bdm_3Bcfbrs3_AwI3ZHx1n3_zo7LBvxlQquGKN3hQrf5RYdMfKF0yJTK7YtRSrQqZcBgWv2IiY0uUpKRceL4_yAjfJxEi2zWTbSHYqZFInO-ayLbN038gemMkDI3loJg__5aNPjeSRWXlkJJ-ZyWfvKnu5_vAnAAD__96MMnU=

# This query performs a semi-join, which is converted to an inner join by the
# This query performs a semi-join, which is converted to paired joins by the
# optimizer.
query T
SELECT url FROM [EXPLAIN (DISTSQL)
SELECT lk FROM ltable WHERE EXISTS (SELECT * FROM rtable WHERE ST_Intersects(ltable.geom2, rtable.geom))]
----
https://cockroachdb.github.io/distsqlplan/decode.html#eJzElVFP2zAQx9_3Kax7opu71klaIE-ZRqZ16lrWIg0JVSg0J8gIcWY7Ewj1u09JKtKG1k4Ho49J7uf7-ey_8gjydwwuTP2h__mMZCImXybj7-TCPz8dfhqMyMHJYHo2_TFskWVJfFtWxCq4ipH8_OpPfOKf51XkYFnzviwRqyVSXUaJQiFxruRBSX-8Rn5n0WVh8dRqzYBCwkMcBXcowb0ABhQsoGDDjEIq-Byl5CL_9FgUDsJ7cLsUoiTNVP56RmHOBYL7CCpSMYILZ3mDCQYhik4XKISogiguli9VvFREd4F4AArTNEikS9qdvOk4Uy7xGPVsmC0o8ExVTaQKrhFctqDNRQbJHxQKw288SlB07HWXchBePojLKAnxHugT4d-nojZFz6LEc1qrmpbO1NrFNDdcTqy_0bKa2JDz2ywlv3iUEJ64xMu3NR5tsu2t225VtXdRPYmkipK56hyvi3r51RmLEAWGecNat2qBqwdyE8ibZ_RsURk5W42qdXjZq77Oh3KhrdrMMng_jWzrvHoN7LJkk99GtRFv87TDerXKzb37a71Z8ySyZknsWO0iKDtn0aBSy6KzxywaTFeyeLjvLBpUq0vdfbMwslcNo_3KYbSaB8JqGAi7_S9xMIjU4tDbYxwMpitxONp3HAyq1bVibxYH61Xj4PzHf9OGxhOUKU8kNvrzdHN1DK-x3KrkmZjjqeDzok35OC644kWIUpVfWfkwSMpPueAqzLSwpYctLWzrYbsOs1XYWYPZbjDrvoju6WlHu2sD3NMfVl8_s76WPtTDh1r4SA8faeFjPXz8kqPWw6ajNtCG02L6bJlofbiYIV1MHy9myBd7dsvXcceAP7vmuxyagTadmgk3DV6fsjo9W7z7GwAA__9Kzz7E
https://cockroachdb.github.io/distsqlplan/decode.html#eJzUlFFvmz4Uxd__n8K6T81fzhIgSVuemDaqUaVJlzCtUoUqhu8qVmoz20ytqnz3yRCtCWocsj3l0fgc7vldHfkF1M8CfFiG0_BDTCpZkIvF_IrchjfX0_fRjJx8jJbx8vO0R9aS4qFRFDr9ViD5-ilchCS8MSpystb830jkpkTpu5xrlAozrU4a97t7FI8uXQvrU6-XAAUuGM7SR1Tg34IDFFyg4EFCoZQiQ6WENFcvtTBiT-APKeS8rLT5nFDIhETwX0DnukDwITYDFpgylIMhUGCo07yof99ECUqZP6byGSgsy5Qrn_QHZui80j4JHBp4kKwoiEq_DlE6vUfwnRXtHiTiv1BqZJci5ygH3naW-LlEn0zDi5jMv8ThglzOoxlQaBYUmAXd5ZzhE9A_fwqfStnabuBSEox6QOF7LpUmP0TOSc5JmeYSWd8cN9FcGng0GO8EdA8BNGDrRU92wi3Dq6jF9rr_qRAPVdmEFtwngVnSfPYW48QwKswEZ3bInWzeTrZXpIoLyVAi2-JJVm_Qz0RflIPzlvDt0aOt0U73AjvdCjxw-3W_Dq7wniitCo-Or8J7ADcqfHp0FXa798jt2COv_zct2hOk1aLx8bVoD-BGi86OrkV7HvkFqlJwhZ3euaF5KJHdY_OqKlHJDK-lyOoxzXFe--oPDJVubp3mEPHmygTcNDtWs2s3u1azt2V22mbPHntoHz2yusd289hqntjNk3-BPrWaz-yTz6zmc7v5_KDYyeq_3wEAAP___8eFGg==

# Left joins are converted to paired joins by the optimizer.
query T
Expand Down Expand Up @@ -199,7 +199,7 @@ SELECT url FROM [EXPLAIN (DISTSQL)
SELECT lk FROM ltable WHERE EXISTS (SELECT * FROM rtable2@geom_index
WHERE ST_Intersects(ltable.geom1, rtable2.geom)) ORDER BY lk]
----
https://cockroachdb.github.io/distsqlplan/decode.html#eJzMlVFP2zwUhu-_X2GdK_rNXeskLZCrbCPTOnUta5HGhCoUGgsyQpzZzgRC_e-Tk440KbXT0QGXic_j8-bYj3IP4mcMLkz9of_hBGU8Rh8n4y_ozD89Hr4bjNDe0WB6Mv06bKFlSXxdVMQyuIgp-vbJn_jIP1VVaG9Z839RwvMSy7uk7OY8SkJ6uywX8jxKJOWCzqXYK3Z6q6oI_gPlj60WGk-O_Al6_x3F1zPAkLCQjoIbKsA9AwIYLMBgwwxDytmcCsG4WrrPCwfhLbhdDFGSZlK9nmGYM07BvQcZyZiCCyeq24QGIeWdLmAIqQyiON--yOWlPLoJ-B1gmKZBIlzU7qim40y6yCPYs2C2wMAyWTYRMrik4JIFbh5kkPyiXNLwM4sSyjt2Ncv6KAE_IP5tymsz9SyMPKdVyYk9G3u9jWmtbdKqlMup9R9PWo5tyNh1lqIfLEoQS1ykYjgq2eix0P1Ws-Ha28Q9ioSMkrnsHFbDeuoKjXlIOQ1Vw1q3coOLO3QViKs1erYoEzkbE5X7sKJXfZ83xUYbYxPLkPthZBvn1dt5uhFrs7RDepXyTe37lfakuZSkmZQdq507s7WWhig1LZ0X1tKQdkXL_degpSFueb-7z-Yl2amX9o69tJqLYTUUw27_jRaGIDUtei-shSHtihYHr0ELQ9zyepFn08LaqRbOP_xdPdJ4QkXKEkEb_Ym6KjoNL2nxqYJlfE6POZvnbYrHcc7lL0IqZLFKiodBUiypgKsw0cKWHra0sK2H7TpMVmGnApPtYNJ9Et3T0472qw1wT39Yff3M-lp6Xw_va-EDPXyghQ_18OFTjloPm47aQBtOi-jdMtF6uYjBLqLXixj8Imu3vIo7Bnztmm9zaAbadGom3DR4vWV1erb473cAAAD__7TDS8A=
https://cockroachdb.github.io/distsqlplan/decode.html#eJzUlFFv0z4Uxd__n8K6T-sflzZJ2215CrBMZOrakRYxNFVTiC9TWGYH20Gbpn535KSwpaxuCk99dHyO7_k5R34E9T0HH2bhOHw3J6XMyWk8PSdX4eXF-E00IQcn0Ww--zDukJUkv60VuU6-5Eg-vQ_jkISXRkUOVpr_a4msJG5wg-LuOuMM71dypa8zrlEqTLU6qE96bVQO_WWqlp0OmcYnYUzefib57QIocMFwktyhAv8KHKDgAgUPFhQKKVJUSkiz9VgJI3YPfp9CxotSm88LCqmQCP4j6EznCD7MzbQYE4ay1wcKDHWS5dXxda6gkNldIh-AwqxIuPJJt2eGTkvtk8ChgQuLJQVR6qchSic3CL6zpO2DRPwHSo3sTGQcZc9rZpk_FOiTcXg6J9OP8zAmZ9NoAhT-vGKgv48K7wu5dteBS0kw6ACFr5lUmnwTGScZJ0WSSWRds2yw0cCjwZAGo42Q7i6QBm512aONgLPwPFrne_oJYyFuy6IOLrhPTMCByTx5CfXQoCpMBWd21o183ka-JywhGUpkTaLAeQWL5QuXMBFdUfSOG-pN0weN6U77Ljvtutxzu1XVdm7zlihrbR7sZ5u3QD5r8-Fettlt3ye3ZZ-87t-0aUuQtTYN97NNWyCfteloL9u05e2PURWCK2z17vXNw4nsBuuHVolSpnghRVqNqZfTyld9YKh0vevUi4jXWybgc7NjNbt2s2s1ew2zs2727LH79tEDq3toNw-t5pHdPPoX6EOr-cg--chqPrabj3eKvVj-9zMAAP__o_mSGQ==

query T
SELECT url FROM [EXPLAIN (DISTSQL)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -146,39 +146,34 @@ project · ·
· table ltable@primary · ·
· spans FULL SCAN · ·

# This query performs a semi-join, which is converted to an inner join by the
# This query performs a semi-join, which is converted to paired joins by the
# optimizer.
query TTTTT
EXPLAIN (VERBOSE)
SELECT lk FROM ltable WHERE EXISTS (SELECT * FROM rtable WHERE ST_Intersects(ltable.geom2, rtable.geom))
----
· distribution local · ·
· vectorized true · ·
project · · (lk) ·
│ estimated row count 10 (missing stats) · ·
└── distinct · · (lk, geom2) ·
│ estimated row count 1000 (missing stats) · ·
│ distinct on lk · ·
│ order key lk · ·
└── project · · (lk, geom2) +lk
└── project · · (lk, geom2, geom) +lk
│ estimated row count 9801 (missing stats) · ·
└── lookup join (inner) · · (lk, geom2, rk1, rk2, geom) +lk
│ table rtable@primary · ·
│ equality (rk1, rk2) = (rk1,rk2) · ·
│ equality cols are key · · ·
│ pred st_intersects(geom2, geom) · ·
└── project · · (lk, geom2, rk1, rk2) +lk
│ estimated row count 10000 (missing stats) · ·
└── inverted join (inner) · · (lk, geom2, rk1, rk2, geom_inverted_key) +lk
│ table rtable@geom_index · ·
│ inverted expr st_intersects(geom2, geom_inverted_key) · ·
└── scan · · (lk, geom2) +lk
· estimated row count 1000 (missing stats) · ·
· table ltable@primary · ·
· spans FULL SCAN · ·
· distribution local · ·
· vectorized true · ·
project · · (lk) ·
│ estimated row count 10 (missing stats) · ·
└── project · · (lk, geom2) ·
│ estimated row count 10 (missing stats) · ·
└── lookup join (semi) · · (lk, geom2, rk1, rk2, cont) ·
│ table rtable@primary · ·
│ equality (rk1, rk2) = (rk1,rk2) · ·
│ equality cols are key · · ·
│ pred st_intersects(geom2, geom) · ·
└── project · · (lk, geom2, rk1, rk2, cont) ·
│ estimated row count 10000 (missing stats) · ·
└── inverted join (left outer) · · (lk, geom2, rk1, rk2, geom_inverted_key, cont) ·
│ table rtable@geom_index · ·
│ inverted expr st_intersects(geom2, geom_inverted_key) · ·
└── scan · · (lk, geom2) ·
· estimated row count 1000 (missing stats) · ·
· table ltable@primary · ·
· spans FULL SCAN · ·

# Left joins are also converted to an inner join by the optimizer.
# Left outer joins are also converted to paired joins by the optimizer.
query TTTTT
EXPLAIN (VERBOSE)
SELECT lk, rk1 FROM ltable LEFT JOIN rtable ON ST_Intersects(ltable.geom1, rtable.geom)
Expand Down
71 changes: 24 additions & 47 deletions pkg/sql/opt/exec/execbuilder/testdata/inverted_index
Original file line number Diff line number Diff line change
Expand Up @@ -785,59 +785,36 @@ query T
EXPLAIN (OPT, VERBOSE) SELECT * FROM geo_table2 WHERE EXISTS (SELECT * FROM geo_table@geom_index
WHERE ST_Intersects(geo_table2.geom, geo_table.geom))
----
project
semi-join (lookup geo_table)
├── columns: k:1 geom:2
├── key columns: [5] = [5]
├── lookup columns are key
├── immutable
├── stats: [rows=10]
├── cost: 112690.199
├── cost: 112684.05
├── key: (1)
├── fd: (1)-->(2)
├── prune: (1)
└── distinct-on
├── columns: geo_table2.k:1 geo_table2.geom:2
├── grouping columns: geo_table2.k:1
├── internal-ordering: +1
├── immutable
├── stats: [rows=999.947218, distinct(1)=999.947218, null(1)=0]
├── cost: 112690.089
├── key: (1)
├── fd: (1)-->(2)
├── inner-join (lookup geo_table)
│ ├── columns: geo_table2.k:1 geo_table2.geom:2 geo_table.geom:6
│ ├── key columns: [5] = [5]
│ ├── lookup columns are key
│ ├── immutable
│ ├── stats: [rows=9801, distinct(1)=999.947218, null(1)=0]
│ ├── cost: 112484.05
│ ├── fd: (1)-->(2)
│ ├── ordering: +1
│ ├── prune: (1)
│ ├── interesting orderings: (+1)
│ ├── inner-join (inverted-lookup geo_table@geom_index)
│ │ ├── columns: geo_table2.k:1 geo_table2.geom:2 geo_table.k:5
│ │ ├── inverted-expr
│ │ │ └── st_intersects(geo_table2.geom:2, geo_table.geom:6)
│ │ ├── stats: [rows=10000, distinct(1)=999.956829, null(1)=0]
│ │ ├── cost: 41784.03
│ │ ├── key: (1,5)
│ │ ├── fd: (1)-->(2)
│ │ ├── ordering: +1
│ │ ├── scan geo_table2
│ │ │ ├── columns: geo_table2.k:1 geo_table2.geom:2
│ │ │ ├── stats: [rows=1000, distinct(1)=1000, null(1)=0, distinct(2)=100, null(2)=10]
│ │ │ ├── cost: 1084.02
│ │ │ ├── key: (1)
│ │ │ ├── fd: (1)-->(2)
│ │ │ ├── ordering: +1
│ │ │ ├── prune: (1,2)
│ │ │ ├── interesting orderings: (+1)
│ │ │ └── unfiltered-cols: (1-4)
│ │ └── filters (true)
│ └── filters
│ └── st_intersects(geo_table2.geom:2, geo_table.geom:6) [outer=(2,6), immutable, constraints=(/2: (/NULL - ]; /6: (/NULL - ])]
└── aggregations
└── const-agg [as=geo_table2.geom:2, outer=(2)]
└── geo_table2.geom:2
├── left-join (inverted-lookup geo_table@geom_index)
│ ├── columns: geo_table2.k:1 geo_table2.geom:2 geo_table.k:5 continuation:11
│ ├── inverted-expr
│ │ └── st_intersects(geo_table2.geom:2, geo_table.geom:6)
│ ├── stats: [rows=10000, distinct(1)=1000, null(1)=0]
│ ├── cost: 41984.03
│ ├── key: (1,5)
│ ├── fd: (1)-->(2), (5)-->(11)
│ ├── scan geo_table2
│ │ ├── columns: geo_table2.k:1 geo_table2.geom:2
│ │ ├── stats: [rows=1000, distinct(1)=1000, null(1)=0, distinct(2)=100, null(2)=10]
│ │ ├── cost: 1084.02
│ │ ├── key: (1)
│ │ ├── fd: (1)-->(2)
│ │ ├── prune: (1,2)
│ │ ├── interesting orderings: (+1)
│ │ └── unfiltered-cols: (1-4)
│ └── filters (true)
└── filters
└── st_intersects(geo_table2.geom:2, geo_table.geom:6) [outer=(2,6), immutable, constraints=(/2: (/NULL - ]; /6: (/NULL - ])]

query T
EXPLAIN (OPT, VERBOSE) SELECT * FROM geo_table2 WHERE NOT EXISTS (SELECT * FROM geo_table@geom_index
Expand Down
19 changes: 3 additions & 16 deletions pkg/sql/opt/xform/join_funcs.go
Original file line number Diff line number Diff line change
Expand Up @@ -447,19 +447,6 @@ func (c *CustomFuncs) GenerateInvertedJoins(
if scanPrivate.Flags.NoIndexJoin {
return
}
if joinType == opt.SemiJoinOp {
// We cannot use a non-covering index for semi join. Note that
// since the semi join doesn't pass through any columns, "non
// covering" here means that not all columns in the ON condition are
// available.
//
// For semi joins, we may still be able to generate an inverted join
// by converting it to an inner join using the ConvertSemiToInnerJoin
// rule. Any semi join that could use an inverted index would already be
// transformed into an inner join by ConvertSemiToInnerJoin, so semi
// joins can be ignored here.
return
}

if pkCols == nil {
tab := c.e.mem.Metadata().Table(scanPrivate.Table)
Expand All @@ -477,9 +464,9 @@ func (c *CustomFuncs) GenerateInvertedJoins(

continuationCol := opt.ColumnID(0)
invertedJoinType := joinType
// Anti joins are converted to a pair consisting of a left join and
// anti join.
if joinType == opt.LeftJoinOp || joinType == opt.AntiJoinOp {
// Anti/semi joins are converted to a pair consisting of a left join and
// anti/semi lookup join.
if joinType == opt.LeftJoinOp || joinType == opt.AntiJoinOp || joinType == opt.SemiJoinOp {
continuationCol = c.constructContinuationColumnForPairedLeftJoin()
invertedJoinType = opt.LeftJoinOp
}
Expand Down
Loading

0 comments on commit 5863460

Please sign in to comment.