Skip to content

Commit

Permalink
Merge #39294
Browse files Browse the repository at this point in the history
39294: exec: fix output batches of LEFT SEMI for hash and merge joiners r=yuzefovich a=yuzefovich

Previously, the merge joiner's output batch would always have the
columns corresponding to both the left and the right sides (even
with LEFT SEMI and LEFT ANTI join types although the right side
output would not be used). This is incorrect, and now the merge
joiner outputs batches with the correct number of columns.

A similar issue was present with LEFT SEMI hash joiner and is now
fixed.

Release note: None

Co-authored-by: Yahor Yuzefovich <[email protected]>
  • Loading branch information
craig[bot] and yuzefovich committed Aug 15, 2019
2 parents 02f239d + 5a7955f commit 1c7351e
Show file tree
Hide file tree
Showing 4 changed files with 76 additions and 18 deletions.
13 changes: 12 additions & 1 deletion pkg/sql/exec/hashjoiner.go
Original file line number Diff line number Diff line change
Expand Up @@ -750,10 +750,21 @@ func makeHashJoinProber(
var outColTypes []coltypes.T
var buildColOffset, probeColOffset uint32
if buildRightSide {
outColTypes = append(probe.sourceTypes, build.sourceTypes...)
if len(build.outCols) == 0 {
// We do not have output columns from the right side in case of LEFT SEMI
// and LEFT ANTI joins, and we should not have the corresponding columns
// in the output batch, so we only have the types from the left side in
// outColTypes.
outColTypes = probe.sourceTypes
} else {
outColTypes = append(probe.sourceTypes, build.sourceTypes...)
}
buildColOffset = uint32(len(probe.sourceTypes))
probeColOffset = 0
} else {
// Note that we don't need to check whether probe.outCols is non-empty
// before populating outColTypes because LEFT SEMI and LEFT ANTI joins will
// always build the right side.
outColTypes = append(build.sourceTypes, probe.sourceTypes...)
buildColOffset = 0
probeColOffset = nBuildCols
Expand Down
10 changes: 9 additions & 1 deletion pkg/sql/exec/mergejoiner.go
Original file line number Diff line number Diff line change
Expand Up @@ -330,7 +330,15 @@ func (o *mergeJoinBase) Init() {
func (o *mergeJoinBase) initWithBatchSize(outBatchSize uint16) {
outColTypes := make([]coltypes.T, len(o.left.sourceTypes)+len(o.right.sourceTypes))
copy(outColTypes, o.left.sourceTypes)
copy(outColTypes[len(o.left.sourceTypes):], o.right.sourceTypes)
if len(o.right.outCols) == 0 {
// We do not have output columns from the right input in case of LEFT SEMI
// and LEFT ANTI joins, and we should not have the corresponding columns in
// the output batch, so we only have the types from the left input in
// outColTypes.
outColTypes = outColTypes[:len(o.left.sourceTypes)]
} else {
copy(outColTypes[len(o.left.sourceTypes):], o.right.sourceTypes)
}

o.output = coldata.NewMemBatchWithSize(outColTypes, int(outBatchSize))
o.left.source.Init()
Expand Down
30 changes: 30 additions & 0 deletions pkg/sql/logictest/testdata/logic_test/exec_hash_join_dist
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# LogicTest: 5node-dist-vec

statement ok
CREATE TABLE t (k INT, v INT)

statement ok
INSERT INTO t VALUES (1, 10), (2, 20), (3, 30)

statement ok
ALTER TABLE t EXPERIMENTAL_RELOCATE VALUES (ARRAY[3], 1)

statement ok
CREATE TABLE xy (x INT PRIMARY KEY, y INT)

statement ok
INSERT INTO xy VALUES (2, 200), (3, 300), (4, 400)

statement ok
ALTER TABLE t SPLIT AT VALUES (3), (4)

statement ok
ALTER TABLE t EXPERIMENTAL_RELOCATE VALUES (ARRAY[1], 2), (ARRAY[2], 3), (ARRAY[3], 4)

# Test that LEFT SEMI hash join outputs batches only with the columns from the
# left side.
query II rowsort
SELECT * FROM t WHERE EXISTS(SELECT * FROM xy WHERE x=t.k)
----
2 20
3 30
41 changes: 25 additions & 16 deletions pkg/sql/logictest/testdata/logic_test/exec_merge_join_dist
Original file line number Diff line number Diff line change
Expand Up @@ -3,50 +3,59 @@
# Regression test for #39317.

statement ok
CREATE TABLE l (a INT PRIMARY KEY)
CREATE TABLE l (a INT PRIMARY KEY, b INT)

statement ok
CREATE TABLE r (a INT PRIMARY KEY)
CREATE TABLE r (a INT PRIMARY KEY, b INT)

statement ok
INSERT INTO l VALUES (1), (2)
INSERT INTO l VALUES (1, 10), (2, 20), (3, 30)

statement ok
INSERT INTO r VALUES (2), (3)
INSERT INTO r VALUES (2, 200), (3, 300), (4, 400)

statement ok
ALTER TABLE l SPLIT AT VALUES (2)
ALTER TABLE l SPLIT AT VALUES (2), (3)

statement ok
ALTER TABLE r SPLIT AT VALUES (2)
ALTER TABLE r SPLIT AT VALUES (2), (3)

statement ok
ALTER TABLE l EXPERIMENTAL_RELOCATE VALUES (ARRAY[1], 1), (ARRAY[2], 2)
ALTER TABLE l EXPERIMENTAL_RELOCATE VALUES (ARRAY[1], 1), (ARRAY[2], 2), (ARRAY[3], 3)

statement ok
ALTER TABLE r EXPERIMENTAL_RELOCATE VALUES (ARRAY[1], 2), (ARRAY[2], 1)
ALTER TABLE r EXPERIMENTAL_RELOCATE VALUES (ARRAY[1], 2), (ARRAY[2], 3), (ARRAY[3], 1)

query TTTI colnames
SELECT start_key, end_key, replicas, lease_holder from [SHOW EXPERIMENTAL_RANGES FROM TABLE l]
SELECT start_key, end_key, replicas, lease_holder from [SHOW EXPERIMENTAL_RANGES FROM TABLE l] ORDER BY lease_holder
----
start_key end_key replicas lease_holder
NULL /2 {1} 1
/2 NULL {2} 2
/2 /3 {2} 2
/3 NULL {3} 3

query TTTI colnames
SELECT start_key, end_key, replicas, lease_holder from [SHOW EXPERIMENTAL_RANGES FROM TABLE r]
SELECT start_key, end_key, replicas, lease_holder from [SHOW EXPERIMENTAL_RANGES FROM TABLE r] ORDER BY lease_holder
----
start_key end_key replicas lease_holder
/2 NULL {1} 1
NULL /2 {2} 2
/2 /3 {1} 1
/3 NULL {2} 2
NULL /2 {3} 3

query T
SELECT url FROM [EXPLAIN (DISTSQL) SELECT * FROM l LEFT OUTER JOIN r USING(a) WHERE a = 2]
----
https://cockroachdb.github.io/distsqlplan/decode.html#eJzEk09rgzAYxu_7FPLusrEUjXWXwKCXDlq2Ojp3Gh5S884K1kgSYaX43Yc6aHXWtWxsN_Pn97zPkwd3kEmBC75BDewVKBBwISSQKxmh1lJV282lmXgH5hBIsrww1XZIIJIKge3AJCZFYBDwVYpL5AKVXWkJNDxJa-lcJRuuthMFBJ5znmlm2e7Idu1LCEsCsjCfsnu11dZac71u60wohGVIQBseIzC3JEfs7XWKTCqBCkVLKazI7670ZHxEFeNcJhkqe9z2FmxzZNbD9D6w_JdgurTm_mwBBFJ8M1cTenN9p5J43XwCAb8wzKrzHObfZxv_IFuP8YUcydz2uo_QO9prjaant-70t57-buv0qL2_aN39p9Z7bC1R5zLTeFKpThUMRYzNQ2lZqAiflIzqMc3Sr7n6TQVq05x6zWKW1Uf1L3c6TIdh2oWdQ9htwbQLu4PwbQt2uvD4jMxfJg_DdBj2zsoclhcfAQAA___49Nlj
https://cockroachdb.github.io/distsqlplan/decode.html#eJzEk8Fro0AUxu_7V8jbyy47QWfiXgYWvGQhYTeW1J6Kh4nzagTjyMwIDcH_vaiFVGtsQkt7c2be73vvex8eoVAS12KPBvg9UCDAICZQapWgMUo3113RUj4C9whkRVnZ5jomkCiNwI9gM5sjcIjENscNConabbQkWpHlrXSps73Qh0ADgdtSFIY7Lpu5zP0OcU1AVfZZ9qS2PTg7YXZ9nYBCXMcEjBUpAmc1OTPeSacqlJaoUfaU4oZ8q2TE43_UKa5UVqB25_3ZokOJ3Pm3-Bs54V202DircLkGAjk-2B8B_fXzj87SXfcJBMLKciegJGAk8AdbODmcv8PhyPhrNVOl6w9XMdra77Wml2fvjWeff2z29Ox4n5E9-9LsR4bboClVYfCiaL3GHsoUu3UZVekEb7RK2jbdMWy5drMSje1e_e6wLNqn9ve7HKbTMB3C3kuY9WA6hNkk_LsHe0N4foXnV52nYToN-1d5jutvTwEAAP__eVLb3w==

query I
query III
SELECT * FROM l LEFT OUTER JOIN r USING(a) WHERE a = 2
----
2
2 20 200

# Test that LEFT SEMI merge join outputs batches only with the columns from the
# left side.
query II rowsort
SELECT * FROM l WHERE EXISTS(SELECT * FROM r WHERE r.a=l.a)
----
2 20
3 30

0 comments on commit 1c7351e

Please sign in to comment.