Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
32216: exec: plan the hash joiner r=changangela a=changangela

This adds planning for hash joins to `experimental_vectorize`. Joins in this experiment only work if the left join table is distinct.

```
root@:26257/tpch> set experimental_vectorize=false;                                                                                                                                  SET

Time: 351µs

root@:26257/tpch> select count(*) from supplier join partsupp on ps_suppkey=s_suppkey;                                                                                                 count
+--------+
  800000
(1 row)

Time: 546.155ms

root@:26257/tpch> set experimental_vectorize=true;
SET

Time: 357µs

root@:26257/tpch> select count(*) from supplier join partsupp on ps_suppkey=s_suppkey;                                                                                                 count
+--------+
  800000
(1 row)

Time: 309.745ms
```

Co-authored-by: changangela <[email protected]>
  • Loading branch information
craig[bot] and changangela committed Nov 13, 2018
2 parents 61f5a6b + ecf92c1 commit 728dd55
Show file tree
Hide file tree
Showing 3 changed files with 175 additions and 6 deletions.
53 changes: 52 additions & 1 deletion pkg/sql/distsqlrun/column_exec_setup.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ import (
"github.com/cockroachdb/cockroach/pkg/util/log"

"github.com/cockroachdb/cockroach/pkg/sql/exec/types"

"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgerror"
"github.com/cockroachdb/cockroach/pkg/sql/sqlbase"
"github.com/cockroachdb/cockroach/pkg/util"

"github.com/pkg/errors"
Expand Down Expand Up @@ -90,6 +90,57 @@ func newColOperator(
typs := types.FromColumnTypes(spec.Input[0].ColumnTypes)
op, err = exec.NewOrderedDistinct(inputs[0], core.Distinct.OrderedColumns, typs)

case core.HashJoiner != nil:
if err := checkNumIn(inputs, 2); err != nil {
return nil, err
}

if !core.HashJoiner.OnExpr.Empty() {
return nil, errors.New("can't plan hash join with on expressions")
}

if core.HashJoiner.Type != sqlbase.JoinType_INNER {
return nil, errors.Errorf("hash join of type %s not supported", core.HashJoiner.Type)
}

leftTypes := types.FromColumnTypes(spec.Input[0].ColumnTypes)
rightTypes := types.FromColumnTypes(spec.Input[1].ColumnTypes)

nLeftCols := uint32(len(leftTypes))
nRightCols := uint32(len(rightTypes))

leftOutCols := make([]uint32, 0)
rightOutCols := make([]uint32, 0)

if post.Projection {
for _, col := range post.OutputColumns {
if col < nLeftCols {
leftOutCols = append(leftOutCols, col)
} else {
rightOutCols = append(rightOutCols, col-nLeftCols)
}
}
} else {
for i := uint32(0); i < nLeftCols; i++ {
leftOutCols = append(leftOutCols, i)
}

for i := uint32(0); i < nRightCols; i++ {
rightOutCols = append(rightOutCols, i)
}
}

op, err = exec.NewEqInnerDistinctHashJoiner(
inputs[0],
inputs[1],
core.HashJoiner.LeftEqColumns,
core.HashJoiner.RightEqColumns,
leftOutCols,
rightOutCols,
leftTypes,
rightTypes,
)

default:
return nil, errors.Errorf("unsupported processor core %s", core)
}
Expand Down
58 changes: 53 additions & 5 deletions pkg/sql/exec/hashjoiner.go
Original file line number Diff line number Diff line change
Expand Up @@ -129,11 +129,6 @@ func (hj *hashJoinEqInnerDistinctOp) Init() {
hj.spec.left.source.Init()
hj.spec.right.source.Init()

nOutCols := len(hj.spec.left.outCols) + len(hj.spec.right.outCols)
if nOutCols == 0 {
panic("no output columns specified for hash joiner")
}

// Prepare the hashTable using the specified side as the build table. Prepare
// the prober using the other side as the probe table.
if hj.spec.buildRightSide {
Expand Down Expand Up @@ -630,3 +625,56 @@ func (prober *hashJoinProber) collectResults(batch ColBatch, batchSize uint16, s

prober.batch.SetLength(nResults)
}

// NewEqInnerDistinctHashJoiner creates a new inner equality hash join operator
// on the left and right input tables. leftEqCols and rightEqCols specify the
// equality columns while leftOutCols and rightOutCols specifies the output
// columns.
func NewEqInnerDistinctHashJoiner(
leftSource Operator,
rightSource Operator,
leftEqCols []uint32,
rightEqCols []uint32,
leftOutCols []uint32,
rightOutCols []uint32,
leftTypes []types.T,
rightTypes []types.T,
) (Operator, error) {
spec := hashJoinerSpec{
left: hashJoinerSourceSpec{
eqCols: make([]int, len(leftEqCols)),
outCols: make([]int, len(leftOutCols)),
sourceTypes: leftTypes,
source: leftSource,
},

right: hashJoinerSourceSpec{
eqCols: make([]int, len(rightEqCols)),
outCols: make([]int, len(rightOutCols)),
sourceTypes: rightTypes,
source: rightSource,
},

buildRightSide: false,
}

for i, col := range leftEqCols {
spec.left.eqCols[i] = int(col)
}

for i, col := range rightEqCols {
spec.right.eqCols[i] = int(col)
}

for i, col := range leftOutCols {
spec.left.outCols[i] = int(col)
}

for i, col := range rightOutCols {
spec.right.outCols[i] = int(col)
}

return &hashJoinEqInnerDistinctOp{
spec: spec,
}, nil
}
70 changes: 70 additions & 0 deletions pkg/sql/logictest/testdata/logic_test/exec_hash_join
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
# LogicTest: local local-vec

# Test that the exec HashJoiner follows SQL NULL semantics for ON predicate
# equivilance. The use of sorts here force the planning of merge join.

statement ok
SET experimental_vectorize = true;

statement ok
CREATE TABLE t1 (k INT PRIMARY KEY, v INT)

statement ok
INSERT INTO t1 VALUES (0, 4), (2, 1), (5, 4), (3, 4)

statement ok
CREATE TABLE t2 (x INT PRIMARY KEY, y INT)

statement ok
INSERT INTO t2 VALUES (1, 3), (4, 6), (0, 5), (3, 2)

statement ok
CREATE TABLE a (k INT, v INT)

statement ok
INSERT INTO a VALUES (0, 1), (1, 2), (2, 0)

statement ok
CREATE TABLE b (a INT, b INT, c STRING)

statement ok
INSERT INTO b VALUES (0, 1, 'a'), (2, 1, 'b'), (0, 2, 'c')

statement ok
CREATE TABLE c (a INT, b STRING)

statement ok
INSERT INTO c VALUES (1, 'a'), (1, 'b'), (2, 'c')

query IIII
SELECT * FROM t1 JOIN t2 ON t1.k = t2.x
----
0 4 0 5
3 4 3 2

query IIII rowsort
SELECT * FROM a AS a1 JOIN a AS a2 ON a1.k = a2.v
----
0 1 2 0
1 2 0 1
2 0 1 2

query IIII rowsort
SELECT * FROM a AS a2 JOIN a AS a1 ON a1.k = a2.v
----
0 1 1 2
1 2 2 0
2 0 0 1

query II
SELECT t2.y, t1.v FROM t1 JOIN t2 ON t1.k = t2.x
----
5 4
2 4

query ITI
SELECT b.a, b.c, c.a FROM b JOIN c ON b.b = c.a AND b.c = c.b
----
0 a 1
2 b 1
0 c 2

0 comments on commit 728dd55

Please sign in to comment.