diff --git a/pkg/sql/exec/coldata/nulls.go b/pkg/sql/exec/coldata/nulls.go index 59d934b3606e..bd1810912f10 100644 --- a/pkg/sql/exec/coldata/nulls.go +++ b/pkg/sql/exec/coldata/nulls.go @@ -232,3 +232,28 @@ func (n *Nulls) SetNullBitmap(bm []uint64) { } } } + +// Or returns a new Nulls vector where NullAt(i) iff n1.NullAt(i) or +// n2.NullAt(i). +func (n *Nulls) Or(n2 *Nulls) *Nulls { + // For simplicity, enforce that len(n.nulls) <= len(n2.nulls). + if len(n.nulls) > len(n2.nulls) { + n, n2 = n2, n + } + nulls := make([]uint64, len(n2.nulls)) + if n.hasNulls && n2.hasNulls { + for i := 0; i < len(n.nulls); i++ { + nulls[i] = n.nulls[i] | n2.nulls[i] + } + // If n2 is longer, we can just copy the remainder. + copy(nulls[len(n.nulls):], n2.nulls[len(n.nulls):]) + } else if n.hasNulls { + copy(nulls, n.nulls) + } else if n2.hasNulls { + copy(nulls, n2.nulls) + } + return &Nulls{ + hasNulls: n.hasNulls || n2.hasNulls, + nulls: nulls, + } +} diff --git a/pkg/sql/exec/coldata/nulls_test.go b/pkg/sql/exec/coldata/nulls_test.go new file mode 100644 index 000000000000..d583d6b8da2d --- /dev/null +++ b/pkg/sql/exec/coldata/nulls_test.go @@ -0,0 +1,45 @@ +// Copyright 2019 The Cockroach Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +// implied. See the License for the specific language governing +// permissions and limitations under the License. + +package coldata + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +func TestNullsOr(t *testing.T) { + length1, length2 := uint16(300), uint16(400) + n1, n2 := NewNulls(int(length1)), NewNulls(int(length2)) + for i := uint16(0); i < length1; i++ { + if i%3 == 0 { + n1.SetNull(i) + } + } + for i := uint16(0); i < length2; i++ { + if i%5 == 0 { + n2.SetNull(i) + } + } + or := n1.Or(&n2) + require.True(t, or.hasNulls) + for i := uint16(0); i < length2; i++ { + if i < length1 && n1.NullAt(i) || i < length2 && n2.NullAt(i) { + require.True(t, or.NullAt(i), "or.NullAt(%d) should be true", i) + } else { + require.False(t, or.NullAt(i), "or.NullAt(%d) should be false", i) + } + } +} diff --git a/pkg/sql/exec/execgen/cmd/execgen/like_ops_gen.go b/pkg/sql/exec/execgen/cmd/execgen/like_ops_gen.go index 75b16e05888d..164954f2ab61 100644 --- a/pkg/sql/exec/execgen/cmd/execgen/like_ops_gen.go +++ b/pkg/sql/exec/execgen/cmd/execgen/like_ops_gen.go @@ -43,7 +43,7 @@ import ( ` func genLikeOps(wr io.Writer) error { - tmpl := template.New("like_ops") + tmpl := template.New("like_ops").Funcs(template.FuncMap{"buildDict": buildDict}) var err error tmpl, err = tmpl.Parse(selTemplate) if err != nil { diff --git a/pkg/sql/exec/execgen/cmd/execgen/selection_ops_gen.go b/pkg/sql/exec/execgen/cmd/execgen/selection_ops_gen.go index 2a9ae00756b1..05bdfebf2713 100644 --- a/pkg/sql/exec/execgen/cmd/execgen/selection_ops_gen.go +++ b/pkg/sql/exec/execgen/cmd/execgen/selection_ops_gen.go @@ -40,6 +40,59 @@ import ( {{define "opConstName"}}sel{{.Name}}{{.LTyp}}{{.RTyp}}ConstOp{{end}} {{define "opName"}}sel{{.Name}}{{.LTyp}}{{.RTyp}}Op{{end}} +{{define "selConstLoop"}} +if sel := batch.Selection(); sel != nil { + sel = sel[:n] + for _, i := range sel { + var cmp bool + {{(.Global.Assign "cmp" "col[i]" "p.constArg")}} + if cmp {{if .HasNulls}}&& !nulls.NullAt(i) {{end}}{ + sel[idx] = i + idx++ + } + } +} else { + batch.SetSelection(true) + sel := batch.Selection() + col = col[:n] + for i := range col { + var cmp bool + {{(.Global.Assign "cmp" "col[i]" "p.constArg")}} + if cmp {{if .HasNulls}}&& !nulls.NullAt(uint16(i)) {{end}}{ + sel[idx] = uint16(i) + idx++ + } + } +} +{{end}} + +{{define "selLoop"}} +if sel := batch.Selection(); sel != nil { + sel = sel[:n] + for _, i := range sel { + var cmp bool + {{(.Global.Assign "cmp" "col1[i]" "col2[i]")}} + if cmp {{if .HasNulls}}&& !nulls.NullAt(i) {{end}}{ + sel[idx] = i + idx++ + } + } +} else { + batch.SetSelection(true) + sel := batch.Selection() + col1 = col1[:n] + col2 = col2[:len(col1)] + for i := range col1 { + var cmp bool + {{(.Global.Assign "cmp" "col1[i]" "col2[i]")}} + if cmp {{if .HasNulls}}&& !nulls.NullAt(uint16(i)) {{end}}{ + sel[idx] = uint16(i) + idx++ + } + } +} +{{end}} + {{define "selConstOp"}} type {{template "opConstName" .}} struct { input Operator @@ -55,31 +108,15 @@ func (p *{{template "opConstName" .}}) Next(ctx context.Context) coldata.Batch { return batch } - col := batch.ColVec(p.colIdx).{{.LTyp}}()[:coldata.BatchSize] + vec := batch.ColVec(p.colIdx) + col := vec.{{.LTyp}}()[:coldata.BatchSize] var idx uint16 n := batch.Length() - if sel := batch.Selection(); sel != nil { - sel = sel[:n] - for _, i := range sel { - var cmp bool - {{(.Assign "cmp" "col[i]" "p.constArg")}} - if cmp { - sel[idx] = i - idx++ - } - } + if vec.HasNulls() { + nulls := vec.Nulls() + {{template "selConstLoop" buildDict "Global" . "HasNulls" true }} } else { - batch.SetSelection(true) - sel := batch.Selection() - col = col[:n] - for i := range col { - var cmp bool - {{(.Assign "cmp" "col[i]" "p.constArg")}} - if cmp { - sel[idx] = uint16(i) - idx++ - } - } + {{template "selConstLoop" buildDict "Global" . "HasNulls" false }} } if idx > 0 { batch.SetLength(idx) @@ -93,13 +130,7 @@ func (p {{template "opConstName" .}}) Init() { } {{end}} -{{/* The outer range is a types.T, and the inner is the overloads associated - with that type. */}} -{{range .}} -{{range .}} - -{{template "selConstOp" .}} - +{{define "selOp"}} type {{template "opName" .}} struct { input Operator @@ -114,34 +145,18 @@ func (p *{{template "opName" .}}) Next(ctx context.Context) coldata.Batch { return batch } - col1 := batch.ColVec(p.col1Idx).{{.LTyp}}()[:coldata.BatchSize] - col2 := batch.ColVec(p.col2Idx).{{.RTyp}}()[:coldata.BatchSize] + vec1 := batch.ColVec(p.col1Idx) + vec2 := batch.ColVec(p.col2Idx) + col1 := vec1.{{.LTyp}}()[:coldata.BatchSize] + col2 := vec2.{{.RTyp}}()[:coldata.BatchSize] n := batch.Length() var idx uint16 - if sel := batch.Selection(); sel != nil { - sel = sel[:n] - for _, i := range sel { - var cmp bool - {{(.Assign "cmp" "col1[i]" "col2[i]")}} - if cmp { - sel[idx] = i - idx++ - } - } + if vec1.HasNulls() || vec2.HasNulls() { + nulls := vec1.Nulls().Or(vec2.Nulls()) + {{template "selLoop" buildDict "Global" . "HasNulls" true }} } else { - batch.SetSelection(true) - sel := batch.Selection() - col1 = col1[:n] - col2 = col2[:len(col1)] - for i := range col1 { - var cmp bool - {{(.Assign "cmp" "col1[i]" "col2[i]")}} - if cmp { - sel[idx] = uint16(i) - idx++ - } - } + {{template "selLoop" buildDict "Global" . "HasNulls" false }} } if idx > 0 { batch.SetLength(idx) @@ -153,7 +168,14 @@ func (p *{{template "opName" .}}) Next(ctx context.Context) coldata.Batch { func (p {{template "opName" .}}) Init() { p.input.Init() } +{{end}} +{{/* The outer range is a types.T, and the inner is the overloads associated + with that type. */}} +{{range .}} +{{range .}} +{{template "selConstOp" .}} +{{template "selOp" .}} {{end}} {{end}} @@ -228,7 +250,9 @@ func genSelectionOps(wr io.Writer) error { typ := overload.LTyp typToOverloads[typ] = append(typToOverloads[typ], overload) } - tmpl, err := template.New("selection_ops").Parse(selTemplate) + tmpl := template.New("selection_ops").Funcs(template.FuncMap{"buildDict": buildDict}) + var err error + tmpl, err = tmpl.Parse(selTemplate) if err != nil { return err } diff --git a/pkg/sql/exec/selection_ops_test.go b/pkg/sql/exec/selection_ops_test.go index d185608fd2d4..ca49b0e00f3c 100644 --- a/pkg/sql/exec/selection_ops_test.go +++ b/pkg/sql/exec/selection_ops_test.go @@ -17,7 +17,7 @@ package exec import ( "context" "fmt" - "math" + "math/rand" "reflect" "testing" @@ -25,12 +25,16 @@ import ( "github.com/cockroachdb/cockroach/pkg/sql/exec/types" "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" semtypes "github.com/cockroachdb/cockroach/pkg/sql/types" - "github.com/cockroachdb/cockroach/pkg/util/randutil" "github.com/cockroachdb/cockroach/pkg/util/timeutil/pgdate" ) +const ( + selectivity = .5 + nullProbability = .1 +) + func TestSelLTInt64Int64ConstOp(t *testing.T) { - tups := tuples{{0}, {1}, {2}} + tups := tuples{{0}, {1}, {2}, {nil}} runTests(t, []tuples{tups}, func(t *testing.T, input []Operator) { op := selLTInt64Int64ConstOp{ input: input[0], @@ -51,6 +55,9 @@ func TestSelLTInt64Int64(t *testing.T) { {0, 1}, {1, 0}, {1, 1}, + {nil, 1}, + {-1, nil}, + {nil, nil}, } runTests(t, []tuples{tups}, func(t *testing.T, input []Operator) { op := selLTInt64Int64Op{ @@ -98,19 +105,24 @@ func TestGetSelectionOperator(t *testing.T) { } } -func benchmarkSelLTInt64Int64ConstOp(b *testing.B, useSelectionVector bool) { - rng, _ := randutil.NewPseudoRand() +func benchmarkSelLTInt64Int64ConstOp(b *testing.B, useSelectionVector bool, hasNulls bool) { ctx := context.Background() - // We need to generate such a batch that selection operator will output at - // least one tuple - otherwise, the benchmark will be stuck in an infinite - // loop, so we put MinInt64 as the first element and make sure that constArg - // is not MinInt64. batch := coldata.NewMemBatch([]types.T{types.Int64}) col := batch.ColVec(0).Int64() - col[0] = math.MinInt64 - for i := int64(1); i < coldata.BatchSize; i++ { - col[i] = rng.Int63() + for i := int64(0); i < coldata.BatchSize; i++ { + if float64(i) < coldata.BatchSize*selectivity { + col[i] = -1 + } else { + col[i] = 1 + } + } + if hasNulls { + for i := 0; i < coldata.BatchSize; i++ { + if rand.Float64() < nullProbability { + batch.ColVec(0).Nulls().SetNull(uint16(i)) + } + } } batch.SetLength(coldata.BatchSize) if useSelectionVector { @@ -120,18 +132,13 @@ func benchmarkSelLTInt64Int64ConstOp(b *testing.B, useSelectionVector bool) { sel[i] = uint16(i) } } - constArg := rng.Int63() - for constArg == math.MinInt64 { - constArg = rng.Int63() - } - source := newRepeatableBatchSource(batch) source.Init() plusOp := &selLTInt64Int64ConstOp{ input: source, colIdx: 0, - constArg: constArg, + constArg: 0, } plusOp.Init() @@ -144,26 +151,36 @@ func benchmarkSelLTInt64Int64ConstOp(b *testing.B, useSelectionVector bool) { func BenchmarkSelLTInt64Int64ConstOp(b *testing.B) { for _, useSel := range []bool{true, false} { - b.Run(fmt.Sprintf("useSel=%t", useSel), func(b *testing.B) { - benchmarkSelLTInt64Int64ConstOp(b, useSel) - }) + for _, hasNulls := range []bool{true, false} { + b.Run(fmt.Sprintf("useSel=%t,hasNulls=%t", useSel, hasNulls), func(b *testing.B) { + benchmarkSelLTInt64Int64ConstOp(b, useSel, hasNulls) + }) + } } } -func benchmarkSelLTInt64Int64Op(b *testing.B, useSelectionVector bool) { - rng, _ := randutil.NewPseudoRand() +func benchmarkSelLTInt64Int64Op(b *testing.B, useSelectionVector bool, hasNulls bool) { ctx := context.Background() batch := coldata.NewMemBatch([]types.T{types.Int64, types.Int64}) col1 := batch.ColVec(0).Int64() col2 := batch.ColVec(1).Int64() - // We need to generate such a batch that selection operator will output at - // least one tuple - otherwise, the benchmark will be stuck in an infinite - // loop, so we put 0 and 1 as the first tuple of the batch. - col1[0], col2[0] = 0, 1 - for i := int64(1); i < coldata.BatchSize; i++ { - col1[i] = rng.Int63() - col2[i] = rng.Int63() + for i := int64(0); i < coldata.BatchSize; i++ { + if float64(i) < coldata.BatchSize*selectivity { + col1[i], col2[i] = -1, 1 + } else { + col1[i], col2[i] = 1, -1 + } + } + if hasNulls { + for i := 0; i < coldata.BatchSize; i++ { + if rand.Float64() < nullProbability { + batch.ColVec(0).Nulls().SetNull(uint16(i)) + } + if rand.Float64() < nullProbability { + batch.ColVec(1).Nulls().SetNull(uint16(i)) + } + } } batch.SetLength(coldata.BatchSize) if useSelectionVector { @@ -192,8 +209,10 @@ func benchmarkSelLTInt64Int64Op(b *testing.B, useSelectionVector bool) { func BenchmarkSelLTInt64Int64Op(b *testing.B) { for _, useSel := range []bool{true, false} { - b.Run(fmt.Sprintf("useSel=%t", useSel), func(b *testing.B) { - benchmarkSelLTInt64Int64Op(b, useSel) - }) + for _, hasNulls := range []bool{true, false} { + b.Run(fmt.Sprintf("useSel=%t,hasNulls=%t", useSel, hasNulls), func(b *testing.B) { + benchmarkSelLTInt64Int64Op(b, useSel, hasNulls) + }) + } } } diff --git a/pkg/sql/logictest/testdata/logic_test/vectorize b/pkg/sql/logictest/testdata/logic_test/vectorize index 91c18700d10b..043fba7abb3f 100644 --- a/pkg/sql/logictest/testdata/logic_test/vectorize +++ b/pkg/sql/logictest/testdata/logic_test/vectorize @@ -13,6 +13,12 @@ INSERT INTO a SELECT g//2, g FROM generate_series(0,2000) g(g) statement ok CREATE TABLE bools (b BOOL, i INT, PRIMARY KEY (b, i)); INSERT INTO bools VALUES (true, 0), (false, 1), (true, 2), (false, 3); +statement ok +CREATE TABLE nulls (a INT, b INT) + +statement ok +INSERT INTO nulls VALUES (NULL, NULL), (NULL, 1), (1, NULL), (1, 1) + query I SELECT count(*) FROM a ---- @@ -59,6 +65,19 @@ SELECT b FROM a WHERE b < 3 1 2 +# Simple filter with nulls. +query I +SELECT a FROM nulls WHERE a < 2 +---- +1 +1 + +query II +SELECT a, b FROM nulls WHERE a <= b +---- +1 1 + + # Filter on the result of a projection. query II SELECT a, b FROM a WHERE a * 2 < b LIMIT 5