Skip to content

Commit

Permalink
sql: vectorized data encoding package
Browse files Browse the repository at this point in the history
colenc is a new package that allows kv.Batch's to be produced to encode
tables using coldata.Batch's as the input. Every attempt was made to
avoid code duplication and delegate to row encoding code where possible.

Epic: CRDB-18892
Informs: #91831
Release note: None
  • Loading branch information
cucaroach committed Feb 22, 2023
1 parent 3ad8eeb commit 2962e00
Show file tree
Hide file tree
Showing 15 changed files with 2,492 additions and 19 deletions.
4 changes: 4 additions & 0 deletions pkg/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -334,6 +334,7 @@ ALL_TESTS = [
"//pkg/sql/colcontainer:colcontainer_test",
"//pkg/sql/colconv:colconv_disallowed_imports_test",
"//pkg/sql/colconv:colconv_test",
"//pkg/sql/colenc:colenc_test",
"//pkg/sql/colencoding:colencoding_test",
"//pkg/sql/colexec/colbuilder:colbuilder_test",
"//pkg/sql/colexec/colexecagg:colexecagg_disallowed_imports_test",
Expand Down Expand Up @@ -1541,6 +1542,8 @@ GO_TARGETS = [
"//pkg/sql/colcontainer:colcontainer_test",
"//pkg/sql/colconv:colconv",
"//pkg/sql/colconv:colconv_test",
"//pkg/sql/colenc:colenc",
"//pkg/sql/colenc:colenc_test",
"//pkg/sql/colencoding:colencoding",
"//pkg/sql/colencoding:colencoding_test",
"//pkg/sql/colexec/colbuilder:colbuilder",
Expand Down Expand Up @@ -2839,6 +2842,7 @@ GET_X_DATA_TARGETS = [
"//pkg/sql/clusterunique:get_x_data",
"//pkg/sql/colcontainer:get_x_data",
"//pkg/sql/colconv:get_x_data",
"//pkg/sql/colenc:get_x_data",
"//pkg/sql/colencoding:get_x_data",
"//pkg/sql/colexec:get_x_data",
"//pkg/sql/colexec/colbuilder:get_x_data",
Expand Down
9 changes: 9 additions & 0 deletions pkg/col/coldata/nulls.go
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,15 @@ func (n *Nulls) NullAt(i int) bool {
return n.nulls[i>>3]&bitMask[i&7] == 0
}

// NullAtChecked returns true if the ith value of the column is null and allows
// an uninitialized Nulls to represent "no nulls".
func (n *Nulls) NullAtChecked(i int) bool {
if n.nulls != nil {
return n.nulls[i>>3]&bitMask[i&7] == 0
}
return false
}

// SetNull sets the ith value of the column to null.
func (n *Nulls) SetNull(i int) {
n.maybeHasNulls = true
Expand Down
92 changes: 92 additions & 0 deletions pkg/sql/colenc/BUILD.bazel
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
load("//build/bazelutil/unused_checker:unused.bzl", "get_x_data")
load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")

go_library(
name = "colenc",
srcs = [
"encode.go",
"inverted.go",
"key.go",
"legacy.go",
"value.go",
],
importpath = "github.com/cockroachdb/cockroach/pkg/sql/colenc",
visibility = ["//visibility:public"],
deps = [
"//pkg/col/coldata",
"//pkg/keys",
"//pkg/roachpb",
"//pkg/settings",
"//pkg/sql/catalog",
"//pkg/sql/catalog/catalogkeys",
"//pkg/sql/catalog/catenumpb",
"//pkg/sql/catalog/descpb",
"//pkg/sql/catalog/fetchpb",
"//pkg/sql/row",
"//pkg/sql/rowenc",
"//pkg/sql/rowenc/keyside",
"//pkg/sql/rowenc/valueside",
"//pkg/sql/rowinfra",
"//pkg/sql/sem/catid",
"//pkg/sql/sem/tree",
"//pkg/sql/sqlerrors",
"//pkg/sql/types",
"//pkg/util/encoding",
"//pkg/util/intsets",
"//pkg/util/json",
"//pkg/util/uuid",
"@com_github_cockroachdb_errors//:errors",
],
)

go_test(
name = "colenc_test",
srcs = [
"bench_test.go",
"encode_test.go",
"main_test.go",
],
args = ["-test.timeout=295s"],
data = ["//c-deps:libgeos"],
deps = [
":colenc",
"//pkg/base",
"//pkg/ccl",
"//pkg/ccl/utilccl",
"//pkg/cli/clisqlclient",
"//pkg/col/coldata",
"//pkg/col/coldataext",
"//pkg/keys",
"//pkg/roachpb",
"//pkg/security/securityassets",
"//pkg/security/securitytest",
"//pkg/security/username",
"//pkg/server",
"//pkg/settings",
"//pkg/settings/cluster",
"//pkg/sql/catalog",
"//pkg/sql/catalog/catalogkeys",
"//pkg/sql/catalog/descpb",
"//pkg/sql/catalog/desctestutils",
"//pkg/sql/colconv",
"//pkg/sql/colexecerror",
"//pkg/sql/randgen",
"//pkg/sql/row",
"//pkg/sql/sem/eval",
"//pkg/sql/sem/tree",
"//pkg/sql/tests",
"//pkg/sql/types",
"//pkg/testutils/serverutils",
"//pkg/testutils/sqlutils",
"//pkg/testutils/testcluster",
"//pkg/util",
"//pkg/util/leaktest",
"//pkg/util/log",
"//pkg/util/randutil",
"//pkg/util/timeutil/pgdate",
"@com_github_cockroachdb_apd_v3//:apd",
"@com_github_stretchr_testify//require",
],
)

get_x_data(name = "get_x_data")
152 changes: 152 additions & 0 deletions pkg/sql/colenc/bench_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
// Copyright 2023 The Cockroach Authors.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.

package colenc_test

import (
"context"
"io"
"net/url"
"testing"

"github.com/cockroachdb/cockroach/pkg/cli/clisqlclient"
"github.com/cockroachdb/cockroach/pkg/col/coldata"
"github.com/cockroachdb/cockroach/pkg/col/coldataext"
"github.com/cockroachdb/cockroach/pkg/keys"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/security/username"
"github.com/cockroachdb/cockroach/pkg/settings/cluster"
"github.com/cockroachdb/cockroach/pkg/sql/catalog/desctestutils"
"github.com/cockroachdb/cockroach/pkg/sql/colenc"
"github.com/cockroachdb/cockroach/pkg/sql/randgen"
"github.com/cockroachdb/cockroach/pkg/sql/sem/eval"
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
"github.com/cockroachdb/cockroach/pkg/sql/tests"
"github.com/cockroachdb/cockroach/pkg/sql/types"
"github.com/cockroachdb/cockroach/pkg/testutils/serverutils"
"github.com/cockroachdb/cockroach/pkg/testutils/sqlutils"
"github.com/cockroachdb/cockroach/pkg/util/leaktest"
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/cockroach/pkg/util/randutil"
"github.com/stretchr/testify/require"
)

var lineitemSchema string = `CREATE TABLE lineitem (
l_orderkey INT8 NOT NULL,
l_partkey INT8 NOT NULL,
l_suppkey INT8 NOT NULL,
l_linenumber INT8 NOT NULL,
l_quantity DECIMAL(15,2) NOT NULL,
l_extendedprice DECIMAL(15,2) NOT NULL,
l_discount DECIMAL(15,2) NOT NULL,
l_tax DECIMAL(15,2) NOT NULL,
l_returnflag CHAR(1) NOT NULL,
l_linestatus CHAR(1) NOT NULL,
l_shipdate DATE NOT NULL,
l_commitdate DATE NOT NULL,
l_receiptdate DATE NOT NULL,
l_shipinstruct CHAR(25) NOT NULL,
l_shipmode CHAR(10) NOT NULL,
l_comment VARCHAR(44) NOT NULL,
l_dummy CHAR,
PRIMARY KEY (l_orderkey, l_linenumber),
INDEX l_ok (l_orderkey ASC),
INDEX l_pk (l_partkey ASC),
INDEX l_sk (l_suppkey ASC),
INDEX l_sd (l_shipdate ASC),
INDEX l_cd (l_commitdate ASC),
INDEX l_rd (l_receiptdate ASC),
INDEX l_pk_sk (l_partkey ASC, l_suppkey ASC),
INDEX l_sk_pk (l_suppkey ASC, l_partkey ASC))`

var lineitemTypes = []*types.T{
types.Int,
types.Int,
types.Int,
types.Int,
types.Decimal,
types.Decimal,
types.Decimal,
types.Decimal,
types.String,
types.String,
types.Date,
types.Date,
types.Date,
types.String,
types.String,
types.String,
types.String,
}

func BenchmarkTCPHLineItem(b *testing.B) {
defer leaktest.AfterTest(b)()
defer log.Scope(b).Close(b)
ctx := context.Background()

params, _ := tests.CreateTestServerParams()
s, _, kvdb := serverutils.StartServer(b, params)
defer s.Stopper().Stop(ctx)

url, cleanup := sqlutils.PGUrl(b, s.ServingSQLAddr(), "copytest", url.User(username.RootUser))
defer cleanup()
var sqlConnCtx clisqlclient.Context
conn := sqlConnCtx.MakeSQLConn(io.Discard, io.Discard, url.String())

err := conn.Exec(ctx, lineitemSchema)
require.NoError(b, err)
// Make benchmark stable by using a constant seed.
rng := randutil.NewTestRandWithSeed(0)
st := cluster.MakeTestingClusterSettings()
evalCtx := eval.NewTestingEvalContext(st)
factory := coldataext.NewExtendedColumnFactory(evalCtx)
numRows := 1000
cb := coldata.NewMemBatchWithCapacity(lineitemTypes, numRows, factory)
for i, t := range lineitemTypes {
vec := cb.ColVec(i)
for row := 0; row < numRows; row++ {
switch t.Family() {
case types.IntFamily:
vec.Int64()[row] = int64(randutil.RandIntInRange(rng, 0, 10000))
case types.DecimalFamily:
d := randgen.RandDatum(rng, t, false)
vec.Decimal().Set(row, d.(*tree.DDecimal).Decimal)
case types.StringFamily:
l := randutil.RandIntInRange(rng, 10, 20)
vec.Bytes().Set(row, []byte(randutil.RandString(rng, l, "asdf")))
case types.DateFamily:
d := randgen.RandDatum(rng, t, false)
vec.Int64()[row] = d.(*tree.DDate).UnixEpochDaysWithOrig()
}
}
}
cb.SetLength(numRows)
desc := desctestutils.TestingGetTableDescriptor(kvdb, keys.SystemSQLCodec, "defaultdb", "public", "lineitem")
enc := colenc.MakeEncoder(keys.SystemSQLCodec, desc, &st.SV, cb, desc.PublicColumns(),
nil /*metrics*/, nil /*partialIndexMap*/, func() bool { return false })
b.ResetTimer()
for i := 0; i < b.N; i++ {
err = enc.PrepareBatch(ctx, &noopPutter{}, 0, cb.Length())
}
require.NoError(b, err)
}

type noopPutter struct{}

func (n *noopPutter) CPut(key, value interface{}, expValue []byte) {}
func (n *noopPutter) Put(key, value interface{}) {}
func (n *noopPutter) InitPut(key, value interface{}, failOnTombstones bool) {}
func (n *noopPutter) Del(key ...interface{}) {}
func (n *noopPutter) CPutValues(kys []roachpb.Key, values []roachpb.Value) {}
func (n *noopPutter) CPutTuples(kys []roachpb.Key, values [][]byte) {}
func (n *noopPutter) PutBytes(kys []roachpb.Key, values [][]byte) {}
func (n *noopPutter) InitPutBytes(kys []roachpb.Key, values [][]byte) {}
func (n *noopPutter) PutTuples(kys []roachpb.Key, values [][]byte) {}
func (n *noopPutter) InitPutTuples(kys []roachpb.Key, values [][]byte) {}
Loading

0 comments on commit 2962e00

Please sign in to comment.