-
Notifications
You must be signed in to change notification settings - Fork 3.8k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
sql: vectorized data encoding package
colenc is a new package that allows kv.Batch's to be produced to encode tables using coldata.Batch's as the input. Every attempt was made to avoid code duplication and delegate to row encoding code where possible. Epic: CRDB-18892 Informs: #91831 Release note: None
- Loading branch information
Showing
15 changed files
with
2,492 additions
and
19 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
load("//build/bazelutil/unused_checker:unused.bzl", "get_x_data") | ||
load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") | ||
|
||
go_library( | ||
name = "colenc", | ||
srcs = [ | ||
"encode.go", | ||
"inverted.go", | ||
"key.go", | ||
"legacy.go", | ||
"value.go", | ||
], | ||
importpath = "github.com/cockroachdb/cockroach/pkg/sql/colenc", | ||
visibility = ["//visibility:public"], | ||
deps = [ | ||
"//pkg/col/coldata", | ||
"//pkg/keys", | ||
"//pkg/roachpb", | ||
"//pkg/settings", | ||
"//pkg/sql/catalog", | ||
"//pkg/sql/catalog/catalogkeys", | ||
"//pkg/sql/catalog/catenumpb", | ||
"//pkg/sql/catalog/descpb", | ||
"//pkg/sql/catalog/fetchpb", | ||
"//pkg/sql/row", | ||
"//pkg/sql/rowenc", | ||
"//pkg/sql/rowenc/keyside", | ||
"//pkg/sql/rowenc/valueside", | ||
"//pkg/sql/rowinfra", | ||
"//pkg/sql/sem/catid", | ||
"//pkg/sql/sem/tree", | ||
"//pkg/sql/sqlerrors", | ||
"//pkg/sql/types", | ||
"//pkg/util/encoding", | ||
"//pkg/util/intsets", | ||
"//pkg/util/json", | ||
"//pkg/util/uuid", | ||
"@com_github_cockroachdb_errors//:errors", | ||
], | ||
) | ||
|
||
go_test( | ||
name = "colenc_test", | ||
srcs = [ | ||
"bench_test.go", | ||
"encode_test.go", | ||
"main_test.go", | ||
], | ||
args = ["-test.timeout=295s"], | ||
data = ["//c-deps:libgeos"], | ||
deps = [ | ||
":colenc", | ||
"//pkg/base", | ||
"//pkg/ccl", | ||
"//pkg/ccl/utilccl", | ||
"//pkg/cli/clisqlclient", | ||
"//pkg/col/coldata", | ||
"//pkg/col/coldataext", | ||
"//pkg/keys", | ||
"//pkg/roachpb", | ||
"//pkg/security/securityassets", | ||
"//pkg/security/securitytest", | ||
"//pkg/security/username", | ||
"//pkg/server", | ||
"//pkg/settings", | ||
"//pkg/settings/cluster", | ||
"//pkg/sql/catalog", | ||
"//pkg/sql/catalog/catalogkeys", | ||
"//pkg/sql/catalog/descpb", | ||
"//pkg/sql/catalog/desctestutils", | ||
"//pkg/sql/colconv", | ||
"//pkg/sql/colexecerror", | ||
"//pkg/sql/randgen", | ||
"//pkg/sql/row", | ||
"//pkg/sql/sem/eval", | ||
"//pkg/sql/sem/tree", | ||
"//pkg/sql/tests", | ||
"//pkg/sql/types", | ||
"//pkg/testutils/serverutils", | ||
"//pkg/testutils/sqlutils", | ||
"//pkg/testutils/testcluster", | ||
"//pkg/util", | ||
"//pkg/util/leaktest", | ||
"//pkg/util/log", | ||
"//pkg/util/randutil", | ||
"//pkg/util/timeutil/pgdate", | ||
"@com_github_cockroachdb_apd_v3//:apd", | ||
"@com_github_stretchr_testify//require", | ||
], | ||
) | ||
|
||
get_x_data(name = "get_x_data") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,152 @@ | ||
// Copyright 2023 The Cockroach Authors. | ||
// | ||
// Use of this software is governed by the Business Source License | ||
// included in the file licenses/BSL.txt. | ||
// | ||
// As of the Change Date specified in that file, in accordance with | ||
// the Business Source License, use of this software will be governed | ||
// by the Apache License, Version 2.0, included in the file | ||
// licenses/APL.txt. | ||
|
||
package colenc_test | ||
|
||
import ( | ||
"context" | ||
"io" | ||
"net/url" | ||
"testing" | ||
|
||
"github.com/cockroachdb/cockroach/pkg/cli/clisqlclient" | ||
"github.com/cockroachdb/cockroach/pkg/col/coldata" | ||
"github.com/cockroachdb/cockroach/pkg/col/coldataext" | ||
"github.com/cockroachdb/cockroach/pkg/keys" | ||
"github.com/cockroachdb/cockroach/pkg/roachpb" | ||
"github.com/cockroachdb/cockroach/pkg/security/username" | ||
"github.com/cockroachdb/cockroach/pkg/settings/cluster" | ||
"github.com/cockroachdb/cockroach/pkg/sql/catalog/desctestutils" | ||
"github.com/cockroachdb/cockroach/pkg/sql/colenc" | ||
"github.com/cockroachdb/cockroach/pkg/sql/randgen" | ||
"github.com/cockroachdb/cockroach/pkg/sql/sem/eval" | ||
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree" | ||
"github.com/cockroachdb/cockroach/pkg/sql/tests" | ||
"github.com/cockroachdb/cockroach/pkg/sql/types" | ||
"github.com/cockroachdb/cockroach/pkg/testutils/serverutils" | ||
"github.com/cockroachdb/cockroach/pkg/testutils/sqlutils" | ||
"github.com/cockroachdb/cockroach/pkg/util/leaktest" | ||
"github.com/cockroachdb/cockroach/pkg/util/log" | ||
"github.com/cockroachdb/cockroach/pkg/util/randutil" | ||
"github.com/stretchr/testify/require" | ||
) | ||
|
||
var lineitemSchema string = `CREATE TABLE lineitem ( | ||
l_orderkey INT8 NOT NULL, | ||
l_partkey INT8 NOT NULL, | ||
l_suppkey INT8 NOT NULL, | ||
l_linenumber INT8 NOT NULL, | ||
l_quantity DECIMAL(15,2) NOT NULL, | ||
l_extendedprice DECIMAL(15,2) NOT NULL, | ||
l_discount DECIMAL(15,2) NOT NULL, | ||
l_tax DECIMAL(15,2) NOT NULL, | ||
l_returnflag CHAR(1) NOT NULL, | ||
l_linestatus CHAR(1) NOT NULL, | ||
l_shipdate DATE NOT NULL, | ||
l_commitdate DATE NOT NULL, | ||
l_receiptdate DATE NOT NULL, | ||
l_shipinstruct CHAR(25) NOT NULL, | ||
l_shipmode CHAR(10) NOT NULL, | ||
l_comment VARCHAR(44) NOT NULL, | ||
l_dummy CHAR, | ||
PRIMARY KEY (l_orderkey, l_linenumber), | ||
INDEX l_ok (l_orderkey ASC), | ||
INDEX l_pk (l_partkey ASC), | ||
INDEX l_sk (l_suppkey ASC), | ||
INDEX l_sd (l_shipdate ASC), | ||
INDEX l_cd (l_commitdate ASC), | ||
INDEX l_rd (l_receiptdate ASC), | ||
INDEX l_pk_sk (l_partkey ASC, l_suppkey ASC), | ||
INDEX l_sk_pk (l_suppkey ASC, l_partkey ASC))` | ||
|
||
var lineitemTypes = []*types.T{ | ||
types.Int, | ||
types.Int, | ||
types.Int, | ||
types.Int, | ||
types.Decimal, | ||
types.Decimal, | ||
types.Decimal, | ||
types.Decimal, | ||
types.String, | ||
types.String, | ||
types.Date, | ||
types.Date, | ||
types.Date, | ||
types.String, | ||
types.String, | ||
types.String, | ||
types.String, | ||
} | ||
|
||
func BenchmarkTCPHLineItem(b *testing.B) { | ||
defer leaktest.AfterTest(b)() | ||
defer log.Scope(b).Close(b) | ||
ctx := context.Background() | ||
|
||
params, _ := tests.CreateTestServerParams() | ||
s, _, kvdb := serverutils.StartServer(b, params) | ||
defer s.Stopper().Stop(ctx) | ||
|
||
url, cleanup := sqlutils.PGUrl(b, s.ServingSQLAddr(), "copytest", url.User(username.RootUser)) | ||
defer cleanup() | ||
var sqlConnCtx clisqlclient.Context | ||
conn := sqlConnCtx.MakeSQLConn(io.Discard, io.Discard, url.String()) | ||
|
||
err := conn.Exec(ctx, lineitemSchema) | ||
require.NoError(b, err) | ||
// Make benchmark stable by using a constant seed. | ||
rng := randutil.NewTestRandWithSeed(0) | ||
st := cluster.MakeTestingClusterSettings() | ||
evalCtx := eval.NewTestingEvalContext(st) | ||
factory := coldataext.NewExtendedColumnFactory(evalCtx) | ||
numRows := 1000 | ||
cb := coldata.NewMemBatchWithCapacity(lineitemTypes, numRows, factory) | ||
for i, t := range lineitemTypes { | ||
vec := cb.ColVec(i) | ||
for row := 0; row < numRows; row++ { | ||
switch t.Family() { | ||
case types.IntFamily: | ||
vec.Int64()[row] = int64(randutil.RandIntInRange(rng, 0, 10000)) | ||
case types.DecimalFamily: | ||
d := randgen.RandDatum(rng, t, false) | ||
vec.Decimal().Set(row, d.(*tree.DDecimal).Decimal) | ||
case types.StringFamily: | ||
l := randutil.RandIntInRange(rng, 10, 20) | ||
vec.Bytes().Set(row, []byte(randutil.RandString(rng, l, "asdf"))) | ||
case types.DateFamily: | ||
d := randgen.RandDatum(rng, t, false) | ||
vec.Int64()[row] = d.(*tree.DDate).UnixEpochDaysWithOrig() | ||
} | ||
} | ||
} | ||
cb.SetLength(numRows) | ||
desc := desctestutils.TestingGetTableDescriptor(kvdb, keys.SystemSQLCodec, "defaultdb", "public", "lineitem") | ||
enc := colenc.MakeEncoder(keys.SystemSQLCodec, desc, &st.SV, cb, desc.PublicColumns(), | ||
nil /*metrics*/, nil /*partialIndexMap*/, func() bool { return false }) | ||
b.ResetTimer() | ||
for i := 0; i < b.N; i++ { | ||
err = enc.PrepareBatch(ctx, &noopPutter{}, 0, cb.Length()) | ||
} | ||
require.NoError(b, err) | ||
} | ||
|
||
type noopPutter struct{} | ||
|
||
func (n *noopPutter) CPut(key, value interface{}, expValue []byte) {} | ||
func (n *noopPutter) Put(key, value interface{}) {} | ||
func (n *noopPutter) InitPut(key, value interface{}, failOnTombstones bool) {} | ||
func (n *noopPutter) Del(key ...interface{}) {} | ||
func (n *noopPutter) CPutValues(kys []roachpb.Key, values []roachpb.Value) {} | ||
func (n *noopPutter) CPutTuples(kys []roachpb.Key, values [][]byte) {} | ||
func (n *noopPutter) PutBytes(kys []roachpb.Key, values [][]byte) {} | ||
func (n *noopPutter) InitPutBytes(kys []roachpb.Key, values [][]byte) {} | ||
func (n *noopPutter) PutTuples(kys []roachpb.Key, values [][]byte) {} | ||
func (n *noopPutter) InitPutTuples(kys []roachpb.Key, values [][]byte) {} |
Oops, something went wrong.