From f4cad41d338ed5d0abd308f68fce82a9342582e5 Mon Sep 17 00:00:00 2001 From: Tommy Reilly Date: Wed, 1 Jun 2022 15:17:54 -0400 Subject: [PATCH] sql: COPY benchmark roachtest Simple roachtest benchmark performing a single node copy of a 750k line CSV (TPCH lineitem table). On my gceworker CRDB does it in 5773 rows/s and PG does it in 36945 rows/s. Fixes: #81725 Release note: none --- pkg/cmd/roachtest/tests/BUILD.bazel | 1 + pkg/cmd/roachtest/tests/copyfrom.go | 162 ++++++++++++++++++++++++++++ pkg/cmd/roachtest/tests/registry.go | 1 + 3 files changed, 164 insertions(+) create mode 100644 pkg/cmd/roachtest/tests/copyfrom.go diff --git a/pkg/cmd/roachtest/tests/BUILD.bazel b/pkg/cmd/roachtest/tests/BUILD.bazel index 3c42062d6ff4..9ecfc621b6fd 100644 --- a/pkg/cmd/roachtest/tests/BUILD.bazel +++ b/pkg/cmd/roachtest/tests/BUILD.bazel @@ -27,6 +27,7 @@ go_library( "cluster_init.go", "connection_latency.go", "copy.go", + "copyfrom.go", "costfuzz.go", "decommission.go", "decommission_self.go", diff --git a/pkg/cmd/roachtest/tests/copyfrom.go b/pkg/cmd/roachtest/tests/copyfrom.go new file mode 100644 index 000000000000..407774f538c5 --- /dev/null +++ b/pkg/cmd/roachtest/tests/copyfrom.go @@ -0,0 +1,162 @@ +// Copyright 2022 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package tests + +import ( + "context" + "fmt" + "runtime" + "strings" + + "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/cluster" + "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/option" + "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/registry" + "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/test" + "github.com/cockroachdb/cockroach/pkg/roachprod/install" + "github.com/cockroachdb/cockroach/pkg/util/timeutil" + "github.com/stretchr/testify/require" +) + +const tpchLineitemFmt = `https://storage.googleapis.com/cockroach-fixtures/tpch-csv/sf-%d/lineitem.tbl.1` + +// There's an extra dummy field because the file above ends lines with delimiter and standard CSV behavior is to +// interpret that as a column. +var lineitemSchema string = ` +CREATE TABLE lineitem ( + l_orderkey INT8 NOT NULL, + l_partkey INT8 NOT NULL, + l_suppkey INT8 NOT NULL, + l_linenumber INT8 NOT NULL, + l_quantity DECIMAL(15,2) NOT NULL, + l_extendedprice DECIMAL(15,2) NOT NULL, + l_discount DECIMAL(15,2) NOT NULL, + l_tax DECIMAL(15,2) NOT NULL, + l_returnflag CHAR(1) NOT NULL, + l_linestatus CHAR(1) NOT NULL, + l_shipdate DATE NOT NULL, + l_commitdate DATE NOT NULL, + l_receiptdate DATE NOT NULL, + l_shipinstruct CHAR(25) NOT NULL, + l_shipmode CHAR(10) NOT NULL, + l_comment VARCHAR(44) NOT NULL, + l_dummy CHAR(1), + PRIMARY KEY (l_orderkey, l_linenumber)); +CREATE INDEX l_ok ON lineitem (l_orderkey); +CREATE INDEX l_pk ON lineitem (l_partkey); +CREATE INDEX l_sk ON lineitem (l_suppkey); +CREATE INDEX l_sd ON lineitem (l_shipdate); +CREATE INDEX l_cd ON lineitem (l_commitdate); +CREATE INDEX l_rd ON lineitem (l_receiptdate); +CREATE INDEX l_pk_sk ON lineitem (l_partkey, l_suppkey); +CREATE INDEX l_sk_pk ON lineitem (l_suppkey, l_partkey); +` + +func initTest(ctx context.Context, t test.Test, c cluster.Cluster, sf int) { + if runtime.GOOS == "linux" { + if err := repeatRunE( + ctx, t, c, c.All(), "update apt-get", `sudo apt-get -qq update`, + ); err != nil { + t.Fatal(err) + } + if err := repeatRunE( + ctx, + t, + c, + c.All(), + "install dependencies", + `sudo apt-get install -qq postgresql`, + ); err != nil { + t.Fatal(err) + } + csv := fmt.Sprintf(tpchLineitemFmt, sf) + c.Run(ctx, c.Node(1), "rm -f /tmp/lineitem-table.csv") + c.Run(ctx, c.Node(1), fmt.Sprintf("curl '%s' -o /tmp/lineitem-table.csv", csv)) + } +} + +func runTest(ctx context.Context, t test.Test, c cluster.Cluster, pg string) { + start := timeutil.Now() + det, err := c.RunWithDetailsSingleNode(ctx, t.L(), c.Node(1), fmt.Sprintf(`cat /tmp/lineitem-table.csv | %s -c "COPY lineitem FROM STDIN WITH CSV DELIMITER '|';"`, pg)) + if err != nil { + t.L().Printf("stdout:\n%v\n", det.Stdout) + t.L().Printf("stderr:\n%v\n", det.Stderr) + t.Fatal(err) + } + dur := timeutil.Since(start) + t.L().Printf("%v\n", det.Stdout) + rows := 0 + copy := "" + _, err = fmt.Sscan(det.Stdout, ©, &rows) + require.NoError(t, err) + rate := int(float64(rows) / dur.Seconds()) + t.L().Printf("results: %d rows/s", rate) + // Write the copy rate into the stats.json file to be used by roachperf. + c.Run(ctx, c.Node(1), "mkdir", t.PerfArtifactsDir()) + cmd := fmt.Sprintf( + `echo '{ "copy_rate": %d }' > %s/stats.json`, + rate, t.PerfArtifactsDir(), + ) + c.Run(ctx, c.Node(1), cmd) +} + +func runCopyFromPG(ctx context.Context, t test.Test, c cluster.Cluster, sf int) { + initTest(ctx, t, c, sf) + c.Run(ctx, c.Node(1), "sudo -i -u postgres psql -c 'DROP TABLE IF EXISTS lineitem'") + c.Run(ctx, c.Node(1), fmt.Sprintf("sudo -i -u postgres psql -c '%s'", lineitemSchema)) + runTest(ctx, t, c, "sudo -i -u postgres psql") +} + +func runCopyFromCRDB(ctx context.Context, t test.Test, c cluster.Cluster, sf int) { + c.Put(ctx, t.Cockroach(), "./cockroach", c.All()) + c.Start(ctx, t.L(), option.DefaultStartOpts(), install.MakeClusterSettings(), c.All()) + initTest(ctx, t, c, sf) + urls, err := c.InternalPGUrl(ctx, t.L(), c.Node(1)) + require.NoError(t, err) + m := c.NewMonitor(ctx, c.All()) + m.Go(func(ctx context.Context) error { + // psql w/ url first are doesn't support --db arg so have to do this. + url := strings.Replace(urls[0], "?", "/defaultdb?", 1) + c.Run(ctx, c.Node(1), fmt.Sprintf("psql %s -c 'SELECT 1'", url)) + c.Run(ctx, c.Node(1), fmt.Sprintf("psql %s -c '%s'", url, lineitemSchema)) + runTest(ctx, t, c, fmt.Sprintf("psql '%s'", url)) + return nil + }) + m.Wait() +} + +func registerCopyFrom(r registry.Registry) { + testcases := []struct { + sf int + nodes int + }{ + {sf: 1, nodes: 1}, + } + + for _, tc := range testcases { + tc := tc + r.Add(registry.TestSpec{ + Name: fmt.Sprintf("copyfrom/crdb/sf=%d/nodes=%d", tc.sf, tc.nodes), + Owner: registry.OwnerKV, + Cluster: r.MakeClusterSpec(tc.nodes), + Run: func(ctx context.Context, t test.Test, c cluster.Cluster) { + runCopyFromCRDB(ctx, t, c, tc.sf) + }, + }) + r.Add(registry.TestSpec{ + Name: fmt.Sprintf("copyfrom/pg/sf=%d/nodes=%d", tc.sf, tc.nodes), + Owner: registry.OwnerKV, + Cluster: r.MakeClusterSpec(tc.nodes), + Run: func(ctx context.Context, t test.Test, c cluster.Cluster) { + runCopyFromPG(ctx, t, c, tc.sf) + }, + }) + } +} diff --git a/pkg/cmd/roachtest/tests/registry.go b/pkg/cmd/roachtest/tests/registry.go index c8f4ce4824d8..d5009e8d24d5 100644 --- a/pkg/cmd/roachtest/tests/registry.go +++ b/pkg/cmd/roachtest/tests/registry.go @@ -31,6 +31,7 @@ func RegisterTests(r registry.Registry) { registerClockMonotonicTests(r) registerConnectionLatencyTest(r) registerCopy(r) + registerCopyFrom(r) registerCostFuzz(r) registerDecommission(r) registerDiskFull(r)