Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

sql: COPY benchmark roachtest #82348

Merged
merged 1 commit into from
Jun 7, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pkg/cmd/roachtest/tests/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ go_library(
"cluster_init.go",
"connection_latency.go",
"copy.go",
"copyfrom.go",
"costfuzz.go",
"decommission.go",
"decommission_self.go",
Expand Down
162 changes: 162 additions & 0 deletions pkg/cmd/roachtest/tests/copyfrom.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
// Copyright 2022 The Cockroach Authors.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.

package tests

import (
"context"
"fmt"
"runtime"
"strings"

"github.com/cockroachdb/cockroach/pkg/cmd/roachtest/cluster"
"github.com/cockroachdb/cockroach/pkg/cmd/roachtest/option"
"github.com/cockroachdb/cockroach/pkg/cmd/roachtest/registry"
"github.com/cockroachdb/cockroach/pkg/cmd/roachtest/test"
"github.com/cockroachdb/cockroach/pkg/roachprod/install"
"github.com/cockroachdb/cockroach/pkg/util/timeutil"
"github.com/stretchr/testify/require"
)

const tpchLineitemFmt = `https://storage.googleapis.com/cockroach-fixtures/tpch-csv/sf-%d/lineitem.tbl.1`

// There's an extra dummy field because the file above ends lines with delimiter and standard CSV behavior is to
// interpret that as a column.
var lineitemSchema string = `
CREATE TABLE lineitem (
l_orderkey INT8 NOT NULL,
l_partkey INT8 NOT NULL,
l_suppkey INT8 NOT NULL,
l_linenumber INT8 NOT NULL,
l_quantity DECIMAL(15,2) NOT NULL,
l_extendedprice DECIMAL(15,2) NOT NULL,
l_discount DECIMAL(15,2) NOT NULL,
l_tax DECIMAL(15,2) NOT NULL,
l_returnflag CHAR(1) NOT NULL,
l_linestatus CHAR(1) NOT NULL,
l_shipdate DATE NOT NULL,
l_commitdate DATE NOT NULL,
l_receiptdate DATE NOT NULL,
l_shipinstruct CHAR(25) NOT NULL,
l_shipmode CHAR(10) NOT NULL,
l_comment VARCHAR(44) NOT NULL,
l_dummy CHAR(1),
PRIMARY KEY (l_orderkey, l_linenumber));
CREATE INDEX l_ok ON lineitem (l_orderkey);
CREATE INDEX l_pk ON lineitem (l_partkey);
CREATE INDEX l_sk ON lineitem (l_suppkey);
CREATE INDEX l_sd ON lineitem (l_shipdate);
CREATE INDEX l_cd ON lineitem (l_commitdate);
CREATE INDEX l_rd ON lineitem (l_receiptdate);
CREATE INDEX l_pk_sk ON lineitem (l_partkey, l_suppkey);
CREATE INDEX l_sk_pk ON lineitem (l_suppkey, l_partkey);
`

func initTest(ctx context.Context, t test.Test, c cluster.Cluster, sf int) {
if runtime.GOOS == "linux" {
if err := repeatRunE(
ctx, t, c, c.All(), "update apt-get", `sudo apt-get -qq update`,
); err != nil {
t.Fatal(err)
}
if err := repeatRunE(
ctx,
t,
c,
c.All(),
"install dependencies",
`sudo apt-get install -qq postgresql`,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do you think it's worth using a stable postgresql version?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can't think of a reason why, ubuntu LTS should be stable enough no? Open to suggestions...

Copy link
Contributor

@otan otan Jun 3, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In the past we've found that the package archive updates the versions if you do sudo apt-get install and then you get result changes that don't correlate.

If you wanted to be a bit more particular I'd install from source, but I am not going to hold this pr down for it.

); err != nil {
t.Fatal(err)
}
csv := fmt.Sprintf(tpchLineitemFmt, sf)
c.Run(ctx, c.Node(1), "rm -f /tmp/lineitem-table.csv")
c.Run(ctx, c.Node(1), fmt.Sprintf("curl '%s' -o /tmp/lineitem-table.csv", csv))
}
}

func runTest(ctx context.Context, t test.Test, c cluster.Cluster, pg string) {
start := timeutil.Now()
det, err := c.RunWithDetailsSingleNode(ctx, t.L(), c.Node(1), fmt.Sprintf(`cat /tmp/lineitem-table.csv | %s -c "COPY lineitem FROM STDIN WITH CSV DELIMITER '|';"`, pg))
if err != nil {
t.L().Printf("stdout:\n%v\n", det.Stdout)
t.L().Printf("stderr:\n%v\n", det.Stderr)
t.Fatal(err)
}
dur := timeutil.Since(start)
t.L().Printf("%v\n", det.Stdout)
rows := 0
copy := ""
_, err = fmt.Sscan(det.Stdout, &copy, &rows)
require.NoError(t, err)
rate := int(float64(rows) / dur.Seconds())
t.L().Printf("results: %d rows/s", rate)
// Write the copy rate into the stats.json file to be used by roachperf.
c.Run(ctx, c.Node(1), "mkdir", t.PerfArtifactsDir())
cmd := fmt.Sprintf(
`echo '{ "copy_rate": %d }' > %s/stats.json`,
rate, t.PerfArtifactsDir(),
)
c.Run(ctx, c.Node(1), cmd)
}

func runCopyFromPG(ctx context.Context, t test.Test, c cluster.Cluster, sf int) {
initTest(ctx, t, c, sf)
c.Run(ctx, c.Node(1), "sudo -i -u postgres psql -c 'DROP TABLE IF EXISTS lineitem'")
c.Run(ctx, c.Node(1), fmt.Sprintf("sudo -i -u postgres psql -c '%s'", lineitemSchema))
runTest(ctx, t, c, "sudo -i -u postgres psql")
}

func runCopyFromCRDB(ctx context.Context, t test.Test, c cluster.Cluster, sf int) {
c.Put(ctx, t.Cockroach(), "./cockroach", c.All())
c.Start(ctx, t.L(), option.DefaultStartOpts(), install.MakeClusterSettings(), c.All())
initTest(ctx, t, c, sf)
urls, err := c.InternalPGUrl(ctx, t.L(), c.Node(1))
require.NoError(t, err)
m := c.NewMonitor(ctx, c.All())
m.Go(func(ctx context.Context) error {
// psql w/ url first are doesn't support --db arg so have to do this.
otan marked this conversation as resolved.
Show resolved Hide resolved
url := strings.Replace(urls[0], "?", "/defaultdb?", 1)
c.Run(ctx, c.Node(1), fmt.Sprintf("psql %s -c 'SELECT 1'", url))
c.Run(ctx, c.Node(1), fmt.Sprintf("psql %s -c '%s'", url, lineitemSchema))
runTest(ctx, t, c, fmt.Sprintf("psql '%s'", url))
return nil
})
m.Wait()
}

func registerCopyFrom(r registry.Registry) {
testcases := []struct {
sf int
nodes int
}{
{sf: 1, nodes: 1},
}

for _, tc := range testcases {
tc := tc
r.Add(registry.TestSpec{
Name: fmt.Sprintf("copyfrom/crdb/sf=%d/nodes=%d", tc.sf, tc.nodes),
Owner: registry.OwnerKV,
Cluster: r.MakeClusterSpec(tc.nodes),
Run: func(ctx context.Context, t test.Test, c cluster.Cluster) {
runCopyFromCRDB(ctx, t, c, tc.sf)
},
})
r.Add(registry.TestSpec{
Name: fmt.Sprintf("copyfrom/pg/sf=%d/nodes=%d", tc.sf, tc.nodes),
Owner: registry.OwnerKV,
Cluster: r.MakeClusterSpec(tc.nodes),
Run: func(ctx context.Context, t test.Test, c cluster.Cluster) {
runCopyFromPG(ctx, t, c, tc.sf)
},
})
}
}
1 change: 1 addition & 0 deletions pkg/cmd/roachtest/tests/registry.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ func RegisterTests(r registry.Registry) {
registerClockMonotonicTests(r)
registerConnectionLatencyTest(r)
registerCopy(r)
registerCopyFrom(r)
registerCostFuzz(r)
registerDecommission(r)
registerDiskFull(r)
Expand Down