forked from cockroachdb/cockroach
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Simple roachtest benchmark performing a single node copy of a 750k line CSV (TPCH lineitem table). On my gceworker CRDB does it in 5773 rows/s and PG does it in 36945 rows/s. Fixes: cockroachdb#81725 Release note: none
- Loading branch information
Showing
3 changed files
with
164 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,162 @@ | ||
// Copyright 2022 The Cockroach Authors. | ||
// | ||
// Use of this software is governed by the Business Source License | ||
// included in the file licenses/BSL.txt. | ||
// | ||
// As of the Change Date specified in that file, in accordance with | ||
// the Business Source License, use of this software will be governed | ||
// by the Apache License, Version 2.0, included in the file | ||
// licenses/APL.txt. | ||
|
||
package tests | ||
|
||
import ( | ||
"context" | ||
"fmt" | ||
"runtime" | ||
"strings" | ||
"time" | ||
|
||
"github.com/cockroachdb/cockroach/pkg/cmd/roachtest/cluster" | ||
"github.com/cockroachdb/cockroach/pkg/cmd/roachtest/option" | ||
"github.com/cockroachdb/cockroach/pkg/cmd/roachtest/registry" | ||
"github.com/cockroachdb/cockroach/pkg/cmd/roachtest/test" | ||
"github.com/cockroachdb/cockroach/pkg/roachprod/install" | ||
"github.com/stretchr/testify/require" | ||
) | ||
|
||
const tpchLineitemFmt = `https://storage.googleapis.com/cockroach-fixtures/tpch-csv/sf-%d/lineitem.tbl.1` | ||
|
||
// There's an extra dummy field because the file above ends lines with delimiter and standard CSV behavior is to | ||
// interpret that as a column. | ||
var lineitemSchema string = ` | ||
CREATE TABLE lineitem ( | ||
l_orderkey INT8 NOT NULL, | ||
l_partkey INT8 NOT NULL, | ||
l_suppkey INT8 NOT NULL, | ||
l_linenumber INT8 NOT NULL, | ||
l_quantity DECIMAL(15,2) NOT NULL, | ||
l_extendedprice DECIMAL(15,2) NOT NULL, | ||
l_discount DECIMAL(15,2) NOT NULL, | ||
l_tax DECIMAL(15,2) NOT NULL, | ||
l_returnflag CHAR(1) NOT NULL, | ||
l_linestatus CHAR(1) NOT NULL, | ||
l_shipdate DATE NOT NULL, | ||
l_commitdate DATE NOT NULL, | ||
l_receiptdate DATE NOT NULL, | ||
l_shipinstruct CHAR(25) NOT NULL, | ||
l_shipmode CHAR(10) NOT NULL, | ||
l_comment VARCHAR(44) NOT NULL, | ||
l_dummy CHAR(1), | ||
PRIMARY KEY (l_orderkey, l_linenumber)); | ||
CREATE INDEX l_ok ON lineitem (l_orderkey); | ||
CREATE INDEX l_pk ON lineitem (l_partkey); | ||
CREATE INDEX l_sk ON lineitem (l_suppkey); | ||
CREATE INDEX l_sd ON lineitem (l_shipdate); | ||
CREATE INDEX l_cd ON lineitem (l_commitdate); | ||
CREATE INDEX l_rd ON lineitem (l_receiptdate); | ||
CREATE INDEX l_pk_sk ON lineitem (l_partkey, l_suppkey); | ||
CREATE INDEX l_sk_pk ON lineitem (l_suppkey, l_partkey); | ||
` | ||
|
||
func initTest(ctx context.Context, t test.Test, c cluster.Cluster, sf int) { | ||
if runtime.GOOS == "linux" { | ||
if err := repeatRunE( | ||
ctx, t, c, c.All(), "update apt-get", `sudo apt-get -qq update`, | ||
); err != nil { | ||
t.Fatal(err) | ||
} | ||
if err := repeatRunE( | ||
ctx, | ||
t, | ||
c, | ||
c.All(), | ||
"install dependencies", | ||
`sudo apt-get install -qq postgresql`, | ||
); err != nil { | ||
t.Fatal(err) | ||
} | ||
csv := fmt.Sprintf(tpchLineitemFmt, sf) | ||
c.Run(ctx, c.Node(1), "rm -f /tmp/lineitem-table.csv") | ||
c.Run(ctx, c.Node(1), fmt.Sprintf("curl '%s' -o /tmp/lineitem-table.csv", csv)) | ||
} | ||
} | ||
|
||
func runTest(ctx context.Context, t test.Test, c cluster.Cluster, pg string) { | ||
start := time.Now() | ||
det, err := c.RunWithDetailsSingleNode(ctx, t.L(), c.Node(1), fmt.Sprintf(`cat /tmp/lineitem-table.csv | %s -c "COPY lineitem FROM STDIN WITH CSV DELIMITER '|';"`, pg)) | ||
if err != nil { | ||
t.L().Printf("%v\n", det.Stdout) | ||
t.L().Printf("%v\n", det.Stderr) | ||
t.Fatal(err) | ||
} | ||
dur := time.Since(start) | ||
t.L().Printf("%v\n", det.Stdout) | ||
rows := 0 | ||
copy := "" | ||
_, err = fmt.Sscan(det.Stdout, ©, &rows) | ||
require.NoError(t, err) | ||
rate := int(float64(rows) / dur.Seconds()) | ||
t.L().Printf("results: %d rows/s", rate) | ||
// Write the copy rate into the stats.json file to be used by roachperf. | ||
c.Run(ctx, c.Node(1), "mkdir", t.PerfArtifactsDir()) | ||
cmd := fmt.Sprintf( | ||
`echo '{ "copy_rate": %d }' > %s/stats.json`, | ||
rate, t.PerfArtifactsDir(), | ||
) | ||
c.Run(ctx, c.Node(1), cmd) | ||
} | ||
|
||
func runCopyFromPG(ctx context.Context, t test.Test, c cluster.Cluster, sf int) { | ||
initTest(ctx, t, c, sf) | ||
c.Run(ctx, c.Node(1), "sudo -i -u postgres psql -c 'DROP TABLE IF EXISTS lineitem'") | ||
c.Run(ctx, c.Node(1), fmt.Sprintf("sudo -i -u postgres psql -c '%s'", lineitemSchema)) | ||
runTest(ctx, t, c, "sudo -i -u postgres psql") | ||
} | ||
|
||
func runCopyFromCRDB(ctx context.Context, t test.Test, c cluster.Cluster, sf int) { | ||
c.Put(ctx, t.Cockroach(), "./cockroach", c.All()) | ||
c.Start(ctx, t.L(), option.DefaultStartOpts(), install.MakeClusterSettings(), c.All()) | ||
initTest(ctx, t, c, sf) | ||
urls, err := c.InternalPGUrl(ctx, t.L(), c.Node(1)) | ||
require.NoError(t, err) | ||
m := c.NewMonitor(ctx, c.All()) | ||
m.Go(func(ctx context.Context) error { | ||
// psql w/ url first are doesn't support --db arg so have to do this. | ||
url := strings.Replace(urls[0], "?", "/defaultdb?", 1) | ||
c.Run(ctx, c.Node(1), fmt.Sprintf("psql %s -c 'SELECT 1'", url)) | ||
c.Run(ctx, c.Node(1), fmt.Sprintf("psql %s -c '%s'", url, lineitemSchema)) | ||
runTest(ctx, t, c, fmt.Sprintf("psql '%s'", url)) | ||
return nil | ||
}) | ||
m.Wait() | ||
} | ||
|
||
func registerCopyFrom(r registry.Registry) { | ||
testcases := []struct { | ||
sf int | ||
nodes int | ||
}{ | ||
{sf: 1, nodes: 1}, | ||
} | ||
|
||
for _, tc := range testcases { | ||
tc := tc | ||
r.Add(registry.TestSpec{ | ||
Name: fmt.Sprintf("copyfrom/crdb/sf=%d,nodes=%d", tc.sf, tc.nodes), | ||
Owner: registry.OwnerKV, | ||
Cluster: r.MakeClusterSpec(tc.nodes), | ||
Run: func(ctx context.Context, t test.Test, c cluster.Cluster) { | ||
runCopyFromCRDB(ctx, t, c, tc.sf) | ||
}, | ||
}) | ||
r.Add(registry.TestSpec{ | ||
Name: fmt.Sprintf("copyfrom/pg/sf=%d,nodes=%d", tc.sf, tc.nodes), | ||
Owner: registry.OwnerKV, | ||
Cluster: r.MakeClusterSpec(tc.nodes), | ||
Run: func(ctx context.Context, t test.Test, c cluster.Cluster) { | ||
runCopyFromPG(ctx, t, c, tc.sf) | ||
}, | ||
}) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters