From c1a574cc5428e116d9af5cb7d1a25da3a582a7f5 Mon Sep 17 00:00:00 2001 From: Daniel Harrison Date: Tue, 26 Dec 2017 16:29:01 -0500 Subject: [PATCH] workload: move sampledataccl abstraction and add query workloads The sampledataccl package was started to centralize logic for testdata generation in BACKUP/RESTORE, as well as provide comparable benchmarks between operations running at the sql and kv layers. It has proven useful and is ready to graduate out of ccl. Before sampledataccl, many ccl/... tests rolled their own test data in one of various forms: sql INSERTS, csv, []string, []engine.MVCCKeyValue, ssts, RocksDB batch reprs. Insidiously the kv testdata generation was different from test to test and recreated just enough of our sql key encoding to get that test to pass. The sampledataccl.Bank abstraction was built to provide exactly the same schema in all of these forms, converting between them using actual production code, so nothing rotted. This had the wonderful side benefit of making the throughput measurement of benchmarks at the sql and kv layers comparable, which helped considerably during BACKUP/RESTORE performance tuning work. This problem is not exclusive to ccl; many places in the codebase do this recreation of just enough of our key encoding to get a particular test to pass, which is often subtly incorrect and requires boilerplate that detracts from what the test is doing. This commit moves the abstraction out of ccl so it can start to be used more widely. This opportunity is also taken to add support for more than one table as well as the ability to run query workloads (hence the new workload package name). Upcoming commits will add tooling to run these workloads against clusters (think of the loadgen/kv tool), with significantly less effort than is currently required to add a new loadgen. We'll also be able to have a single tool for making large test fixtures and to keep this tool tested so it doesn't rot. This has traditionally been done ad-hoc and was a huge obstruction in BACKUP/RESTORE production testing. Still TODO is teasing the final ccl deps out of the remaining sampledataccl features. Release note: None --- pkg/ccl/sqlccl/backup_test.go | 15 +- pkg/ccl/sqlccl/bench_test.go | 20 +- pkg/ccl/sqlccl/load_test.go | 20 +- pkg/ccl/storageccl/bench_test.go | 7 +- pkg/ccl/utilccl/sampledataccl/bankdata.go | 205 +----------------- .../utilccl/sampledataccl/bankdata_test.go | 90 +------- pkg/testutils/workload/bank/bank.go | 142 ++++++++++++ pkg/testutils/workload/bank/bank_test.go | 71 ++++++ pkg/testutils/workload/bank/main_test.go | 37 ++++ pkg/testutils/workload/main_test.go | 37 ++++ pkg/testutils/workload/opts.go | 50 +++++ pkg/testutils/workload/workload.go | 161 ++++++++++++++ pkg/testutils/workload/workload_test.go | 80 +++++++ 13 files changed, 622 insertions(+), 313 deletions(-) create mode 100644 pkg/testutils/workload/bank/bank.go create mode 100644 pkg/testutils/workload/bank/bank_test.go create mode 100644 pkg/testutils/workload/bank/main_test.go create mode 100644 pkg/testutils/workload/main_test.go create mode 100644 pkg/testutils/workload/opts.go create mode 100644 pkg/testutils/workload/workload.go create mode 100644 pkg/testutils/workload/workload_test.go diff --git a/pkg/ccl/sqlccl/backup_test.go b/pkg/ccl/sqlccl/backup_test.go index 9e53de08c5a0..a4e334d24d4c 100644 --- a/pkg/ccl/sqlccl/backup_test.go +++ b/pkg/ccl/sqlccl/backup_test.go @@ -48,6 +48,8 @@ import ( "github.com/cockroachdb/cockroach/pkg/testutils/jobutils" "github.com/cockroachdb/cockroach/pkg/testutils/sqlutils" "github.com/cockroachdb/cockroach/pkg/testutils/testcluster" + "github.com/cockroachdb/cockroach/pkg/testutils/workload" + "github.com/cockroachdb/cockroach/pkg/testutils/workload/bank" "github.com/cockroachdb/cockroach/pkg/util/leaktest" "github.com/cockroachdb/cockroach/pkg/util/protoutil" "github.com/cockroachdb/cockroach/pkg/util/randutil" @@ -92,12 +94,19 @@ func backupRestoreTestSetupWithParams( if numAccounts == 0 { splits = 0 } - bankData := sampledataccl.Bank(numAccounts, payloadSize, splits) + bankData := bank.FromConfig(numAccounts, payloadSize, splits) sqlDB = sqlutils.MakeSQLRunner(tc.Conns[0]) - if err := sampledataccl.Setup(sqlDB.DB, bankData); err != nil { + sqlDB.Exec(t, `CREATE DATABASE data`) + sqlDB.Exec(t, `USE data`) + const insertBatchSize = 1000 + if _, err := workload.Setup(sqlDB.DB, bankData.Tables(), insertBatchSize); err != nil { t.Fatalf("%+v", err) } + if err := bank.Split(sqlDB.DB, bankData); err != nil { + // This occasionally flakes, so ignore errors. + t.Logf("failed to split: %+v", err) + } if err := tc.WaitForFullReplication(); err != nil { t.Fatal(err) @@ -1788,7 +1797,7 @@ func TestAsOfSystemTimeOnRestoredData(t *testing.T) { sqlDB.Exec(t, `DROP TABLE data.bank`) const numAccounts = 10 - bankData := sampledataccl.BankRows(numAccounts) + bankData := bank.FromRows(numAccounts).Tables()[0] if _, err := sampledataccl.ToBackup(t, bankData, filepath.Join(dir, "foo")); err != nil { t.Fatalf("%+v", err) } diff --git a/pkg/ccl/sqlccl/bench_test.go b/pkg/ccl/sqlccl/bench_test.go index 9f0d0b0b633e..a6bf15f48ccf 100644 --- a/pkg/ccl/sqlccl/bench_test.go +++ b/pkg/ccl/sqlccl/bench_test.go @@ -18,19 +18,17 @@ import ( "github.com/cockroachdb/cockroach/pkg/ccl/sqlccl" "github.com/cockroachdb/cockroach/pkg/ccl/utilccl/sampledataccl" + "github.com/cockroachdb/cockroach/pkg/testutils/workload/bank" "github.com/cockroachdb/cockroach/pkg/util/hlc" ) func bankBuf(numAccounts int) *bytes.Buffer { - bankData := sampledataccl.BankRows(numAccounts) + bankData := bank.FromRows(numAccounts).Tables()[0] var buf bytes.Buffer - fmt.Fprintf(&buf, "CREATE TABLE %s %s;\n", bankData.Name(), bankData.Schema()) - for { - row, ok := bankData.NextRow() - if !ok { - break - } - fmt.Fprintf(&buf, "INSERT INTO %s VALUES (%s);\n", bankData.Name(), strings.Join(row, `,`)) + fmt.Fprintf(&buf, "CREATE TABLE %s %s;\n", bankData.Name, bankData.Schema) + for rowIdx := 0; rowIdx < bankData.InitialRowCount; rowIdx++ { + row := bankData.InitialRowFn(rowIdx) + fmt.Fprintf(&buf, "INSERT INTO %s VALUES (%s);\n", bankData.Name, strings.Join(row, `,`)) } return &buf } @@ -44,7 +42,7 @@ func BenchmarkClusterBackup(b *testing.B) { defer cleanupFn() sqlDB.Exec(b, `DROP TABLE data.bank`) - bankData := sampledataccl.BankRows(b.N) + bankData := bank.FromRows(b.N).Tables()[0] loadDir := filepath.Join(dir, "load") if _, err := sampledataccl.ToBackup(b, bankData, loadDir); err != nil { b.Fatalf("%+v", err) @@ -77,7 +75,7 @@ func BenchmarkClusterRestore(b *testing.B) { defer cleanup() sqlDB.Exec(b, `DROP TABLE data.bank`) - bankData := sampledataccl.BankRows(b.N) + bankData := bank.FromRows(b.N).Tables()[0] backup, err := sampledataccl.ToBackup(b, bankData, filepath.Join(dir, "foo")) if err != nil { b.Fatalf("%+v", err) @@ -146,7 +144,7 @@ func BenchmarkClusterEmptyIncrementalBackup(b *testing.B) { restoreDir := filepath.Join(localFoo, "restore") fullDir := filepath.Join(localFoo, "full") - bankData := sampledataccl.BankRows(numStatements) + bankData := bank.FromRows(numStatements).Tables()[0] _, err := sampledataccl.ToBackup(b, bankData, restoreDir) if err != nil { b.Fatalf("%+v", err) diff --git a/pkg/ccl/sqlccl/load_test.go b/pkg/ccl/sqlccl/load_test.go index a21de5c052ea..c188ae15dd74 100644 --- a/pkg/ccl/sqlccl/load_test.go +++ b/pkg/ccl/sqlccl/load_test.go @@ -15,8 +15,8 @@ import ( "testing" "github.com/cockroachdb/cockroach/pkg/ccl/sqlccl" - "github.com/cockroachdb/cockroach/pkg/ccl/utilccl/sampledataccl" "github.com/cockroachdb/cockroach/pkg/testutils" + "github.com/cockroachdb/cockroach/pkg/testutils/workload/bank" "github.com/cockroachdb/cockroach/pkg/util/hlc" "github.com/cockroachdb/cockroach/pkg/util/leaktest" ) @@ -47,21 +47,15 @@ func TestImportOutOfOrder(t *testing.T) { ctx, _, sqlDB, dir, cleanupFn := backupRestoreTestSetup(t, singleNode, 0, initNone) defer cleanupFn() - bankData := sampledataccl.Bank(2, 0, 0) - row1, ok := bankData.NextRow() - if !ok { - t.Fatalf("expected 2 rows") - } - row2, ok := bankData.NextRow() - if !ok { - t.Fatalf("expected 2 rows") - } + bankData := bank.FromRows(2).Tables()[0] + row1 := bankData.InitialRowFn(0) + row2 := bankData.InitialRowFn(1) var buf bytes.Buffer - fmt.Fprintf(&buf, "CREATE TABLE %s %s;\n", bankData.Name(), bankData.Schema()) + fmt.Fprintf(&buf, "CREATE TABLE %s %s;\n", bankData.Name, bankData.Schema) // Intentionally write the rows out of order. - fmt.Fprintf(&buf, "INSERT INTO %s VALUES (%s);\n", bankData.Name(), strings.Join(row2, `,`)) - fmt.Fprintf(&buf, "INSERT INTO %s VALUES (%s);\n", bankData.Name(), strings.Join(row1, `,`)) + fmt.Fprintf(&buf, "INSERT INTO %s VALUES (%s);\n", bankData.Name, strings.Join(row2, `,`)) + fmt.Fprintf(&buf, "INSERT INTO %s VALUES (%s);\n", bankData.Name, strings.Join(row1, `,`)) ts := hlc.Timestamp{WallTime: hlc.UnixNano()} _, err := sqlccl.Load(ctx, sqlDB.DB, &buf, "data", localFoo, ts, 0, dir) diff --git a/pkg/ccl/storageccl/bench_test.go b/pkg/ccl/storageccl/bench_test.go index fab5b35e7bd8..d61b26a3d4c8 100644 --- a/pkg/ccl/storageccl/bench_test.go +++ b/pkg/ccl/storageccl/bench_test.go @@ -25,6 +25,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/storage/engine" "github.com/cockroachdb/cockroach/pkg/testutils" "github.com/cockroachdb/cockroach/pkg/testutils/testcluster" + "github.com/cockroachdb/cockroach/pkg/testutils/workload/bank" "github.com/cockroachdb/cockroach/pkg/util/protoutil" ) @@ -34,7 +35,7 @@ func BenchmarkAddSSTable(b *testing.B) { for _, numEntries := range []int{100, 1000, 10000, 300000} { b.Run(fmt.Sprintf("numEntries=%d", numEntries), func(b *testing.B) { - bankData := sampledataccl.BankRows(numEntries) + bankData := bank.FromRows(numEntries).Tables()[0] backupDir := filepath.Join(tempDir, strconv.Itoa(numEntries)) backup, err := sampledataccl.ToBackup(b, bankData, backupDir) if err != nil { @@ -92,7 +93,7 @@ func BenchmarkWriteBatch(b *testing.B) { for _, numEntries := range []int{100, 1000, 10000} { b.Run(fmt.Sprintf("numEntries=%d", numEntries), func(b *testing.B) { - bankData := sampledataccl.BankRows(numEntries) + bankData := bank.FromRows(numEntries).Tables()[0] backupDir := filepath.Join(tempDir, strconv.Itoa(numEntries)) backup, err := sampledataccl.ToBackup(b, bankData, backupDir) if err != nil { @@ -143,7 +144,7 @@ func BenchmarkImport(b *testing.B) { for _, numEntries := range []int{1, 100, 10000, 300000} { b.Run(fmt.Sprintf("numEntries=%d", numEntries), func(b *testing.B) { - bankData := sampledataccl.BankRows(numEntries) + bankData := bank.FromRows(numEntries).Tables()[0] subdir := strconv.Itoa(numEntries) backupDir := filepath.Join(tempDir, subdir) backup, err := sampledataccl.ToBackup(b, bankData, backupDir) diff --git a/pkg/ccl/utilccl/sampledataccl/bankdata.go b/pkg/ccl/utilccl/sampledataccl/bankdata.go index 2e493c6013ba..267b5f074558 100644 --- a/pkg/ccl/utilccl/sampledataccl/bankdata.go +++ b/pkg/ccl/utilccl/sampledataccl/bankdata.go @@ -11,13 +11,10 @@ package sampledataccl import ( "bytes" "context" - gosql "database/sql" "fmt" "io" "io/ioutil" - "math/rand" "path/filepath" - "strconv" "strings" "testing" @@ -32,116 +29,17 @@ import ( "github.com/cockroachdb/cockroach/pkg/storage/engine" "github.com/cockroachdb/cockroach/pkg/testutils" "github.com/cockroachdb/cockroach/pkg/testutils/serverutils" + "github.com/cockroachdb/cockroach/pkg/testutils/workload" "github.com/cockroachdb/cockroach/pkg/util/hlc" "github.com/cockroachdb/cockroach/pkg/util/protoutil" - "github.com/cockroachdb/cockroach/pkg/util/randutil" ) -// TODO(dan): Once the interface for this has settled a bit, move it out of ccl. - -// Data is a representation of a sql table, used for creating test data in -// various forms (sql rows, kvs, enterprise backup). All data tables live in the -// `data` database. -type Data interface { - // Name returns the fully-qualified name of the represented table. - Name() string - - // Schema returns the schema for the represented table, such that it can be - // used in fmt.Sprintf(`CREATE TABLE %s %s`, data.Name(), data.Schema`())`. - Schema() string - - // NextRow iterates through the rows in the represented table, returning one - // per call. The row is represented as a slice of strings, each string one - // of the columns in the row. When no more rows are available, the bool - // returned is false. - NextRow() ([]string, bool) - - // NextRow iterates through the split points in the represented table, - // returning one per call. The split is represented as a slice of strings, - // each string one of the columns in the split. When no more splits are - // available, the bool returned is false. - NextSplit() ([]string, bool) -} - -// InsertBatched inserts all rows represented by `data` into `db` in batches of -// `batchSize` rows. The table must exist. -func InsertBatched(db *gosql.DB, data Data, batchSize int) error { - var stmt bytes.Buffer - for { - stmt.Reset() - fmt.Fprintf(&stmt, `INSERT INTO %s VALUES `, data.Name()) - i := 0 - done := false - for ; i < batchSize; i++ { - row, ok := data.NextRow() - if !ok { - done = true - break - } - if i != 0 { - stmt.WriteRune(',') - } - fmt.Fprintf(&stmt, `(%s)`, strings.Join(row, `,`)) - } - if i > 0 { - if _, err := db.Exec(stmt.String()); err != nil { - return err - } - } - if done { - return nil - } - continue - } -} - -// Split creates the configured number of ranges in an already created created -// version of the table represented by `data`. -func Split(db *gosql.DB, data Data) error { - var stmt bytes.Buffer - fmt.Fprintf(&stmt, `ALTER TABLE %s SPLIT AT VALUES `, data.Name()) - i := 0 - for ; ; i++ { - split, ok := data.NextSplit() - if !ok { - break - } - if i != 0 { - stmt.WriteRune(',') - } - fmt.Fprintf(&stmt, `(%s)`, strings.Join(split, `,`)) - } - if i > 0 { - if _, err := db.Exec(stmt.String()); err != nil { - return err - } - } - return nil -} - -// Setup creates a table in `db` with all the rows and splits of `data`. -func Setup(db *gosql.DB, data Data) error { - if _, err := db.Exec(`CREATE DATABASE IF NOT EXISTS data`); err != nil { - return err - } - if _, err := db.Exec(fmt.Sprintf(`CREATE TABLE %s %s`, data.Name(), data.Schema())); err != nil { - return err - } - const batchSize = 1000 - if err := InsertBatched(db, data, batchSize); err != nil { - return err - } - // This occasionally flakes, so ignore errors. - _ = Split(db, data) - return nil -} - // ToBackup creates an enterprise backup in `dir`. -func ToBackup(t testing.TB, data Data, dir string) (*Backup, error) { +func ToBackup(t testing.TB, data workload.Table, dir string) (*Backup, error) { return toBackup(t, data, dir, 0) } -func toBackup(t testing.TB, data Data, dir string, chunkBytes int64) (*Backup, error) { +func toBackup(t testing.TB, data workload.Table, dir string, chunkBytes int64) (*Backup, error) { tempDir, dirCleanupFn := testutils.TempDir(t) defer dirCleanupFn() @@ -154,13 +52,10 @@ func toBackup(t testing.TB, data Data, dir string, chunkBytes int64) (*Backup, e } var stmts bytes.Buffer - fmt.Fprintf(&stmts, "CREATE TABLE %s %s;\n", data.Name(), data.Schema()) - for { - row, ok := data.NextRow() - if !ok { - break - } - fmt.Fprintf(&stmts, "INSERT INTO %s VALUES (%s);\n", data.Name(), strings.Join(row, `,`)) + fmt.Fprintf(&stmts, "CREATE TABLE %s %s;\n", data.Name, data.Schema) + for rowIdx := 0; rowIdx < data.InitialRowCount; rowIdx++ { + row := data.InitialRowFn(rowIdx) + fmt.Fprintf(&stmts, "INSERT INTO %s VALUES (%s);\n", data.Name, strings.Join(row, `,`)) } // TODO(dan): The csv load will be less overhead, use it when we have it. @@ -301,89 +196,3 @@ func (b *Backup) NextKeyValues( } return kvs, span, nil } - -type bankData struct { - Rows int - PayloadBytes int - Ranges int - Rng *rand.Rand - - rowIdx int - splitIdx int -} - -var _ Data = &bankData{} - -// bankConfigDefault will trigger the default for any of the parameters for -// `Bank`. -const bankConfigDefault = -1 - -// BankRows returns Bank testdata with the given number of rows and default -// payload size and range count. -func BankRows(rows int) Data { - return Bank(rows, bankConfigDefault, bankConfigDefault) -} - -// Bank returns a bank table with three columns: an `id INT PRIMARY KEY` -// representing an account number, a `balance` INT, and a `payload` BYTES to pad -// the size of the rows for various tests. -func Bank(rows int, payloadBytes int, ranges int) Data { - // TODO(dan): This interface is a little wonky, but it's a bit annoying to - // replace it with a struct because that would make most of the callsites - // wrap. Complicating things is that there needs to be a distinction between - // the default value and an explicit 0 for ranges and payloadBytes. - if rows == bankConfigDefault { - rows = 1000 - } - if payloadBytes == bankConfigDefault { - payloadBytes = 100 - } - if ranges == bankConfigDefault { - ranges = 10 - } - if ranges > rows { - ranges = rows - } - rng, _ := randutil.NewPseudoRand() - return &bankData{Rows: rows, PayloadBytes: payloadBytes, Ranges: ranges, Rng: rng} -} - -// Name implements the Data interface. -func (d *bankData) Name() string { - return `data.bank` -} - -// Schema implements the Data interface. -func (d *bankData) Schema() string { - return `( - id INT PRIMARY KEY, - balance INT, - payload STRING, - FAMILY (id, balance, payload) - )` -} - -// NextRow implements the Data interface. -func (d *bankData) NextRow() ([]string, bool) { - if d.rowIdx >= d.Rows { - return nil, false - } - payload := fmt.Sprintf(`'initial-%x'`, randutil.RandBytes(d.Rng, d.PayloadBytes)) - row := []string{ - strconv.Itoa(d.rowIdx), // id - `0`, // balance - payload, // payload - } - d.rowIdx++ - return row, true -} - -// NextSplit implements the Data interface. -func (d *bankData) NextSplit() ([]string, bool) { - if d.splitIdx+1 >= d.Ranges { - return nil, false - } - split := []string{strconv.Itoa((d.splitIdx + 1) * (d.Rows / d.Ranges))} - d.splitIdx++ - return split, true -} diff --git a/pkg/ccl/utilccl/sampledataccl/bankdata_test.go b/pkg/ccl/utilccl/sampledataccl/bankdata_test.go index a76e0ffc60b8..12ec15bb3fb3 100644 --- a/pkg/ccl/utilccl/sampledataccl/bankdata_test.go +++ b/pkg/ccl/utilccl/sampledataccl/bankdata_test.go @@ -23,91 +23,10 @@ import ( "github.com/cockroachdb/cockroach/pkg/testutils" "github.com/cockroachdb/cockroach/pkg/testutils/serverutils" "github.com/cockroachdb/cockroach/pkg/testutils/sqlutils" + "github.com/cockroachdb/cockroach/pkg/testutils/workload/bank" "github.com/cockroachdb/cockroach/pkg/util/leaktest" ) -func TestInsertBatched(t *testing.T) { - defer leaktest.AfterTest(t)() - - tests := []struct { - rows int - batchSize int - }{ - {10, 1}, - {10, 9}, - {10, 10}, - {10, 100}, - } - - ctx := context.Background() - s, db, _ := serverutils.StartServer(t, base.TestServerArgs{}) - defer s.Stopper().Stop(ctx) - - for _, test := range tests { - t.Run(fmt.Sprintf("rows=%d/batch=%d", test.rows, test.batchSize), func(t *testing.T) { - sqlDB := sqlutils.MakeSQLRunner(db) - - data := Bank(test.rows, 0, bankConfigDefault) - sqlDB.Exec(t, `CREATE DATABASE IF NOT EXISTS data`) - sqlDB.Exec(t, fmt.Sprintf(`DROP TABLE IF EXISTS %s`, data.Name())) - sqlDB.Exec(t, fmt.Sprintf(`CREATE TABLE %s %s`, data.Name(), data.Schema())) - - if err := InsertBatched(sqlDB.DB, data, test.batchSize); err != nil { - t.Fatalf("%+v", err) - } - - var rowCount int - sqlDB.QueryRow(t, fmt.Sprintf(`SELECT COUNT(*) FROM %s`, data.Name())).Scan(&rowCount) - if rowCount != test.rows { - t.Errorf("got %d rows expected %d", rowCount, test.rows) - } - }) - } -} - -func TestSplit(t *testing.T) { - defer leaktest.AfterTest(t)() - - tests := []struct { - rows int - ranges int - expectedRanges int - }{ - {10, 0, 1}, // We always have at least one range. - {10, 1, 1}, - {10, 9, 9}, - {10, 10, 10}, - {10, 100, 10}, // Don't make more ranges than rows. - } - - ctx := context.Background() - s, db, _ := serverutils.StartServer(t, base.TestServerArgs{}) - defer s.Stopper().Stop(ctx) - - for _, test := range tests { - t.Run(fmt.Sprintf("rows=%d/ranges=%d", test.rows, test.ranges), func(t *testing.T) { - sqlDB := sqlutils.MakeSQLRunner(db) - - data := Bank(test.rows, bankConfigDefault, test.ranges) - sqlDB.Exec(t, `CREATE DATABASE IF NOT EXISTS data`) - sqlDB.Exec(t, fmt.Sprintf(`DROP TABLE IF EXISTS %s`, data.Name())) - sqlDB.Exec(t, fmt.Sprintf(`CREATE TABLE %s %s`, data.Name(), data.Schema())) - - if err := Split(sqlDB.DB, data); err != nil { - t.Fatalf("%+v", err) - } - - var rangeCount int - sqlDB.QueryRow(t, - fmt.Sprintf(`SELECT COUNT(*) FROM [SHOW TESTING_RANGES FROM TABLE %s]`, data.Name()), - ).Scan(&rangeCount) - if rangeCount != test.expectedRanges { - t.Errorf("got %d ranges expected %d", rangeCount, test.expectedRanges) - } - }) - } -} - func TestToBackup(t *testing.T) { defer leaktest.AfterTest(t)() @@ -118,7 +37,7 @@ func TestToBackup(t *testing.T) { s, db, _ := serverutils.StartServer(t, base.TestServerArgs{ExternalIODir: outerDir}) defer s.Stopper().Stop(ctx) - const payloadBytes = 100 + const payloadBytes, ranges = 100, 10 chunkBytesSizes := []int64{ 0, // 0 means default ~32MB 3 * payloadBytes, // a number that will not evently divide the number of rows @@ -128,7 +47,7 @@ func TestToBackup(t *testing.T) { for _, chunkBytes := range chunkBytesSizes { t.Run(fmt.Sprintf("rows=%d/chunk=%d", rows, chunkBytes), func(t *testing.T) { dir := fmt.Sprintf("%d-%d", rows, chunkBytes) - data := Bank(rows, payloadBytes, bankConfigDefault) + data := bank.FromConfig(rows, payloadBytes, ranges).Tables()[0] backup, err := toBackup(t, data, filepath.Join(outerDir, dir), chunkBytes) if err != nil { t.Fatalf("%+v", err) @@ -138,10 +57,11 @@ func TestToBackup(t *testing.T) { sqlDB := sqlutils.MakeSQLRunner(db) sqlDB.Exec(t, `DROP DATABASE IF EXISTS data CASCADE`) sqlDB.Exec(t, `CREATE DATABASE data`) + sqlDB.Exec(t, `USE data`) sqlDB.Exec(t, `RESTORE data.* FROM $1`, `nodelocal:///`+dir) var rowCount int - sqlDB.QueryRow(t, fmt.Sprintf(`SELECT COUNT(*) FROM %s`, data.Name())).Scan(&rowCount) + sqlDB.QueryRow(t, fmt.Sprintf(`SELECT COUNT(*) FROM %s`, data.Name)).Scan(&rowCount) if rowCount != rows { t.Errorf("got %d rows expected %d", rowCount, rows) } diff --git a/pkg/testutils/workload/bank/bank.go b/pkg/testutils/workload/bank/bank.go new file mode 100644 index 000000000000..da1cb0cb109f --- /dev/null +++ b/pkg/testutils/workload/bank/bank.go @@ -0,0 +1,142 @@ +// Copyright 2017 The Cockroach Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +// implied. See the License for the specific language governing +// permissions and limitations under the License. + +package bank + +import ( + "bytes" + gosql "database/sql" + "encoding/hex" + "fmt" + "math/rand" + "strconv" + + "github.com/pkg/errors" + + "github.com/cockroachdb/cockroach/pkg/testutils/workload" + "github.com/cockroachdb/cockroach/pkg/util/randutil" + "github.com/cockroachdb/cockroach/pkg/util/timeutil" +) + +const ( + defaultRows = 1000 + defaultPayloadBytes = 100 + defaultRanges = 10 +) + +type bank struct { + seed int64 + rows, payloadBytes, ranges int +} + +func init() { + workload.Register(fromOpts) +} + +func fromOpts(opts map[string]string) (workload.Generator, error) { + o := workload.NewOpts(opts) + b := &bank{ + seed: int64(o.Int(`seed`, 1)), + rows: o.Int(`rows`, defaultRows), + payloadBytes: o.Int(`payload-bytes`, defaultPayloadBytes), + ranges: o.Int(`ranges`, defaultRanges), + } + return b, o.Err() +} + +// FromRows returns Bank testdata with the given number of rows and default +// payload size and range count. +func FromRows(rows int) workload.Generator { + return FromConfig(rows, defaultPayloadBytes, defaultRanges) +} + +// FromConfig returns a one table testdata with three columns: an `id INT +// PRIMARY KEY` representing an account number, a `balance` INT, and a `payload` +// BYTES to pad the size of the rows for various tests. +func FromConfig(rows int, payloadBytes int, ranges int) workload.Generator { + if ranges > rows { + ranges = rows + } + return &bank{ + seed: timeutil.Now().UnixNano(), + rows: rows, + payloadBytes: payloadBytes, + ranges: ranges, + } +} + +// Name implements the Generator interface. +func (b bank) Name() string { + return `bank` +} + +// Tables implements the Generator interface. +func (b bank) Tables() []workload.Table { + rng := rand.New(rand.NewSource(b.seed)) + table := workload.Table{ + Name: `bank`, + Schema: `( + id INT PRIMARY KEY, + balance INT, + payload STRING, + FAMILY (id, balance, payload) + )`, + InitialRowCount: b.rows, + InitialRowFn: func(rowIdx int) []string { + const initialPrefix = `initial-` + bytes := hex.EncodeToString(randutil.RandBytes(rng, b.payloadBytes/2)) + // Minus 2 for the single quotes + bytes = bytes[:b.payloadBytes-len(initialPrefix)-2] + return []string{ + strconv.Itoa(rowIdx), // id + `0`, // balance + fmt.Sprintf(`'%s%s'`, initialPrefix, bytes), // payload + } + }, + } + return []workload.Table{table} +} + +// Tables implements the Generator interface. +func (b bank) Ops() []workload.Operation { + // TODO(dan): Move the various queries in the backup/restore tests here. + return nil +} + +// Split creates the configured number of ranges in an already created version +// of the table represented by `g`. +func Split(db *gosql.DB, g workload.Generator) error { + // TODO(dan): Make this general and move it into the workload package. + b, ok := g.(*bank) + if !ok { + return errors.Errorf("don't know how to split: %T", g) + } + bankTable := b.Tables()[0] + + var stmt bytes.Buffer + fmt.Fprintf(&stmt, `ALTER TABLE %s SPLIT AT VALUES `, bankTable.Name) + splitIdx, splits := 0, b.ranges-1 + for ; splitIdx < splits; splitIdx++ { + if splitIdx != 0 { + stmt.WriteRune(',') + } + fmt.Fprintf(&stmt, `(%d)`, (splitIdx+1)*(b.rows/b.ranges)) + } + if splitIdx > 0 { + if _, err := db.Exec(stmt.String()); err != nil { + return err + } + } + return nil +} diff --git a/pkg/testutils/workload/bank/bank_test.go b/pkg/testutils/workload/bank/bank_test.go new file mode 100644 index 000000000000..a7099c9ddecd --- /dev/null +++ b/pkg/testutils/workload/bank/bank_test.go @@ -0,0 +1,71 @@ +// Copyright 2017 The Cockroach Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +// implied. See the License for the specific language governing +// permissions and limitations under the License. + +package bank + +import ( + "context" + "fmt" + "testing" + + "github.com/cockroachdb/cockroach/pkg/base" + "github.com/cockroachdb/cockroach/pkg/testutils/serverutils" + "github.com/cockroachdb/cockroach/pkg/testutils/sqlutils" + "github.com/cockroachdb/cockroach/pkg/util/leaktest" +) + +func TestSplit(t *testing.T) { + defer leaktest.AfterTest(t)() + + tests := []struct { + rows int + ranges int + expectedRanges int + }{ + {10, 0, 1}, // we always have at least one range + {10, 1, 1}, + {10, 9, 9}, + {10, 10, 10}, + {10, 100, 10}, // don't make more ranges than rows + } + + ctx := context.Background() + s, db, _ := serverutils.StartServer(t, base.TestServerArgs{}) + defer s.Stopper().Stop(ctx) + + for _, test := range tests { + t.Run(fmt.Sprintf("rows=%d/ranges=%d", test.rows, test.ranges), func(t *testing.T) { + sqlDB := sqlutils.MakeSQLRunner(db) + + bank := FromConfig(test.rows, defaultPayloadBytes, test.ranges) + bankTable := bank.Tables()[0] + sqlDB.Exec(t, `DROP DATABASE IF EXISTS data CASCADE`) + sqlDB.Exec(t, `CREATE DATABASE data`) + sqlDB.Exec(t, `USE data`) + sqlDB.Exec(t, fmt.Sprintf(`CREATE TABLE %s %s`, bankTable.Name, bankTable.Schema)) + + if err := Split(sqlDB.DB, bank); err != nil { + t.Fatalf("%+v", err) + } + + var rangeCount int + sqlDB.QueryRow(t, + fmt.Sprintf(`SELECT COUNT(*) FROM [SHOW TESTING_RANGES FROM TABLE %s]`, bankTable.Name), + ).Scan(&rangeCount) + if rangeCount != test.expectedRanges { + t.Errorf("got %d ranges expected %d", rangeCount, test.expectedRanges) + } + }) + } +} diff --git a/pkg/testutils/workload/bank/main_test.go b/pkg/testutils/workload/bank/main_test.go new file mode 100644 index 000000000000..ccaafa61ee39 --- /dev/null +++ b/pkg/testutils/workload/bank/main_test.go @@ -0,0 +1,37 @@ +// Copyright 2017 The Cockroach Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +// implied. See the License for the specific language governing +// permissions and limitations under the License. + +package bank + +import ( + "os" + "testing" + + "github.com/cockroachdb/cockroach/pkg/security" + "github.com/cockroachdb/cockroach/pkg/security/securitytest" + "github.com/cockroachdb/cockroach/pkg/server" + "github.com/cockroachdb/cockroach/pkg/testutils/serverutils" + "github.com/cockroachdb/cockroach/pkg/testutils/testcluster" + "github.com/cockroachdb/cockroach/pkg/util/randutil" +) + +func TestMain(m *testing.M) { + security.SetAssetLoader(securitytest.EmbeddedAssets) + randutil.SeedForTests() + serverutils.InitTestServerFactory(server.TestServerFactory) + serverutils.InitTestClusterFactory(testcluster.TestClusterFactory) + os.Exit(m.Run()) +} + +//go:generate ../../../util/leaktest/add-leaktest.sh *_test.go diff --git a/pkg/testutils/workload/main_test.go b/pkg/testutils/workload/main_test.go new file mode 100644 index 000000000000..c5bfc4c2c4b1 --- /dev/null +++ b/pkg/testutils/workload/main_test.go @@ -0,0 +1,37 @@ +// Copyright 2017 The Cockroach Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +// implied. See the License for the specific language governing +// permissions and limitations under the License. + +package workload + +import ( + "os" + "testing" + + "github.com/cockroachdb/cockroach/pkg/security" + "github.com/cockroachdb/cockroach/pkg/security/securitytest" + "github.com/cockroachdb/cockroach/pkg/server" + "github.com/cockroachdb/cockroach/pkg/testutils/serverutils" + "github.com/cockroachdb/cockroach/pkg/testutils/testcluster" + "github.com/cockroachdb/cockroach/pkg/util/randutil" +) + +func TestMain(m *testing.M) { + security.SetAssetLoader(securitytest.EmbeddedAssets) + randutil.SeedForTests() + serverutils.InitTestServerFactory(server.TestServerFactory) + serverutils.InitTestClusterFactory(testcluster.TestClusterFactory) + os.Exit(m.Run()) +} + +//go:generate ../../util/leaktest/add-leaktest.sh *_test.go diff --git a/pkg/testutils/workload/opts.go b/pkg/testutils/workload/opts.go new file mode 100644 index 000000000000..5ba15ff96f2f --- /dev/null +++ b/pkg/testutils/workload/opts.go @@ -0,0 +1,50 @@ +// Copyright 2017 The Cockroach Authors. +// +// Licensed as a CockroachDB Enterprise file under the Cockroach Community +// License (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// https://github.com/cockroachdb/cockroach/blob/master/licenses/CCL.txt + +package workload + +import ( + "strconv" + + "github.com/pkg/errors" +) + +// Opts is a helper for parsing structured options from the string to string map +// used to configure workloads. After the various parse methods are called, the +// Err method should be checked for any errors. +type Opts struct { + raw map[string]string + err error +} + +// NewOpts makes an Opts from the raw map. +func NewOpts(raw map[string]string) *Opts { + return &Opts{raw: raw} +} + +// Int parses an int from the option with the given name, falling back to the +// given default if it's not present. +func (o *Opts) Int(name string, defaultValue int) int { + i := int64(defaultValue) + if opt, ok := o.raw[name]; ok { + var err error + i, err = strconv.ParseInt(opt, 0, 64) + if err != nil && o.err == nil { + o.err = errors.Wrapf(err, "parsing '%s' option: %s", name, opt) + } + } + if int64(int(x)) != x && o.err == nil { + o.err = errors.Errorf("parsing int '%s' option overflowed: %d", name, x) + } + return int(x) +} + +// Err returns the first error encountered during options parsing, if any. +func (o *Opts) Err() error { + return o.err +} diff --git a/pkg/testutils/workload/workload.go b/pkg/testutils/workload/workload.go new file mode 100644 index 000000000000..6fa5ead6a7ba --- /dev/null +++ b/pkg/testutils/workload/workload.go @@ -0,0 +1,161 @@ +// Copyright 2017 The Cockroach Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +// implied. See the License for the specific language governing +// permissions and limitations under the License. + +// Package workload provides an abstraction for generators of sql query loads +// (and requisite initial data) as well as tools for working with these +// generators. +package workload + +import ( + "bytes" + "context" + gosql "database/sql" + "fmt" + + "github.com/pkg/errors" +) + +// Generator represents one or more sql query loads and associated initial data. +// +// A set of options are used to configure this Generator. Any randomness must be +// deterministic from these options so that table data initialization, query +// work, etc can be distributed by serialization only this options map. +// +// TODO(dan): To support persisted test fixtures, we'll need versioning of +// Generators. +type Generator interface { + // Name returns a unique and descriptive name for this generator. + Name() string + + // Tables returns the set of tables for this generator, including schemas + // and initial data. + Tables() []Table + + // Ops returns the work functions for this generator. The tables are + // required to have been created and initialized before running these. + Ops() []Operation +} + +// GeneratorFn is used to register a Generator at init time. A map of string to +// string options are used for configuration, see Generator for details. +type GeneratorFn func(opts map[string]string) (Generator, error) + +// Table represents a single table in a Generator. Included is a name, schema, +// and initial data. +type Table struct { + // Name is the unqualified table name, pre-escaped for use directly in SQL. + Name string + // Schema is the SQL formatted schema for this table, with the `CREATE TABLE + // ` prefix omitted. + Schema string + // InitialRowCount is the initial number of rows that will be present in the + // table after setup is completed. + InitialRowCount int + // InitialRowFn is a function to deterministically compute the datums in a + // row of the table's initial data given its index. They are returned as + // strings and must be pre-escaped for use directly in SQL/CSVs. + InitialRowFn func(int) []string +} + +// Operation represents some SQL query workload performable on a database +// initialized with the requisite tables. +// +// TODO(dan): Finish nailing down the invariants of Operation as more workloads +// are ported to this framework. TPCC in particular should be informative. +type Operation struct { + // Name is a name for the work performed by this Operation. + Name string + // Fn returns a function to be called once per unit of work to be done. + // Various generator tools use this to track progress. + Fn func(*gosql.DB) (func(context.Context) error, error) +} + +var registered = make(map[string]GeneratorFn) + +// Register is a hook for init-time registration of Generator implementations. +// This allows only the necessary generators to be compiled into a given binary. +func Register(fn GeneratorFn) { + g, err := fn(map[string]string{}) + if err != nil { + panic(errors.Wrapf(err, "could not register: %T", fn)) + } + name := g.Name() + if _, ok := registered[name]; ok { + panic(name + " is already registered") + } + registered[name] = fn +} + +// Get returns the registered Generator with the given name, if it exists. +func Get(name string) (GeneratorFn, error) { + g, ok := registered[name] + if !ok { + return nil, errors.Errorf("unknown generator: %s", name) + } + return g, nil +} + +// Setup creates the given tables and fills them with initial data via batched +// INSERTs. +// +// The size of the loaded data is returned in bytes, suitable for use with +// SetBytes of benchmarks. This size is defined as the sum of lengths of the +// string representations of the sql (e.g. `1` the int is 1 and `'x'` the string +// is three). +// +// TODO(dan): Is there something better we could be doing here for the size of +// the loaded data? +func Setup(db *gosql.DB, tables []Table, batchSize int) (int64, error) { + if batchSize <= 0 { + batchSize = 1000 + } + var insertStmtBuf bytes.Buffer + + var size int64 + for _, table := range tables { + createStmt := fmt.Sprintf(`CREATE TABLE %s %s`, table.Name, table.Schema) + if _, err := db.Exec(createStmt); err != nil { + return 0, err + } + + for rowIdx := 0; rowIdx < table.InitialRowCount; { + insertStmtBuf.Reset() + fmt.Fprintf(&insertStmtBuf, `INSERT INTO %s VALUES `, table.Name) + + batchIdx := 0 + for ; batchIdx < batchSize && rowIdx < table.InitialRowCount; batchIdx++ { + if batchIdx != 0 { + insertStmtBuf.WriteString(`,`) + } + insertStmtBuf.WriteString(`(`) + for i, datum := range table.InitialRowFn(rowIdx) { + size += int64(len(datum)) + if i != 0 { + insertStmtBuf.WriteString(`,`) + } + insertStmtBuf.WriteString(datum) + } + insertStmtBuf.WriteString(`)`) + rowIdx++ + } + if batchIdx > 0 { + insertStmt := insertStmtBuf.String() + if _, err := db.Exec(insertStmt); err != nil { + return 0, err + } + } + } + } + return size, nil +} diff --git a/pkg/testutils/workload/workload_test.go b/pkg/testutils/workload/workload_test.go new file mode 100644 index 000000000000..0b3e8b7a41b9 --- /dev/null +++ b/pkg/testutils/workload/workload_test.go @@ -0,0 +1,80 @@ +// Copyright 2017 The Cockroach Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +// implied. See the License for the specific language governing +// permissions and limitations under the License. + +package workload_test + +import ( + "context" + "fmt" + "testing" + + "github.com/cockroachdb/cockroach/pkg/base" + "github.com/cockroachdb/cockroach/pkg/testutils" + "github.com/cockroachdb/cockroach/pkg/testutils/serverutils" + "github.com/cockroachdb/cockroach/pkg/testutils/sqlutils" + "github.com/cockroachdb/cockroach/pkg/testutils/workload" + "github.com/cockroachdb/cockroach/pkg/testutils/workload/bank" + "github.com/cockroachdb/cockroach/pkg/util/leaktest" +) + +func TestGet(t *testing.T) { + defer leaktest.AfterTest(t)() + if _, err := workload.Get(`bank`); err != nil { + t.Errorf(`expected success got: %+v`, err) + } + if _, err := workload.Get(`nope`); !testutils.IsError(err, `unknown generator`) { + t.Errorf(`expected "unknown generator" error got: %+v`, err) + } +} + +func TestSetup(t *testing.T) { + defer leaktest.AfterTest(t)() + + tests := []struct { + rows int + batchSize int + }{ + {10, 1}, + {10, 9}, + {10, 10}, + {10, 100}, + } + + ctx := context.Background() + s, db, _ := serverutils.StartServer(t, base.TestServerArgs{}) + defer s.Stopper().Stop(ctx) + + for _, test := range tests { + t.Run(fmt.Sprintf("rows=%d/batch=%d", test.rows, test.batchSize), func(t *testing.T) { + sqlDB := sqlutils.MakeSQLRunner(db) + sqlDB.Exec(t, `DROP DATABASE IF EXISTS test`) + sqlDB.Exec(t, `CREATE DATABASE test`) + sqlDB.Exec(t, `USE test`) + + gen := bank.FromRows(test.rows) + tables := gen.Tables() + if _, err := workload.Setup(sqlDB.DB, tables, test.batchSize); err != nil { + t.Fatalf("%+v", err) + } + + for _, table := range tables { + var c int + sqlDB.QueryRow(t, fmt.Sprintf(`SELECT COUNT(*) FROM %s`, table.Name)).Scan(&c) + if c != table.InitialRowCount { + t.Errorf(`%s: got %d rows expected %d`, table.Name, c, table.InitialRowCount) + } + } + }) + } +}