Skip to content

Commit

Permalink
workload: move sampledataccl abstraction and add query workloads
Browse files Browse the repository at this point in the history
The sampledataccl package was started to centralize logic for testdata
generation in BACKUP/RESTORE, as well as provide comparable benchmarks
between operations running at the sql and kv layers. It has proven
useful and is ready to graduate out of ccl.

Before sampledataccl, many ccl/... tests rolled their own test data in
one of various forms: sql INSERTS, csv, []string, []engine.MVCCKeyValue,
ssts, RocksDB batch reprs. Insidiously the kv testdata generation was
different from test to test and recreated just enough of our sql key
encoding to get that test to pass. The sampledataccl.Bank abstraction
was built to provide exactly the same schema in all of these forms,
converting between them using actual production code, so nothing rotted.

This had the wonderful side benefit of making the throughput measurement
of benchmarks at the sql and kv layers comparable, which helped
considerably during BACKUP/RESTORE performance tuning work.

This problem is not exclusive to ccl; many places in the codebase do
this recreation of  just enough of our key encoding to get a particular
test to pass, which is often subtly incorrect and requires boilerplate
that detracts from what the test is doing.

This commit moves the abstraction out of ccl so it can start to be used
more widely. This opportunity is also taken to add support for more than
one table as well as the ability to run query workloads (hence the new
workload package name).

Upcoming commits will add tooling to run these workloads against
clusters (think of the loadgen/kv tool), with significantly less effort
than is currently required to add a new loadgen.

We'll also be able to have a single tool for making large test fixtures
and to keep this tool tested so it doesn't rot. This has traditionally
been done ad-hoc and was a huge obstruction in BACKUP/RESTORE production
testing.

Still TODO is teasing the final ccl deps out of the remaining
sampledataccl features.

Release note: None
  • Loading branch information
danhhz committed Dec 29, 2017
1 parent 283bcdf commit c1a574c
Show file tree
Hide file tree
Showing 13 changed files with 622 additions and 313 deletions.
15 changes: 12 additions & 3 deletions pkg/ccl/sqlccl/backup_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ import (
"github.com/cockroachdb/cockroach/pkg/testutils/jobutils"
"github.com/cockroachdb/cockroach/pkg/testutils/sqlutils"
"github.com/cockroachdb/cockroach/pkg/testutils/testcluster"
"github.com/cockroachdb/cockroach/pkg/testutils/workload"
"github.com/cockroachdb/cockroach/pkg/testutils/workload/bank"
"github.com/cockroachdb/cockroach/pkg/util/leaktest"
"github.com/cockroachdb/cockroach/pkg/util/protoutil"
"github.com/cockroachdb/cockroach/pkg/util/randutil"
Expand Down Expand Up @@ -92,12 +94,19 @@ func backupRestoreTestSetupWithParams(
if numAccounts == 0 {
splits = 0
}
bankData := sampledataccl.Bank(numAccounts, payloadSize, splits)
bankData := bank.FromConfig(numAccounts, payloadSize, splits)

sqlDB = sqlutils.MakeSQLRunner(tc.Conns[0])
if err := sampledataccl.Setup(sqlDB.DB, bankData); err != nil {
sqlDB.Exec(t, `CREATE DATABASE data`)
sqlDB.Exec(t, `USE data`)
const insertBatchSize = 1000
if _, err := workload.Setup(sqlDB.DB, bankData.Tables(), insertBatchSize); err != nil {
t.Fatalf("%+v", err)
}
if err := bank.Split(sqlDB.DB, bankData); err != nil {
// This occasionally flakes, so ignore errors.
t.Logf("failed to split: %+v", err)
}

if err := tc.WaitForFullReplication(); err != nil {
t.Fatal(err)
Expand Down Expand Up @@ -1788,7 +1797,7 @@ func TestAsOfSystemTimeOnRestoredData(t *testing.T) {
sqlDB.Exec(t, `DROP TABLE data.bank`)

const numAccounts = 10
bankData := sampledataccl.BankRows(numAccounts)
bankData := bank.FromRows(numAccounts).Tables()[0]
if _, err := sampledataccl.ToBackup(t, bankData, filepath.Join(dir, "foo")); err != nil {
t.Fatalf("%+v", err)
}
Expand Down
20 changes: 9 additions & 11 deletions pkg/ccl/sqlccl/bench_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,19 +18,17 @@ import (

"github.com/cockroachdb/cockroach/pkg/ccl/sqlccl"
"github.com/cockroachdb/cockroach/pkg/ccl/utilccl/sampledataccl"
"github.com/cockroachdb/cockroach/pkg/testutils/workload/bank"
"github.com/cockroachdb/cockroach/pkg/util/hlc"
)

func bankBuf(numAccounts int) *bytes.Buffer {
bankData := sampledataccl.BankRows(numAccounts)
bankData := bank.FromRows(numAccounts).Tables()[0]
var buf bytes.Buffer
fmt.Fprintf(&buf, "CREATE TABLE %s %s;\n", bankData.Name(), bankData.Schema())
for {
row, ok := bankData.NextRow()
if !ok {
break
}
fmt.Fprintf(&buf, "INSERT INTO %s VALUES (%s);\n", bankData.Name(), strings.Join(row, `,`))
fmt.Fprintf(&buf, "CREATE TABLE %s %s;\n", bankData.Name, bankData.Schema)
for rowIdx := 0; rowIdx < bankData.InitialRowCount; rowIdx++ {
row := bankData.InitialRowFn(rowIdx)
fmt.Fprintf(&buf, "INSERT INTO %s VALUES (%s);\n", bankData.Name, strings.Join(row, `,`))
}
return &buf
}
Expand All @@ -44,7 +42,7 @@ func BenchmarkClusterBackup(b *testing.B) {
defer cleanupFn()
sqlDB.Exec(b, `DROP TABLE data.bank`)

bankData := sampledataccl.BankRows(b.N)
bankData := bank.FromRows(b.N).Tables()[0]
loadDir := filepath.Join(dir, "load")
if _, err := sampledataccl.ToBackup(b, bankData, loadDir); err != nil {
b.Fatalf("%+v", err)
Expand Down Expand Up @@ -77,7 +75,7 @@ func BenchmarkClusterRestore(b *testing.B) {
defer cleanup()
sqlDB.Exec(b, `DROP TABLE data.bank`)

bankData := sampledataccl.BankRows(b.N)
bankData := bank.FromRows(b.N).Tables()[0]
backup, err := sampledataccl.ToBackup(b, bankData, filepath.Join(dir, "foo"))
if err != nil {
b.Fatalf("%+v", err)
Expand Down Expand Up @@ -146,7 +144,7 @@ func BenchmarkClusterEmptyIncrementalBackup(b *testing.B) {
restoreDir := filepath.Join(localFoo, "restore")
fullDir := filepath.Join(localFoo, "full")

bankData := sampledataccl.BankRows(numStatements)
bankData := bank.FromRows(numStatements).Tables()[0]
_, err := sampledataccl.ToBackup(b, bankData, restoreDir)
if err != nil {
b.Fatalf("%+v", err)
Expand Down
20 changes: 7 additions & 13 deletions pkg/ccl/sqlccl/load_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ import (
"testing"

"github.com/cockroachdb/cockroach/pkg/ccl/sqlccl"
"github.com/cockroachdb/cockroach/pkg/ccl/utilccl/sampledataccl"
"github.com/cockroachdb/cockroach/pkg/testutils"
"github.com/cockroachdb/cockroach/pkg/testutils/workload/bank"
"github.com/cockroachdb/cockroach/pkg/util/hlc"
"github.com/cockroachdb/cockroach/pkg/util/leaktest"
)
Expand Down Expand Up @@ -47,21 +47,15 @@ func TestImportOutOfOrder(t *testing.T) {
ctx, _, sqlDB, dir, cleanupFn := backupRestoreTestSetup(t, singleNode, 0, initNone)
defer cleanupFn()

bankData := sampledataccl.Bank(2, 0, 0)
row1, ok := bankData.NextRow()
if !ok {
t.Fatalf("expected 2 rows")
}
row2, ok := bankData.NextRow()
if !ok {
t.Fatalf("expected 2 rows")
}
bankData := bank.FromRows(2).Tables()[0]
row1 := bankData.InitialRowFn(0)
row2 := bankData.InitialRowFn(1)

var buf bytes.Buffer
fmt.Fprintf(&buf, "CREATE TABLE %s %s;\n", bankData.Name(), bankData.Schema())
fmt.Fprintf(&buf, "CREATE TABLE %s %s;\n", bankData.Name, bankData.Schema)
// Intentionally write the rows out of order.
fmt.Fprintf(&buf, "INSERT INTO %s VALUES (%s);\n", bankData.Name(), strings.Join(row2, `,`))
fmt.Fprintf(&buf, "INSERT INTO %s VALUES (%s);\n", bankData.Name(), strings.Join(row1, `,`))
fmt.Fprintf(&buf, "INSERT INTO %s VALUES (%s);\n", bankData.Name, strings.Join(row2, `,`))
fmt.Fprintf(&buf, "INSERT INTO %s VALUES (%s);\n", bankData.Name, strings.Join(row1, `,`))

ts := hlc.Timestamp{WallTime: hlc.UnixNano()}
_, err := sqlccl.Load(ctx, sqlDB.DB, &buf, "data", localFoo, ts, 0, dir)
Expand Down
7 changes: 4 additions & 3 deletions pkg/ccl/storageccl/bench_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/storage/engine"
"github.com/cockroachdb/cockroach/pkg/testutils"
"github.com/cockroachdb/cockroach/pkg/testutils/testcluster"
"github.com/cockroachdb/cockroach/pkg/testutils/workload/bank"
"github.com/cockroachdb/cockroach/pkg/util/protoutil"
)

Expand All @@ -34,7 +35,7 @@ func BenchmarkAddSSTable(b *testing.B) {

for _, numEntries := range []int{100, 1000, 10000, 300000} {
b.Run(fmt.Sprintf("numEntries=%d", numEntries), func(b *testing.B) {
bankData := sampledataccl.BankRows(numEntries)
bankData := bank.FromRows(numEntries).Tables()[0]
backupDir := filepath.Join(tempDir, strconv.Itoa(numEntries))
backup, err := sampledataccl.ToBackup(b, bankData, backupDir)
if err != nil {
Expand Down Expand Up @@ -92,7 +93,7 @@ func BenchmarkWriteBatch(b *testing.B) {

for _, numEntries := range []int{100, 1000, 10000} {
b.Run(fmt.Sprintf("numEntries=%d", numEntries), func(b *testing.B) {
bankData := sampledataccl.BankRows(numEntries)
bankData := bank.FromRows(numEntries).Tables()[0]
backupDir := filepath.Join(tempDir, strconv.Itoa(numEntries))
backup, err := sampledataccl.ToBackup(b, bankData, backupDir)
if err != nil {
Expand Down Expand Up @@ -143,7 +144,7 @@ func BenchmarkImport(b *testing.B) {

for _, numEntries := range []int{1, 100, 10000, 300000} {
b.Run(fmt.Sprintf("numEntries=%d", numEntries), func(b *testing.B) {
bankData := sampledataccl.BankRows(numEntries)
bankData := bank.FromRows(numEntries).Tables()[0]
subdir := strconv.Itoa(numEntries)
backupDir := filepath.Join(tempDir, subdir)
backup, err := sampledataccl.ToBackup(b, bankData, backupDir)
Expand Down
Loading

0 comments on commit c1a574c

Please sign in to comment.