Skip to content

Commit

Permalink
storageccl: fix and reenable BenchmarkTimeBoundIterate
Browse files Browse the repository at this point in the history
This benchmark creates a `mvcc_data` directory in the current working
dir (i.e. `pkg/ccl/storageccl/engineccl`) and reuses it if exists.
Normally this would not be permitted in CI; but `mvcc_data` is in
`.gitignore` so it doesn't trip the "assert workspace clean" CI step.

The leftover `mvcc_data` can be from an older version, which can cause
mysterious CI failures.

This fix changes to generating the data in a temporary directory and
cleans it up afterwards. The generation only takes a few seconds
(significantly less than what it takes to run all benchmarks).

Release note: None
Epic: none
  • Loading branch information
RaduBerinde committed Feb 8, 2023
1 parent 7ee61fb commit e68c5d6
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 39 deletions.
3 changes: 0 additions & 3 deletions pkg/ccl/storageccl/engineccl/.gitignore

This file was deleted.

78 changes: 42 additions & 36 deletions pkg/ccl/storageccl/engineccl/bench_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,18 @@ import (
"github.com/cockroachdb/cockroach/pkg/settings/cluster"
"github.com/cockroachdb/cockroach/pkg/storage"
"github.com/cockroachdb/cockroach/pkg/testutils"
"github.com/cockroachdb/cockroach/pkg/testutils/skip"
"github.com/cockroachdb/cockroach/pkg/util/encoding"
"github.com/cockroachdb/cockroach/pkg/util/hlc"
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/cockroach/pkg/util/randutil"
"github.com/cockroachdb/errors/oserror"
)

// loadTestData writes numKeys keys in numBatches separate batches. Keys are
const numKeys = 100000
const numBatches = 100
const batchTimeSpan = 10
const valueBytes = 512

// generateTestData writes numKeys keys in numBatches separate batches. Keys are
// written in order. Every key in a given batch has the same MVCC timestamp;
// batch timestamps start at batchTimeSpan and increase in intervals of
// batchTimeSpan.
Expand All @@ -40,30 +43,15 @@ import (
// at t0, and one for C that only contains keys at t1. Conversely, writing A, C
// at t0 and B at t1 would create just one SST that contained A,B,C (due to an
// immediate compaction).
//
// The creation of the database is time consuming, so the caller can choose
// whether to use a temporary or permanent location.
func loadTestData(
dir string, numKeys, numBatches, batchTimeSpan, valueBytes int,
) (storage.Engine, error) {
func generateTestData(dir string) error {
ctx := context.Background()

exists := true
if _, err := os.Stat(dir); oserror.IsNotExist(err) {
exists = false
}

eng, err := storage.Open(
ctx,
storage.Filesystem(dir),
cluster.MakeTestingClusterSettings())
if err != nil {
return nil, err
}

if exists {
testutils.ReadAllFiles(filepath.Join(dir, "*"))
return eng, nil
return err
}

log.Infof(context.Background(), "creating test data: %s", dir)
Expand All @@ -88,11 +76,11 @@ func loadTestData(
if i > 0 {
log.Infof(ctx, "committing (%d/~%d)", i/scaled, numBatches)
if err := batch.Commit(false /* sync */); err != nil {
return nil, err
return err
}
batch.Close()
if err := eng.Flush(); err != nil {
return nil, err
return err
}
}
batch = eng.NewBatch()
Expand All @@ -102,17 +90,32 @@ func loadTestData(
value := roachpb.MakeValueFromBytes(randutil.RandBytes(rng, valueBytes))
value.InitChecksum(key)
if err := storage.MVCCPut(ctx, batch, nil, key, timestamp, hlc.ClockTimestamp{}, value, nil); err != nil {
return nil, err
return err
}
}
if err := batch.Commit(false /* sync */); err != nil {
return nil, err
return err
}
batch.Close()
if err := eng.Flush(); err != nil {
return nil, err
return err
}
eng.Close()
return nil
}

func loadTestData(dir string) (storage.Engine, error) {
ctx := context.Background()
eng, err := storage.Open(
ctx,
storage.Filesystem(dir),
cluster.MakeTestingClusterSettings(),
storage.MustExist,
)
if err != nil {
return nil, err
}
testutils.ReadAllFiles(filepath.Join(dir, "*"))
return eng, nil
}

Expand All @@ -121,17 +124,11 @@ func loadTestData(
// of the SSTs contain keys in the range [startTime, endTime].
func runIterate(
b *testing.B,
dir string,
loadFactor float32,
makeIterator func(storage.Engine, hlc.Timestamp, hlc.Timestamp) storage.MVCCIterator,
) {
const numKeys = 100000
const numBatches = 100
const batchTimeSpan = 10
const valueBytes = 512

// Store the database in this directory so we don't have to regenerate it on
// each benchmark run.
eng, err := loadTestData("mvcc_data", numKeys, numBatches, batchTimeSpan, valueBytes)
eng, err := loadTestData(dir)
if err != nil {
b.Fatal(err)
}
Expand Down Expand Up @@ -167,17 +164,26 @@ func runIterate(
}

func BenchmarkTimeBoundIterate(b *testing.B) {
skip.WithIssue(b, 95530, "bump minBinary to 22.2. Skip 22.2 mixed-version tests for future cleanup")
// Generate test data.
dir, err := os.MkdirTemp("", "mvcc_data")
if err != nil {
b.Fatal(err)
}
defer os.RemoveAll(dir)

if err := generateTestData(dir); err != nil {
b.Fatal(err)
}

for _, loadFactor := range []float32{1.0, 0.5, 0.1, 0.05, 0.0} {
b.Run(fmt.Sprintf("LoadFactor=%.2f", loadFactor), func(b *testing.B) {
b.Run("NormalIterator", func(b *testing.B) {
runIterate(b, loadFactor, func(e storage.Engine, _, _ hlc.Timestamp) storage.MVCCIterator {
runIterate(b, dir, loadFactor, func(e storage.Engine, _, _ hlc.Timestamp) storage.MVCCIterator {
return e.NewMVCCIterator(storage.MVCCKeyAndIntentsIterKind, storage.IterOptions{UpperBound: roachpb.KeyMax})
})
})
b.Run("TimeBoundIterator", func(b *testing.B) {
runIterate(b, loadFactor, func(e storage.Engine, startTime, endTime hlc.Timestamp) storage.MVCCIterator {
runIterate(b, dir, loadFactor, func(e storage.Engine, startTime, endTime hlc.Timestamp) storage.MVCCIterator {
return e.NewMVCCIterator(storage.MVCCKeyIterKind, storage.IterOptions{
MinTimestampHint: startTime,
MaxTimestampHint: endTime,
Expand Down

0 comments on commit e68c5d6

Please sign in to comment.