Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

storageccl: fix and reenable BenchmarkTimeBoundIterate #96771

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 0 additions & 3 deletions pkg/ccl/storageccl/engineccl/.gitignore

This file was deleted.

78 changes: 42 additions & 36 deletions pkg/ccl/storageccl/engineccl/bench_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,18 @@ import (
"github.com/cockroachdb/cockroach/pkg/settings/cluster"
"github.com/cockroachdb/cockroach/pkg/storage"
"github.com/cockroachdb/cockroach/pkg/testutils"
"github.com/cockroachdb/cockroach/pkg/testutils/skip"
"github.com/cockroachdb/cockroach/pkg/util/encoding"
"github.com/cockroachdb/cockroach/pkg/util/hlc"
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/cockroach/pkg/util/randutil"
"github.com/cockroachdb/errors/oserror"
)

// loadTestData writes numKeys keys in numBatches separate batches. Keys are
const numKeys = 100000
const numBatches = 100
const batchTimeSpan = 10
const valueBytes = 512

// generateTestData writes numKeys keys in numBatches separate batches. Keys are
// written in order. Every key in a given batch has the same MVCC timestamp;
// batch timestamps start at batchTimeSpan and increase in intervals of
// batchTimeSpan.
Expand All @@ -40,30 +43,15 @@ import (
// at t0, and one for C that only contains keys at t1. Conversely, writing A, C
// at t0 and B at t1 would create just one SST that contained A,B,C (due to an
// immediate compaction).
//
// The creation of the database is time consuming, so the caller can choose
// whether to use a temporary or permanent location.
func loadTestData(
dir string, numKeys, numBatches, batchTimeSpan, valueBytes int,
) (storage.Engine, error) {
func generateTestData(dir string) error {
ctx := context.Background()

exists := true
if _, err := os.Stat(dir); oserror.IsNotExist(err) {
exists = false
}

eng, err := storage.Open(
ctx,
storage.Filesystem(dir),
cluster.MakeTestingClusterSettings())
if err != nil {
return nil, err
}

if exists {
testutils.ReadAllFiles(filepath.Join(dir, "*"))
return eng, nil
return err
}

log.Infof(context.Background(), "creating test data: %s", dir)
Expand All @@ -88,11 +76,11 @@ func loadTestData(
if i > 0 {
log.Infof(ctx, "committing (%d/~%d)", i/scaled, numBatches)
if err := batch.Commit(false /* sync */); err != nil {
return nil, err
return err
}
batch.Close()
if err := eng.Flush(); err != nil {
return nil, err
return err
}
}
batch = eng.NewBatch()
Expand All @@ -102,17 +90,32 @@ func loadTestData(
value := roachpb.MakeValueFromBytes(randutil.RandBytes(rng, valueBytes))
value.InitChecksum(key)
if err := storage.MVCCPut(ctx, batch, nil, key, timestamp, hlc.ClockTimestamp{}, value, nil); err != nil {
return nil, err
return err
}
}
if err := batch.Commit(false /* sync */); err != nil {
return nil, err
return err
}
batch.Close()
if err := eng.Flush(); err != nil {
return nil, err
return err
}
eng.Close()
return nil
}

func loadTestData(dir string) (storage.Engine, error) {
ctx := context.Background()
eng, err := storage.Open(
ctx,
storage.Filesystem(dir),
cluster.MakeTestingClusterSettings(),
storage.MustExist,
)
if err != nil {
return nil, err
}
testutils.ReadAllFiles(filepath.Join(dir, "*"))
return eng, nil
}

Expand All @@ -121,17 +124,11 @@ func loadTestData(
// of the SSTs contain keys in the range [startTime, endTime].
func runIterate(
b *testing.B,
dir string,
loadFactor float32,
makeIterator func(storage.Engine, hlc.Timestamp, hlc.Timestamp) storage.MVCCIterator,
) {
const numKeys = 100000
const numBatches = 100
const batchTimeSpan = 10
const valueBytes = 512

// Store the database in this directory so we don't have to regenerate it on
// each benchmark run.
eng, err := loadTestData("mvcc_data", numKeys, numBatches, batchTimeSpan, valueBytes)
eng, err := loadTestData(dir)
if err != nil {
b.Fatal(err)
}
Expand Down Expand Up @@ -167,17 +164,26 @@ func runIterate(
}

func BenchmarkTimeBoundIterate(b *testing.B) {
skip.WithIssue(b, 95530, "bump minBinary to 22.2. Skip 22.2 mixed-version tests for future cleanup")
// Generate test data.
dir, err := os.MkdirTemp("", "mvcc_data")
if err != nil {
b.Fatal(err)
}
defer os.RemoveAll(dir)

if err := generateTestData(dir); err != nil {
b.Fatal(err)
}

for _, loadFactor := range []float32{1.0, 0.5, 0.1, 0.05, 0.0} {
b.Run(fmt.Sprintf("LoadFactor=%.2f", loadFactor), func(b *testing.B) {
b.Run("NormalIterator", func(b *testing.B) {
runIterate(b, loadFactor, func(e storage.Engine, _, _ hlc.Timestamp) storage.MVCCIterator {
runIterate(b, dir, loadFactor, func(e storage.Engine, _, _ hlc.Timestamp) storage.MVCCIterator {
return e.NewMVCCIterator(storage.MVCCKeyAndIntentsIterKind, storage.IterOptions{UpperBound: roachpb.KeyMax})
})
})
b.Run("TimeBoundIterator", func(b *testing.B) {
runIterate(b, loadFactor, func(e storage.Engine, startTime, endTime hlc.Timestamp) storage.MVCCIterator {
runIterate(b, dir, loadFactor, func(e storage.Engine, startTime, endTime hlc.Timestamp) storage.MVCCIterator {
return e.NewMVCCIterator(storage.MVCCKeyIterKind, storage.IterOptions{
MinTimestampHint: startTime,
MaxTimestampHint: endTime,
Expand Down