From 204ad978bf11be5d199323d5e68e7d7742c527b7 Mon Sep 17 00:00:00 2001 From: Radu Berinde Date: Tue, 14 Feb 2023 10:40:20 -0800 Subject: [PATCH] storage: include version in benchmark fixture directory Some storage-related benchmarks leave fixtures around in `.gitignore`d directories. There is potential for fixtures from old versions to cause failures. We are seeing some CI failures that would be explained by this (we're not sure yet if it's possible). In any case, it is better to include the version in the fixture name to avid this (which can be a problem even locally). The logging around the fixture location is also improved. Informs #97061 Release note: None Epic: none --- pkg/ccl/storageccl/engineccl/.gitignore | 4 ++++ pkg/ccl/storageccl/engineccl/BUILD.bazel | 1 + pkg/ccl/storageccl/engineccl/bench_test.go | 14 ++++++++++++-- .../batcheval/cmd_refresh_range_bench_test.go | 17 ++++++++++++++--- pkg/kv/kvserver/rangefeed/BUILD.bazel | 1 + .../rangefeed/catchup_scan_bench_test.go | 13 ++++++++++--- pkg/storage/bench_data_test.go | 16 +++++++++++++--- pkg/storage/open.go | 8 -------- 8 files changed, 55 insertions(+), 19 deletions(-) diff --git a/pkg/ccl/storageccl/engineccl/.gitignore b/pkg/ccl/storageccl/engineccl/.gitignore index 2b05ecacc733..77e451b2ff7a 100644 --- a/pkg/ccl/storageccl/engineccl/.gitignore +++ b/pkg/ccl/storageccl/engineccl/.gitignore @@ -1,3 +1,7 @@ # Do not add environment-specific entries here (see the top-level .gitignore # for reasoning and alternatives). + +# Old benchmark data. mvcc_data +# New benchmark data. +mvcc_data_* diff --git a/pkg/ccl/storageccl/engineccl/BUILD.bazel b/pkg/ccl/storageccl/engineccl/BUILD.bazel index bbaed697b3f8..831b2dbee1f2 100644 --- a/pkg/ccl/storageccl/engineccl/BUILD.bazel +++ b/pkg/ccl/storageccl/engineccl/BUILD.bazel @@ -43,6 +43,7 @@ go_test( "//pkg/base", "//pkg/ccl/baseccl", "//pkg/ccl/storageccl/engineccl/enginepbccl", + "//pkg/clusterversion", "//pkg/keys", "//pkg/roachpb", "//pkg/settings/cluster", diff --git a/pkg/ccl/storageccl/engineccl/bench_test.go b/pkg/ccl/storageccl/engineccl/bench_test.go index 08cde2494f20..3bbd0b656c15 100644 --- a/pkg/ccl/storageccl/engineccl/bench_test.go +++ b/pkg/ccl/storageccl/engineccl/bench_test.go @@ -16,6 +16,7 @@ import ( "path/filepath" "testing" + "github.com/cockroachdb/cockroach/pkg/clusterversion" "github.com/cockroachdb/cockroach/pkg/keys" "github.com/cockroachdb/cockroach/pkg/roachpb" "github.com/cockroachdb/cockroach/pkg/settings/cluster" @@ -43,10 +44,13 @@ import ( // The creation of the database is time consuming, so the caller can choose // whether to use a temporary or permanent location. func loadTestData( - dir string, numKeys, numBatches, batchTimeSpan, valueBytes int, + dirPrefix string, numKeys, numBatches, batchTimeSpan, valueBytes int, ) (storage.Engine, error) { ctx := context.Background() + verStr := fmt.Sprintf("v%s", clusterversion.TestingBinaryVersion.String()) + dir := fmt.Sprintf("%s_v%s_%d_%d_%d_%d", dirPrefix, verStr, numKeys, numBatches, batchTimeSpan, valueBytes) + exists := true if _, err := os.Stat(dir); oserror.IsNotExist(err) { exists = false @@ -60,12 +64,18 @@ func loadTestData( return nil, err } + absPath, err := filepath.Abs(dir) + if err != nil { + absPath = dir + } + if exists { + log.Infof(context.Background(), "using existing test data: %s", absPath) testutils.ReadAllFiles(filepath.Join(dir, "*")) return eng, nil } - log.Infof(context.Background(), "creating test data: %s", dir) + log.Infof(context.Background(), "creating test data: %s", absPath) // Generate the same data every time. rng := rand.New(rand.NewSource(1449168817)) diff --git a/pkg/kv/kvserver/batcheval/cmd_refresh_range_bench_test.go b/pkg/kv/kvserver/batcheval/cmd_refresh_range_bench_test.go index e77ca18dd056..a3350b564586 100644 --- a/pkg/kv/kvserver/batcheval/cmd_refresh_range_bench_test.go +++ b/pkg/kv/kvserver/batcheval/cmd_refresh_range_bench_test.go @@ -19,6 +19,7 @@ import ( "testing" "github.com/cockroachdb/cockroach/pkg/base" + "github.com/cockroachdb/cockroach/pkg/clusterversion" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/batcheval" "github.com/cockroachdb/cockroach/pkg/roachpb" "github.com/cockroachdb/cockroach/pkg/settings/cluster" @@ -210,6 +211,9 @@ func setupMVCCPebble(b testing.TB, dir string, lBaseMaxBytes int64, readOnly boo func setupData( ctx context.Context, b *testing.B, emk engineMaker, opts benchDataOptions, ) (storage.Engine, string) { + // Include the current version in the fixture name, or we may inadvertently + // run against a left-over fixture that is no longer supported. + verStr := fmt.Sprintf("v%s", clusterversion.TestingBinaryVersion.String()) orderStr := "linear" if opts.randomKeyOrder { orderStr = "random" @@ -218,8 +222,9 @@ func setupData( if opts.readOnlyEngine { readOnlyStr = "_readonly" } - loc := fmt.Sprintf("refresh_range_bench_data_%s%s_%d_%d_%d", - orderStr, readOnlyStr, opts.numKeys, opts.valueBytes, opts.lBaseMaxBytes) + loc := fmt.Sprintf("refresh_range_bench_data_%s_%s%s_%d_%d_%d", + verStr, orderStr, readOnlyStr, opts.numKeys, opts.valueBytes, opts.lBaseMaxBytes) + exists := true if _, err := os.Stat(loc); oserror.IsNotExist(err) { exists = false @@ -227,13 +232,19 @@ func setupData( b.Fatal(err) } + absPath, err := filepath.Abs(loc) + if err != nil { + absPath = loc + } + if exists { + log.Infof(ctx, "using existing refresh range benchmark data: %s", absPath) testutils.ReadAllFiles(filepath.Join(loc, "*")) return emk(b, loc, opts.lBaseMaxBytes, opts.readOnlyEngine), loc } eng := emk(b, loc, opts.lBaseMaxBytes, false) - log.Infof(ctx, "creating refresh range benchmark data: %s", loc) + log.Infof(ctx, "creating refresh range benchmark data: %s", absPath) // Generate the same data every time. rng := rand.New(rand.NewSource(1449168817)) diff --git a/pkg/kv/kvserver/rangefeed/BUILD.bazel b/pkg/kv/kvserver/rangefeed/BUILD.bazel index 8c6a9a265804..9c566a82403c 100644 --- a/pkg/kv/kvserver/rangefeed/BUILD.bazel +++ b/pkg/kv/kvserver/rangefeed/BUILD.bazel @@ -55,6 +55,7 @@ go_test( embed = [":rangefeed"], deps = [ "//pkg/base", + "//pkg/clusterversion", "//pkg/keys", "//pkg/roachpb", "//pkg/settings/cluster", diff --git a/pkg/kv/kvserver/rangefeed/catchup_scan_bench_test.go b/pkg/kv/kvserver/rangefeed/catchup_scan_bench_test.go index f22ba5c3bc32..f1be451858b9 100644 --- a/pkg/kv/kvserver/rangefeed/catchup_scan_bench_test.go +++ b/pkg/kv/kvserver/rangefeed/catchup_scan_bench_test.go @@ -19,6 +19,7 @@ import ( "testing" "github.com/cockroachdb/cockroach/pkg/base" + "github.com/cockroachdb/cockroach/pkg/clusterversion" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/rangefeed" "github.com/cockroachdb/cockroach/pkg/roachpb" "github.com/cockroachdb/cockroach/pkg/settings/cluster" @@ -229,6 +230,7 @@ func setupMVCCPebble(b testing.TB, dir string, lBaseMaxBytes int64, readOnly boo func setupData( ctx context.Context, b *testing.B, emk engineMaker, opts benchDataOptions, ) (storage.Engine, string) { + verStr := fmt.Sprintf("v%s", clusterversion.TestingBinaryVersion.String()) orderStr := "linear" if opts.randomKeyOrder { orderStr = "random" @@ -237,8 +239,8 @@ func setupData( if opts.readOnlyEngine { readOnlyStr = "_readonly" } - loc := fmt.Sprintf("rangefeed_bench_data_%s%s_%d_%d_%d_%d", - orderStr, readOnlyStr, opts.numKeys, opts.valueBytes, opts.lBaseMaxBytes, opts.numRangeKeys) + loc := fmt.Sprintf("rangefeed_bench_data_%s_%s%s_%d_%d_%d_%d", + verStr, orderStr, readOnlyStr, opts.numKeys, opts.valueBytes, opts.lBaseMaxBytes, opts.numRangeKeys) exists := true if _, err := os.Stat(loc); oserror.IsNotExist(err) { exists = false @@ -246,13 +248,18 @@ func setupData( b.Fatal(err) } + absPath, err := filepath.Abs(loc) + if err != nil { + absPath = loc + } if exists { + log.Infof(ctx, "using existing refresh range benchmark data: %s", absPath) testutils.ReadAllFiles(filepath.Join(loc, "*")) return emk(b, loc, opts.lBaseMaxBytes, opts.readOnlyEngine), loc } eng := emk(b, loc, opts.lBaseMaxBytes, false) - log.Infof(ctx, "creating rangefeed benchmark data: %s", loc) + log.Infof(ctx, "creating rangefeed benchmark data: %s", absPath) // Generate the same data every time. rng := rand.New(rand.NewSource(1449168817)) diff --git a/pkg/storage/bench_data_test.go b/pkg/storage/bench_data_test.go index 57bb9c586c94..5a4f8a7aa412 100644 --- a/pkg/storage/bench_data_test.go +++ b/pkg/storage/bench_data_test.go @@ -26,6 +26,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/util/log" "github.com/cockroachdb/cockroach/pkg/util/randutil" "github.com/cockroachdb/errors/oserror" + "github.com/cockroachdb/pebble" "github.com/cockroachdb/pebble/vfs" "github.com/stretchr/testify/require" ) @@ -38,7 +39,7 @@ type initialState interface { Base() initialState // Key returns a unique sequence of strings that uniquely identifies the - // represented initial condtions. Key is used as the cache key for reusing + // represented initial conditions. Key is used as the cache key for reusing // databases computed by previous runs, so all configuration must be fully // represented in Key's return value. Key() []string @@ -58,6 +59,14 @@ type engineWithLocation struct { Location } +// TODO(jackson): Tie this to the mapping in SetMinVersion. +var latestReleaseFormatMajorVersion = pebble.FormatPrePebblev1Marked // v22.2 + +var latestReleaseFormatMajorVersionOpt ConfigOption = func(cfg *engineConfig) error { + cfg.PebbleConfig.Opts.FormatMajorVersion = latestReleaseFormatMajorVersion + return nil +} + // getInitialStateEngine constructs an Engine with an initial database // state necessary for a benchmark. The initial states are cached on the // filesystem to avoid expensive reconstruction when possible. The return value @@ -89,7 +98,7 @@ func getInitialStateEngine( opts := append([]ConfigOption{ MustExist, - LatestReleaseFormatMajorVersion, + latestReleaseFormatMajorVersionOpt, }, initial.ConfigOptions()...) if !inMemory { @@ -149,7 +158,7 @@ func buildInitialState( e.Close() buildFS = e.Location.fs } else { - opts := append([]ConfigOption{LatestReleaseFormatMajorVersion}, initial.ConfigOptions()...) + opts := append([]ConfigOption{latestReleaseFormatMajorVersionOpt}, initial.ConfigOptions()...) // Regardless of whether the initial conditions specify an in-memory engine // or not, we build the conditions using an in-memory engine for @@ -240,6 +249,7 @@ var _ initialState = mvccBenchData{} func (d mvccBenchData) Key() []string { key := []string{ "mvcc", + fmt.Sprintf("fmtver_%d", latestReleaseFormatMajorVersion), fmt.Sprintf("numKeys_%d", d.numKeys), fmt.Sprintf("numVersions_%d", d.numVersions), fmt.Sprintf("valueBytes_%d", d.valueBytes), diff --git a/pkg/storage/open.go b/pkg/storage/open.go index 92285225bfa5..871daf61fb1e 100644 --- a/pkg/storage/open.go +++ b/pkg/storage/open.go @@ -167,14 +167,6 @@ func Hook(hookFunc func(*base.StorageConfig) error) ConfigOption { } } -// LatestReleaseFormatMajorVersion opens the database already upgraded to the -// latest release's format major version. -var LatestReleaseFormatMajorVersion ConfigOption = func(cfg *engineConfig) error { - // TODO(jackson): Tie the below to the mapping in SetMinVersion. - cfg.PebbleConfig.Opts.FormatMajorVersion = pebble.FormatPrePebblev1Marked // v22.2 - return nil -} - // If enables the given option if enable is true. func If(enable bool, opt ConfigOption) ConfigOption { if enable {