From d411861ce0f026c2aa730e4eabbb9cab197dc4e7 Mon Sep 17 00:00:00 2001 From: Michael Butler Date: Fri, 22 Jul 2022 14:00:55 -0400 Subject: [PATCH] bulk: replace old SSTIterators with PebbleSSTIterator in bulk stack This is purely a refactor. The only prod refactor is in the SSTBatcher, where it calls ComputeStatsForRange. This should not have a significant perf hit as the iterator only surfaces point keys and the bulk of sst_batcher comput time is spent in doing ingestion. Currently the pebbleIterator is slightly slower than the old iterator, but optimizations are coming soon (#83051). Release note: none --- pkg/ccl/backupccl/BUILD.bazel | 1 + pkg/ccl/backupccl/restore_data_processor_test.go | 9 +++++++-- pkg/kv/bulk/sst_batcher.go | 12 +++++++++++- pkg/kv/bulk/sst_batcher_test.go | 7 ++++++- pkg/kv/kvserver/batcheval/cmd_export_test.go | 15 ++++++++++++--- 5 files changed, 37 insertions(+), 7 deletions(-) diff --git a/pkg/ccl/backupccl/BUILD.bazel b/pkg/ccl/backupccl/BUILD.bazel index 0573ae6408a0..c2a5683277b2 100644 --- a/pkg/ccl/backupccl/BUILD.bazel +++ b/pkg/ccl/backupccl/BUILD.bazel @@ -273,6 +273,7 @@ go_test( "@com_github_cockroachdb_errors//:errors", "@com_github_cockroachdb_errors//oserror", "@com_github_cockroachdb_logtags//:logtags", + "@com_github_cockroachdb_pebble//sstable", "@com_github_cockroachdb_pebble//vfs", "@com_github_gogo_protobuf//proto", "@com_github_gogo_protobuf//types", diff --git a/pkg/ccl/backupccl/restore_data_processor_test.go b/pkg/ccl/backupccl/restore_data_processor_test.go index 60ec4771a462..ee75a7ef603d 100644 --- a/pkg/ccl/backupccl/restore_data_processor_test.go +++ b/pkg/ccl/backupccl/restore_data_processor_test.go @@ -46,6 +46,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/util/limit" "github.com/cockroachdb/cockroach/pkg/util/mon" "github.com/cockroachdb/cockroach/pkg/util/timeutil" + "github.com/cockroachdb/pebble/sstable" "github.com/cockroachdb/pebble/vfs" "github.com/stretchr/testify/require" ) @@ -66,8 +67,12 @@ func slurpSSTablesLatestKey( if err != nil { t.Fatal(err) } - - sst, err := storage.NewSSTIterator(file) + iterOpts := storage.IterOptions{ + KeyTypes: storage.IterKeyTypePointsOnly, + LowerBound: keys.LocalMax, + UpperBound: keys.MaxKey, + } + sst, err := storage.NewPebbleSSTIterator([]sstable.ReadableFile{file}, iterOpts) if err != nil { t.Fatal(err) } diff --git a/pkg/kv/bulk/sst_batcher.go b/pkg/kv/bulk/sst_batcher.go index 7933b657dd5a..5bd31c8c78c5 100644 --- a/pkg/kv/bulk/sst_batcher.go +++ b/pkg/kv/bulk/sst_batcher.go @@ -92,6 +92,8 @@ func MakeAndRegisterConcurrencyLimiter(sv *settings.Values) limit.ConcurrentRequ // bytes, etc. If configured with a non-nil, populated range cache, it will use // it to attempt to flush SSTs before they cross range boundaries to minimize // expensive on-split retries. +// +// Note: the SSTBatcher currently cannot bulk add range keys. type SSTBatcher struct { name string db *kv.DB @@ -644,7 +646,15 @@ func (b *SSTBatcher) addSSTable( updatesLastRange bool, ) error { sendStart := timeutil.Now() - iter, err := storage.NewMemSSTIterator(sstBytes, true) + + // Currently, the SSTBatcher cannot ingest range keys, so it is safe to + // ComputeStatsForRange with an iterator that only surfaces point keys. + iterOpts := storage.IterOptions{ + KeyTypes: storage.IterKeyTypePointsOnly, + LowerBound: start, + UpperBound: end, + } + iter, err := storage.NewPebbleMemSSTIterator(sstBytes, true, iterOpts) if err != nil { return err } diff --git a/pkg/kv/bulk/sst_batcher_test.go b/pkg/kv/bulk/sst_batcher_test.go index c6b73bd289de..f26d9dcd81d1 100644 --- a/pkg/kv/bulk/sst_batcher_test.go +++ b/pkg/kv/bulk/sst_batcher_test.go @@ -72,7 +72,12 @@ func TestDuplicateHandling(t *testing.T) { require.NoError(t, err.GoError()) keyCount := 0 for _, file := range resp.(*roachpb.ExportResponse).Files { - it, err := storage.NewMemSSTIterator(file.SST, false /* verify */) + iterOpts := storage.IterOptions{ + KeyTypes: storage.IterKeyTypePointsOnly, + LowerBound: keys.LocalMax, + UpperBound: keys.MaxKey, + } + it, err := storage.NewPebbleMemSSTIterator(file.SST, false /* verify */, iterOpts) require.NoError(t, err) defer it.Close() for it.SeekGE(storage.NilKey); ; it.Next() { diff --git a/pkg/kv/kvserver/batcheval/cmd_export_test.go b/pkg/kv/kvserver/batcheval/cmd_export_test.go index e4f498564b2c..32364cfdbfa6 100644 --- a/pkg/kv/kvserver/batcheval/cmd_export_test.go +++ b/pkg/kv/kvserver/batcheval/cmd_export_test.go @@ -81,8 +81,12 @@ func TestExportCmd(t *testing.T) { var kvs []storage.MVCCKeyValue for _, file := range res.(*roachpb.ExportResponse).Files { paths = append(paths, file.Path) - - sst, err := storage.NewMemSSTIterator(file.SST, false) + iterOpts := storage.IterOptions{ + KeyTypes: storage.IterKeyTypePointsOnly, + LowerBound: keys.LocalMax, + UpperBound: keys.MaxKey, + } + sst, err := storage.NewPebbleMemSSTIterator(file.SST, true, iterOpts) if err != nil { t.Fatalf("%+v", err) } @@ -505,7 +509,12 @@ func loadSST(t *testing.T, data []byte, start, end roachpb.Key) []storage.MVCCKe return nil } - sst, err := storage.NewMemSSTIterator(data, false) + iterOpts := storage.IterOptions{ + KeyTypes: storage.IterKeyTypePointsOnly, + LowerBound: start, + UpperBound: end, + } + sst, err := storage.NewPebbleMemSSTIterator(data, true, iterOpts) if err != nil { t.Fatal(err) }