From 935708231ac7ea29794028c17d43f21b2c00d60d Mon Sep 17 00:00:00 2001 From: Aaditya Sondhi <20070511+aadityasondhi@users.noreply.github.com> Date: Tue, 26 Mar 2024 14:43:42 -0400 Subject: [PATCH] storage,ingest: enable ingestion optimization using flushableIngest In https://github.com/cockroachdb/pebble/pull/3398, we introduced an optimization in pebble where we could use flushableIngests with excises for ingesting SSTs with RangeDels and RangeKeyDels. This patch turns this optimization on using `sstContainsExciseTombstone`. Informs https://github.com/cockroachdb/pebble/issues/3335. Release note: None --- pkg/kv/kvserver/replica_raftstorage.go | 30 ++++++++++++++--------- pkg/kv/kvserver/store_snapshot.go | 33 +++++++++++++++----------- pkg/storage/engine.go | 19 +++++++++++---- pkg/storage/pebble.go | 5 ++-- 4 files changed, 55 insertions(+), 32 deletions(-) diff --git a/pkg/kv/kvserver/replica_raftstorage.go b/pkg/kv/kvserver/replica_raftstorage.go index a48f06fd7df6..c4a43b07feca 100644 --- a/pkg/kv/kvserver/replica_raftstorage.go +++ b/pkg/kv/kvserver/replica_raftstorage.go @@ -380,14 +380,15 @@ type IncomingSnapshot struct { // Size of the key-value pairs. DataSize int64 // Size of the ssts containing these key-value pairs. - SSTSize int64 - SharedSize int64 - placeholder *ReplicaPlaceholder - raftAppliedIndex kvpb.RaftIndex // logging only - msgAppRespCh chan raftpb.Message // receives MsgAppResp if/when snap is applied - sharedSSTs []pebble.SharedSSTMeta - externalSSTs []pebble.ExternalFile - doExcise bool + SSTSize int64 + SharedSize int64 + placeholder *ReplicaPlaceholder + raftAppliedIndex kvpb.RaftIndex // logging only + msgAppRespCh chan raftpb.Message // receives MsgAppResp if/when snap is applied + sharedSSTs []pebble.SharedSSTMeta + externalSSTs []pebble.ExternalFile + doExcise bool + includesRangeDelForLastSpan bool // clearedSpans represents the key spans in the existing store that will be // cleared by doing the Ingest*. This is tracked so that we can convert the // ssts into a WriteBatch if the total size of the ssts is small. @@ -673,9 +674,16 @@ func (r *Replica) applySnapshot( // https://github.com/cockroachdb/cockroach/issues/93251 if inSnap.doExcise { exciseSpan := desc.KeySpan().AsRawSpanWithNoLocals() - if ingestStats, err = - r.store.TODOEngine().IngestAndExciseFiles(ctx, inSnap.SSTStorageScratch.SSTs(), inSnap.sharedSSTs, inSnap.externalSSTs, exciseSpan); err != nil { - return errors.Wrapf(err, "while ingesting %s and excising %s-%s", inSnap.SSTStorageScratch.SSTs(), exciseSpan.Key, exciseSpan.EndKey) + if ingestStats, err = r.store.TODOEngine().IngestAndExciseFiles( + ctx, + inSnap.SSTStorageScratch.SSTs(), + inSnap.sharedSSTs, + inSnap.externalSSTs, + exciseSpan, + inSnap.includesRangeDelForLastSpan, + ); err != nil { + return errors.Wrapf(err, "while ingesting %s and excising %s-%s", + inSnap.SSTStorageScratch.SSTs(), exciseSpan.Key, exciseSpan.EndKey) } } else { if inSnap.SSTSize > snapshotIngestAsWriteThreshold.Get(&r.ClusterSettings().SV) { diff --git a/pkg/kv/kvserver/store_snapshot.go b/pkg/kv/kvserver/store_snapshot.go index e51310780774..520e2486981c 100644 --- a/pkg/kv/kvserver/store_snapshot.go +++ b/pkg/kv/kvserver/store_snapshot.go @@ -513,7 +513,11 @@ func (kvSS *kvBatchSnapshotStrategy) Receive( return noSnap, errors.AssertionFailedf("last span in multiSSTWriter did not equal the user key span: %s", keyRanges[len(keyRanges)-1].String()) } } - msstw, err := newMultiSSTWriter(ctx, kvSS.st, kvSS.scratch, keyRanges, kvSS.sstChunkSize, doExcise) + + // TODO(aaditya): Remove once we support flushableIngests for shared and + // external files in the engine. + skipRangeDelForLastSpan := doExcise && (header.SharedReplicate || header.ExternalReplicate) + msstw, err := newMultiSSTWriter(ctx, kvSS.st, kvSS.scratch, keyRanges, kvSS.sstChunkSize, skipRangeDelForLastSpan) if err != nil { return noSnap, err } @@ -691,19 +695,20 @@ func (kvSS *kvBatchSnapshotStrategy) Receive( } inSnap := IncomingSnapshot{ - SnapUUID: snapUUID, - SSTStorageScratch: kvSS.scratch, - FromReplica: header.RaftMessageRequest.FromReplica, - Desc: header.State.Desc, - DataSize: dataSize, - SSTSize: sstSize, - SharedSize: sharedSize, - raftAppliedIndex: header.State.RaftAppliedIndex, - msgAppRespCh: make(chan raftpb.Message, 1), - sharedSSTs: sharedSSTs, - externalSSTs: externalSSTs, - doExcise: doExcise, - clearedSpans: keyRanges, + SnapUUID: snapUUID, + SSTStorageScratch: kvSS.scratch, + FromReplica: header.RaftMessageRequest.FromReplica, + Desc: header.State.Desc, + DataSize: dataSize, + SSTSize: sstSize, + SharedSize: sharedSize, + raftAppliedIndex: header.State.RaftAppliedIndex, + msgAppRespCh: make(chan raftpb.Message, 1), + sharedSSTs: sharedSSTs, + externalSSTs: externalSSTs, + doExcise: doExcise, + includesRangeDelForLastSpan: !skipRangeDelForLastSpan, + clearedSpans: keyRanges, } timingTag.stop("totalTime") diff --git a/pkg/storage/engine.go b/pkg/storage/engine.go index f55a03bde6eb..dec7cde59537 100644 --- a/pkg/storage/engine.go +++ b/pkg/storage/engine.go @@ -1033,11 +1033,22 @@ type Engine interface { // additionally returns ingestion stats. IngestLocalFilesWithStats( ctx context.Context, paths []string) (pebble.IngestOperationStats, error) - // IngestAndExciseFiles is a variant of IngestLocalFilesWithStats - // that excises an ExciseSpan, and ingests either local or shared sstables or - // both. + // IngestAndExciseFiles is a variant of IngestLocalFilesWithStats that excises + // an ExciseSpan, and ingests either local or shared sstables or both. It also + // takes the flag sstsContainExciseTombstone to signal that the exciseSpan + // contains RANGEDELs and RANGEKEYDELs. + // + // NB: It is the caller's responsibility to ensure if + // sstsContainExciseTombstone is set to true, the ingestion sstables must + // contain a tombstone for the exciseSpan. IngestAndExciseFiles( - ctx context.Context, paths []string, shared []pebble.SharedSSTMeta, external []pebble.ExternalFile, exciseSpan roachpb.Span) (pebble.IngestOperationStats, error) + ctx context.Context, + paths []string, + shared []pebble.SharedSSTMeta, + external []pebble.ExternalFile, + exciseSpan roachpb.Span, + sstsContainExciseTombstone bool, + ) (pebble.IngestOperationStats, error) // IngestExternalFiles is a variant of IngestLocalFiles that takes external // files. These files can be referred to by multiple stores, but are not // modified or deleted by the Engine doing the ingestion. diff --git a/pkg/storage/pebble.go b/pkg/storage/pebble.go index 954f1d058205..ff04b01c3129 100644 --- a/pkg/storage/pebble.go +++ b/pkg/storage/pebble.go @@ -2182,14 +2182,13 @@ func (p *Pebble) IngestAndExciseFiles( shared []pebble.SharedSSTMeta, external []pebble.ExternalFile, exciseSpan roachpb.Span, + sstsContainExciseTombstone bool, ) (pebble.IngestOperationStats, error) { rawSpan := pebble.KeyRange{ Start: EngineKey{Key: exciseSpan.Key}.Encode(), End: EngineKey{Key: exciseSpan.EndKey}.Encode(), } - // TODO(aaditya): Enable sstsContainExciseTombstone once bugs introduced in - // https://github.com/cockroachdb/pebble/pull/3398 are sorted out. - return p.db.IngestAndExcise(paths, shared, external, rawSpan, false /* sstsContainExciseTombstone */) + return p.db.IngestAndExcise(paths, shared, external, rawSpan, sstsContainExciseTombstone) } // IngestExternalFiles implements the Engine interface.