Skip to content

Commit

Permalink
scan_internal: return err if skip-shared breaks snapshot consistency
Browse files Browse the repository at this point in the history
Previously, we'd unintentionally expose newer keys in shared sstables
through a snapshot if a compaction pushed keys newer than the snapshot
down into a shared level. We can identify this case and return
`ErrInvalidSkipSharedIteration`, forcing the caller to resort to
non-skip-shared ways to replicate keys.
  • Loading branch information
itsbilal committed Jun 27, 2023
1 parent 09f6dc1 commit 9642416
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 7 deletions.
10 changes: 6 additions & 4 deletions db.go
Original file line number Diff line number Diff line change
Expand Up @@ -1162,10 +1162,12 @@ func finishInitializingIter(ctx context.Context, buf *iterAlloc) *Iterator {
// their metadatas truncated to [lower, upper) and passed into visitSharedFile.
// ErrInvalidSkipSharedIteration is returned if visitSharedFile is not nil and an
// sstable in L5 or L6 is found that is not in shared storage according to
// provider.IsShared. Examples of when this could happen could be if Pebble
// started writing sstables before a creator ID was set (as creator IDs are
// necessary to enable shared storage) resulting in some lower level SSTs being
// on non-shared storage. Skip-shared iteration is invalid in those cases.
// provider.IsShared, or an sstable in those levels contains a newer key than the
// snapshot sequence number (only applicable for snapshot.ScanInternal). Examples
// of when this could happen could be if Pebble started writing sstables before a
// creator ID was set (as creator IDs are necessary to enable shared storage)
// resulting in some lower level SSTs being on non-shared storage. Skip-shared
// iteration is invalid in those cases.
func (d *DB) ScanInternal(
ctx context.Context,
lower, upper []byte,
Expand Down
11 changes: 8 additions & 3 deletions scan_internal.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,9 @@ const (
// ErrInvalidSkipSharedIteration is returned by ScanInternal if it was called
// with a shared file visitor function, and a file in a shareable level (i.e.
// level >= sharedLevelsStart) was found to not be in shared storage according
// to objstorage.Provider.
var ErrInvalidSkipSharedIteration = errors.New("pebble: cannot use skip-shared iteration due to non-shared files in lower levels")
// to objstorage.Provider, or not shareable for another reason such as for
// containing keys newer than the snapshot sequence number.
var ErrInvalidSkipSharedIteration = errors.New("pebble: cannot use skip-shared iteration due to non-shareable files in lower levels")

// SharedSSTMeta represents an sstable on shared storage that can be ingested
// by another pebble instance. This struct must contain all fields that are
Expand Down Expand Up @@ -889,6 +890,7 @@ func scanInternalImpl(
cmp := iter.comparer.Compare
db := iter.readState.db
provider := db.objProvider
seqNum := iter.seqNum
if visitSharedFile != nil {
if provider == nil {
panic("expected non-nil Provider in skip-shared iteration mode")
Expand All @@ -903,7 +905,10 @@ func scanInternalImpl(
return err
}
if !objMeta.IsShared() {
return errors.Wrapf(ErrInvalidSkipSharedIteration, "when processing file %s", objMeta.DiskFileNum)
return errors.Wrapf(ErrInvalidSkipSharedIteration, "file %s is not shared", objMeta.DiskFileNum)
}
if f.LargestSeqNum > seqNum {
return errors.Wrapf(ErrInvalidSkipSharedIteration, "file %s contains keys newer than snapshot", objMeta.DiskFileNum)
}
var sst *SharedSSTMeta
var skip bool
Expand Down
18 changes: 18 additions & 0 deletions testdata/scan_internal
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,24 @@ scan-internal snapshot=foo
a-c:{(#10,RANGEKEYSET,@5,boop)}
c-e:{(#11,RANGEKEYSET,@5,beep)}

# Force keys newer than the snapshot into a lower level, then try skip-shared
# iteration through it. This should return an error as it would expose keys
# newer than the snapshot in the shared sstable.

compact a-z
----
6:
000008:[a#10,RANGEKEYSET-e#13,SET]

lsm
----
6:
000008:[a#10,RANGEKEYSET-e#13,SET]

scan-internal lower=a upper=z skip-shared snapshot=foo
----
file 000008 contains keys newer than snapshot: pebble: cannot use skip-shared iteration due to non-shareable files in lower levels

# Range keys and range dels are truncated to [lower,upper).

scan-internal lower=bb upper=dd
Expand Down

0 comments on commit 9642416

Please sign in to comment.