Skip to content

Commit

Permalink
db: add format major version for flushable ingests
Browse files Browse the repository at this point in the history
Introduce a new format major version that gates uses of flushable ingests and
the corresponding backwards-incompatible changes to the write-ahead log.

Close cockroachdb#2292.
  • Loading branch information
jbowens committed Feb 27, 2023
1 parent 02d779f commit 74f40ed
Show file tree
Hide file tree
Showing 8 changed files with 60 additions and 32 deletions.
21 changes: 18 additions & 3 deletions format_major_version.go
Original file line number Diff line number Diff line change
Expand Up @@ -131,11 +131,22 @@ const (
// TableFormatPebblev2.
FormatSSTableValueBlocks

// FormatFlushableIngest is a format major version that enables lazy
// addition of ingested sstables into the LSM structure. When an ingest
// overlaps with a memtable, a record of the ingest is written to the WAL
// without waiting for a flush. Subsequent reads treat the ingested files as
// a level above the overlapping memtable. Once the memtable is flushed, the
// ingested files are moved into the lowest possible levels.
//
// This feature is behind a format major version because it required
// breaking changes to the WAL format.
FormatFlushableIngest

// FormatNewest always contains the most recent format major version.
// NB: When adding new versions, the MaxTableFormat method should also be
// updated to return the maximum allowable version for the new
// FormatMajorVersion.
FormatNewest FormatMajorVersion = FormatSSTableValueBlocks
FormatNewest FormatMajorVersion = FormatFlushableIngest
)

// MaxTableFormat returns the maximum sstable.TableFormat that can be used at
Expand All @@ -151,7 +162,7 @@ func (v FormatMajorVersion) MaxTableFormat() sstable.TableFormat {
case FormatRangeKeys, FormatMinTableFormatPebblev1, FormatPrePebblev1Marked,
FormatPrePebblev1MarkedCompacted:
return sstable.TableFormatPebblev2
case FormatSSTableValueBlocks:
case FormatSSTableValueBlocks, FormatFlushableIngest:
return sstable.TableFormatPebblev3
default:
panic(fmt.Sprintf("pebble: unsupported format major version: %s", v))
Expand All @@ -168,7 +179,8 @@ func (v FormatMajorVersion) MinTableFormat() sstable.TableFormat {
FormatRangeKeys:
return sstable.TableFormatLevelDB
case FormatMinTableFormatPebblev1, FormatPrePebblev1Marked,
FormatPrePebblev1MarkedCompacted, FormatSSTableValueBlocks:
FormatPrePebblev1MarkedCompacted, FormatSSTableValueBlocks,
FormatFlushableIngest:
return sstable.TableFormatPebblev1
default:
panic(fmt.Sprintf("pebble: unsupported format major version: %s", v))
Expand Down Expand Up @@ -294,6 +306,9 @@ var formatMajorVersionMigrations = map[FormatMajorVersion]func(*DB) error{
FormatSSTableValueBlocks: func(d *DB) error {
return d.finalizeFormatVersUpgrade(FormatSSTableValueBlocks)
},
FormatFlushableIngest: func(d *DB) error {
return d.finalizeFormatVersUpgrade(FormatFlushableIngest)
},
}

const formatVersionMarkerName = `format-version`
Expand Down
3 changes: 3 additions & 0 deletions format_major_version_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ func TestRatchetFormat(t *testing.T) {
require.Equal(t, FormatPrePebblev1MarkedCompacted, d.FormatMajorVersion())
require.NoError(t, d.RatchetFormatMajorVersion(FormatSSTableValueBlocks))
require.Equal(t, FormatSSTableValueBlocks, d.FormatMajorVersion())
require.NoError(t, d.RatchetFormatMajorVersion(FormatFlushableIngest))
require.Equal(t, FormatFlushableIngest, d.FormatMajorVersion())

require.NoError(t, d.Close())

Expand Down Expand Up @@ -219,6 +221,7 @@ func TestFormatMajorVersions_TableFormat(t *testing.T) {
FormatPrePebblev1Marked: {sstable.TableFormatPebblev1, sstable.TableFormatPebblev2},
FormatPrePebblev1MarkedCompacted: {sstable.TableFormatPebblev1, sstable.TableFormatPebblev2},
FormatSSTableValueBlocks: {sstable.TableFormatPebblev1, sstable.TableFormatPebblev3},
FormatFlushableIngest: {sstable.TableFormatPebblev1, sstable.TableFormatPebblev3},
}

// Valid versions.
Expand Down
3 changes: 2 additions & 1 deletion ingest.go
Original file line number Diff line number Diff line change
Expand Up @@ -845,7 +845,8 @@ func (d *DB) ingest(
m := d.mu.mem.queue[i]
if ingestMemtableOverlaps(d.cmp, m, meta) {
if (len(d.mu.mem.queue) > d.opts.MemTableStopWritesThreshold-1) ||
!d.opts.Experimental.IngestSSTablesAsFlushable {
!d.opts.Experimental.IngestSSTablesAsFlushable ||
d.mu.formatVers.vers < FormatFlushableIngest {
mem = m
if mem.flushable == d.mu.mem.mutable {
err = d.makeRoomForWrite(nil)
Expand Down
2 changes: 1 addition & 1 deletion open_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ func TestNewDBFilenames(t *testing.T) {
"LOCK",
"MANIFEST-000001",
"OPTIONS-000003",
"marker.format-version.000011.012",
"marker.format-version.000012.013",
"marker.manifest.000001.MANIFEST-000001",
},
}
Expand Down
8 changes: 4 additions & 4 deletions options.go
Original file line number Diff line number Diff line change
Expand Up @@ -629,11 +629,11 @@ type Options struct {

// IngestSSTablesAsFlushable is used to determine if ingested sstables
// should be ingested as a flushable. By default this is false, but it
// is true for all metamorphic tests.
// is true for all metamorphic tests. Only effective if the format major
// version is also at least `FormatFlushableIngest`.
//
// TODO(bananabrick): Remove this field and enable by default once
// https://github.com/cockroachdb/pebble/issues/2292 and
// https://github.com/cockroachdb/pebble/issues/2266 are closed.
// TODO(bananabrick,jackson): Remove this field and unconditionally
// ingest as flushable.
IngestSSTablesAsFlushable bool

// SharedStorage is a second FS-like storage medium that can be shared
Expand Down
30 changes: 17 additions & 13 deletions testdata/checkpoint
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,10 @@ create: db/marker.format-version.000011.012
close: db/marker.format-version.000011.012
remove: db/marker.format-version.000010.011
sync: db
create: db/marker.format-version.000012.013
close: db/marker.format-version.000012.013
remove: db/marker.format-version.000011.012
sync: db
create: db/temporary.000003.dbtmp
sync: db/temporary.000003.dbtmp
close: db/temporary.000003.dbtmp
Expand Down Expand Up @@ -130,9 +134,9 @@ close:
open-dir: checkpoints/checkpoint1
link: db/OPTIONS-000003 -> checkpoints/checkpoint1/OPTIONS-000003
open-dir: checkpoints/checkpoint1
create: checkpoints/checkpoint1/marker.format-version.000001.012
sync-data: checkpoints/checkpoint1/marker.format-version.000001.012
close: checkpoints/checkpoint1/marker.format-version.000001.012
create: checkpoints/checkpoint1/marker.format-version.000001.013
sync-data: checkpoints/checkpoint1/marker.format-version.000001.013
close: checkpoints/checkpoint1/marker.format-version.000001.013
sync: checkpoints/checkpoint1
close: checkpoints/checkpoint1
link: db/000005.sst -> checkpoints/checkpoint1/000005.sst
Expand Down Expand Up @@ -166,9 +170,9 @@ close: checkpoints
open-dir: checkpoints/checkpoint2
link: db/OPTIONS-000003 -> checkpoints/checkpoint2/OPTIONS-000003
open-dir: checkpoints/checkpoint2
create: checkpoints/checkpoint2/marker.format-version.000001.012
sync-data: checkpoints/checkpoint2/marker.format-version.000001.012
close: checkpoints/checkpoint2/marker.format-version.000001.012
create: checkpoints/checkpoint2/marker.format-version.000001.013
sync-data: checkpoints/checkpoint2/marker.format-version.000001.013
close: checkpoints/checkpoint2/marker.format-version.000001.013
sync: checkpoints/checkpoint2
close: checkpoints/checkpoint2
link: db/000007.sst -> checkpoints/checkpoint2/000007.sst
Expand Down Expand Up @@ -197,9 +201,9 @@ close: checkpoints
open-dir: checkpoints/checkpoint3
link: db/OPTIONS-000003 -> checkpoints/checkpoint3/OPTIONS-000003
open-dir: checkpoints/checkpoint3
create: checkpoints/checkpoint3/marker.format-version.000001.012
sync-data: checkpoints/checkpoint3/marker.format-version.000001.012
close: checkpoints/checkpoint3/marker.format-version.000001.012
create: checkpoints/checkpoint3/marker.format-version.000001.013
sync-data: checkpoints/checkpoint3/marker.format-version.000001.013
close: checkpoints/checkpoint3/marker.format-version.000001.013
sync: checkpoints/checkpoint3
close: checkpoints/checkpoint3
link: db/000005.sst -> checkpoints/checkpoint3/000005.sst
Expand Down Expand Up @@ -253,7 +257,7 @@ CURRENT
LOCK
MANIFEST-000001
OPTIONS-000003
marker.format-version.000011.012
marker.format-version.000012.013
marker.manifest.000001.MANIFEST-000001

list checkpoints/checkpoint1
Expand All @@ -263,7 +267,7 @@ list checkpoints/checkpoint1
000007.sst
MANIFEST-000001
OPTIONS-000003
marker.format-version.000001.012
marker.format-version.000001.013
marker.manifest.000001.MANIFEST-000001

open checkpoints/checkpoint1 readonly
Expand Down Expand Up @@ -304,7 +308,7 @@ list checkpoints/checkpoint2
000007.sst
MANIFEST-000001
OPTIONS-000003
marker.format-version.000001.012
marker.format-version.000001.013
marker.manifest.000001.MANIFEST-000001

open checkpoints/checkpoint2 readonly
Expand Down Expand Up @@ -332,7 +336,7 @@ list checkpoints/checkpoint3
000007.sst
MANIFEST-000001
OPTIONS-000003
marker.format-version.000001.012
marker.format-version.000001.013
marker.manifest.000001.MANIFEST-000001

open checkpoints/checkpoint3 readonly
Expand Down
11 changes: 8 additions & 3 deletions testdata/event_listener
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,11 @@ close: db/marker.format-version.000011.012
remove: db/marker.format-version.000010.011
sync: db
upgraded to format version: 012
create: db/marker.format-version.000012.013
close: db/marker.format-version.000012.013
remove: db/marker.format-version.000011.012
sync: db
upgraded to format version: 013
create: db/temporary.000003.dbtmp
sync: db/temporary.000003.dbtmp
close: db/temporary.000003.dbtmp
Expand Down Expand Up @@ -251,9 +256,9 @@ close:
open-dir: checkpoint
link: db/OPTIONS-000003 -> checkpoint/OPTIONS-000003
open-dir: checkpoint
create: checkpoint/marker.format-version.000001.012
sync-data: checkpoint/marker.format-version.000001.012
close: checkpoint/marker.format-version.000001.012
create: checkpoint/marker.format-version.000001.013
sync-data: checkpoint/marker.format-version.000001.013
close: checkpoint/marker.format-version.000001.013
sync: checkpoint
close: checkpoint
link: db/000013.sst -> checkpoint/000013.sst
Expand Down
14 changes: 7 additions & 7 deletions testdata/flushable_ingest
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ LOCK
MANIFEST-000001
OPTIONS-000003
ext
marker.format-version.000011.012
marker.format-version.000012.013
marker.manifest.000001.MANIFEST-000001

# Test basic WAL replay
Expand All @@ -83,7 +83,7 @@ LOCK
MANIFEST-000001
OPTIONS-000003
ext
marker.format-version.000011.012
marker.format-version.000012.013
marker.manifest.000001.MANIFEST-000001

open
Expand Down Expand Up @@ -392,7 +392,7 @@ LOCK
MANIFEST-000001
OPTIONS-000003
ext
marker.format-version.000011.012
marker.format-version.000012.013
marker.manifest.000001.MANIFEST-000001

close
Expand All @@ -413,7 +413,7 @@ LOCK
MANIFEST-000001
OPTIONS-000003
ext
marker.format-version.000011.012
marker.format-version.000012.013
marker.manifest.000001.MANIFEST-000001

open
Expand Down Expand Up @@ -446,7 +446,7 @@ MANIFEST-000001
MANIFEST-000012
OPTIONS-000013
ext
marker.format-version.000011.012
marker.format-version.000012.013
marker.manifest.000002.MANIFEST-000012

# Make sure that the new mutable memtable can accept writes.
Expand Down Expand Up @@ -590,7 +590,7 @@ LOCK
MANIFEST-000001
OPTIONS-000003
ext
marker.format-version.000011.012
marker.format-version.000012.013
marker.manifest.000001.MANIFEST-000001

close
Expand All @@ -610,7 +610,7 @@ MANIFEST-000001
OPTIONS-000003
ext
ext1
marker.format-version.000011.012
marker.format-version.000012.013
marker.manifest.000001.MANIFEST-000001

ignoreSyncs false
Expand Down

0 comments on commit 74f40ed

Please sign in to comment.