From c58ca6257b4995be942cfccb008d05a28fb1a120 Mon Sep 17 00:00:00 2001 From: Nick Travers Date: Tue, 28 Feb 2023 15:20:27 -0800 Subject: [PATCH] db: reorder existing format major version for pre-Pebblev1 tables Currently, the `FormatPrePebblev1MarkedCompacted` (enum value `11`) is used as a synchronization point for the DB, ensuring that all pre-Pebblev1 tables have been rewritten. This format major version is implicitly tied to a Cockroach internal cluster version that ships in the 23.1 release. Not only is an implicit Pebble FMV upgrade confusing, it unnecessarily blocking. Rather than blocking the finalization of 23.1, the decision was made to instead alter Cockroach to ensure that all tables had been rewritten before ratcheting the Pebble FMV. Rename the existing FMV for the blocking re-write. The version was renamed to include `Unused`, along with a comment detailing the reasoning. Move the existing FMV, `FormatPrePebblev1MarkedCompacted`, to be the latest in the sequence of FMVs, in a section dedicated to 23.2 FMVs. Touches cockroachdb/cockroach#96819. --- format_major_version.go | 46 ++++++++++++------- format_major_version_test.go | 39 ++++++++-------- open_test.go | 2 +- testdata/checkpoint | 30 ++++++------ testdata/event_listener | 11 +++-- testdata/flushable_ingest | 14 +++--- .../format_major_version_pebblev1_migration | 6 +-- 7 files changed, 87 insertions(+), 61 deletions(-) diff --git a/format_major_version.go b/format_major_version.go index 8fcb5d68fa..fbd69556bf 100644 --- a/format_major_version.go +++ b/format_major_version.go @@ -110,12 +110,12 @@ const ( // 23.1 versions. - // FormatPrePebblev1MarkedCompacted is a format major version that - // guarantees that all sstables explicitly marked for compaction in the - // manifest have been compacted. Ratcheting to this format version will block - // (without holding mutexes) until all necessary compactions for files marked - // for compaction are complete. - FormatPrePebblev1MarkedCompacted + // FormatUnusedPrePebblev1MarkedCompacted is an unused format major version. + // This format major version was originally intended to ship in the 23.1 + // release. It was later decided that this should be deferred until a + // subsequent release. The original ordering is preserved so as not to + // introduce breaking changes in Cockroach. + FormatUnusedPrePebblev1MarkedCompacted // FormatSSTableValueBlocks is a format major version that adds support for // storing values in value blocks in the sstable. Value block support is not @@ -142,6 +142,15 @@ const ( // breaking changes to the WAL format. FormatFlushableIngest + // 23.2 versions. + + // FormatPrePebblev1MarkedCompacted is a format major version that guarantees + // that all sstables explicitly marked for compaction in the manifest (see + // FormatPrePebblev1Marked) have been compacted. Ratcheting to this format + // version will block (without holding mutexes) until all necessary + // compactions for files marked for compaction are complete. + FormatPrePebblev1MarkedCompacted + // FormatNewest always contains the most recent format major version. FormatNewest FormatMajorVersion = iota - 1 ) @@ -157,9 +166,10 @@ func (v FormatMajorVersion) MaxTableFormat() sstable.TableFormat { FormatSplitUserKeysMarkedCompacted: return sstable.TableFormatPebblev1 case FormatRangeKeys, FormatMinTableFormatPebblev1, FormatPrePebblev1Marked, - FormatPrePebblev1MarkedCompacted: + FormatUnusedPrePebblev1MarkedCompacted: return sstable.TableFormatPebblev2 - case FormatSSTableValueBlocks, FormatFlushableIngest: + case FormatSSTableValueBlocks, FormatFlushableIngest, + FormatPrePebblev1MarkedCompacted: return sstable.TableFormatPebblev3 default: panic(fmt.Sprintf("pebble: unsupported format major version: %s", v)) @@ -176,8 +186,8 @@ func (v FormatMajorVersion) MinTableFormat() sstable.TableFormat { FormatRangeKeys: return sstable.TableFormatLevelDB case FormatMinTableFormatPebblev1, FormatPrePebblev1Marked, - FormatPrePebblev1MarkedCompacted, FormatSSTableValueBlocks, - FormatFlushableIngest: + FormatUnusedPrePebblev1MarkedCompacted, FormatSSTableValueBlocks, + FormatFlushableIngest, FormatPrePebblev1MarkedCompacted: return sstable.TableFormatPebblev1 default: panic(fmt.Sprintf("pebble: unsupported format major version: %s", v)) @@ -291,6 +301,16 @@ var formatMajorVersionMigrations = map[FormatMajorVersion]func(*DB) error{ } return d.finalizeFormatVersUpgrade(FormatPrePebblev1Marked) }, + FormatUnusedPrePebblev1MarkedCompacted: func(d *DB) error { + // Intentional no-op. + return d.finalizeFormatVersUpgrade(FormatUnusedPrePebblev1MarkedCompacted) + }, + FormatSSTableValueBlocks: func(d *DB) error { + return d.finalizeFormatVersUpgrade(FormatSSTableValueBlocks) + }, + FormatFlushableIngest: func(d *DB) error { + return d.finalizeFormatVersUpgrade(FormatFlushableIngest) + }, FormatPrePebblev1MarkedCompacted: func(d *DB) error { // Before finalizing the format major version, rewrite any sstables // still marked for compaction. Note all format major versions @@ -300,12 +320,6 @@ var formatMajorVersionMigrations = map[FormatMajorVersion]func(*DB) error{ } return d.finalizeFormatVersUpgrade(FormatPrePebblev1MarkedCompacted) }, - FormatSSTableValueBlocks: func(d *DB) error { - return d.finalizeFormatVersUpgrade(FormatSSTableValueBlocks) - }, - FormatFlushableIngest: func(d *DB) error { - return d.finalizeFormatVersUpgrade(FormatFlushableIngest) - }, } const formatVersionMarkerName = `format-version` diff --git a/format_major_version_test.go b/format_major_version_test.go index b862b984bf..0e1078c1b8 100644 --- a/format_major_version_test.go +++ b/format_major_version_test.go @@ -54,12 +54,14 @@ func TestRatchetFormat(t *testing.T) { require.Equal(t, FormatMinTableFormatPebblev1, d.FormatMajorVersion()) require.NoError(t, d.RatchetFormatMajorVersion(FormatPrePebblev1Marked)) require.Equal(t, FormatPrePebblev1Marked, d.FormatMajorVersion()) - require.NoError(t, d.RatchetFormatMajorVersion(FormatPrePebblev1MarkedCompacted)) - require.Equal(t, FormatPrePebblev1MarkedCompacted, d.FormatMajorVersion()) + require.NoError(t, d.RatchetFormatMajorVersion(FormatUnusedPrePebblev1MarkedCompacted)) + require.Equal(t, FormatUnusedPrePebblev1MarkedCompacted, d.FormatMajorVersion()) require.NoError(t, d.RatchetFormatMajorVersion(FormatSSTableValueBlocks)) require.Equal(t, FormatSSTableValueBlocks, d.FormatMajorVersion()) require.NoError(t, d.RatchetFormatMajorVersion(FormatFlushableIngest)) require.Equal(t, FormatFlushableIngest, d.FormatMajorVersion()) + require.NoError(t, d.RatchetFormatMajorVersion(FormatPrePebblev1MarkedCompacted)) + require.Equal(t, FormatPrePebblev1MarkedCompacted, d.FormatMajorVersion()) require.NoError(t, d.Close()) @@ -208,20 +210,21 @@ func TestFormatMajorVersions_TableFormat(t *testing.T) { // fixture is intentionally verbose. m := map[FormatMajorVersion][2]sstable.TableFormat{ - FormatDefault: {sstable.TableFormatLevelDB, sstable.TableFormatRocksDBv2}, - FormatMostCompatible: {sstable.TableFormatLevelDB, sstable.TableFormatRocksDBv2}, - formatVersionedManifestMarker: {sstable.TableFormatLevelDB, sstable.TableFormatRocksDBv2}, - FormatVersioned: {sstable.TableFormatLevelDB, sstable.TableFormatRocksDBv2}, - FormatSetWithDelete: {sstable.TableFormatLevelDB, sstable.TableFormatRocksDBv2}, - FormatBlockPropertyCollector: {sstable.TableFormatLevelDB, sstable.TableFormatPebblev1}, - FormatSplitUserKeysMarked: {sstable.TableFormatLevelDB, sstable.TableFormatPebblev1}, - FormatSplitUserKeysMarkedCompacted: {sstable.TableFormatLevelDB, sstable.TableFormatPebblev1}, - FormatRangeKeys: {sstable.TableFormatLevelDB, sstable.TableFormatPebblev2}, - FormatMinTableFormatPebblev1: {sstable.TableFormatPebblev1, sstable.TableFormatPebblev2}, - FormatPrePebblev1Marked: {sstable.TableFormatPebblev1, sstable.TableFormatPebblev2}, - FormatPrePebblev1MarkedCompacted: {sstable.TableFormatPebblev1, sstable.TableFormatPebblev2}, - FormatSSTableValueBlocks: {sstable.TableFormatPebblev1, sstable.TableFormatPebblev3}, - FormatFlushableIngest: {sstable.TableFormatPebblev1, sstable.TableFormatPebblev3}, + FormatDefault: {sstable.TableFormatLevelDB, sstable.TableFormatRocksDBv2}, + FormatMostCompatible: {sstable.TableFormatLevelDB, sstable.TableFormatRocksDBv2}, + formatVersionedManifestMarker: {sstable.TableFormatLevelDB, sstable.TableFormatRocksDBv2}, + FormatVersioned: {sstable.TableFormatLevelDB, sstable.TableFormatRocksDBv2}, + FormatSetWithDelete: {sstable.TableFormatLevelDB, sstable.TableFormatRocksDBv2}, + FormatBlockPropertyCollector: {sstable.TableFormatLevelDB, sstable.TableFormatPebblev1}, + FormatSplitUserKeysMarked: {sstable.TableFormatLevelDB, sstable.TableFormatPebblev1}, + FormatSplitUserKeysMarkedCompacted: {sstable.TableFormatLevelDB, sstable.TableFormatPebblev1}, + FormatRangeKeys: {sstable.TableFormatLevelDB, sstable.TableFormatPebblev2}, + FormatMinTableFormatPebblev1: {sstable.TableFormatPebblev1, sstable.TableFormatPebblev2}, + FormatPrePebblev1Marked: {sstable.TableFormatPebblev1, sstable.TableFormatPebblev2}, + FormatUnusedPrePebblev1MarkedCompacted: {sstable.TableFormatPebblev1, sstable.TableFormatPebblev2}, + FormatSSTableValueBlocks: {sstable.TableFormatPebblev1, sstable.TableFormatPebblev3}, + FormatFlushableIngest: {sstable.TableFormatPebblev1, sstable.TableFormatPebblev3}, + FormatPrePebblev1MarkedCompacted: {sstable.TableFormatPebblev1, sstable.TableFormatPebblev3}, } // Valid versions. @@ -562,9 +565,9 @@ func TestPebblev1MigrationConcurrencyRace(t *testing.T) { require.NoError(t, d.Flush()) }() - opts.FormatMajorVersion = FormatPrePebblev1MarkedCompacted + opts.FormatMajorVersion = FormatUnusedPrePebblev1MarkedCompacted d, err := Open("", opts) require.NoError(t, err) - require.NoError(t, d.RatchetFormatMajorVersion(FormatPrePebblev1MarkedCompacted)) + require.NoError(t, d.RatchetFormatMajorVersion(FormatUnusedPrePebblev1MarkedCompacted)) require.NoError(t, d.Close()) } diff --git a/open_test.go b/open_test.go index 64cb46adba..77c2f2431b 100644 --- a/open_test.go +++ b/open_test.go @@ -161,7 +161,7 @@ func TestNewDBFilenames(t *testing.T) { "LOCK", "MANIFEST-000001", "OPTIONS-000003", - "marker.format-version.000012.013", + "marker.format-version.000013.014", "marker.manifest.000001.MANIFEST-000001", }, } diff --git a/testdata/checkpoint b/testdata/checkpoint index 2872d98f8a..ead1c79df8 100644 --- a/testdata/checkpoint +++ b/testdata/checkpoint @@ -72,6 +72,10 @@ create: db/marker.format-version.000012.013 close: db/marker.format-version.000012.013 remove: db/marker.format-version.000011.012 sync: db +create: db/marker.format-version.000013.014 +close: db/marker.format-version.000013.014 +remove: db/marker.format-version.000012.013 +sync: db create: db/temporary.000003.dbtmp sync: db/temporary.000003.dbtmp close: db/temporary.000003.dbtmp @@ -134,9 +138,9 @@ close: open-dir: checkpoints/checkpoint1 link: db/OPTIONS-000003 -> checkpoints/checkpoint1/OPTIONS-000003 open-dir: checkpoints/checkpoint1 -create: checkpoints/checkpoint1/marker.format-version.000001.013 -sync-data: checkpoints/checkpoint1/marker.format-version.000001.013 -close: checkpoints/checkpoint1/marker.format-version.000001.013 +create: checkpoints/checkpoint1/marker.format-version.000001.014 +sync-data: checkpoints/checkpoint1/marker.format-version.000001.014 +close: checkpoints/checkpoint1/marker.format-version.000001.014 sync: checkpoints/checkpoint1 close: checkpoints/checkpoint1 link: db/000005.sst -> checkpoints/checkpoint1/000005.sst @@ -170,9 +174,9 @@ close: checkpoints open-dir: checkpoints/checkpoint2 link: db/OPTIONS-000003 -> checkpoints/checkpoint2/OPTIONS-000003 open-dir: checkpoints/checkpoint2 -create: checkpoints/checkpoint2/marker.format-version.000001.013 -sync-data: checkpoints/checkpoint2/marker.format-version.000001.013 -close: checkpoints/checkpoint2/marker.format-version.000001.013 +create: checkpoints/checkpoint2/marker.format-version.000001.014 +sync-data: checkpoints/checkpoint2/marker.format-version.000001.014 +close: checkpoints/checkpoint2/marker.format-version.000001.014 sync: checkpoints/checkpoint2 close: checkpoints/checkpoint2 link: db/000007.sst -> checkpoints/checkpoint2/000007.sst @@ -201,9 +205,9 @@ close: checkpoints open-dir: checkpoints/checkpoint3 link: db/OPTIONS-000003 -> checkpoints/checkpoint3/OPTIONS-000003 open-dir: checkpoints/checkpoint3 -create: checkpoints/checkpoint3/marker.format-version.000001.013 -sync-data: checkpoints/checkpoint3/marker.format-version.000001.013 -close: checkpoints/checkpoint3/marker.format-version.000001.013 +create: checkpoints/checkpoint3/marker.format-version.000001.014 +sync-data: checkpoints/checkpoint3/marker.format-version.000001.014 +close: checkpoints/checkpoint3/marker.format-version.000001.014 sync: checkpoints/checkpoint3 close: checkpoints/checkpoint3 link: db/000005.sst -> checkpoints/checkpoint3/000005.sst @@ -257,7 +261,7 @@ CURRENT LOCK MANIFEST-000001 OPTIONS-000003 -marker.format-version.000012.013 +marker.format-version.000013.014 marker.manifest.000001.MANIFEST-000001 list checkpoints/checkpoint1 @@ -267,7 +271,7 @@ list checkpoints/checkpoint1 000007.sst MANIFEST-000001 OPTIONS-000003 -marker.format-version.000001.013 +marker.format-version.000001.014 marker.manifest.000001.MANIFEST-000001 open checkpoints/checkpoint1 readonly @@ -308,7 +312,7 @@ list checkpoints/checkpoint2 000007.sst MANIFEST-000001 OPTIONS-000003 -marker.format-version.000001.013 +marker.format-version.000001.014 marker.manifest.000001.MANIFEST-000001 open checkpoints/checkpoint2 readonly @@ -336,7 +340,7 @@ list checkpoints/checkpoint3 000007.sst MANIFEST-000001 OPTIONS-000003 -marker.format-version.000001.013 +marker.format-version.000001.014 marker.manifest.000001.MANIFEST-000001 open checkpoints/checkpoint3 readonly diff --git a/testdata/event_listener b/testdata/event_listener index 7ed11522e4..2badc481b1 100644 --- a/testdata/event_listener +++ b/testdata/event_listener @@ -88,6 +88,11 @@ close: db/marker.format-version.000012.013 remove: db/marker.format-version.000011.012 sync: db upgraded to format version: 013 +create: db/marker.format-version.000013.014 +close: db/marker.format-version.000013.014 +remove: db/marker.format-version.000012.013 +sync: db +upgraded to format version: 014 create: db/temporary.000003.dbtmp sync: db/temporary.000003.dbtmp close: db/temporary.000003.dbtmp @@ -256,9 +261,9 @@ close: open-dir: checkpoint link: db/OPTIONS-000003 -> checkpoint/OPTIONS-000003 open-dir: checkpoint -create: checkpoint/marker.format-version.000001.013 -sync-data: checkpoint/marker.format-version.000001.013 -close: checkpoint/marker.format-version.000001.013 +create: checkpoint/marker.format-version.000001.014 +sync-data: checkpoint/marker.format-version.000001.014 +close: checkpoint/marker.format-version.000001.014 sync: checkpoint close: checkpoint link: db/000013.sst -> checkpoint/000013.sst diff --git a/testdata/flushable_ingest b/testdata/flushable_ingest index 41ce3eef0c..f30613bf0e 100644 --- a/testdata/flushable_ingest +++ b/testdata/flushable_ingest @@ -61,7 +61,7 @@ LOCK MANIFEST-000001 OPTIONS-000003 ext -marker.format-version.000012.013 +marker.format-version.000013.014 marker.manifest.000001.MANIFEST-000001 # Test basic WAL replay @@ -83,7 +83,7 @@ LOCK MANIFEST-000001 OPTIONS-000003 ext -marker.format-version.000012.013 +marker.format-version.000013.014 marker.manifest.000001.MANIFEST-000001 open @@ -392,7 +392,7 @@ LOCK MANIFEST-000001 OPTIONS-000003 ext -marker.format-version.000012.013 +marker.format-version.000013.014 marker.manifest.000001.MANIFEST-000001 close @@ -413,7 +413,7 @@ LOCK MANIFEST-000001 OPTIONS-000003 ext -marker.format-version.000012.013 +marker.format-version.000013.014 marker.manifest.000001.MANIFEST-000001 open @@ -446,7 +446,7 @@ MANIFEST-000001 MANIFEST-000012 OPTIONS-000013 ext -marker.format-version.000012.013 +marker.format-version.000013.014 marker.manifest.000002.MANIFEST-000012 # Make sure that the new mutable memtable can accept writes. @@ -590,7 +590,7 @@ LOCK MANIFEST-000001 OPTIONS-000003 ext -marker.format-version.000012.013 +marker.format-version.000013.014 marker.manifest.000001.MANIFEST-000001 close @@ -610,7 +610,7 @@ MANIFEST-000001 OPTIONS-000003 ext ext1 -marker.format-version.000012.013 +marker.format-version.000013.014 marker.manifest.000001.MANIFEST-000001 ignoreSyncs false diff --git a/testdata/format_major_version_pebblev1_migration b/testdata/format_major_version_pebblev1_migration index 5f698196a0..5efe962ea1 100644 --- a/testdata/format_major_version_pebblev1_migration +++ b/testdata/format_major_version_pebblev1_migration @@ -125,12 +125,12 @@ marked-file-count disable-automatic-compactions false ---- -ratchet-format-major-version 011 +ratchet-format-major-version 014 ---- format-major-version ---- -011 +014 min-table-format ---- @@ -138,7 +138,7 @@ min-table-format max-table-format ---- -(Pebble,v2) +(Pebble,v3) marked-file-count ----