From bbf7dc407d624cc25d2d62860895c78355af733b Mon Sep 17 00:00:00 2001 From: Radu Berinde Date: Mon, 18 Dec 2023 09:01:34 -0800 Subject: [PATCH] db: deprecate older format versions This change deprecates format versions below `FormatFlushableIngest` (the 23.1 version) and sstable formats below `TableFormatPebblev1`. As part of this change, we remove code that deals with split keys; further simplifications will be made separately (e.g. removing atomic unit logic, simplifying the truncation iterator). We also remove all code related to the `CURRENT` file. The plan is to merge this after we tag a `v1.0.0` release, which will for the time being be the recommended version series for users other than CockroachDB. Informs #3064 --- README.md | 76 ++-- batch.go | 6 - checkpoint.go | 7 +- compaction.go | 21 +- compaction_iter.go | 7 +- compaction_iter_test.go | 7 +- compaction_test.go | 52 +-- data_test.go | 39 -- db.go | 20 +- db_test.go | 2 +- error_test.go | 34 +- external_iterator_test.go | 12 +- filenames.go | 34 +- format_major_version.go | 321 ++++---------- format_major_version_test.go | 413 ++---------------- get_iter_test.go | 87 +--- ingest.go | 5 +- ingest_test.go | 79 ++-- internal/base/filenames.go | 5 - internal/base/filenames_test.go | 10 +- internal/keyspan/level_iter_test.go | 4 +- internal/manifest/l0_sublevels_test.go | 4 +- .../manifest/testdata/version_check_ordering | 7 - internal/manifest/version.go | 67 +-- internal/manifest/version_edit.go | 10 +- internal/manifest/version_edit_test.go | 2 +- internal/manifest/version_test.go | 6 +- iterator_histories_test.go | 2 +- iterator_test.go | 14 +- level_iter_test.go | 4 +- metamorphic/options.go | 18 +- open.go | 65 ++- open_test.go | 60 ++- options.go | 10 +- options_test.go | 11 +- range_del_test.go | 17 +- replay/replay.go | 3 +- replay/replay_test.go | 6 +- replay/testdata/corpus/findManifestStart | 9 +- replay/testdata/corpus/high_read_amp | 8 +- replay/testdata/corpus/simple | 8 +- replay/testdata/replay | 11 +- replay/testdata/replay_paced | 9 +- scan_internal_test.go | 2 +- snapshot_test.go | 4 +- sstable/data_test.go | 5 - sstable/format.go | 32 +- sstable/format_test.go | 27 +- sstable/options.go | 6 +- sstable/random_test.go | 4 +- sstable/reader_test.go | 8 +- sstable/table.go | 34 +- sstable/table_test.go | 16 +- sstable/testdata/writer | 13 +- sstable/testdata/writer_v3 | 19 +- sstable/writer_test.go | 2 +- table_cache_test.go | 2 +- table_stats_test.go | 2 +- testdata/checkpoint | 86 +--- testdata/cleaner | 51 +-- testdata/compaction_check_ordering | 2 +- testdata/compaction_read_triggered | 4 +- testdata/event_listener | 178 +++----- testdata/external_iterator | 4 +- testdata/flushable_ingest | 21 +- .../format_major_version_pebblev1_migration | 170 ------- ...mat_major_version_split_user_key_migration | 148 ------- testdata/ingest_load | 16 +- testdata/iter_histories/range_keys_simple | 20 - testdata/range_del | 196 +-------- testdata/rocksdb-ingest-only/000003.log | 0 testdata/rocksdb-ingest-only/000006.sst | Bin 1187 -> 0 bytes testdata/rocksdb-ingest-only/CURRENT | 1 - testdata/rocksdb-ingest-only/IDENTITY | 1 - testdata/rocksdb-ingest-only/LOCK | 0 testdata/rocksdb-ingest-only/MANIFEST-000001 | Bin 13 -> 0 bytes testdata/rocksdb-ingest-only/MANIFEST-000007 | Bin 135 -> 0 bytes testdata/rocksdb-ingest-only/OPTIONS-000005 | 158 ------- tool/db.go | 1 - tool/manifest.go | 4 +- version_set.go | 115 +---- 81 files changed, 577 insertions(+), 2367 deletions(-) delete mode 100644 testdata/format_major_version_pebblev1_migration delete mode 100644 testdata/format_major_version_split_user_key_migration delete mode 100644 testdata/rocksdb-ingest-only/000003.log delete mode 100644 testdata/rocksdb-ingest-only/000006.sst delete mode 100644 testdata/rocksdb-ingest-only/CURRENT delete mode 100644 testdata/rocksdb-ingest-only/IDENTITY delete mode 100644 testdata/rocksdb-ingest-only/LOCK delete mode 100644 testdata/rocksdb-ingest-only/MANIFEST-000001 delete mode 100644 testdata/rocksdb-ingest-only/MANIFEST-000007 delete mode 100644 testdata/rocksdb-ingest-only/OPTIONS-000005 diff --git a/README.md b/README.md index c09e45d802..683e06ece8 100644 --- a/README.md +++ b/README.md @@ -86,17 +86,22 @@ differences. ## RocksDB Compatibility -Pebble strives for forward compatibility with RocksDB 6.2.1 (the latest -version of RocksDB used by CockroachDB). Forward compatibility means -that a DB generated by RocksDB can be used by Pebble. Currently, Pebble -provides bidirectional compatibility with RocksDB (a Pebble generated DB -can be used by RocksDB) when using its FormatMostCompatible format. New -functionality that is backwards incompatible is gated behind new format -major versions. In general, Pebble only provides compatibility with the -subset of functionality and configuration used by CockroachDB. The scope -of RocksDB functionality and configuration is too large to adequately -test and document all the incompatibilities. The list below contains -known incompatibilities. +Pebble strives for forward compatibility with RocksDB 6.2.1 (the latest version +of RocksDB used by CockroachDB). Forward compatibility means that a DB generated +by RocksDB 6.2.1 can be upgraded for use by Pebble. Pebble versions in the `v1` +series may open DBs generated by RocksDB 6.2.1. Since its introduction, Pebble +has adopted various backwards-incompatible format changes that are gated behind +new 'format major versions'. The Pebble `master` branch does not support opening +DBs generated by RocksDB. DBs generated by RocksDB may only be used with recent +versions of Pebble after migrating them through format major version upgrades +using previous versions of Pebble. See the below section of format major +versions. + +Even the RocksDB-compatible versions of Pebble only provide compatibility with +the subset of functionality and configuration used by CockroachDB. The scope of +RocksDB functionality and configuration is too large to adequately test and +document all the incompatibilities. The list below contains known +incompatibilities. * Pebble's use of WAL recycling is only compatible with RocksDB's `kTolerateCorruptedTailRecords` WAL recovery mode. Older versions of @@ -119,9 +124,14 @@ known incompatibilities. Over time Pebble has introduced new physical file formats. Backwards incompatible changes are made through the introduction of 'format major -versions'. By default, when Pebble opens a database, it defaults to -`FormatMostCompatible`. This version is bi-directionally compatible with RocksDB -6.2.1 (with the caveats described above). +versions'. By default, when Pebble opens a database, it defaults to the lowest +supported version. In `v1`, this is `FormatMostCompatible`, which is +bi-directionally compatible with RocksDB 6.2.1 (with the caveats described +above). + +Databases created by RocksDB or Pebble versions `v1` and earlier must be upgraded +to a compatible format major version before running newer Pebble versions. Newer +Pebble versions will refuse to open databases in no longer supported formats. To opt into new formats, a user may set `FormatMajorVersion` on the [`Options`](https://pkg.go.dev/github.com/cockroachdb/pebble#Options) @@ -132,24 +142,25 @@ upgrade the format major version at runtime using Format major version upgrades are permanent; There is no option to return to an earlier format. -The table below outlines the history of format major versions: - -| Name | Value | Migration | -|------------------------------------|-------|------------| -| FormatMostCompatible | 1 | No | -| FormatVersioned | 3 | No | -| FormatSetWithDelete | 4 | No | -| FormatBlockPropertyCollector | 5 | No | -| FormatSplitUserKeysMarked | 6 | Background | -| FormatSplitUserKeysMarkedCompacted | 7 | Blocking | -| FormatRangeKeys | 8 | No | -| FormatMinTableFormatPebblev1 | 9 | No | -| FormatPrePebblev1Marked | 10 | Background | -| FormatSSTableValueBlocks | 12 | No | -| FormatFlushableIngest | 13 | No | -| FormatPrePebblev1MarkedCompacted | 14 | Blocking | -| FormatDeleteSizedAndObsolete | 15 | No | -| FormatVirtualSSTables | 16 | No | +The table below outlines the history of format major versions, along with what +range of Pebble versions support that format. + +| Name | Value | Migration | Pebble support | +|------------------------------------|-------|------------|----------------| +| FormatMostCompatible | 1 | No | v1 | +| FormatVersioned | 3 | No | v1 | +| FormatSetWithDelete | 4 | No | v1 | +| FormatBlockPropertyCollector | 5 | No | v1 | +| FormatSplitUserKeysMarked | 6 | Background | v1 | +| FormatSplitUserKeysMarkedCompacted | 7 | Blocking | v1 | +| FormatRangeKeys | 8 | No | v1 | +| FormatMinTableFormatPebblev1 | 9 | No | v1 | +| FormatPrePebblev1Marked | 10 | Background | v1 | +| FormatSSTableValueBlocks | 12 | No | v1 | +| FormatFlushableIngest | 13 | No | v1, master | +| FormatPrePebblev1MarkedCompacted | 14 | Blocking | v1, master | +| FormatDeleteSizedAndObsolete | 15 | No | v1, master | +| FormatVirtualSSTables | 16 | No | v1, master | Upgrading to a format major version with 'Background' in the migration column may trigger background activity to rewrite physical file @@ -172,7 +183,6 @@ versions for CockroachDB releases. | 22.2 | FormatMostCompatible | FormatPrePebblev1Marked | | 23.1 | FormatSplitUserKeysMarkedCompacted | FormatFlushableIngest | | 23.2 | FormatSplitUserKeysMarkedCompacted | FormatVirtualSSTables | -| 24.1 plan | FormatSSTableValueBlocks | | ## Pedigree diff --git a/batch.go b/batch.go index c695f8d2aa..ad419f147e 100644 --- a/batch.go +++ b/batch.go @@ -485,9 +485,6 @@ func (b *Batch) refreshMemTableSize() error { } b.memTableSize += memTableEntrySize(len(key), len(value)) } - if b.countRangeKeys > 0 && b.minimumFormatMajorVersion < FormatRangeKeys { - b.minimumFormatMajorVersion = FormatRangeKeys - } return nil } @@ -968,9 +965,6 @@ func (b *Batch) rangeKeySetDeferred(startLen, internalValueLen int) *DeferredBat func (b *Batch) incrementRangeKeysCount() { b.countRangeKeys++ - if b.minimumFormatMajorVersion < FormatRangeKeys { - b.minimumFormatMajorVersion = FormatRangeKeys - } if b.index != nil { b.rangeKeys = nil b.rangeKeysSeqNum = 0 diff --git a/checkpoint.go b/checkpoint.go index f321c01ec3..00ee6c5659 100644 --- a/checkpoint.go +++ b/checkpoint.go @@ -411,17 +411,12 @@ func (d *DB) writeCheckpointManifest( return err } - // Recent format versions use an atomic marker for setting the - // active manifest. Older versions use the CURRENT file. The - // setCurrentFunc function will return a closure that will - // take the appropriate action for the database's format - // version. var manifestMarker *atomicfs.Marker manifestMarker, _, err := atomicfs.LocateMarker(fs, destDirPath, manifestMarkerName) if err != nil { return err } - if err := setCurrentFunc(formatVers, manifestMarker, fs, destDirPath, destDir)(manifestFileNum); err != nil { + if err := manifestMarker.Move(base.MakeFilename(fileTypeManifest, manifestFileNum)); err != nil { return err } return manifestMarker.Close() diff --git a/compaction.go b/compaction.go index 9b10b45335..c62a2a83f6 100644 --- a/compaction.go +++ b/compaction.go @@ -1277,22 +1277,16 @@ func (c *compaction) newInputIter( newIters tableNewIters, newRangeKeyIter keyspan.TableNewSpanIter, snapshots []uint64, ) (_ internalIterator, retErr error) { // Validate the ordering of compaction input files for defense in depth. - // TODO(jackson): Some of the CheckOrdering calls may be adapted to pass - // ProhibitSplitUserKeys if we thread the active format major version in. Or - // if we remove support for earlier FMVs, we can remove the parameter - // altogether. if len(c.flushing) == 0 { if c.startLevel.level >= 0 { err := manifest.CheckOrdering(c.cmp, c.formatKey, - manifest.Level(c.startLevel.level), c.startLevel.files.Iter(), - manifest.AllowSplitUserKeys) + manifest.Level(c.startLevel.level), c.startLevel.files.Iter()) if err != nil { return nil, err } } err := manifest.CheckOrdering(c.cmp, c.formatKey, - manifest.Level(c.outputLevel.level), c.outputLevel.files.Iter(), - manifest.AllowSplitUserKeys) + manifest.Level(c.outputLevel.level), c.outputLevel.files.Iter()) if err != nil { return nil, err } @@ -1302,9 +1296,7 @@ func (c *compaction) newInputIter( } for _, info := range c.startLevel.l0SublevelInfo { err := manifest.CheckOrdering(c.cmp, c.formatKey, - info.sublevel, info.Iter(), - // NB: L0 sublevels have never allowed split user keys. - manifest.ProhibitSplitUserKeys) + info.sublevel, info.Iter()) if err != nil { return nil, err } @@ -1316,8 +1308,7 @@ func (c *compaction) newInputIter( } interLevel := c.extraLevels[0] err := manifest.CheckOrdering(c.cmp, c.formatKey, - manifest.Level(interLevel.level), interLevel.files.Iter(), - manifest.AllowSplitUserKeys) + manifest.Level(interLevel.level), interLevel.files.Iter()) if err != nil { return nil, err } @@ -3173,10 +3164,6 @@ func (d *DB) runCompaction( } writerOpts := d.opts.MakeWriterOptions(c.outputLevel.level, tableFormat) - if formatVers < FormatBlockPropertyCollector { - // Cannot yet write block properties. - writerOpts.BlockPropertyCollectors = nil - } // prevPointKey is a sstable.WriterOption that provides access to // the last point key written to a writer's sstable. When a new diff --git a/compaction_iter.go b/compaction_iter.go index 299dbfc983..c86173e711 100644 --- a/compaction_iter.go +++ b/compaction_iter.go @@ -753,12 +753,9 @@ func (i *compactionIter) setNext() { i.valid = true i.maybeZeroSeqnum(i.curSnapshotIdx) - // There are two cases where we can early return and skip the remaining + // If this key is already a SETWITHDEL we can early return and skip the remaining // records in the stripe: - // - If the DB does not SETWITHDEL. - // - If this key is already a SETWITHDEL. - if i.formatVersion < FormatSetWithDelete || - i.iterKey.Kind() == InternalKeyKindSetWithDelete { + if i.iterKey.Kind() == InternalKeyKindSetWithDelete { i.skip = true return } diff --git a/compaction_iter_test.go b/compaction_iter_test.go index 98243e1fdc..932abde16e 100644 --- a/compaction_iter_test.go +++ b/compaction_iter_test.go @@ -90,9 +90,6 @@ func TestCompactionIter(t *testing.T) { // The input to the data-driven test is dependent on the format major // version we are testing against. fileFunc := func(formatVersion FormatMajorVersion) string { - if formatVersion < FormatSetWithDelete { - return "testdata/compaction_iter" - } if formatVersion < FormatDeleteSizedAndObsolete { return "testdata/compaction_iter_set_with_del" } @@ -330,9 +327,7 @@ func TestCompactionIter(t *testing.T) { // Rather than testing against all format version, we test against the // significant boundaries. formatVersions := []FormatMajorVersion{ - FormatMostCompatible, - FormatSetWithDelete - 1, - FormatSetWithDelete, + FormatMinSupported, internalFormatNewest, } for _, formatVersion := range formatVersions { diff --git a/compaction_test.go b/compaction_test.go index ea1437a4b9..d8b9a45b84 100644 --- a/compaction_test.go +++ b/compaction_test.go @@ -1480,63 +1480,41 @@ func TestManualCompaction(t *testing.T) { testCases := []struct { testData string - minVersion FormatMajorVersion - maxVersion FormatMajorVersion // inclusive + minVersion FormatMajorVersion // inclusive, FormatMinSupported if unspecified. + maxVersion FormatMajorVersion // inclusive, internalFormatNewest if unspecified. verbose bool }{ { - testData: "testdata/manual_compaction", - minVersion: FormatMostCompatible, - maxVersion: FormatSetWithDelete - 1, + testData: "testdata/singledel_manual_compaction_set_with_del", }, { - testData: "testdata/manual_compaction_set_with_del", - minVersion: FormatBlockPropertyCollector, - // This test exercises split user keys. - maxVersion: FormatSplitUserKeysMarkedCompacted - 1, - }, - { - testData: "testdata/singledel_manual_compaction", - minVersion: FormatMostCompatible, - maxVersion: FormatSetWithDelete - 1, - }, - { - testData: "testdata/singledel_manual_compaction_set_with_del", - minVersion: FormatSetWithDelete, - maxVersion: internalFormatNewest, - }, - { - testData: "testdata/manual_compaction_range_keys", - minVersion: FormatRangeKeys, - maxVersion: internalFormatNewest, - verbose: true, - }, - { - testData: "testdata/manual_compaction_file_boundaries", - minVersion: FormatBlockPropertyCollector, - // This test exercises split user keys. - maxVersion: FormatSplitUserKeysMarkedCompacted - 1, + testData: "testdata/manual_compaction_range_keys", + verbose: true, }, { testData: "testdata/manual_compaction_file_boundaries_delsized", minVersion: FormatDeleteSizedAndObsolete, - maxVersion: internalFormatNewest, }, { testData: "testdata/manual_compaction_set_with_del_sstable_Pebblev4", minVersion: FormatDeleteSizedAndObsolete, - maxVersion: internalFormatNewest, }, { - testData: "testdata/manual_compaction_multilevel", - minVersion: FormatMostCompatible, - maxVersion: internalFormatNewest, + testData: "testdata/manual_compaction_multilevel", }, } for _, tc := range testCases { t.Run(tc.testData, func(t *testing.T) { - runTest(t, tc.testData, tc.minVersion, tc.maxVersion, tc.verbose) + minVersion, maxVersion := tc.minVersion, tc.maxVersion + if minVersion == 0 { + minVersion = FormatMinSupported + } + if maxVersion == 0 { + maxVersion = internalFormatNewest + } + + runTest(t, tc.testData, minVersion, maxVersion, tc.verbose) }) } } diff --git a/data_test.go b/data_test.go index 4172868660..f75637e45d 100644 --- a/data_test.go +++ b/data_test.go @@ -504,10 +504,6 @@ func runBuildRemoteCmd(td *datadriven.TestData, d *DB, storage remote.Storage) e switch cmdArg.Key { case "format": switch cmdArg.Vals[0] { - case "leveldb": - tableFormat = sstable.TableFormatLevelDB - case "rocksdbv2": - tableFormat = sstable.TableFormatRocksDBv2 case "pebblev1": tableFormat = sstable.TableFormatPebblev1 case "pebblev2": @@ -594,10 +590,6 @@ func runBuildCmd(td *datadriven.TestData, d *DB, fs vfs.FS) error { switch cmdArg.Key { case "format": switch cmdArg.Vals[0] { - case "leveldb": - tableFormat = sstable.TableFormatLevelDB - case "rocksdbv2": - tableFormat = sstable.TableFormatRocksDBv2 case "pebblev1": tableFormat = sstable.TableFormatPebblev1 case "pebblev2": @@ -1304,37 +1296,6 @@ func runIngestExternalCmd(td *datadriven.TestData, d *DB, locator string) error return nil } -func runForceIngestCmd(td *datadriven.TestData, d *DB) error { - var paths []string - var level int - for _, arg := range td.CmdArgs { - switch arg.Key { - case "paths": - paths = append(paths, arg.Vals...) - case "level": - var err error - level, err = strconv.Atoi(arg.Vals[0]) - if err != nil { - return err - } - } - } - _, err := d.ingest(paths, func( - tableNewIters, - keyspan.TableNewSpanIter, - IterOptions, - *Comparer, - *version, - int, - map[*compaction]struct{}, - *fileMetadata, - bool, - ) (int, *fileMetadata, error) { - return level, nil, nil - }, nil /* shared */, KeyRange{}, nil /* external */) - return err -} - func runLSMCmd(td *datadriven.TestData, d *DB) string { d.mu.Lock() defer d.mu.Unlock() diff --git a/db.go b/db.go index ab00321c53..0aca4f1445 100644 --- a/db.go +++ b/db.go @@ -829,13 +829,11 @@ func (d *DB) applyInternal(batch *Batch, opts *WriteOptions, noSyncWait bool) er return errors.New("pebble: WAL disabled") } - if batch.minimumFormatMajorVersion != FormatMostCompatible { - if fmv := d.FormatMajorVersion(); fmv < batch.minimumFormatMajorVersion { - panic(fmt.Sprintf( - "pebble: batch requires at least format major version %d (current: %d)", - batch.minimumFormatMajorVersion, fmv, - )) - } + if fmv := d.FormatMajorVersion(); fmv < batch.minimumFormatMajorVersion { + panic(fmt.Sprintf( + "pebble: batch requires at least format major version %d (current: %d)", + batch.minimumFormatMajorVersion, fmv, + )) } if batch.countRangeKeys > 0 { @@ -1036,14 +1034,6 @@ func (d *DB) newIter( panic(err) } seqNum := internalOpts.snapshot.seqNum - if o.rangeKeys() { - if d.FormatMajorVersion() < FormatRangeKeys { - panic(fmt.Sprintf( - "pebble: range keys require at least format major version %d (current: %d)", - FormatRangeKeys, d.FormatMajorVersion(), - )) - } - } if o != nil && o.RangeKeyMasking.Suffix != nil && o.KeyTypes != IterKeyTypePointsAndRanges { panic("pebble: range key masking requires IterKeyTypePointsAndRanges") } diff --git a/db_test.go b/db_test.go index 631753df92..66a4e39949 100644 --- a/db_test.go +++ b/db_test.go @@ -1435,7 +1435,7 @@ func TestTracing(t *testing.T) { _, closer, err := d.Get([]byte("hello")) require.NoError(t, err) closer.Close() - readerInitTraceString := "reading 37 bytes took 5ms\nreading 628 bytes took 5ms\n" + readerInitTraceString := "reading 37 bytes took 5ms\nreading 491 bytes took 5ms\n" iterTraceString := "reading 27 bytes took 5ms\nreading 29 bytes took 5ms\n" require.Equal(t, readerInitTraceString+iterTraceString, tracer.buf.String()) diff --git a/error_test.go b/error_test.go index 82af4a49a6..b73f48cf2a 100644 --- a/error_test.go +++ b/error_test.go @@ -142,14 +142,17 @@ func TestErrors(t *testing.T) { t.Logf("success %d\n", i) break } - errorCounts[err.Error()]++ + errMsg := err.Error() + if !strings.Contains(errMsg, "injected error") { + t.Fatalf("unexpected errors: %v", err) + } + errorCounts[errMsg]++ } expectedErrors := []string{ "fatal: MANIFEST flush failed: injected error", "fatal: MANIFEST sync failed: injected error", "fatal: MANIFEST set current failed: injected error", - "fatal: MANIFEST dirsync failed: injected error", } for _, expected := range expectedErrors { if errorCounts[expected] == 0 { @@ -193,21 +196,12 @@ func TestRequireReadError(t *testing.T) { require.NoError(t, d.DeleteRange(key1, key2, nil)) require.NoError(t, d.Set(key1, value, nil)) require.NoError(t, d.Flush()) - if formatVersion < FormatSetWithDelete { - expectLSM(` -0.0: - 000007:[a1#13,SET-a2#inf,RANGEDEL] -6: - 000005:[a1#10,SET-a2#11,SET] -`, d, t) - } else { - expectLSM(` + expectLSM(` 0.0: 000007:[a1#13,SETWITHDEL-a2#inf,RANGEDEL] 6: 000005:[a1#10,SET-a2#11,SET] `, d, t) - } // Now perform foreground ops with error injection enabled. ii.Store(index) @@ -246,7 +240,7 @@ func TestRequireReadError(t *testing.T) { return nil } - versions := []FormatMajorVersion{FormatMostCompatible, FormatSetWithDelete} + versions := []FormatMajorVersion{FormatMinSupported, internalFormatNewest} for _, version := range versions { t.Run(fmt.Sprintf("version-%s", version), func(t *testing.T) { for i := int32(0); ; i++ { @@ -296,22 +290,12 @@ func TestCorruptReadError(t *testing.T) { require.NoError(t, d.DeleteRange(key1, key2, nil)) require.NoError(t, d.Set(key1, value, nil)) require.NoError(t, d.Flush()) - if formatVersion < FormatSetWithDelete { - expectLSM(` -0.0: - 000007:[a1#13,SET-a2#inf,RANGEDEL] -6: - 000005:[a1#10,SET-a2#11,SET] -`, d, t) - - } else { - expectLSM(` + expectLSM(` 0.0: 000007:[a1#13,SETWITHDEL-a2#inf,RANGEDEL] 6: 000005:[a1#10,SET-a2#11,SET] `, d, t) - } // Now perform foreground ops with corruption injection enabled. fs.index.Store(index) @@ -349,7 +333,7 @@ func TestCorruptReadError(t *testing.T) { } return nil } - versions := []FormatMajorVersion{FormatMostCompatible, FormatSetWithDelete} + versions := []FormatMajorVersion{FormatMinSupported, internalFormatNewest} for _, version := range versions { t.Run(fmt.Sprintf("version-%s", version), func(t *testing.T) { for i := int32(0); ; i++ { diff --git a/external_iterator_test.go b/external_iterator_test.go index 77afd4dcc7..75b7acf6b9 100644 --- a/external_iterator_test.go +++ b/external_iterator_test.go @@ -27,10 +27,10 @@ import ( func TestExternalIterator(t *testing.T) { mem := vfs.NewMem() o := &Options{ - FS: mem, - Comparer: testkeys.Comparer, - FormatMajorVersion: FormatRangeKeys, + FS: mem, + Comparer: testkeys.Comparer, } + o.testingRandomized(t) o.EnsureDefaults() d, err := Open("", o) require.NoError(t, err) @@ -80,10 +80,10 @@ func TestExternalIterator(t *testing.T) { func TestSimpleLevelIter(t *testing.T) { mem := vfs.NewMem() o := &Options{ - FS: mem, - Comparer: testkeys.Comparer, - FormatMajorVersion: FormatRangeKeys, + FS: mem, + Comparer: testkeys.Comparer, } + o.testingRandomized(t) o.EnsureDefaults() d, err := Open("", o) require.NoError(t, err) diff --git a/filenames.go b/filenames.go index 07d74c87d3..86e7ed1d37 100644 --- a/filenames.go +++ b/filenames.go @@ -4,12 +4,7 @@ package pebble -import ( - "fmt" - - "github.com/cockroachdb/pebble/internal/base" - "github.com/cockroachdb/pebble/vfs" -) +import "github.com/cockroachdb/pebble/internal/base" type fileType = base.FileType @@ -21,34 +16,7 @@ const ( fileTypeLock = base.FileTypeLock fileTypeTable = base.FileTypeTable fileTypeManifest = base.FileTypeManifest - fileTypeCurrent = base.FileTypeCurrent fileTypeOptions = base.FileTypeOptions fileTypeTemp = base.FileTypeTemp fileTypeOldTemp = base.FileTypeOldTemp ) - -// setCurrentFile sets the CURRENT file to point to the manifest with -// provided file number. -// -// NB: This is a low-level routine and typically not what you want to -// use. Newer versions of Pebble running newer format major versions do -// not use the CURRENT file. See setCurrentFunc in version_set.go. -func setCurrentFile(dirname string, fs vfs.FS, fileNum base.DiskFileNum) error { - newFilename := base.MakeFilepath(fs, dirname, fileTypeCurrent, fileNum) - oldFilename := base.MakeFilepath(fs, dirname, fileTypeTemp, fileNum) - fs.Remove(oldFilename) - f, err := fs.Create(oldFilename) - if err != nil { - return err - } - if _, err := fmt.Fprintf(f, "MANIFEST-%s\n", fileNum); err != nil { - return err - } - if err := f.Sync(); err != nil { - return err - } - if err := f.Close(); err != nil { - return err - } - return fs.Rename(oldFilename, newFilename) -} diff --git a/format_major_version.go b/format_major_version.go index 89be1610df..e65301ec4a 100644 --- a/format_major_version.go +++ b/format_major_version.go @@ -9,7 +9,6 @@ import ( "strconv" "github.com/cockroachdb/errors" - "github.com/cockroachdb/pebble/internal/base" "github.com/cockroachdb/pebble/internal/manifest" "github.com/cockroachdb/pebble/sstable" "github.com/cockroachdb/pebble/vfs" @@ -43,15 +42,18 @@ func (v FormatMajorVersion) String() string { } const ( + // FormatDefault leaves the format version unspecified. When used to create a + // new store, Pebble will choose the earliest format version it supports. + FormatDefault FormatMajorVersion = iota + // 21.2 versions. - // FormatDefault leaves the format version unspecified. The - // FormatDefault constant may be ratcheted upwards over time. - FormatDefault FormatMajorVersion = iota // FormatMostCompatible maintains the most backwards compatibility, // maintaining bi-directional compatibility with RocksDB 6.2.1 in // the particular configuration described in the Pebble README. - FormatMostCompatible + // Deprecated. + _ // FormatMostCompatible + // formatVersionedManifestMarker is the first // backwards-incompatible change made to Pebble, introducing the // format-version marker file for handling backwards-incompatible @@ -63,28 +65,36 @@ const ( // format major version. Clients should use FormatVersioned which // also ensures earlier versions of Pebble fail to open a database // written in a future format major version. - formatVersionedManifestMarker + // Deprecated. + _ // formatVersionedManifestMarker + // FormatVersioned is a new format major version that replaces the // old `CURRENT` file with a new 'marker' file scheme. Previous // Pebble versions will be unable to open the database unless // they're aware of format versions. - FormatVersioned + // Deprecated. + _ // FormatVersioned + // FormatSetWithDelete is a format major version that introduces a new key // kind, base.InternalKeyKindSetWithDelete. Previous Pebble versions will be // unable to open this database. - FormatSetWithDelete + // Deprecated. + _ // FormatSetWithDelete // 22.1 versions. // FormatBlockPropertyCollector is a format major version that introduces // BlockPropertyCollectors. - FormatBlockPropertyCollector + // Deprecated. + _ // FormatBlockPropertyCollector + // FormatSplitUserKeysMarked is a format major version that guarantees that // all files that share user keys with neighbors are marked for compaction // in the manifest. Ratcheting to FormatSplitUserKeysMarked will block // (without holding mutexes) until the scan of the LSM is complete and the // manifest has been rotated. - FormatSplitUserKeysMarked + // Deprecated. + _ // FormatSplitUserKeysMarked // 22.2 versions. @@ -95,21 +105,28 @@ const ( // across multiple files within a level L1+. Ratcheting to this format version // will block (without holding mutexes) until all necessary compactions for // files marked for compaction are complete. - FormatSplitUserKeysMarkedCompacted + // Deprecated. + _ // FormatSplitUserKeysMarkedCompacted + // FormatRangeKeys is a format major version that introduces range keys. - FormatRangeKeys + // Deprecated. + _ // FormatRangeKeys + // FormatMinTableFormatPebblev1 is a format major version that guarantees that // tables created by or ingested into the DB at or above this format major // version will have a table format version of at least Pebblev1 (Block // Properties). - FormatMinTableFormatPebblev1 + // Deprecated. + _ // FormatMinTableFormatPebblev1 + // FormatPrePebblev1Marked is a format major version that guarantees that all // sstables with a table format version pre-Pebblev1 (i.e. those that are // guaranteed to not contain block properties) are marked for compaction in // the manifest. Ratcheting to FormatPrePebblev1Marked will block (without // holding mutexes) until the scan of the LSM is complete and the manifest has // been rotated. - FormatPrePebblev1Marked + // Deprecated. + _ // FormatPrePebblev1Marked // 23.1 versions. @@ -118,21 +135,13 @@ const ( // release. It was later decided that this should be deferred until a // subsequent release. The original ordering is preserved so as not to // introduce breaking changes in Cockroach. - formatUnusedPrePebblev1MarkedCompacted + _ // formatUnusedPrePebblev1MarkedCompacted // FormatSSTableValueBlocks is a format major version that adds support for // storing values in value blocks in the sstable. Value block support is not // necessarily enabled when writing sstables, when running with this format // major version. - // - // WARNING: In development, so no production code should upgrade to this - // format, since a DB with this format major version will not actually - // interoperate correctly with another DB with the same format major - // version. This format major version is introduced so that tests can start - // being executed up to this version. Note that these tests succeed despite - // the incomplete support since they do not enable value blocks and use - // TableFormatPebblev2. - FormatSSTableValueBlocks + _ // FormatSSTableValueBlocks // FormatFlushableIngest is a format major version that enables lazy // addition of ingested sstables into the LSM structure. When an ingest @@ -169,29 +178,36 @@ const ( // a format major version. FormatVirtualSSTables - // internalFormatNewest holds the newest format major version, including - // experimental ones excluded from the exported FormatNewest constant until - // they've stabilized. Used in tests. - internalFormatNewest FormatMajorVersion = iota - 1 + // -- Add new versions here -- + + // FormatNewest is the most recent format major version. + FormatNewest FormatMajorVersion = iota - 1 + + // Experimental versions, which are excluded by FormatNewest (but can be used + // in tests) can be defined here. + + // -- Add experimental versions here -- - // FormatNewest always contains the most recent format major version. - FormatNewest FormatMajorVersion = internalFormatNewest + // internalFormatNewest is the most recent, possibly experimental format major + // version. + internalFormatNewest FormatMajorVersion = iota - 2 ) +// FormatMinSupported is the minimum format version that is supported by this +// Pebble version. +const FormatMinSupported = FormatFlushableIngest + +// IsSupported returns true if the version is supported by the current Pebble +// version. +func (v FormatMajorVersion) IsSupported() bool { + return v == FormatDefault && v >= FormatMinSupported && v <= internalFormatNewest +} + // MaxTableFormat returns the maximum sstable.TableFormat that can be used at // this FormatMajorVersion. func (v FormatMajorVersion) MaxTableFormat() sstable.TableFormat { switch v { - case FormatDefault, FormatMostCompatible, formatVersionedManifestMarker, - FormatVersioned, FormatSetWithDelete: - return sstable.TableFormatRocksDBv2 - case FormatBlockPropertyCollector, FormatSplitUserKeysMarked, - FormatSplitUserKeysMarkedCompacted: - return sstable.TableFormatPebblev1 - case FormatRangeKeys, FormatMinTableFormatPebblev1, FormatPrePebblev1Marked, - formatUnusedPrePebblev1MarkedCompacted: - return sstable.TableFormatPebblev2 - case FormatSSTableValueBlocks, FormatFlushableIngest, FormatPrePebblev1MarkedCompacted: + case FormatDefault, FormatFlushableIngest, FormatPrePebblev1MarkedCompacted: return sstable.TableFormatPebblev3 case FormatDeleteSizedAndObsolete, FormatVirtualSSTables: return sstable.TableFormatPebblev4 @@ -204,14 +220,7 @@ func (v FormatMajorVersion) MaxTableFormat() sstable.TableFormat { // this FormatMajorVersion. func (v FormatMajorVersion) MinTableFormat() sstable.TableFormat { switch v { - case FormatDefault, FormatMostCompatible, formatVersionedManifestMarker, - FormatVersioned, FormatSetWithDelete, FormatBlockPropertyCollector, - FormatSplitUserKeysMarked, FormatSplitUserKeysMarkedCompacted, - FormatRangeKeys: - return sstable.TableFormatLevelDB - case FormatMinTableFormatPebblev1, FormatPrePebblev1Marked, - formatUnusedPrePebblev1MarkedCompacted, FormatSSTableValueBlocks, - FormatFlushableIngest, FormatPrePebblev1MarkedCompacted, + case FormatDefault, FormatFlushableIngest, FormatPrePebblev1MarkedCompacted, FormatDeleteSizedAndObsolete, FormatVirtualSSTables: return sstable.TableFormatPebblev1 default: @@ -219,18 +228,6 @@ func (v FormatMajorVersion) MinTableFormat() sstable.TableFormat { } } -// orderingInvariants returns an enum encoding the set of invariants that must -// hold within the receiver format major version. Invariants only get stricter -// as the format major version advances, so it is okay to retrieve the -// invariants from the current format major version and by the time the -// invariants are enforced, the format major version has advanced. -func (v FormatMajorVersion) orderingInvariants() manifest.OrderingInvariants { - if v < FormatSplitUserKeysMarkedCompacted { - return manifest.AllowSplitUserKeys - } - return manifest.ProhibitSplitUserKeys -} - // formatMajorVersionMigrations defines the migrations from one format // major version to the next. Each migration is defined as a closure // which will be invoked on the database before the new format major @@ -242,112 +239,7 @@ func (v FormatMajorVersion) orderingInvariants() manifest.OrderingInvariants { // panic if a migration returns a nil error but fails to finalize the // new format major version. var formatMajorVersionMigrations = map[FormatMajorVersion]func(*DB) error{ - FormatMostCompatible: func(d *DB) error { return nil }, - formatVersionedManifestMarker: func(d *DB) error { - // formatVersionedManifestMarker introduces the use of a marker - // file for pointing to the current MANIFEST file. - - // Lock the manifest. - d.mu.versions.logLock() - defer d.mu.versions.logUnlock() - - // Construct the filename of the currently active manifest and - // move the manifest marker to that filename. The marker is - // guaranteed to exist, because we unconditionally locate it - // during Open. - manifestFileNum := d.mu.versions.manifestFileNum - filename := base.MakeFilename(fileTypeManifest, manifestFileNum) - if err := d.mu.versions.manifestMarker.Move(filename); err != nil { - return errors.Wrap(err, "moving manifest marker") - } - - // Now that we have a manifest marker file in place and pointing - // to the current MANIFEST, finalize the upgrade. If we fail for - // some reason, a retry of this migration is guaranteed to again - // move the manifest marker file to the latest manifest. If - // we're unable to finalize the upgrade, a subsequent call to - // Open will ignore the manifest marker. - if err := d.finalizeFormatVersUpgrade(formatVersionedManifestMarker); err != nil { - return err - } - - // We've finalized the upgrade. All subsequent Open calls will - // ignore the CURRENT file and instead read the manifest marker. - // Before we unlock the manifest, we need to update versionSet - // to use the manifest marker on future rotations. - d.mu.versions.setCurrent = setCurrentFuncMarker( - d.mu.versions.manifestMarker, - d.mu.versions.fs, - d.mu.versions.dirname) - return nil - }, - // The FormatVersioned version is split into two, each with their - // own migration to ensure the post-migration cleanup happens even - // if there's a crash immediately after finalizing the version. Once - // a new format major version is finalized, its migration will never - // run again. Post-migration cleanup like the one in the migration - // below must be performed in a separate migration or every time the - // database opens. - FormatVersioned: func(d *DB) error { - // Replace the `CURRENT` file with one that points to the - // nonexistent `MANIFEST-000000` file. If an earlier Pebble - // version that does not know about format major versions - // attempts to open the database, it will error avoiding - // accidental corruption. - if err := setCurrentFile(d.mu.versions.dirname, d.mu.versions.fs, base.FileNum(0).DiskFileNum()); err != nil { - return err - } - return d.finalizeFormatVersUpgrade(FormatVersioned) - }, - // As SetWithDelete is a new key kind, there is nothing to migrate. We can - // simply finalize the format version and we're done. - FormatSetWithDelete: func(d *DB) error { - return d.finalizeFormatVersUpgrade(FormatSetWithDelete) - }, - FormatBlockPropertyCollector: func(d *DB) error { - return d.finalizeFormatVersUpgrade(FormatBlockPropertyCollector) - }, - FormatSplitUserKeysMarked: func(d *DB) error { - // Mark any unmarked files with split-user keys. Note all format major - // versions migrations are invoked with DB.mu locked. - if err := d.markFilesLocked(markFilesWithSplitUserKeys(d.opts.Comparer.Equal)); err != nil { - return err - } - return d.finalizeFormatVersUpgrade(FormatSplitUserKeysMarked) - }, - FormatSplitUserKeysMarkedCompacted: func(d *DB) error { - // Before finalizing the format major version, rewrite any sstables - // still marked for compaction. Note all format major versions - // migrations are invoked with DB.mu locked. - if err := d.compactMarkedFilesLocked(); err != nil { - return err - } - return d.finalizeFormatVersUpgrade(FormatSplitUserKeysMarkedCompacted) - }, - FormatRangeKeys: func(d *DB) error { - return d.finalizeFormatVersUpgrade(FormatRangeKeys) - }, - FormatMinTableFormatPebblev1: func(d *DB) error { - return d.finalizeFormatVersUpgrade(FormatMinTableFormatPebblev1) - }, - FormatPrePebblev1Marked: func(d *DB) error { - // Mark any unmarked files that contain only table properties. Note all - // format major versions migrations are invoked with DB.mu locked. - if err := d.markFilesLocked(markFilesPrePebblev1(d.tableCache)); err != nil { - return err - } - return d.finalizeFormatVersUpgrade(FormatPrePebblev1Marked) - }, - formatUnusedPrePebblev1MarkedCompacted: func(d *DB) error { - // Intentional no-op. - return d.finalizeFormatVersUpgrade(formatUnusedPrePebblev1MarkedCompacted) - }, - FormatSSTableValueBlocks: func(d *DB) error { - return d.finalizeFormatVersUpgrade(FormatSSTableValueBlocks) - }, - FormatFlushableIngest: func(d *DB) error { - return d.finalizeFormatVersUpgrade(FormatFlushableIngest) - }, + FormatFlushableIngest: func(d *DB) error { return nil }, FormatPrePebblev1MarkedCompacted: func(d *DB) error { // Before finalizing the format major version, rewrite any sstables // still marked for compaction. Note all format major versions @@ -367,6 +259,12 @@ var formatMajorVersionMigrations = map[FormatMajorVersion]func(*DB) error{ const formatVersionMarkerName = `format-version` +// lookupFormatMajorVersion retrieves the format version from the format version +// marker file. +// +// If such a file does not exist, returns FormatDefault. Note that this case is +// only acceptable if we are creating a new store (we no longer support +// FormatMostCompatible which is the only one with no version marker file). func lookupFormatMajorVersion( fs vfs.FS, dirname string, ) (FormatMajorVersion, *atomicfs.Marker, error) { @@ -375,7 +273,7 @@ func lookupFormatMajorVersion( return 0, nil, err } if versString == "" { - return FormatMostCompatible, m, nil + return FormatDefault, m, nil } v, err := strconv.ParseUint(versString, 10, 64) if err != nil { @@ -386,7 +284,10 @@ func lookupFormatMajorVersion( return 0, nil, errors.Newf("pebble: default format major version should not persisted", vers) } if vers > internalFormatNewest { - return 0, nil, errors.Newf("pebble: database %q written in format major version %d", dirname, vers) + return 0, nil, errors.Newf("pebble: database %q written in unknown format major version %d", dirname, vers) + } + if vers < FormatMinSupported { + return 0, nil, errors.Newf("pebble: database %q written in format major version %d which is no longer supported", dirname, vers) } return vers, m, nil } @@ -456,11 +357,7 @@ func (d *DB) ratchetFormatMajorVersionLocked(formatVers FormatMajorVersion) erro // // See formatMajorVersionMigrations. func (d *DB) finalizeFormatVersUpgrade(formatVers FormatMajorVersion) error { - // We use the marker to encode the active format version in the - // marker filename. Unlike other uses of the atomic marker, there is - // no file with the filename `formatVers.String()` on the - // filesystem. - if err := d.mu.formatVers.marker.Move(formatVers.String()); err != nil { + if err := d.writeFormatVersionMarker(formatVers); err != nil { return err } d.mu.formatVers.vers.Store(uint64(formatVers)) @@ -468,6 +365,14 @@ func (d *DB) finalizeFormatVersUpgrade(formatVers FormatMajorVersion) error { return nil } +func (d *DB) writeFormatVersionMarker(formatVers FormatMajorVersion) error { + // We use the marker to encode the active format version in the + // marker filename. Unlike other uses of the atomic marker, there is + // no file with the filename `formatVers.String()` on the + // filesystem. + return d.mu.formatVers.marker.Move(formatVers.String()) +} + // compactMarkedFilesLocked performs a migration that schedules rewrite // compactions to compact away any sstables marked for compaction. // compactMarkedFilesLocked is run while ratcheting the database's format major @@ -523,73 +428,11 @@ func (d *DB) compactMarkedFilesLocked() error { // level. type findFilesFunc func(v *version) (found bool, files [numLevels][]*fileMetadata, _ error) -// markFilesWithSplitUserKeys scans the LSM's levels 1 through 6 for adjacent -// files that contain the same user key. Such arrangements of files were -// permitted in RocksDB and in Pebble up to SHA a860bbad. -var markFilesWithSplitUserKeys = func(equal Equal) findFilesFunc { - return func(v *version) (found bool, files [numLevels][]*fileMetadata, _ error) { - // Files with split user keys are expected to be rare and performing key - // comparisons for every file within the LSM is expensive, so drop the - // database lock while scanning the file metadata. - for l := numLevels - 1; l > 0; l-- { - iter := v.Levels[l].Iter() - var prevFile *fileMetadata - var prevUserKey []byte - for f := iter.First(); f != nil; f = iter.Next() { - if prevUserKey != nil && equal(prevUserKey, f.Smallest.UserKey) { - // NB: We may append a file twice, once as prevFile and once - // as f. That's okay, and handled below. - files[l] = append(files[l], prevFile, f) - found = true - } - if f.Largest.IsExclusiveSentinel() { - prevUserKey = nil - prevFile = nil - } else { - prevUserKey = f.Largest.UserKey - prevFile = f - } - } - } - return - } -} - -// markFilesPrePebblev1 scans the LSM for files that do not support block -// properties (i.e. a table format version pre-Pebblev1). -var markFilesPrePebblev1 = func(tc *tableCacheContainer) findFilesFunc { - return func(v *version) (found bool, files [numLevels][]*fileMetadata, err error) { - for l := numLevels - 1; l > 0; l-- { - iter := v.Levels[l].Iter() - for f := iter.First(); f != nil; f = iter.Next() { - if f.Virtual { - // Any physical sstable which has been virtualized must - // have already undergone this migration, and we don't - // need to worry about the virtual sstable themselves. - panic("pebble: unexpected virtual sstable during migration") - } - err = tc.withReader( - f.PhysicalMeta(), func(r *sstable.Reader) error { - tf, err := r.TableFormat() - if err != nil { - return err - } - if tf < sstable.TableFormatPebblev1 { - found = true - files[l] = append(files[l], f) - } - return nil - }) - if err != nil { - return - } - } - } - return - } -} +// This method is not used currently, but it will be useful the next time we need +// to mark files for compaction. +var _ = (*DB)(nil).markFilesLocked -// markFilesLock durably marks the files that match the given findFilesFunc for +// markFilesLocked durably marks the files that match the given findFilesFunc for // compaction. func (d *DB) markFilesLocked(findFn findFilesFunc) error { jobID := d.mu.nextJobID diff --git a/format_major_version_test.go b/format_major_version_test.go index bbca42b5a4..be5519b4b1 100644 --- a/format_major_version_test.go +++ b/format_major_version_test.go @@ -5,25 +5,33 @@ package pebble import ( - "bytes" "fmt" - "strconv" - "sync" "testing" - "time" - "github.com/cockroachdb/datadriven" - "github.com/cockroachdb/pebble/bloom" - "github.com/cockroachdb/pebble/internal/base" - "github.com/cockroachdb/pebble/internal/testkeys" "github.com/cockroachdb/pebble/sstable" "github.com/cockroachdb/pebble/vfs" "github.com/cockroachdb/pebble/vfs/atomicfs" "github.com/stretchr/testify/require" ) +// TestFormatMajorVersionValues checks that we don't accidentally change the +// numbers of format versions. +func TestFormatMajorVersionStableValues(t *testing.T) { + require.Equal(t, FormatDefault, FormatMajorVersion(0)) + + require.Equal(t, FormatFlushableIngest, FormatMajorVersion(13)) + require.Equal(t, FormatPrePebblev1MarkedCompacted, FormatMajorVersion(14)) + require.Equal(t, FormatDeleteSizedAndObsolete, FormatMajorVersion(15)) + require.Equal(t, FormatVirtualSSTables, FormatMajorVersion(16)) + + // When we add a new version, we should add a check for the new version in + // addition to updating these expected values. + require.Equal(t, FormatNewest, FormatMajorVersion(16)) + require.Equal(t, internalFormatNewest, FormatMajorVersion(16)) +} + func TestFormatMajorVersion_MigrationDefined(t *testing.T) { - for v := FormatMostCompatible; v <= FormatNewest; v++ { + for v := FormatMinSupported; v <= FormatNewest; v++ { if _, ok := formatMajorVersionMigrations[v]; !ok { t.Errorf("format major version %d has no migration defined", v) } @@ -35,30 +43,6 @@ func TestRatchetFormat(t *testing.T) { d, err := Open("", (&Options{FS: fs}).WithFSDefaults()) require.NoError(t, err) require.NoError(t, d.Set([]byte("foo"), []byte("bar"), Sync)) - require.Equal(t, FormatMostCompatible, d.FormatMajorVersion()) - require.NoError(t, d.RatchetFormatMajorVersion(FormatVersioned)) - require.Equal(t, FormatVersioned, d.FormatMajorVersion()) - require.NoError(t, d.RatchetFormatMajorVersion(FormatVersioned)) - require.Equal(t, FormatVersioned, d.FormatMajorVersion()) - require.NoError(t, d.RatchetFormatMajorVersion(FormatSetWithDelete)) - require.Equal(t, FormatSetWithDelete, d.FormatMajorVersion()) - require.NoError(t, d.RatchetFormatMajorVersion(FormatBlockPropertyCollector)) - require.Equal(t, FormatBlockPropertyCollector, d.FormatMajorVersion()) - require.NoError(t, d.RatchetFormatMajorVersion(FormatSplitUserKeysMarked)) - require.Equal(t, FormatSplitUserKeysMarked, d.FormatMajorVersion()) - require.NoError(t, d.RatchetFormatMajorVersion(FormatSplitUserKeysMarkedCompacted)) - require.Equal(t, FormatSplitUserKeysMarkedCompacted, d.FormatMajorVersion()) - require.NoError(t, d.RatchetFormatMajorVersion(FormatRangeKeys)) - require.Equal(t, FormatRangeKeys, d.FormatMajorVersion()) - require.NoError(t, d.RatchetFormatMajorVersion(FormatMinTableFormatPebblev1)) - require.Equal(t, FormatMinTableFormatPebblev1, d.FormatMajorVersion()) - require.NoError(t, d.RatchetFormatMajorVersion(FormatPrePebblev1Marked)) - require.Equal(t, FormatPrePebblev1Marked, d.FormatMajorVersion()) - require.NoError(t, d.RatchetFormatMajorVersion(formatUnusedPrePebblev1MarkedCompacted)) - require.Equal(t, formatUnusedPrePebblev1MarkedCompacted, d.FormatMajorVersion()) - require.NoError(t, d.RatchetFormatMajorVersion(FormatSSTableValueBlocks)) - require.Equal(t, FormatSSTableValueBlocks, d.FormatMajorVersion()) - require.NoError(t, d.RatchetFormatMajorVersion(FormatFlushableIngest)) require.Equal(t, FormatFlushableIngest, d.FormatMajorVersion()) require.NoError(t, d.RatchetFormatMajorVersion(FormatPrePebblev1MarkedCompacted)) require.Equal(t, FormatPrePebblev1MarkedCompacted, d.FormatMajorVersion()) @@ -84,10 +68,10 @@ func TestRatchetFormat(t *testing.T) { _, err = Open("", (&Options{ FS: fs, - FormatMajorVersion: FormatVersioned, + FormatMajorVersion: FormatMinSupported, }).WithFSDefaults()) require.Error(t, err) - require.EqualError(t, err, `pebble: database "" written in format major version 999999`) + require.EqualError(t, err, `pebble: database "" written in unknown format major version 999999`) } func testBasicDB(d *DB) error { @@ -113,7 +97,7 @@ func testBasicDB(d *DB) error { } func TestFormatMajorVersions(t *testing.T) { - for vers := FormatMostCompatible; vers <= FormatNewest; vers++ { + for vers := FormatMinSupported; vers <= FormatNewest; vers++ { t.Run(fmt.Sprintf("vers=%03d", vers), func(t *testing.T) { fs := vfs.NewStrictMem() opts := (&Options{ @@ -214,27 +198,15 @@ func TestFormatMajorVersions_TableFormat(t *testing.T) { // fixture is intentionally verbose. m := map[FormatMajorVersion][2]sstable.TableFormat{ - FormatDefault: {sstable.TableFormatLevelDB, sstable.TableFormatRocksDBv2}, - FormatMostCompatible: {sstable.TableFormatLevelDB, sstable.TableFormatRocksDBv2}, - formatVersionedManifestMarker: {sstable.TableFormatLevelDB, sstable.TableFormatRocksDBv2}, - FormatVersioned: {sstable.TableFormatLevelDB, sstable.TableFormatRocksDBv2}, - FormatSetWithDelete: {sstable.TableFormatLevelDB, sstable.TableFormatRocksDBv2}, - FormatBlockPropertyCollector: {sstable.TableFormatLevelDB, sstable.TableFormatPebblev1}, - FormatSplitUserKeysMarked: {sstable.TableFormatLevelDB, sstable.TableFormatPebblev1}, - FormatSplitUserKeysMarkedCompacted: {sstable.TableFormatLevelDB, sstable.TableFormatPebblev1}, - FormatRangeKeys: {sstable.TableFormatLevelDB, sstable.TableFormatPebblev2}, - FormatMinTableFormatPebblev1: {sstable.TableFormatPebblev1, sstable.TableFormatPebblev2}, - FormatPrePebblev1Marked: {sstable.TableFormatPebblev1, sstable.TableFormatPebblev2}, - formatUnusedPrePebblev1MarkedCompacted: {sstable.TableFormatPebblev1, sstable.TableFormatPebblev2}, - FormatSSTableValueBlocks: {sstable.TableFormatPebblev1, sstable.TableFormatPebblev3}, - FormatFlushableIngest: {sstable.TableFormatPebblev1, sstable.TableFormatPebblev3}, - FormatPrePebblev1MarkedCompacted: {sstable.TableFormatPebblev1, sstable.TableFormatPebblev3}, - FormatDeleteSizedAndObsolete: {sstable.TableFormatPebblev1, sstable.TableFormatPebblev4}, - FormatVirtualSSTables: {sstable.TableFormatPebblev1, sstable.TableFormatPebblev4}, + FormatDefault: {sstable.TableFormatPebblev1, sstable.TableFormatPebblev3}, + FormatFlushableIngest: {sstable.TableFormatPebblev1, sstable.TableFormatPebblev3}, + FormatPrePebblev1MarkedCompacted: {sstable.TableFormatPebblev1, sstable.TableFormatPebblev3}, + FormatDeleteSizedAndObsolete: {sstable.TableFormatPebblev1, sstable.TableFormatPebblev4}, + FormatVirtualSSTables: {sstable.TableFormatPebblev1, sstable.TableFormatPebblev4}, } // Valid versions. - for fmv := FormatMostCompatible; fmv <= internalFormatNewest; fmv++ { + for fmv := FormatMinSupported; fmv <= internalFormatNewest; fmv++ { got := [2]sstable.TableFormat{fmv.MinTableFormat(), fmv.MaxTableFormat()} require.Equalf(t, m[fmv], got, "got %s; want %s", got, m[fmv]) require.True(t, got[0] <= got[1] /* min <= max */) @@ -245,336 +217,3 @@ func TestFormatMajorVersions_TableFormat(t *testing.T) { require.Panics(t, func() { _ = fmv.MaxTableFormat() }) require.Panics(t, func() { _ = fmv.MinTableFormat() }) } - -func TestSplitUserKeyMigration(t *testing.T) { - var d *DB - var opts *Options - var fs vfs.FS - var buf bytes.Buffer - defer func() { - if d != nil { - require.NoError(t, d.Close()) - } - }() - - datadriven.RunTest(t, "testdata/format_major_version_split_user_key_migration", - func(t *testing.T, td *datadriven.TestData) string { - switch td.Cmd { - case "define": - if d != nil { - if err := d.Close(); err != nil { - return err.Error() - } - buf.Reset() - } - opts = (&Options{ - FormatMajorVersion: FormatBlockPropertyCollector, - EventListener: &EventListener{ - CompactionEnd: func(info CompactionInfo) { - // Fix the job ID and durations for determinism. - info.JobID = 100 - info.Duration = time.Second - info.TotalDuration = 2 * time.Second - fmt.Fprintln(&buf, info) - }, - }, - DisableAutomaticCompactions: true, - }).WithFSDefaults() - var err error - if d, err = runDBDefineCmd(td, opts); err != nil { - return err.Error() - } - - fs = d.opts.FS - d.mu.Lock() - defer d.mu.Unlock() - return d.mu.versions.currentVersion().DebugString(base.DefaultFormatter) - case "reopen": - if d != nil { - if err := d.Close(); err != nil { - return err.Error() - } - buf.Reset() - } - opts.FS = fs - opts.DisableAutomaticCompactions = true - var err error - d, err = Open("", opts) - if err != nil { - return err.Error() - } - return "OK" - case "build": - if err := runBuildCmd(td, d, fs); err != nil { - return err.Error() - } - return "" - case "force-ingest": - if err := runForceIngestCmd(td, d); err != nil { - return err.Error() - } - d.mu.Lock() - defer d.mu.Unlock() - return d.mu.versions.currentVersion().DebugString(base.DefaultFormatter) - case "format-major-version": - return d.FormatMajorVersion().String() - case "ratchet-format-major-version": - v, err := strconv.Atoi(td.CmdArgs[0].String()) - if err != nil { - return err.Error() - } - if err := d.RatchetFormatMajorVersion(FormatMajorVersion(v)); err != nil { - return err.Error() - } - return buf.String() - case "lsm": - return runLSMCmd(td, d) - case "marked-file-count": - m := d.Metrics() - return fmt.Sprintf("%d files marked for compaction", m.Compact.MarkedFiles) - case "disable-automatic-compactions": - d.mu.Lock() - defer d.mu.Unlock() - switch v := td.CmdArgs[0].String(); v { - case "true": - d.opts.DisableAutomaticCompactions = true - case "false": - d.opts.DisableAutomaticCompactions = false - default: - return fmt.Sprintf("unknown value %q", v) - } - return "" - default: - return fmt.Sprintf("unrecognized command %q", td.Cmd) - } - }) -} - -func TestPebblev1Migration(t *testing.T) { - var d *DB - defer func() { - if d != nil { - require.NoError(t, d.Close()) - } - }() - - datadriven.RunTest(t, "testdata/format_major_version_pebblev1_migration", - func(t *testing.T, td *datadriven.TestData) string { - switch cmd := td.Cmd; cmd { - case "open": - var version int - var err error - for _, cmdArg := range td.CmdArgs { - switch cmd := cmdArg.Key; cmd { - case "version": - version, err = strconv.Atoi(cmdArg.Vals[0]) - if err != nil { - return err.Error() - } - default: - return fmt.Sprintf("unknown argument: %s", cmd) - } - } - opts := (&Options{ - FS: vfs.NewMem(), - FormatMajorVersion: FormatMajorVersion(version), - }).WithFSDefaults() - d, err = Open("", opts) - if err != nil { - return err.Error() - } - return "" - - case "format-major-version": - return d.FormatMajorVersion().String() - - case "min-table-format": - return d.FormatMajorVersion().MinTableFormat().String() - - case "max-table-format": - return d.FormatMajorVersion().MaxTableFormat().String() - - case "disable-automatic-compactions": - d.mu.Lock() - defer d.mu.Unlock() - switch v := td.CmdArgs[0].String(); v { - case "true": - d.opts.DisableAutomaticCompactions = true - case "false": - d.opts.DisableAutomaticCompactions = false - default: - return fmt.Sprintf("unknown value %q", v) - } - return "" - - case "batch": - b := d.NewIndexedBatch() - if err := runBatchDefineCmd(td, b); err != nil { - return err.Error() - } - if err := b.Commit(nil); err != nil { - return err.Error() - } - return "" - - case "flush": - if err := d.Flush(); err != nil { - return err.Error() - } - return "" - - case "ingest": - if err := runBuildCmd(td, d, d.opts.FS); err != nil { - return err.Error() - } - // Only the first arg is a filename. - td.CmdArgs = td.CmdArgs[:1] - if err := runIngestCmd(td, d, d.opts.FS); err != nil { - return err.Error() - } - return "" - - case "lsm": - return runLSMCmd(td, d) - - case "tally-table-formats": - d.mu.Lock() - defer d.mu.Unlock() - v := d.mu.versions.currentVersion() - tally := make([]int, sstable.TableFormatMax+1) - for _, l := range v.Levels { - iter := l.Iter() - for m := iter.First(); m != nil; m = iter.Next() { - err := d.tableCache.withReader(m.PhysicalMeta(), - func(r *sstable.Reader) error { - f, err := r.TableFormat() - if err != nil { - return err - } - tally[f]++ - return nil - }) - if err != nil { - return err.Error() - } - } - } - var b bytes.Buffer - for i := 1; i <= int(sstable.TableFormatMax); i++ { - _, _ = fmt.Fprintf(&b, "%s: %d\n", sstable.TableFormat(i), tally[i]) - } - return b.String() - - case "ratchet-format-major-version": - v, err := strconv.Atoi(td.CmdArgs[0].String()) - if err != nil { - return err.Error() - } - if err = d.RatchetFormatMajorVersion(FormatMajorVersion(v)); err != nil { - return err.Error() - } - return "" - - case "marked-file-count": - m := d.Metrics() - return fmt.Sprintf("%d files marked for compaction", m.Compact.MarkedFiles) - - default: - return fmt.Sprintf("unknown command: %s", cmd) - } - }, - ) -} - -// TestPebblev1MigrationRace exercises the race between a PrePebbleV1Marked -// format major version upgrade that needs to open sstables to read their table -// format, and concurrent compactions that may delete the same files from the -// LSM. -// -// Regression test for #2019. -func TestPebblev1MigrationRace(t *testing.T) { - // Use a smaller table cache size to slow down the PrePebbleV1Marked - // migration, ensuring each table read needs to re-open the file. - cache := NewCache(4 << 20) - defer cache.Unref() - tableCache := NewTableCache(cache, 1, 5) - defer tableCache.Unref() - d, err := Open("", (&Options{ - Cache: cache, - FS: vfs.NewMem(), - FormatMajorVersion: FormatMajorVersion(FormatPrePebblev1Marked - 1), - TableCache: tableCache, - Levels: []LevelOptions{{TargetFileSize: 1}}, - }).WithFSDefaults()) - require.NoError(t, err) - defer d.Close() - - ks := testkeys.Alpha(3).EveryN(10) - var key [3]byte - for i := int64(0); i < ks.Count(); i++ { - n := testkeys.WriteKey(key[:], ks, i) - require.NoError(t, d.Set(key[:n], key[:n], nil)) - require.NoError(t, d.Flush()) - } - - // Asynchronously write and flush range deletes that will cause compactions - // to delete the existing sstables. These deletes will race with the format - // major version upgrade's migration will attempt to delete the files. - var wg sync.WaitGroup - wg.Add(1) - go func() { - defer wg.Done() - for i := ks.Count() - 1; i > 0; i -= 50 { - endKey := testkeys.Key(ks, i) - startIndex := i - 50 - if startIndex < 0 { - startIndex = 0 - } - startKey := testkeys.Key(ks, startIndex) - - require.NoError(t, d.DeleteRange(startKey, endKey, nil)) - _, err := d.AsyncFlush() - require.NoError(t, err) - } - }() - require.NoError(t, d.RatchetFormatMajorVersion(FormatPrePebblev1Marked)) - wg.Wait() -} - -// Regression test for #2044, where multiple concurrent compactions can lead -// to an indefinite wait on the compaction goroutine in compactMarkedFilesLocked. -func TestPebblev1MigrationConcurrencyRace(t *testing.T) { - opts := (&Options{ - Comparer: testkeys.Comparer, - FS: vfs.NewMem(), - FormatMajorVersion: FormatSplitUserKeysMarked, - Levels: []LevelOptions{{FilterPolicy: bloom.FilterPolicy(10)}}, - MaxConcurrentCompactions: func() int { - return 4 - }, - }).WithFSDefaults() - func() { - d, err := Open("", opts) - require.NoError(t, err) - defer func() { - require.NoError(t, d.Close()) - }() - - ks := testkeys.Alpha(3).EveryN(10) - var key [3]byte - for i := int64(0); i < ks.Count(); i++ { - n := testkeys.WriteKey(key[:], ks, i) - require.NoError(t, d.Set(key[:n], key[:n], nil)) - if i%100 == 0 { - require.NoError(t, d.Flush()) - } - } - require.NoError(t, d.Flush()) - }() - - opts.FormatMajorVersion = formatUnusedPrePebblev1MarkedCompacted - d, err := Open("", opts) - require.NoError(t, err) - require.NoError(t, d.RatchetFormatMajorVersion(formatUnusedPrePebblev1MarkedCompacted)) - require.NoError(t, d.Close()) -} diff --git a/get_iter_test.go b/get_iter_test.go index ab6e67e0a9..244c337823 100644 --- a/get_iter_test.go +++ b/get_iter_test.go @@ -163,91 +163,6 @@ func TestGetIter(t *testing.T) { }, }, - { - description: "quad-4: four level-4 tables", - tables: []testTable{ - { - level: 4, - fileNum: 11, - data: []string{ - "aardvark.SET.101 a1", - "alpaca__.SET.201 a2", - "anteater.SET.301 a3", - }, - }, - { - level: 4, - fileNum: 22, - data: []string{ - "baboon__.SET.102 b1", - "baboon__.DEL.202 ", - "baboon__.SET.302 b3", - "bear____.SET.402 b4", - "bear____.DEL.502 ", - "buffalo_.SET.602 b6", - }, - }, - { - level: 4, - fileNum: 33, - data: []string{ - "buffalo_.SET.103 B1", - }, - }, - { - level: 4, - fileNum: 44, - data: []string{ - "chipmunk.SET.104 c1", - "chipmunk.SET.204 c2", - }, - }, - }, - queries: []string{ - "a_______.SEPARATOR.999 ErrNotFound", - "aardvark.SEPARATOR.999 a1", - "aardvark.SEPARATOR.102 a1", - "aardvark.SEPARATOR.101 a1", - "aardvark.SEPARATOR.100 ErrNotFound", - "alpaca__.SEPARATOR.999 a2", - "alpaca__.SEPARATOR.200 ErrNotFound", - "anteater.SEPARATOR.999 a3", - "anteater.SEPARATOR.302 a3", - "anteater.SEPARATOR.301 a3", - "anteater.SEPARATOR.300 ErrNotFound", - "anteater.SEPARATOR.000 ErrNotFound", - "b_______.SEPARATOR.999 ErrNotFound", - "baboon__.SEPARATOR.999 b3", - "baboon__.SEPARATOR.302 b3", - "baboon__.SEPARATOR.301 ErrNotFound", - "baboon__.SEPARATOR.202 ErrNotFound", - "baboon__.SEPARATOR.201 b1", - "baboon__.SEPARATOR.102 b1", - "baboon__.SEPARATOR.101 ErrNotFound", - "bear____.SEPARATOR.999 ErrNotFound", - "bear____.SEPARATOR.500 b4", - "bear____.SEPARATOR.000 ErrNotFound", - "buffalo_.SEPARATOR.999 b6", - "buffalo_.SEPARATOR.603 b6", - "buffalo_.SEPARATOR.602 b6", - "buffalo_.SEPARATOR.601 B1", - "buffalo_.SEPARATOR.104 B1", - "buffalo_.SEPARATOR.103 B1", - "buffalo_.SEPARATOR.102 ErrNotFound", - "buffalo_.SEPARATOR.000 ErrNotFound", - "c_______.SEPARATOR.999 ErrNotFound", - "chipmunk.SEPARATOR.999 c2", - "chipmunk.SEPARATOR.205 c2", - "chipmunk.SEPARATOR.204 c2", - "chipmunk.SEPARATOR.203 c1", - "chipmunk.SEPARATOR.105 c1", - "chipmunk.SEPARATOR.104 c1", - "chipmunk.SEPARATOR.103 ErrNotFound", - "chipmunk.SEPARATOR.000 ErrNotFound", - "d_______.SEPARATOR.999 ErrNotFound", - }, - }, - { description: "complex: many tables at many levels", tables: []testTable{ @@ -515,7 +430,7 @@ func TestGetIter(t *testing.T) { files[tt.level] = append(files[tt.level], meta) } v := manifest.NewVersion(cmp, base.DefaultFormatter, 10<<20, files) - err := v.CheckOrdering(cmp, base.DefaultFormatter, manifest.AllowSplitUserKeys) + err := v.CheckOrdering(cmp, base.DefaultFormatter) if tc.badOrdering && err == nil { t.Errorf("desc=%q: want bad ordering, got nil error", desc) continue diff --git a/ingest.go b/ingest.go index 2ef8f43296..f6726ab01d 100644 --- a/ingest.go +++ b/ingest.go @@ -118,9 +118,12 @@ func ingestSynthesizeShared( // NB: We create new internal keys and pass them into ExternalRangeKeyBounds // so that we can sub a zero sequence number into the bounds. We can set // the sequence number to anything here; it'll be reset in ingestUpdateSeqNum - // anyway. However we do need to use the same sequence number across all + // anyway. However, we do need to use the same sequence number across all // bound keys at this step so that we end up with bounds that are consistent // across point/range keys. + // Note that the kind of the smallest key might change because of the seqnum + // rewriting. For example, the sstable could start with a.SET.2 and + // a.RANGEDEL.1 (with smallest key being a.SET.2) but after rewriting the seqnum we have `a.RANGEDEL.1`a.SET.100 smallestRangeKey := base.MakeInternalKey(sm.SmallestRangeKey.UserKey, 0, sm.SmallestRangeKey.Kind()) largestRangeKey := base.MakeExclusiveSentinelKey(sm.LargestRangeKey.Kind(), sm.LargestRangeKey.UserKey) meta.ExtendRangeKeyBounds(opts.Comparer.Compare, smallestRangeKey, largestRangeKey) diff --git a/ingest_test.go b/ingest_test.go index e23cdb39ad..af716240b7 100644 --- a/ingest_test.go +++ b/ingest_test.go @@ -2539,54 +2539,47 @@ func TestConcurrentIngestCompact(t *testing.T) { func TestIngestFlushQueuedMemTable(t *testing.T) { // Verify that ingestion forces a flush of a queued memtable. - // Test with a format major version prior to FormatFlushableIngest and one - // after. Both should result in the same statistic calculations. - for _, fmv := range []FormatMajorVersion{FormatFlushableIngest - 1, internalFormatNewest} { - func(fmv FormatMajorVersion) { - mem := vfs.NewMem() - d, err := Open("", &Options{ - FS: mem, - FormatMajorVersion: fmv, - }) - require.NoError(t, err) + mem := vfs.NewMem() + o := &Options{FS: mem} + o.testingRandomized(t) + d, err := Open("", o) + require.NoError(t, err) - // Add the key "a" to the memtable, then fill up the memtable with the key - // "b". The ingested sstable will only overlap with the queued memtable. - require.NoError(t, d.Set([]byte("a"), nil, nil)) - for { - require.NoError(t, d.Set([]byte("b"), nil, nil)) - d.mu.Lock() - done := len(d.mu.mem.queue) == 2 - d.mu.Unlock() - if done { - break - } - } + // Add the key "a" to the memtable, then fill up the memtable with the key + // "b". The ingested sstable will only overlap with the queued memtable. + require.NoError(t, d.Set([]byte("a"), nil, nil)) + for { + require.NoError(t, d.Set([]byte("b"), nil, nil)) + d.mu.Lock() + done := len(d.mu.mem.queue) == 2 + d.mu.Unlock() + if done { + break + } + } - ingest := func(keys ...string) { - t.Helper() - f, err := mem.Create("ext") - require.NoError(t, err) + ingest := func(keys ...string) { + t.Helper() + f, err := mem.Create("ext") + require.NoError(t, err) - w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{ - TableFormat: fmv.MinTableFormat(), - }) - for _, k := range keys { - require.NoError(t, w.Set([]byte(k), nil)) - } - require.NoError(t, w.Close()) - stats, err := d.IngestWithStats([]string{"ext"}) - require.NoError(t, err) - require.Equal(t, stats.ApproxIngestedIntoL0Bytes, stats.Bytes) - require.Equal(t, stats.MemtableOverlappingFiles, 1) - require.Less(t, uint64(0), stats.Bytes) - } + w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{ + TableFormat: o.FormatMajorVersion.MinTableFormat(), + }) + for _, k := range keys { + require.NoError(t, w.Set([]byte(k), nil)) + } + require.NoError(t, w.Close()) + stats, err := d.IngestWithStats([]string{"ext"}) + require.NoError(t, err) + require.Equal(t, stats.ApproxIngestedIntoL0Bytes, stats.Bytes) + require.Equal(t, stats.MemtableOverlappingFiles, 1) + require.Less(t, uint64(0), stats.Bytes) + } - ingest("a") + ingest("a") - require.NoError(t, d.Close()) - }(fmv) - } + require.NoError(t, d.Close()) } func TestIngestStats(t *testing.T) { diff --git a/internal/base/filenames.go b/internal/base/filenames.go index 06098ab639..c4420ffb76 100644 --- a/internal/base/filenames.go +++ b/internal/base/filenames.go @@ -62,7 +62,6 @@ const ( FileTypeLock FileTypeTable FileTypeManifest - FileTypeCurrent FileTypeOptions FileTypeOldTemp FileTypeTemp @@ -79,8 +78,6 @@ func MakeFilename(fileType FileType, dfn DiskFileNum) string { return fmt.Sprintf("%s.sst", dfn) case FileTypeManifest: return fmt.Sprintf("MANIFEST-%s", dfn) - case FileTypeCurrent: - return "CURRENT" case FileTypeOptions: return fmt.Sprintf("OPTIONS-%s", dfn) case FileTypeOldTemp: @@ -100,8 +97,6 @@ func MakeFilepath(fs vfs.FS, dirname string, fileType FileType, dfn DiskFileNum) func ParseFilename(fs vfs.FS, filename string) (fileType FileType, dfn DiskFileNum, ok bool) { filename = fs.PathBase(filename) switch { - case filename == "CURRENT": - return FileTypeCurrent, 0, true case filename == "LOCK": return FileTypeLock, 0, true case strings.HasPrefix(filename, "MANIFEST-"): diff --git a/internal/base/filenames_test.go b/internal/base/filenames_test.go index 07b74302f6..f51292ce67 100644 --- a/internal/base/filenames_test.go +++ b/internal/base/filenames_test.go @@ -24,8 +24,7 @@ func TestParseFilename(t *testing.T) { "abcdef.log": false, "000001ldb": false, "000001.sst": true, - "CURRENT": true, - "CURRaNT": false, + "CURRENT": false, "LOCK": true, "xLOCK": false, "x.LOCK": false, @@ -55,9 +54,8 @@ func TestParseFilename(t *testing.T) { func TestFilenameRoundTrip(t *testing.T) { testCases := map[FileType]bool{ - // CURRENT and LOCK files aren't numbered. - FileTypeCurrent: false, - FileTypeLock: false, + // LOCK files aren't numbered. + FileTypeLock: false, // The remaining file types are numbered. FileTypeLog: true, FileTypeManifest: true, @@ -104,7 +102,7 @@ func TestMustExist(t *testing.T) { MustExist(fs, filename, &buf, err) require.Equal(t, `000000.sst: file does not exist -directory contains 9 files, 2 unknown, 1 tables, 1 logs, 2 manifests`, buf.buf.String()) +directory contains 9 files, 3 unknown, 1 tables, 1 logs, 2 manifests`, buf.buf.String()) } func TestRedactFileNum(t *testing.T) { diff --git a/internal/keyspan/level_iter_test.go b/internal/keyspan/level_iter_test.go index 6e3039677a..d9897dc28b 100644 --- a/internal/keyspan/level_iter_test.go +++ b/internal/keyspan/level_iter_test.go @@ -309,7 +309,7 @@ func TestLevelIterEquivalence(t *testing.T) { amap[metas[i].FileNum] = metas[i] } b.Added[6] = amap - v, err := b.Apply(nil, base.DefaultComparer.Compare, base.DefaultFormatter, 0, 0, nil, manifest.ProhibitSplitUserKeys) + v, err := b.Apply(nil, base.DefaultComparer.Compare, base.DefaultFormatter, 0, 0, nil) require.NoError(t, err) levelIter.Init( SpanIterOptions{}, base.DefaultComparer.Compare, tableNewIters, @@ -448,7 +448,7 @@ func TestLevelIter(t *testing.T) { amap[metas[i].FileNum] = metas[i] } b.Added[6] = amap - v, err := b.Apply(nil, base.DefaultComparer.Compare, base.DefaultFormatter, 0, 0, nil, manifest.ProhibitSplitUserKeys) + v, err := b.Apply(nil, base.DefaultComparer.Compare, base.DefaultFormatter, 0, 0, nil) require.NoError(t, err) iter = NewLevelIter( SpanIterOptions{}, base.DefaultComparer.Compare, diff --git a/internal/manifest/l0_sublevels_test.go b/internal/manifest/l0_sublevels_test.go index 8cedb87358..2b007fca9e 100644 --- a/internal/manifest/l0_sublevels_test.go +++ b/internal/manifest/l0_sublevels_test.go @@ -51,7 +51,7 @@ func readManifest(filename string) (*Version, error) { if err := bve.Accumulate(&ve); err != nil { return nil, err } - if v, err = bve.Apply(v, base.DefaultComparer.Compare, base.DefaultFormatter, 10<<20, 32000, nil, ProhibitSplitUserKeys); err != nil { + if v, err = bve.Apply(v, base.DefaultComparer.Compare, base.DefaultFormatter, 10<<20, 32000, nil); err != nil { return nil, err } } @@ -448,7 +448,7 @@ func TestL0Sublevels(t *testing.T) { for sublevel, files := range sublevels.levelFiles { slice := NewLevelSliceSpecificOrder(files) err := CheckOrdering(base.DefaultComparer.Compare, base.DefaultFormatter, - L0Sublevel(sublevel), slice.Iter(), ProhibitSplitUserKeys) + L0Sublevel(sublevel), slice.Iter()) if err != nil { return err.Error() } diff --git a/internal/manifest/testdata/version_check_ordering b/internal/manifest/testdata/version_check_ordering index 9f8f710056..58fd4cef22 100644 --- a/internal/manifest/testdata/version_check_ordering +++ b/internal/manifest/testdata/version_check_ordering @@ -179,13 +179,6 @@ L1 files 000001 and 000002 have overlapping ranges: [a#1,SET-b#2,SET] vs [b#1,SE 000001:[a#1,SET-b#2,SET] seqnums:[0-0] points:[a#1,SET-b#2,SET] 000002:[b#1,SET-d#4,SET] seqnums:[0-0] points:[b#1,SET-d#4,SET] -check-ordering allow-split-user-keys -1: - 000001:[a#1,SET-b#2,SET] - 000002:[b#1,SET-d#4,SET] ----- -OK - check-ordering 1: 000001:[a#1,SET-b#2,SET] diff --git a/internal/manifest/version.go b/internal/manifest/version.go index d20240af97..2b874331c1 100644 --- a/internal/manifest/version.go +++ b/internal/manifest/version.go @@ -1343,20 +1343,16 @@ func (v *Version) Overlaps( // CheckOrdering checks that the files are consistent with respect to // increasing file numbers (for level 0 files) and increasing and non- // overlapping internal key ranges (for level non-0 files). -func (v *Version) CheckOrdering( - cmp Compare, format base.FormatKey, order OrderingInvariants, -) error { +func (v *Version) CheckOrdering(cmp Compare, format base.FormatKey) error { for sublevel := len(v.L0SublevelFiles) - 1; sublevel >= 0; sublevel-- { sublevelIter := v.L0SublevelFiles[sublevel].Iter() - // Sublevels have NEVER allowed split user keys, so we can pass - // ProhibitSplitUserKeys. - if err := CheckOrdering(cmp, format, L0Sublevel(sublevel), sublevelIter, ProhibitSplitUserKeys); err != nil { + if err := CheckOrdering(cmp, format, L0Sublevel(sublevel), sublevelIter); err != nil { return base.CorruptionErrorf("%s\n%s", err, v.DebugString(format)) } } for level, lm := range v.Levels { - if err := CheckOrdering(cmp, format, Level(level), lm.Iter(), order); err != nil { + if err := CheckOrdering(cmp, format, Level(level), lm.Iter()); err != nil { return base.CorruptionErrorf("%s\n%s", err, v.DebugString(format)) } } @@ -1425,34 +1421,10 @@ func (l *VersionList) Remove(v *Version) { v.list = nil // avoid memory leaks } -// OrderingInvariants dictates the file ordering invariants active. -type OrderingInvariants int8 - -const ( - // ProhibitSplitUserKeys indicates that adjacent files within a level cannot - // contain the same user key. - ProhibitSplitUserKeys OrderingInvariants = iota - // AllowSplitUserKeys indicates that adjacent files within a level may - // contain the same user key. This is only allowed by historical format - // major versions. - // - // TODO(jackson): Remove. - AllowSplitUserKeys -) - // CheckOrdering checks that the files are consistent with respect to // seqnums (for level 0 files -- see detailed comment below) and increasing and non- // overlapping internal key ranges (for non-level 0 files). -// -// The ordering field may be passed AllowSplitUserKeys to allow adjacent files that are both -// inclusive of the same user key. Pebble no longer creates version edits -// installing such files, and Pebble databases with sufficiently high format -// major version should no longer have any such files within their LSM. -// TODO(jackson): Remove AllowSplitUserKeys when we remove support for the -// earlier format major versions. -func CheckOrdering( - cmp Compare, format base.FormatKey, level Level, files LevelIterator, ordering OrderingInvariants, -) error { +func CheckOrdering(cmp Compare, format base.FormatKey, level Level, files LevelIterator) error { // The invariants to check for L0 sublevels are the same as the ones to // check for all other levels. However, if L0 is not organized into // sublevels, or if all L0 files are being passed in, we do the legacy L0 @@ -1531,28 +1503,15 @@ func CheckOrdering( f.Smallest.Pretty(format), f.Largest.Pretty(format)) } - // What's considered "overlapping" is dependent on the format - // major version. If ordering=ProhibitSplitUserKeys, then both - // files cannot contain keys with the same user keys. If the - // bounds have the same user key, the previous file's boundary - // must have a Trailer indicating that it's exclusive. - switch ordering { - case AllowSplitUserKeys: - if base.InternalCompare(cmp, prev.Largest, f.Smallest) >= 0 { - return base.CorruptionErrorf("%s files %s and %s have overlapping ranges: [%s-%s] vs [%s-%s]", - errors.Safe(level), errors.Safe(prev.FileNum), errors.Safe(f.FileNum), - prev.Smallest.Pretty(format), prev.Largest.Pretty(format), - f.Smallest.Pretty(format), f.Largest.Pretty(format)) - } - case ProhibitSplitUserKeys: - if v := cmp(prev.Largest.UserKey, f.Smallest.UserKey); v > 0 || (v == 0 && !prev.Largest.IsExclusiveSentinel()) { - return base.CorruptionErrorf("%s files %s and %s have overlapping ranges: [%s-%s] vs [%s-%s]", - errors.Safe(level), errors.Safe(prev.FileNum), errors.Safe(f.FileNum), - prev.Smallest.Pretty(format), prev.Largest.Pretty(format), - f.Smallest.Pretty(format), f.Largest.Pretty(format)) - } - default: - panic("unreachable") + // In all supported format major version, split user keys are + // prohibited, so both files cannot contain keys with the same user + // keys. If the bounds have the same user key, the previous file's + // boundary must have a Trailer indicating that it's exclusive. + if v := cmp(prev.Largest.UserKey, f.Smallest.UserKey); v > 0 || (v == 0 && !prev.Largest.IsExclusiveSentinel()) { + return base.CorruptionErrorf("%s files %s and %s have overlapping ranges: [%s-%s] vs [%s-%s]", + errors.Safe(level), errors.Safe(prev.FileNum), errors.Safe(f.FileNum), + prev.Smallest.Pretty(format), prev.Largest.Pretty(format), + f.Smallest.Pretty(format), f.Largest.Pretty(format)) } } } diff --git a/internal/manifest/version_edit.go b/internal/manifest/version_edit.go index 08b3555e35..956a56451b 100644 --- a/internal/manifest/version_edit.go +++ b/internal/manifest/version_edit.go @@ -855,7 +855,6 @@ func AccumulateIncompleteAndApplySingleVE( backingStateMap map[base.DiskFileNum]*FileBacking, addBackingFunc func(*FileBacking), removeBackingFunc func(base.DiskFileNum), - orderingInvariants OrderingInvariants, ) (_ *Version, zombies map[base.DiskFileNum]uint64, _ error) { if len(ve.RemovedBackingTables) != 0 { panic("pebble: invalid incomplete version edit") @@ -866,9 +865,7 @@ func AccumulateIncompleteAndApplySingleVE( return nil, nil, err } zombies = make(map[base.DiskFileNum]uint64) - v, err := b.Apply( - curr, cmp, formatKey, flushSplitBytes, readCompactionRate, zombies, orderingInvariants, - ) + v, err := b.Apply(curr, cmp, formatKey, flushSplitBytes, readCompactionRate, zombies) if err != nil { return nil, nil, err } @@ -908,7 +905,6 @@ func (b *BulkVersionEdit) Apply( flushSplitBytes int64, readCompactionRate int64, zombies map[base.DiskFileNum]uint64, - orderingInvariants OrderingInvariants, ) (*Version, error) { addZombie := func(state *FileBacking) { if zombies != nil { @@ -1092,7 +1088,7 @@ func (b *BulkVersionEdit) Apply( } else if err := v.InitL0Sublevels(cmp, formatKey, flushSplitBytes); err != nil { return nil, errors.Wrap(err, "pebble: internal error") } - if err := CheckOrdering(cmp, formatKey, Level(0), v.Levels[level].Iter(), orderingInvariants); err != nil { + if err := CheckOrdering(cmp, formatKey, Level(0), v.Levels[level].Iter()); err != nil { return nil, errors.Wrap(err, "pebble: internal error") } continue @@ -1113,7 +1109,7 @@ func (b *BulkVersionEdit) Apply( end.Prev() } }) - if err := CheckOrdering(cmp, formatKey, Level(level), check.Iter(), orderingInvariants); err != nil { + if err := CheckOrdering(cmp, formatKey, Level(level), check.Iter()); err != nil { return nil, errors.Wrap(err, "pebble: internal error") } } diff --git a/internal/manifest/version_edit_test.go b/internal/manifest/version_edit_test.go index 6d091538bb..387b4c8a40 100644 --- a/internal/manifest/version_edit_test.go +++ b/internal/manifest/version_edit_test.go @@ -521,7 +521,7 @@ func TestVersionEditApply(t *testing.T) { } } zombies := make(map[base.DiskFileNum]uint64) - newv, err := bve.Apply(v, base.DefaultComparer.Compare, base.DefaultFormatter, 10<<20, 32000, zombies, ProhibitSplitUserKeys) + newv, err := bve.Apply(v, base.DefaultComparer.Compare, base.DefaultFormatter, 10<<20, 32000, zombies) if err != nil { return err.Error() } diff --git a/internal/manifest/version_test.go b/internal/manifest/version_test.go index abde613afc..eb743adad8 100644 --- a/internal/manifest/version_test.go +++ b/internal/manifest/version_test.go @@ -290,10 +290,6 @@ func TestCheckOrdering(t *testing.T) { func(t *testing.T, d *datadriven.TestData) string { switch d.Cmd { case "check-ordering": - orderingInvariants := ProhibitSplitUserKeys - if d.HasArg("allow-split-user-keys") { - orderingInvariants = AllowSplitUserKeys - } v, err := ParseVersionDebug(cmp, fmtKey, 10<<20, d.Input) if err != nil { return err.Error() @@ -304,7 +300,7 @@ func TestCheckOrdering(t *testing.T) { m.SmallestSeqNum = m.Smallest.SeqNum() m.LargestSeqNum = m.Largest.SeqNum() }) - if err = v.CheckOrdering(cmp, base.DefaultFormatter, orderingInvariants); err != nil { + if err = v.CheckOrdering(cmp, base.DefaultFormatter); err != nil { return err.Error() } return "OK" diff --git a/iterator_histories_test.go b/iterator_histories_test.go index 792b2faf2f..b6d40e0ac0 100644 --- a/iterator_histories_test.go +++ b/iterator_histories_test.go @@ -45,7 +45,7 @@ func TestIterHistories(t *testing.T) { opts = &Options{ FS: vfs.NewMem(), Comparer: testkeys.Comparer, - FormatMajorVersion: FormatRangeKeys, + FormatMajorVersion: FormatMinSupported, BlockPropertyCollectors: []func() BlockPropertyCollector{ sstable.NewTestKeysBlockPropertyCollector, }, diff --git a/iterator_test.go b/iterator_test.go index 0be563dc71..3fc5c5814a 100644 --- a/iterator_test.go +++ b/iterator_test.go @@ -1751,11 +1751,11 @@ func iterOptionsString(o *IterOptions) string { func newTestkeysDatabase(t *testing.T, ks testkeys.Keyspace, rng *rand.Rand) *DB { dbOpts := &Options{ - Comparer: testkeys.Comparer, - FS: vfs.NewMem(), - FormatMajorVersion: FormatRangeKeys, - Logger: panicLogger{}, + Comparer: testkeys.Comparer, + FS: vfs.NewMem(), + Logger: panicLogger{}, } + dbOpts.testingRandomized(t) d, err := Open("", dbOpts) require.NoError(t, err) @@ -1802,10 +1802,10 @@ func newTestkeysDatabase(t *testing.T, ks testkeys.Keyspace, rng *rand.Rand) *DB func newPointTestkeysDatabase(t *testing.T, ks testkeys.Keyspace) *DB { dbOpts := &Options{ - Comparer: testkeys.Comparer, - FS: vfs.NewMem(), - FormatMajorVersion: FormatRangeKeys, + Comparer: testkeys.Comparer, + FS: vfs.NewMem(), } + dbOpts.testingRandomized(t) d, err := Open("", dbOpts) require.NoError(t, err) diff --git a/level_iter_test.go b/level_iter_test.go index 63d448e68b..eba201517a 100644 --- a/level_iter_test.go +++ b/level_iter_test.go @@ -193,12 +193,10 @@ func (lt *levelIterTest) runBuild(d *datadriven.TestData) string { return err.Error() } - tableFormat := sstable.TableFormatRocksDBv2 + tableFormat := sstable.TableFormatMinSupported for _, arg := range d.CmdArgs { if arg.Key == "format" { switch arg.Vals[0] { - case "rocksdbv2": - tableFormat = sstable.TableFormatRocksDBv2 case "pebblev2": tableFormat = sstable.TableFormatPebblev2 } diff --git a/metamorphic/options.go b/metamorphic/options.go index 6e07cb2fa4..e62b842609 100644 --- a/metamorphic/options.go +++ b/metamorphic/options.go @@ -26,16 +26,13 @@ import ( ) const ( - // The metamorphic test exercises range keys, so we cannot use an older - // FormatMajorVersion than pebble.FormatRangeKeys. - minimumFormatMajorVersion = pebble.FormatRangeKeys + minimumFormatMajorVersion = pebble.FormatMinSupported // The format major version to use in the default options configurations. We - // default to the last format major version of Cockroach 22.2 so we exercise - // the runtime version ratcheting that a cluster upgrading to 23.1 would - // experience. The randomized options may still use format major versions - // that are less than defaultFormatMajorVersion but are at least - // minimumFormatMajorVersion. - defaultFormatMajorVersion = pebble.FormatPrePebblev1Marked + // default to the minimum supported format so we exercise the runtime version + // ratcheting that a cluster upgrading would experience. The randomized + // options may still use format major versions that are less than + // defaultFormatMajorVersion but are at least minimumFormatMajorVersion. + defaultFormatMajorVersion = pebble.FormatMinSupported // newestFormatMajorVersionToTest is the most recent format major version // the metamorphic tests should use. This may be greater than // pebble.FormatNewest when some format major versions are marked as @@ -598,8 +595,7 @@ func randomOptions( if testOpts.disableBlockPropertyCollector { testOpts.Opts.BlockPropertyCollectors = nil } - testOpts.enableValueBlocks = opts.FormatMajorVersion >= pebble.FormatSSTableValueBlocks && - rng.Intn(2) != 0 + testOpts.enableValueBlocks = rng.Intn(2) != 0 if testOpts.enableValueBlocks { testOpts.Opts.Experimental.EnableValueBlocks = func() bool { return true } } diff --git a/open.go b/open.go index d0b5e189bd..3edad4cd87 100644 --- a/open.go +++ b/open.go @@ -71,7 +71,7 @@ func TableCacheSize(maxOpenFiles int) int { } // Open opens a DB whose files live in the given directory. -func Open(dirname string, opts *Options) (db *DB, _ error) { +func Open(dirname string, opts *Options) (db *DB, err error) { // Make a copy of the options so that we don't mutate the passed in options. opts = opts.Clone() opts = opts.EnsureDefaults() @@ -136,8 +136,28 @@ func Open(dirname string, opts *Options) (db *DB, _ error) { } }() + noFormatVersionMarker := formatVersion == FormatDefault + if noFormatVersionMarker { + // There is no format version marker file. There are three cases: + // - we are trying to open an existing store that was created at + // FormatMostCompatible (the only one without a version marker file) + // - we are creating a new store; + // - we are retrying a failed creation. + // + // To error in the first case, we set ErrorIfNotPristine. + opts.ErrorIfNotPristine = true + formatVersion = FormatMinSupported + defer func() { + if err != nil && errors.Is(err, ErrDBNotPristine) { + // We must be trying to open an existing store at FormatMostCompatible. + // Correct the error in this case -we + err = errors.Newf("pebble: database %q written in format major version 1 which is no longer supported", dirname) + } + }() + } + // Find the currently active manifest, if there is one. - manifestMarker, manifestFileNum, manifestExists, err := findCurrentManifest(formatVersion, opts.FS, dirname) + manifestMarker, manifestFileNum, manifestExists, err := findCurrentManifest(opts.FS, dirname) if err != nil { return nil, errors.Wrapf(err, "pebble: database %q", dirname) } @@ -252,8 +272,6 @@ func Open(dirname string, opts *Options) (db *DB, _ error) { jobID := d.mu.nextJobID d.mu.nextJobID++ - setCurrent := setCurrentFunc(d.FormatMajorVersion(), manifestMarker, opts.FS, dirname, d.dataDir) - if !manifestExists { // DB does not exist. if d.opts.ErrorIfNotExists || d.opts.ReadOnly { @@ -261,7 +279,7 @@ func Open(dirname string, opts *Options) (db *DB, _ error) { } // Create the DB. - if err := d.mu.versions.create(jobID, dirname, opts, manifestMarker, setCurrent, d.FormatMajorVersion, &d.mu.Mutex); err != nil { + if err := d.mu.versions.create(jobID, dirname, opts, manifestMarker, d.FormatMajorVersion, &d.mu.Mutex); err != nil { return nil, err } } else { @@ -269,7 +287,7 @@ func Open(dirname string, opts *Options) (db *DB, _ error) { return nil, errors.Wrapf(ErrDBAlreadyExists, "dirname=%q", dirname) } // Load the version set. - if err := d.mu.versions.load(dirname, opts, manifestFileNum, manifestMarker, setCurrent, d.FormatMajorVersion, &d.mu.Mutex); err != nil { + if err := d.mu.versions.load(dirname, opts, manifestFileNum, manifestMarker, d.FormatMajorVersion, &d.mu.Mutex); err != nil { return nil, err } if opts.ErrorIfNotPristine { @@ -485,20 +503,27 @@ func Open(dirname string, opts *Options) (db *DB, _ error) { } d.updateReadStateLocked(d.opts.DebugCheck) - // If the Options specify a format major version higher than the - // loaded database's, upgrade it. If this is a new database, this - // code path also performs an initial upgrade from the starting - // implicit MostCompatible version. - // - // We ratchet the version this far into Open so that migrations have a read - // state available. - if !d.opts.ReadOnly && opts.FormatMajorVersion > d.FormatMajorVersion() { - if err := d.ratchetFormatMajorVersionLocked(opts.FormatMajorVersion); err != nil { - return nil, err + if !d.opts.ReadOnly { + // If the Options specify a format major version higher than the + // loaded database's, upgrade it. If this is a new database, this + // code path also performs an initial upgrade from the starting + // implicit MinSupported version. + // + // We ratchet the version this far into Open so that migrations have a read + // state available. Note that this also results in creating/updating the + // format version marker file. + if opts.FormatMajorVersion > d.FormatMajorVersion() { + if err := d.ratchetFormatMajorVersionLocked(opts.FormatMajorVersion); err != nil { + return nil, err + } + } else if noFormatVersionMarker { + // We are creating a new store at MinSupported. Create the format version + // marker file. + if err := d.writeFormatVersionMarker(d.FormatMajorVersion()); err != nil { + return nil, err + } } - } - if !d.opts.ReadOnly { // Write the current options to disk. d.optionsFileNum = d.mu.versions.getNextDiskFileNum() tmpPath := base.MakeFilepath(opts.FS, dirname, fileTypeTemp, d.optionsFileNum) @@ -994,7 +1019,7 @@ func (d *DB) replayWAL( flushMem() // mem is nil here. - if !d.opts.ReadOnly { + if !d.opts.ReadOnly && batchesReplayed > 0 { err = updateVE() if err != nil { return nil, 0, err @@ -1044,7 +1069,7 @@ func Peek(dirname string, fs vfs.FS) (*DBDesc, error) { } // Find the currently active manifest, if there is one. - manifestMarker, manifestFileNum, exists, err := findCurrentManifest(vers, fs, dirname) + manifestMarker, manifestFileNum, exists, err := findCurrentManifest(fs, dirname) if err != nil { return nil, err } diff --git a/open_test.go b/open_test.go index fae3237aac..b7bb16deb9 100644 --- a/open_test.go +++ b/open_test.go @@ -28,6 +28,7 @@ import ( "github.com/cockroachdb/pebble/objstorage" "github.com/cockroachdb/pebble/objstorage/objstorageprovider" "github.com/cockroachdb/pebble/objstorage/remote" + "github.com/cockroachdb/pebble/sstable" "github.com/cockroachdb/pebble/vfs" "github.com/cockroachdb/pebble/vfs/atomicfs" "github.com/cockroachdb/pebble/vfs/errorfs" @@ -187,20 +188,12 @@ func TestOpenAlreadyLocked(t *testing.T) { func TestNewDBFilenames(t *testing.T) { versions := map[FormatMajorVersion][]string{ - FormatMostCompatible: { - "000002.log", - "CURRENT", - "LOCK", - "MANIFEST-000001", - "OPTIONS-000003", - }, internalFormatNewest: { "000002.log", - "CURRENT", "LOCK", "MANIFEST-000001", "OPTIONS-000003", - "marker.format-version.000015.016", + "marker.format-version.000003.016", "marker.manifest.000001.MANIFEST-000001", }, } @@ -1115,42 +1108,45 @@ func TestGetVersion(t *testing.T) { require.Equal(t, "rocksdb v6.2.1", version) } -func TestRocksDBNoFlushManifest(t *testing.T) { +// TestOpenNeverFlushed verifies that we can open a database that had an +// ingestion but no other operations. +func TestOpenNeverFlushed(t *testing.T) { mem := vfs.NewMem() - // Have the comparer and merger names match what's in the testdata - // directory. - comparer := *DefaultComparer - merger := *DefaultMerger - comparer.Name = "cockroach_comparator" - merger.Name = "cockroach_merge_operator" - opts := &Options{ - FS: mem, - Comparer: &comparer, - Merger: &merger, + + sstFile, err := mem.Create("to-ingest.sst") + require.NoError(t, err) + + writerOpts := sstable.WriterOptions{} + w := sstable.NewWriter(objstorageprovider.NewFileWritable(sstFile), writerOpts) + for _, key := range []string{"a", "b", "c", "d"} { + require.NoError(t, w.Set([]byte(key), []byte("val-"+key))) } + require.NoError(t, w.Close()) - // rocksdb-ingest-only is a RocksDB-generated db directory that has not had - // a single flush yet, only ingestion operations. The manifest contains - // a next-log-num but no log-num entry. Ensure that pebble can read these - // directories without an issue. - _, err := vfs.Clone(vfs.Default, mem, "testdata/rocksdb-ingest-only", "testdata") + opts := &Options{ + FS: mem, + } + db, err := Open("", opts) require.NoError(t, err) + require.NoError(t, db.Ingest([]string{"to-ingest.sst"})) + require.NoError(t, db.Close()) - db, err := Open("testdata", opts) + db, err = Open("", opts) require.NoError(t, err) - defer db.Close() - val, closer, err := db.Get([]byte("ajulxeiombjiyw\x00\x00\x00\x00\x00\x00\x00\x01\x12\x09")) + val, closer, err := db.Get([]byte("b")) require.NoError(t, err) - require.NotEmpty(t, val) + require.Equal(t, "val-b", string(val)) require.NoError(t, closer.Close()) + + require.NoError(t, db.Close()) } func TestOpen_ErrorIfUnknownFormatVersion(t *testing.T) { fs := vfs.NewMem() d, err := Open("", &Options{ FS: fs, - FormatMajorVersion: FormatVersioned, + FormatMajorVersion: FormatMinSupported, }) require.NoError(t, err) require.NoError(t, d.Close()) @@ -1163,10 +1159,10 @@ func TestOpen_ErrorIfUnknownFormatVersion(t *testing.T) { _, err = Open("", &Options{ FS: fs, - FormatMajorVersion: FormatVersioned, + FormatMajorVersion: FormatMinSupported, }) require.Error(t, err) - require.EqualError(t, err, `pebble: database "" written in format major version 999999`) + require.EqualError(t, err, `pebble: database "" written in unknown format major version 999999`) } // ensureFilesClosed updates the provided Options to wrap the filesystem. It diff --git a/options.go b/options.go index 92d98eb08c..3451375d44 100644 --- a/options.go +++ b/options.go @@ -1122,7 +1122,7 @@ func (o *Options) EnsureDefaults() *Options { } if o.FormatMajorVersion == FormatDefault { - o.FormatMajorVersion = FormatMostCompatible + o.FormatMajorVersion = FormatMinSupported } if o.FS == nil { @@ -1489,7 +1489,7 @@ func (o *Options) Parse(s string, hooks *ParseHooks) error { var v uint64 v, err = strconv.ParseUint(value, 10, 64) if vers := FormatMajorVersion(v); vers > internalFormatNewest || vers == FormatDefault { - err = errors.Newf("unknown format major version %d", o.FormatMajorVersion) + err = errors.Newf("unsupported format major version %d", o.FormatMajorVersion) } if err == nil { o.FormatMajorVersion = FormatMajorVersion(v) @@ -1740,9 +1740,9 @@ func (o *Options) Validate() error { fmt.Fprintf(&buf, "MemTableStopWritesThreshold (%d) must be >= 2\n", o.MemTableStopWritesThreshold) } - if o.FormatMajorVersion > internalFormatNewest { - fmt.Fprintf(&buf, "FormatMajorVersion (%d) must be <= %d\n", - o.FormatMajorVersion, internalFormatNewest) + if o.FormatMajorVersion < FormatMinSupported || o.FormatMajorVersion > internalFormatNewest { + fmt.Fprintf(&buf, "FormatMajorVersion (%d) must be between %d and %d\n", + o.FormatMajorVersion, FormatMinSupported, internalFormatNewest) } if o.TableCache != nil && o.Cache != o.TableCache.cache { fmt.Fprintf(&buf, "underlying cache in the TableCache and the Cache dont match\n") diff --git a/options_test.go b/options_test.go index f04f6a790b..13763c8787 100644 --- a/options_test.go +++ b/options_test.go @@ -28,8 +28,9 @@ func (o *Options) testingRandomized(t testing.TB) *Options { } if o.FormatMajorVersion == FormatDefault { // Pick a random format major version from the range - // [MostCompatible, FormatNewest]. - o.FormatMajorVersion = FormatMajorVersion(rand.Intn(int(internalFormatNewest)) + 1) + // [FormatMinSupported, FormatNewest]. + n := rand.Intn(int(internalFormatNewest - FormatMinSupported + 1)) + o.FormatMajorVersion = FormatMinSupported + FormatMajorVersion(n) t.Logf("Running %s with format major version %s", t.Name(), o.FormatMajorVersion.String()) } return o @@ -82,7 +83,7 @@ func TestOptionsString(t *testing.T) { flush_delay_delete_range=0s flush_delay_range_key=0s flush_split_bytes=4194304 - format_major_version=1 + format_major_version=13 l0_compaction_concurrency=10 l0_compaction_file_threshold=500 l0_compaction_threshold=4 @@ -122,9 +123,7 @@ func TestOptionsString(t *testing.T) { var opts *Options opts = opts.EnsureDefaults() - if v := opts.String(); expected != v { - t.Fatalf("expected\n%s\nbut found\n%s", expected, v) - } + require.Equal(t, expected, opts.String()) } func TestOptionsCheck(t *testing.T) { diff --git a/range_del_test.go b/range_del_test.go index 75a9ad2ee6..07251ab33a 100644 --- a/range_del_test.go +++ b/range_del_test.go @@ -352,17 +352,7 @@ func TestRangeDelCompactionTruncation(t *testing.T) { // tables in L2. Lastly, the L2 table containing "c" will be compacted // creating the L3 table. require.NoError(t, d.Compact([]byte("c"), []byte("c\x00"), false)) - if formatVersion < FormatSetWithDelete { - expectLSM(` -1: - 000008:[a#12,RANGEDEL-b#inf,RANGEDEL] -2: - 000012:[b#13,SET-c#inf,RANGEDEL] -3: - 000013:[c#14,SET-d#inf,RANGEDEL] -`) - } else { - expectLSM(` + expectLSM(` 1: 000008:[a#12,RANGEDEL-b#inf,RANGEDEL] 2: @@ -370,7 +360,6 @@ func TestRangeDelCompactionTruncation(t *testing.T) { 3: 000013:[c#14,SET-d#inf,RANGEDEL] `) - } // The L1 table still contains a tombstone from [a,d) which will improperly // delete the newer version of "b" in L2. @@ -386,9 +375,7 @@ func TestRangeDelCompactionTruncation(t *testing.T) { } versions := []FormatMajorVersion{ - FormatMostCompatible, - FormatSetWithDelete - 1, - FormatSetWithDelete, + FormatMinSupported, FormatNewest, } for _, version := range versions { diff --git a/replay/replay.go b/replay/replay.go index d1d894f2ff..106c8f0534 100644 --- a/replay/replay.go +++ b/replay/replay.go @@ -718,8 +718,7 @@ func (r *Runner) prepareWorkloadSteps(ctx context.Context) error { r.Opts.Comparer.FormatKey, r.Opts.FlushSplitBytes, r.Opts.Experimental.ReadCompactionRate, - nil, /* zombies */ - manifest.ProhibitSplitUserKeys) + nil /* zombies */) bve = manifest.BulkVersionEdit{AddedByFileNum: bve.AddedByFileNum} return v, err } diff --git a/replay/replay_test.go b/replay/replay_test.go index 93bbdbef4b..f924edecef 100644 --- a/replay/replay_test.go +++ b/replay/replay_test.go @@ -91,7 +91,7 @@ func runReplayTest(t *testing.T, path string) { opts := &pebble.Options{ FS: fs, Comparer: testkeys.Comparer, - FormatMajorVersion: pebble.FormatRangeKeys, + FormatMajorVersion: pebble.FormatMinSupported, L0CompactionFileThreshold: 1, } setDefaultExperimentalOpts(opts) @@ -174,7 +174,7 @@ func TestLoadFlushedSSTableKeys(t *testing.T) { }, FS: vfs.NewMem(), Comparer: testkeys.Comparer, - FormatMajorVersion: pebble.FormatRangeKeys, + FormatMajorVersion: pebble.FormatMinSupported, } d, err := pebble.Open("", opts) require.NoError(t, err) @@ -282,7 +282,7 @@ func collectCorpus(t *testing.T, fs *vfs.MemFS, name string) { opts := &pebble.Options{ Comparer: testkeys.Comparer, DisableAutomaticCompactions: true, - FormatMajorVersion: pebble.FormatRangeKeys, + FormatMajorVersion: pebble.FormatMinSupported, FS: fs, MaxManifestFileSize: 96, } diff --git a/replay/testdata/corpus/findManifestStart b/replay/testdata/corpus/findManifestStart index a0652ebb1a..f5369f953d 100644 --- a/replay/testdata/corpus/findManifestStart +++ b/replay/testdata/corpus/findManifestStart @@ -38,11 +38,10 @@ list-files build ---- build: 000002.log - CURRENT LOCK MANIFEST-000001 OPTIONS-000003 - marker.format-version.000007.008 + marker.format-version.000001.013 marker.manifest.000001.MANIFEST-000001 commit @@ -60,11 +59,10 @@ build: 000002.log 000004.log 000005.sst - CURRENT LOCK MANIFEST-000001 OPTIONS-000003 - marker.format-version.000007.008 + marker.format-version.000001.013 marker.manifest.000001.MANIFEST-000001 @@ -79,12 +77,11 @@ list-files build build: 000005.sst 000006.log - CURRENT LOCK MANIFEST-000001 MANIFEST-000007 OPTIONS-000008 - marker.format-version.000007.008 + marker.format-version.000001.013 marker.manifest.000002.MANIFEST-000007 delete-all build/MANIFEST-000007 diff --git a/replay/testdata/corpus/high_read_amp b/replay/testdata/corpus/high_read_amp index d05b6f187e..800511b5ca 100644 --- a/replay/testdata/corpus/high_read_amp +++ b/replay/testdata/corpus/high_read_amp @@ -5,11 +5,10 @@ list-files build ---- build: 000002.log - CURRENT LOCK MANIFEST-000001 OPTIONS-000003 - marker.format-version.000007.008 + marker.format-version.000001.013 marker.manifest.000001.MANIFEST-000001 commit @@ -73,12 +72,11 @@ build: 000007.sst 000009.log 000010.sst - CURRENT LOCK MANIFEST-000008 MANIFEST-000011 OPTIONS-000003 - marker.format-version.000007.008 + marker.format-version.000001.013 marker.manifest.000003.MANIFEST-000011 start @@ -94,7 +92,7 @@ high_read_amp/checkpoint: 000010.sst MANIFEST-000011 OPTIONS-000003 - marker.format-version.000001.008 + marker.format-version.000001.013 marker.manifest.000001.MANIFEST-000011 commit diff --git a/replay/testdata/corpus/simple b/replay/testdata/corpus/simple index 8e61209286..1b758b5820 100644 --- a/replay/testdata/corpus/simple +++ b/replay/testdata/corpus/simple @@ -5,11 +5,10 @@ list-files build ---- build: 000002.log - CURRENT LOCK MANIFEST-000001 OPTIONS-000003 - marker.format-version.000007.008 + marker.format-version.000001.013 marker.manifest.000001.MANIFEST-000001 commit @@ -27,11 +26,10 @@ build: 000002.log 000004.log 000005.sst - CURRENT LOCK MANIFEST-000001 OPTIONS-000003 - marker.format-version.000007.008 + marker.format-version.000001.013 marker.manifest.000001.MANIFEST-000001 start @@ -50,7 +48,7 @@ simple/checkpoint: 000005.sst MANIFEST-000001 OPTIONS-000003 - marker.format-version.000001.008 + marker.format-version.000001.013 marker.manifest.000001.MANIFEST-000001 commit diff --git a/replay/testdata/replay b/replay/testdata/replay index 91171604a5..277215f613 100644 --- a/replay/testdata/replay +++ b/replay/testdata/replay @@ -9,12 +9,11 @@ tree 658 000005.sst 49 000006.log 686 000007.sst - 16 CURRENT 0 LOCK 98 MANIFEST-000001 122 MANIFEST-000008 - 1241 OPTIONS-000003 - 0 marker.format-version.000007.008 + 1242 OPTIONS-000003 + 0 marker.format-version.000001.013 0 marker.manifest.000002.MANIFEST-000008 simple/ 686 000007.sst @@ -24,8 +23,8 @@ tree 25 000004.log 658 000005.sst 98 MANIFEST-000001 - 1241 OPTIONS-000003 - 0 marker.format-version.000001.008 + 1242 OPTIONS-000003 + 0 marker.format-version.000001.013 0 marker.manifest.000001.MANIFEST-000001 cat build/OPTIONS-000003 @@ -44,7 +43,7 @@ cat build/OPTIONS-000003 flush_delay_delete_range=0s flush_delay_range_key=0s flush_split_bytes=4194304 - format_major_version=8 + format_major_version=13 l0_compaction_concurrency=10 l0_compaction_file_threshold=500 l0_compaction_threshold=4 diff --git a/replay/testdata/replay_paced b/replay/testdata/replay_paced index ac981aac12..8e03de0ba5 100644 --- a/replay/testdata/replay_paced +++ b/replay/testdata/replay_paced @@ -11,12 +11,11 @@ tree 632 000010.sst 200 000012.log 686 000013.sst - 16 CURRENT 0 LOCK 122 MANIFEST-000008 205 MANIFEST-000011 - 1241 OPTIONS-000003 - 0 marker.format-version.000007.008 + 1242 OPTIONS-000003 + 0 marker.format-version.000001.013 0 marker.manifest.000003.MANIFEST-000011 high_read_amp/ 686 000013.sst @@ -27,8 +26,8 @@ tree 39 000009.log 632 000010.sst 157 MANIFEST-000011 - 1241 OPTIONS-000003 - 0 marker.format-version.000001.008 + 1242 OPTIONS-000003 + 0 marker.format-version.000001.013 0 marker.manifest.000001.MANIFEST-000011 replay high_read_amp fixed 1 diff --git a/scan_internal_test.go b/scan_internal_test.go index 2c3acbc07e..47964e930d 100644 --- a/scan_internal_test.go +++ b/scan_internal_test.go @@ -46,7 +46,7 @@ func TestScanStatistics(t *testing.T) { FS: vfs.NewMem(), Logger: testLogger{t: t}, Comparer: testkeys.Comparer, - FormatMajorVersion: FormatRangeKeys, + FormatMajorVersion: FormatMinSupported, BlockPropertyCollectors: []func() BlockPropertyCollector{ sstable.NewTestKeysBlockPropertyCollector, }, diff --git a/snapshot_test.go b/snapshot_test.go index f0514c8d0b..b9d8f14fcb 100644 --- a/snapshot_test.go +++ b/snapshot_test.go @@ -62,9 +62,7 @@ func testSnapshotImpl(t *testing.T, newSnapshot func(d *DB) Reader) { defer close() randVersion := func() FormatMajorVersion { - minVersion := formatUnusedPrePebblev1MarkedCompacted - return FormatMajorVersion(int(minVersion) + rand.Intn( - int(internalFormatNewest)-int(minVersion)+1)) + return FormatMinSupported + FormatMajorVersion(rand.Intn(int(internalFormatNewest-FormatMinSupported)+1)) } datadriven.RunTest(t, "testdata/snapshot", func(t *testing.T, td *datadriven.TestData) string { switch td.Cmd { diff --git a/sstable/data_test.go b/sstable/data_test.go index 2b1926a355..43aa629afd 100644 --- a/sstable/data_test.go +++ b/sstable/data_test.go @@ -27,11 +27,6 @@ import ( func optsFromArgs(td *datadriven.TestData, writerOpts *WriterOptions) error { for _, arg := range td.CmdArgs { switch arg.Key { - case "leveldb": - if len(arg.Vals) != 0 { - return errors.Errorf("%s: arg %s expects 0 values", td.Cmd, arg.Key) - } - writerOpts.TableFormat = TableFormatLevelDB case "block-size": if len(arg.Vals) != 1 { return errors.Errorf("%s: arg %s expects 1 value", td.Cmd, arg.Key) diff --git a/sstable/format.go b/sstable/format.go index 82310a55ba..9cd9998217 100644 --- a/sstable/format.go +++ b/sstable/format.go @@ -19,15 +19,16 @@ type TableFormat uint32 // Pebble (i.e. the history is linear). const ( TableFormatUnspecified TableFormat = iota - TableFormatLevelDB - TableFormatRocksDBv2 - TableFormatPebblev1 // Block properties. - TableFormatPebblev2 // Range keys. - TableFormatPebblev3 // Value blocks. - TableFormatPebblev4 // DELSIZED tombstones. + _ // TableFormatLevelDB; deprecated. + _ // TableFormatRocksDBv2; deprecated. + TableFormatPebblev1 // Block properties. + TableFormatPebblev2 // Range keys. + TableFormatPebblev3 // Value blocks. + TableFormatPebblev4 // DELSIZED tombstones. NumTableFormats - TableFormatMax = NumTableFormats - 1 + TableFormatMax = NumTableFormats - 1 + TableFormatMinSupported = TableFormatPebblev1 ) // TableFormatPebblev4, in addition to DELSIZED, introduces the use of @@ -185,15 +186,6 @@ const ( // corresponding internal TableFormat. func ParseTableFormat(magic []byte, version uint32) (TableFormat, error) { switch string(magic) { - case levelDBMagic: - return TableFormatLevelDB, nil - case rocksDBMagic: - if version != rocksDBFormatVersion2 { - return TableFormatUnspecified, base.CorruptionErrorf( - "pebble/table: unsupported rocksdb format version %d", errors.Safe(version), - ) - } - return TableFormatRocksDBv2, nil case pebbleDBMagic: switch version { case 1: @@ -219,10 +211,6 @@ func ParseTableFormat(magic []byte, version uint32) (TableFormat, error) { // AsTuple returns the TableFormat's (Magic String, Version) tuple. func (f TableFormat) AsTuple() (string, uint32) { switch f { - case TableFormatLevelDB: - return levelDBMagic, 0 - case TableFormatRocksDBv2: - return rocksDBMagic, 2 case TableFormatPebblev1: return pebbleDBMagic, 1 case TableFormatPebblev2: @@ -239,10 +227,6 @@ func (f TableFormat) AsTuple() (string, uint32) { // String returns the TableFormat (Magic String,Version) tuple. func (f TableFormat) String() string { switch f { - case TableFormatLevelDB: - return "(LevelDB)" - case TableFormatRocksDBv2: - return "(RocksDB,v2)" case TableFormatPebblev1: return "(Pebble,v1)" case TableFormatPebblev2: diff --git a/sstable/format_test.go b/sstable/format_test.go index f5589c1239..61a3dd6ade 100644 --- a/sstable/format_test.go +++ b/sstable/format_test.go @@ -19,18 +19,6 @@ func TestTableFormat_RoundTrip(t *testing.T) { wantErr string }{ // Valid cases. - { - name: "LevelDB", - magic: levelDBMagic, - version: 0, - want: TableFormatLevelDB, - }, - { - name: "RocksDBv2", - magic: rocksDBMagic, - version: 2, - want: TableFormatRocksDBv2, - }, { name: "PebbleDBv1", magic: pebbleDBMagic, @@ -57,10 +45,9 @@ func TestTableFormat_RoundTrip(t *testing.T) { }, // Invalid cases. { - name: "Invalid RocksDB version", + name: "Deprecated RocksDB magic", magic: rocksDBMagic, - version: 1, - wantErr: "pebble/table: unsupported rocksdb format version 1", + wantErr: "pebble/table: invalid table (bad magic number: 0xf7cff485b741e288)", }, { name: "Invalid PebbleDB version", @@ -73,6 +60,16 @@ func TestTableFormat_RoundTrip(t *testing.T) { magic: "foo", wantErr: "pebble/table: invalid table (bad magic number: 0x666f6f)", }, + { + name: "LevelDB", + magic: levelDBMagic, + wantErr: "pebble/table: invalid table (bad magic number: 0x57fb808b247547db)", + }, + { + name: "RocksDBv2", + magic: rocksDBMagic, + wantErr: "pebble/table: invalid table (bad magic number: 0xf7cff485b741e288)", + }, } for _, tc := range tcs { diff --git a/sstable/options.go b/sstable/options.go index c5e1f7935f..7bf3d79907 100644 --- a/sstable/options.go +++ b/sstable/options.go @@ -228,9 +228,7 @@ type WriterOptions struct { MergerName string // TableFormat specifies the format version for writing sstables. The default - // is TableFormatRocksDBv2 which creates RocksDB compatible sstables. Use - // TableFormatLevelDB to create LevelDB compatible sstable which can be used - // by a wider range of tools and libraries. + // is TableFormatMinSupported. TableFormat TableFormat // IsStrictObsolete is only relevant for >= TableFormatPebblev4. See comment @@ -299,7 +297,7 @@ func (o WriterOptions) ensureDefaults() WriterOptions { // By default, if the table format is not specified, fall back to using the // most compatible format. if o.TableFormat == TableFormatUnspecified { - o.TableFormat = TableFormatRocksDBv2 + o.TableFormat = TableFormatMinSupported } return o } diff --git a/sstable/random_test.go b/sstable/random_test.go index 83e83caa3a..8a3e4cee90 100644 --- a/sstable/random_test.go +++ b/sstable/random_test.go @@ -274,8 +274,8 @@ func (cfg *randomTableConfig) readerOpts() ReaderOptions { func (cfg *randomTableConfig) randomize() { if cfg.wopts == nil { cfg.wopts = &WriterOptions{ - // Test all table formats in [TableFormatLevelDB, TableFormatMax]. - TableFormat: TableFormat(cfg.rng.Intn(int(TableFormatMax)) + 1), + // Test all table formats in [TableFormatMinSupported, TableFormatMax]. + TableFormat: TableFormatMinSupported + TableFormat(cfg.rng.Intn(int(TableFormatMax-TableFormatMinSupported+1))), BlockRestartInterval: (1 << cfg.rng.Intn(6)), // {1, 2, 4, ..., 32} BlockSizeThreshold: min(int(100*cfg.rng.Float64()), 1), // 1-100% BlockSize: (1 << cfg.rng.Intn(18)), // {1, 2, 4, ..., 128 KiB} diff --git a/sstable/reader_test.go b/sstable/reader_test.go index 86b40b9513..ccc3a44e12 100644 --- a/sstable/reader_test.go +++ b/sstable/reader_test.go @@ -563,7 +563,7 @@ func forEveryTableFormat[I any]( t *testing.T, formatTable [NumTableFormats]I, runTest func(*testing.T, TableFormat, I), ) { t.Helper() - for tf := TableFormatUnspecified + 1; tf <= TableFormatMax; tf++ { + for tf := TableFormatMinSupported; tf <= TableFormatMax; tf++ { t.Run(tf.String(), func(t *testing.T) { runTest(t, tf, formatTable[tf]) }) @@ -574,8 +574,6 @@ func TestReaderStats(t *testing.T) { forEveryTableFormat[string](t, [NumTableFormats]string{ TableFormatUnspecified: "", - TableFormatLevelDB: "testdata/readerstats_LevelDB", - TableFormatRocksDBv2: "testdata/readerstats_LevelDB", TableFormatPebblev1: "testdata/readerstats_LevelDB", TableFormatPebblev2: "testdata/readerstats_LevelDB", TableFormatPebblev3: "testdata/readerstats_Pebblev3", @@ -606,8 +604,6 @@ func TestReaderWithBlockPropertyFilter(t *testing.T) { forEveryTableFormat[string](t, [NumTableFormats]string{ TableFormatUnspecified: "", // Block properties unsupported - TableFormatLevelDB: "", // Block properties unsupported - TableFormatRocksDBv2: "", // Block properties unsupported TableFormatPebblev1: "", // Block properties unsupported TableFormatPebblev2: "testdata/reader_bpf/Pebblev2", TableFormatPebblev3: "testdata/reader_bpf/Pebblev3", @@ -1389,7 +1385,7 @@ func TestReader_TableFormat(t *testing.T) { require.Equal(t, want, got) } - for tf := TableFormatLevelDB; tf <= TableFormatMax; tf++ { + for tf := TableFormatMinSupported; tf <= TableFormatMax; tf++ { t.Run(tf.String(), func(t *testing.T) { test(t, tf) }) diff --git a/sstable/table.go b/sstable/table.go index 2f36f3076e..8f3c139dad 100644 --- a/sstable/table.go +++ b/sstable/table.go @@ -206,9 +206,6 @@ const ( minFooterLen = levelDBFooterLen maxFooterLen = rocksDBFooterLen - levelDBFormatVersion = 0 - rocksDBFormatVersion2 = 2 - metaRangeKeyName = "pebble.range_key" metaValueIndexName = "pebble.value_index" metaPropertiesName = "rocksdb.properties" @@ -342,18 +339,7 @@ func readFooter(f objstorage.Readable) (footer, error) { } switch magic := buf[len(buf)-len(rocksDBMagic):]; string(magic) { - case levelDBMagic: - if len(buf) < levelDBFooterLen { - return footer, base.CorruptionErrorf( - "pebble/table: invalid table (footer too short): %d", errors.Safe(len(buf))) - } - footer.footerBH.Offset = uint64(off+int64(len(buf))) - levelDBFooterLen - buf = buf[len(buf)-levelDBFooterLen:] - footer.footerBH.Length = uint64(len(buf)) - footer.format = TableFormatLevelDB - footer.checksum = ChecksumTypeCRC32c - - case rocksDBMagic, pebbleDBMagic: + case pebbleDBMagic: // NOTE: The Pebble magic string implies the same footer format as that used // by the RocksDBv2 table format. if len(buf) < rocksDBFooterLen { @@ -404,14 +390,7 @@ func readFooter(f objstorage.Readable) (footer, error) { func (f footer) encode(buf []byte) []byte { switch magic, version := f.format.AsTuple(); magic { - case levelDBMagic: - buf = buf[:levelDBFooterLen] - clear(buf) - n := encodeBlockHandle(buf[0:], f.metaindexBH) - encodeBlockHandle(buf[n:], f.indexBH) - copy(buf[len(buf)-len(levelDBMagic):], levelDBMagic) - - case rocksDBMagic, pebbleDBMagic: + case pebbleDBMagic: buf = buf[:rocksDBFooterLen] clear(buf) switch f.checksum { @@ -440,12 +419,5 @@ func (f footer) encode(buf []byte) []byte { } func supportsTwoLevelIndex(format TableFormat) bool { - switch format { - case TableFormatLevelDB: - return false - case TableFormatRocksDBv2, TableFormatPebblev1, TableFormatPebblev2, TableFormatPebblev3, TableFormatPebblev4: - return true - default: - panic("sstable: unspecified table format version") - } + return format >= TableFormatMinSupported } diff --git a/sstable/table_test.go b/sstable/table_test.go index 7fc535f99b..d856ee3028 100644 --- a/sstable/table_test.go +++ b/sstable/table_test.go @@ -549,10 +549,10 @@ func TestMetaIndexEntriesSorted(t *testing.T) { func TestFooterRoundTrip(t *testing.T) { buf := make([]byte, 100+maxFooterLen) - for format := TableFormatLevelDB; format < TableFormatMax; format++ { + for format := TableFormatMinSupported; format < TableFormatMax; format++ { t.Run(fmt.Sprintf("format=%s", format), func(t *testing.T) { checksums := []ChecksumType{ChecksumTypeCRC32c} - if format != TableFormatLevelDB { + if format != TableFormatMinSupported { checksums = []ChecksumType{ChecksumTypeCRC32c, ChecksumTypeXXHash64} } for _, checksum := range checksums { @@ -603,14 +603,6 @@ func TestFooterRoundTrip(t *testing.T) { } func TestReadFooter(t *testing.T) { - encode := func(format TableFormat, checksum ChecksumType) string { - f := footer{ - format: format, - checksum: checksum, - } - return string(f.encode(make([]byte, maxFooterLen))) - } - testCases := []struct { encoded string expected string @@ -618,10 +610,6 @@ func TestReadFooter(t *testing.T) { {strings.Repeat("a", minFooterLen-1), "file size is too small"}, {strings.Repeat("a", levelDBFooterLen), "bad magic number"}, {strings.Repeat("a", rocksDBFooterLen), "bad magic number"}, - {encode(TableFormatLevelDB, 0)[1:], "file size is too small"}, - {encode(TableFormatRocksDBv2, 0)[1:], "footer too short"}, - {encode(TableFormatRocksDBv2, ChecksumTypeNone), "unsupported checksum type"}, - {encode(TableFormatRocksDBv2, ChecksumTypeXXHash), "unsupported checksum type"}, } for _, c := range testCases { t.Run("", func(t *testing.T) { diff --git a/sstable/testdata/writer b/sstable/testdata/writer index 61d3a24fa3..a32478576e 100644 --- a/sstable/testdata/writer +++ b/sstable/testdata/writer @@ -343,11 +343,14 @@ layout 0 data (21) 26 data (21) 52 data (21) - 78 index (47) - 130 properties (678) - 813 meta-index (33) - 851 leveldb-footer (48) - 899 EOF + 78 index (22) + 105 index (22) + 132 index (22) + 159 top-index (50) + 214 properties (580) + 799 meta-index (33) + 837 footer (53) + 890 EOF # Range keys, if present, are shown in the layout. diff --git a/sstable/testdata/writer_v3 b/sstable/testdata/writer_v3 index a003c5fb96..558ed179ed 100644 --- a/sstable/testdata/writer_v3 +++ b/sstable/testdata/writer_v3 @@ -313,14 +313,17 @@ seqnums: [1-1] layout ---- - 0 data (21) - 26 data (21) - 52 data (21) - 78 index (47) - 130 properties (678) - 813 meta-index (33) - 851 leveldb-footer (48) - 899 EOF + 0 data (22) + 27 data (22) + 54 data (22) + 81 index (22) + 108 index (22) + 135 index (22) + 162 top-index (51) + 218 properties (580) + 803 meta-index (33) + 841 footer (53) + 894 EOF # Range keys, if present, are shown in the layout. diff --git a/sstable/writer_test.go b/sstable/writer_test.go index 20f9e90eec..5922badde2 100644 --- a/sstable/writer_test.go +++ b/sstable/writer_test.go @@ -822,7 +822,7 @@ func TestWriter_TableFormatCompatibility(t *testing.T) { for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { - for tf := TableFormatLevelDB; tf <= TableFormatMax; tf++ { + for tf := TableFormatMinSupported; tf <= TableFormatMax; tf++ { t.Run(tf.String(), func(t *testing.T) { fs := vfs.NewMem() f, err := fs.Create("sst") diff --git a/table_cache_test.go b/table_cache_test.go index 103d62a50d..c9181a65a6 100644 --- a/table_cache_test.go +++ b/table_cache_test.go @@ -1204,7 +1204,7 @@ func TestTableCacheNoSuchFileError(t *testing.T) { _, _, _ = d.Get([]byte("a")) require.NotZero(t, len(logger.fatalMsgs), "no fatal message emitted") require.Equal(t, 1, len(logger.fatalMsgs), "expected one fatal message; got: %v", logger.fatalMsgs) - require.Contains(t, logger.fatalMsgs[0], "directory contains 6 files, 0 unknown, 0 tables, 2 logs, 1 manifests") + require.Contains(t, logger.fatalMsgs[0], "directory contains 7 files, 2 unknown, 0 tables, 2 logs, 1 manifests") } func BenchmarkTableCacheHotPath(b *testing.B) { diff --git a/table_stats_test.go b/table_stats_test.go index 3abece9ebe..1f33a39ec3 100644 --- a/table_stats_test.go +++ b/table_stats_test.go @@ -34,7 +34,7 @@ func TestTableStats(t *testing.T) { } opts.DisableAutomaticCompactions = true opts.Comparer = testkeys.Comparer - opts.FormatMajorVersion = FormatRangeKeys + opts.FormatMajorVersion = FormatMinSupported d, err := Open("", opts) require.NoError(t, err) diff --git a/testdata/checkpoint b/testdata/checkpoint index 6ace513eb7..ccae4aa9fd 100644 --- a/testdata/checkpoint +++ b/testdata/checkpoint @@ -5,85 +5,25 @@ open-dir: db lock: db/LOCK open-dir: db open-dir: db -open: db/CURRENT create: db/MANIFEST-000001 sync: db/MANIFEST-000001 -remove: db/temporary.000001.dbtmp -create: db/temporary.000001.dbtmp -sync: db/temporary.000001.dbtmp -close: db/temporary.000001.dbtmp -rename: db/temporary.000001.dbtmp -> db/CURRENT +create: db/marker.manifest.000001.MANIFEST-000001 +close: db/marker.manifest.000001.MANIFEST-000001 sync: db open-dir: db sync: db/MANIFEST-000001 create: db/000002.log sync: db -create: db/marker.manifest.000001.MANIFEST-000001 -close: db/marker.manifest.000001.MANIFEST-000001 -sync: db -create: db/marker.format-version.000001.002 -close: db/marker.format-version.000001.002 -sync: db -remove: db/temporary.000000.dbtmp -create: db/temporary.000000.dbtmp -sync: db/temporary.000000.dbtmp -close: db/temporary.000000.dbtmp -rename: db/temporary.000000.dbtmp -> db/CURRENT -create: db/marker.format-version.000002.003 -close: db/marker.format-version.000002.003 -remove: db/marker.format-version.000001.002 -sync: db -create: db/marker.format-version.000003.004 -close: db/marker.format-version.000003.004 -remove: db/marker.format-version.000002.003 -sync: db -create: db/marker.format-version.000004.005 -close: db/marker.format-version.000004.005 -remove: db/marker.format-version.000003.004 -sync: db -create: db/marker.format-version.000005.006 -close: db/marker.format-version.000005.006 -remove: db/marker.format-version.000004.005 -sync: db -create: db/marker.format-version.000006.007 -close: db/marker.format-version.000006.007 -remove: db/marker.format-version.000005.006 -sync: db -create: db/marker.format-version.000007.008 -close: db/marker.format-version.000007.008 -remove: db/marker.format-version.000006.007 -sync: db -create: db/marker.format-version.000008.009 -close: db/marker.format-version.000008.009 -remove: db/marker.format-version.000007.008 -sync: db -create: db/marker.format-version.000009.010 -close: db/marker.format-version.000009.010 -remove: db/marker.format-version.000008.009 -sync: db -create: db/marker.format-version.000010.011 -close: db/marker.format-version.000010.011 -remove: db/marker.format-version.000009.010 -sync: db -create: db/marker.format-version.000011.012 -close: db/marker.format-version.000011.012 -remove: db/marker.format-version.000010.011 -sync: db -create: db/marker.format-version.000012.013 -close: db/marker.format-version.000012.013 -remove: db/marker.format-version.000011.012 -sync: db -create: db/marker.format-version.000013.014 -close: db/marker.format-version.000013.014 -remove: db/marker.format-version.000012.013 +create: db/marker.format-version.000001.014 +close: db/marker.format-version.000001.014 sync: db -create: db/marker.format-version.000014.015 -close: db/marker.format-version.000014.015 -remove: db/marker.format-version.000013.014 +create: db/marker.format-version.000002.015 +close: db/marker.format-version.000002.015 +remove: db/marker.format-version.000001.014 sync: db -create: db/marker.format-version.000015.016 -close: db/marker.format-version.000015.016 -remove: db/marker.format-version.000014.015 +create: db/marker.format-version.000003.016 +close: db/marker.format-version.000003.016 +remove: db/marker.format-version.000002.015 sync: db create: db/temporary.000003.dbtmp sync: db/temporary.000003.dbtmp @@ -311,11 +251,10 @@ list db 000006.log 000008.log 000010.sst -CURRENT LOCK MANIFEST-000001 OPTIONS-000003 -marker.format-version.000015.016 +marker.format-version.000003.016 marker.manifest.000001.MANIFEST-000001 list checkpoints/checkpoint1 @@ -627,11 +566,10 @@ list db 000010.sst 000011.sst 000014.sst -CURRENT LOCK MANIFEST-000001 OPTIONS-000003 -marker.format-version.000015.016 +marker.format-version.000003.016 marker.manifest.000001.MANIFEST-000001 diff --git a/testdata/cleaner b/testdata/cleaner index cd96e7d2fb..8b5f1ca094 100644 --- a/testdata/cleaner +++ b/testdata/cleaner @@ -8,19 +8,18 @@ open-dir: db_wal lock: db/LOCK open-dir: db open-dir: db -open: db/CURRENT create: db/MANIFEST-000001 sync: db/MANIFEST-000001 -remove: db/temporary.000001.dbtmp -create: db/temporary.000001.dbtmp -sync: db/temporary.000001.dbtmp -close: db/temporary.000001.dbtmp -rename: db/temporary.000001.dbtmp -> db/CURRENT +create: db/marker.manifest.000001.MANIFEST-000001 +close: db/marker.manifest.000001.MANIFEST-000001 sync: db open-dir: db sync: db/MANIFEST-000001 create: db_wal/000002.log sync: db_wal +create: db/marker.format-version.000001.013 +close: db/marker.format-version.000001.013 +sync: db create: db/temporary.000003.dbtmp sync: db/temporary.000003.dbtmp close: db/temporary.000003.dbtmp @@ -67,16 +66,16 @@ sync: db/MANIFEST-000001 mkdir-all: db_wal/archive 0755 rename: db_wal/000004.log -> db_wal/archive/000004.log open: db/000005.sst -read-at(744, 53): db/000005.sst -read-at(707, 37): db/000005.sst -read-at(79, 628): db/000005.sst +read-at(607, 53): db/000005.sst +read-at(570, 37): db/000005.sst +read-at(79, 491): db/000005.sst read-at(52, 27): db/000005.sst open: db/000005.sst close: db/000005.sst open: db/000007.sst -read-at(718, 53): db/000007.sst -read-at(681, 37): db/000007.sst -read-at(53, 628): db/000007.sst +read-at(581, 53): db/000007.sst +read-at(544, 37): db/000007.sst +read-at(53, 491): db/000007.sst read-at(26, 27): db/000007.sst open: db/000007.sst close: db/000007.sst @@ -101,11 +100,12 @@ rename: db/000007.sst -> db/archive/000007.sst list db ---- 000008.sst -CURRENT LOCK MANIFEST-000001 OPTIONS-000003 archive +marker.format-version.000001.013 +marker.manifest.000001.MANIFEST-000001 list db_wal ---- @@ -132,19 +132,18 @@ open-dir: db1_wal lock: db1/LOCK open-dir: db1 open-dir: db1 -open: db1/CURRENT create: db1/MANIFEST-000001 sync: db1/MANIFEST-000001 -remove: db1/temporary.000001.dbtmp -create: db1/temporary.000001.dbtmp -sync: db1/temporary.000001.dbtmp -close: db1/temporary.000001.dbtmp -rename: db1/temporary.000001.dbtmp -> db1/CURRENT +create: db1/marker.manifest.000001.MANIFEST-000001 +close: db1/marker.manifest.000001.MANIFEST-000001 sync: db1 open-dir: db1 sync: db1/MANIFEST-000001 create: db1_wal/000002.log sync: db1_wal +create: db1/marker.format-version.000001.013 +close: db1/marker.format-version.000001.013 +sync: db1 create: db1/temporary.000003.dbtmp sync: db1/temporary.000003.dbtmp close: db1/temporary.000003.dbtmp @@ -202,9 +201,6 @@ open-dir: db1_wal lock: db1/LOCK open-dir: db1 open-dir: db1 -open: db1/CURRENT -read-at(0, 16): db1/CURRENT -close: db1/CURRENT open: db1/MANIFEST-000001 close: db1/MANIFEST-000001 open-dir: db1 @@ -214,11 +210,9 @@ open: db1_wal/000004.log close: db1_wal/000004.log create: db1/MANIFEST-000458 sync: db1/MANIFEST-000458 -remove: db1/temporary.000458.dbtmp -create: db1/temporary.000458.dbtmp -sync: db1/temporary.000458.dbtmp -close: db1/temporary.000458.dbtmp -rename: db1/temporary.000458.dbtmp -> db1/CURRENT +create: db1/marker.manifest.000002.MANIFEST-000458 +close: db1/marker.manifest.000002.MANIFEST-000458 +remove: db1/marker.manifest.000001.MANIFEST-000001 sync: db1 create: db1_wal/000457.log sync: db1_wal @@ -236,8 +230,9 @@ remove: db1/OPTIONS-000003 list db1 ---- 000005.sst -CURRENT LOCK MANIFEST-000001 MANIFEST-000458 OPTIONS-000459 +marker.format-version.000001.013 +marker.manifest.000002.MANIFEST-000458 diff --git a/testdata/compaction_check_ordering b/testdata/compaction_check_ordering index 07e3197dea..cb786169f5 100644 --- a/testdata/compaction_check_ordering +++ b/testdata/compaction_check_ordering @@ -78,7 +78,7 @@ L1 a.SET.1-b.SET.2 b.SET.1-d.SET.4 ---- -OK +L1 files 000001 and 000002 have overlapping ranges: [a#1,SET-b#2,SET] vs [b#1,SET-d#4,SET] check-ordering L1 diff --git a/testdata/compaction_read_triggered b/testdata/compaction_read_triggered index 5c3c79691b..51336f0d9e 100644 --- a/testdata/compaction_read_triggered +++ b/testdata/compaction_read_triggered @@ -20,7 +20,7 @@ show-read-compactions maybe-compact ---- -[JOB 100] compacted(read) L5 [000004] (784B) Score=0.00 + L6 [000005] (784B) Score=0.00 -> L6 [000006] (778B), in 1.0s (2.0s total), output rate 778B/s +[JOB 100] compacted(read) L5 [000004] (647B) Score=0.00 + L6 [000005] (647B) Score=0.00 -> L6 [000006] (641B), in 1.0s (2.0s total), output rate 641B/s show-read-compactions ---- @@ -81,7 +81,7 @@ show-read-compactions maybe-compact ---- -[JOB 100] compacted(read) L5 [000004] (784B) Score=0.00 + L6 [000005] (784B) Score=0.00 -> L6 [000006] (778B), in 1.0s (2.0s total), output rate 778B/s +[JOB 100] compacted(read) L5 [000004] (647B) Score=0.00 + L6 [000005] (647B) Score=0.00 -> L6 [000006] (641B), in 1.0s (2.0s total), output rate 641B/s show-read-compactions ---- diff --git a/testdata/event_listener b/testdata/event_listener index 2922840717..0402b77109 100644 --- a/testdata/event_listener +++ b/testdata/event_listener @@ -7,14 +7,10 @@ open-dir: wal lock: db/LOCK open-dir: db open-dir: db -open: db/CURRENT create: db/MANIFEST-000001 sync: db/MANIFEST-000001 -remove: db/temporary.000001.dbtmp -create: db/temporary.000001.dbtmp -sync: db/temporary.000001.dbtmp -close: db/temporary.000001.dbtmp -rename: db/temporary.000001.dbtmp -> db/CURRENT +create: db/marker.manifest.000001.MANIFEST-000001 +close: db/marker.manifest.000001.MANIFEST-000001 sync: db [JOB 1] MANIFEST created 000001 open-dir: db @@ -22,86 +18,18 @@ sync: db/MANIFEST-000001 create: wal/000002.log sync: wal [JOB 1] WAL created 000002 -create: db/marker.manifest.000001.MANIFEST-000001 -close: db/marker.manifest.000001.MANIFEST-000001 -sync: db -create: db/marker.format-version.000001.002 -close: db/marker.format-version.000001.002 -sync: db -upgraded to format version: 002 -remove: db/temporary.000000.dbtmp -create: db/temporary.000000.dbtmp -sync: db/temporary.000000.dbtmp -close: db/temporary.000000.dbtmp -rename: db/temporary.000000.dbtmp -> db/CURRENT -create: db/marker.format-version.000002.003 -close: db/marker.format-version.000002.003 -remove: db/marker.format-version.000001.002 -sync: db -upgraded to format version: 003 -create: db/marker.format-version.000003.004 -close: db/marker.format-version.000003.004 -remove: db/marker.format-version.000002.003 -sync: db -upgraded to format version: 004 -create: db/marker.format-version.000004.005 -close: db/marker.format-version.000004.005 -remove: db/marker.format-version.000003.004 -sync: db -upgraded to format version: 005 -create: db/marker.format-version.000005.006 -close: db/marker.format-version.000005.006 -remove: db/marker.format-version.000004.005 -sync: db -upgraded to format version: 006 -create: db/marker.format-version.000006.007 -close: db/marker.format-version.000006.007 -remove: db/marker.format-version.000005.006 -sync: db -upgraded to format version: 007 -create: db/marker.format-version.000007.008 -close: db/marker.format-version.000007.008 -remove: db/marker.format-version.000006.007 -sync: db -upgraded to format version: 008 -create: db/marker.format-version.000008.009 -close: db/marker.format-version.000008.009 -remove: db/marker.format-version.000007.008 -sync: db -upgraded to format version: 009 -create: db/marker.format-version.000009.010 -close: db/marker.format-version.000009.010 -remove: db/marker.format-version.000008.009 -sync: db -upgraded to format version: 010 -create: db/marker.format-version.000010.011 -close: db/marker.format-version.000010.011 -remove: db/marker.format-version.000009.010 -sync: db -upgraded to format version: 011 -create: db/marker.format-version.000011.012 -close: db/marker.format-version.000011.012 -remove: db/marker.format-version.000010.011 -sync: db -upgraded to format version: 012 -create: db/marker.format-version.000012.013 -close: db/marker.format-version.000012.013 -remove: db/marker.format-version.000011.012 -sync: db -upgraded to format version: 013 -create: db/marker.format-version.000013.014 -close: db/marker.format-version.000013.014 -remove: db/marker.format-version.000012.013 +create: db/marker.format-version.000001.014 +close: db/marker.format-version.000001.014 sync: db upgraded to format version: 014 -create: db/marker.format-version.000014.015 -close: db/marker.format-version.000014.015 -remove: db/marker.format-version.000013.014 +create: db/marker.format-version.000002.015 +close: db/marker.format-version.000002.015 +remove: db/marker.format-version.000001.014 sync: db upgraded to format version: 015 -create: db/marker.format-version.000015.016 -close: db/marker.format-version.000015.016 -remove: db/marker.format-version.000014.015 +create: db/marker.format-version.000003.016 +close: db/marker.format-version.000003.016 +remove: db/marker.format-version.000002.015 sync: db upgraded to format version: 016 create: db/temporary.000003.dbtmp @@ -117,10 +45,10 @@ sync-data: wal/000002.log close: wal/000002.log create: wal/000004.log sync: wal -[JOB 4] WAL created 000004 -[JOB 5] flushing 1 memtable (100B) to L0 +[JOB 2] WAL created 000004 +[JOB 3] flushing 1 memtable (100B) to L0 create: db/000005.sst -[JOB 5] flushing: sstable created 000005 +[JOB 3] flushing: sstable created 000005 sync-data: db/000005.sst close: db/000005.sst sync: db @@ -131,8 +59,8 @@ create: db/marker.manifest.000002.MANIFEST-000006 close: db/marker.manifest.000002.MANIFEST-000006 remove: db/marker.manifest.000001.MANIFEST-000001 sync: db -[JOB 5] MANIFEST created 000006 -[JOB 5] flushed 1 memtable (100B) to L0 [000005] (662B), in 1.0s (2.0s total), output rate 662B/s +[JOB 3] MANIFEST created 000006 +[JOB 3] flushed 1 memtable (100B) to L0 [000005] (662B), in 1.0s (2.0s total), output rate 662B/s compact ---- @@ -141,10 +69,10 @@ sync-data: wal/000004.log close: wal/000004.log reuseForWrite: wal/000002.log -> wal/000007.log sync: wal -[JOB 6] WAL created 000007 (recycled 000002) -[JOB 7] flushing 1 memtable (100B) to L0 +[JOB 4] WAL created 000007 (recycled 000002) +[JOB 5] flushing 1 memtable (100B) to L0 create: db/000008.sst -[JOB 7] flushing: sstable created 000008 +[JOB 5] flushing: sstable created 000008 sync-data: db/000008.sst close: db/000008.sst sync: db @@ -155,11 +83,11 @@ create: db/marker.manifest.000003.MANIFEST-000009 close: db/marker.manifest.000003.MANIFEST-000009 remove: db/marker.manifest.000002.MANIFEST-000006 sync: db -[JOB 7] MANIFEST created 000009 -[JOB 7] flushed 1 memtable (100B) to L0 [000008] (662B), in 1.0s (2.0s total), output rate 662B/s +[JOB 5] MANIFEST created 000009 +[JOB 5] flushed 1 memtable (100B) to L0 [000008] (662B), in 1.0s (2.0s total), output rate 662B/s remove: db/MANIFEST-000001 -[JOB 7] MANIFEST deleted 000001 -[JOB 8] compacting(default) L0 [000005 000008] (1.3KB) Score=0.00 + L6 [] (0B) Score=0.00; OverlappingRatio: Single 0.00, Multi 0.00 +[JOB 5] MANIFEST deleted 000001 +[JOB 6] compacting(default) L0 [000005 000008] (1.3KB) Score=0.00 + L6 [] (0B) Score=0.00; OverlappingRatio: Single 0.00, Multi 0.00 open: db/000005.sst read-at(609, 53): db/000005.sst read-at(572, 37): db/000005.sst @@ -181,7 +109,7 @@ read-at(0, 26): db/000008.sst close: db/000008.sst close: db/000005.sst create: db/000010.sst -[JOB 8] compacting: sstable created 000010 +[JOB 6] compacting: sstable created 000010 sync-data: db/000010.sst close: db/000010.sst sync: db @@ -192,16 +120,16 @@ create: db/marker.manifest.000004.MANIFEST-000011 close: db/marker.manifest.000004.MANIFEST-000011 remove: db/marker.manifest.000003.MANIFEST-000009 sync: db -[JOB 8] MANIFEST created 000011 -[JOB 8] compacted(default) L0 [000005 000008] (1.3KB) Score=0.00 + L6 [] (0B) Score=0.00 -> L6 [000010] (662B), in 1.0s (3.0s total), output rate 662B/s +[JOB 6] MANIFEST created 000011 +[JOB 6] compacted(default) L0 [000005 000008] (1.3KB) Score=0.00 + L6 [] (0B) Score=0.00 -> L6 [000010] (662B), in 1.0s (3.0s total), output rate 662B/s close: db/000005.sst close: db/000008.sst remove: db/000005.sst -[JOB 8] sstable deleted 000005 +[JOB 6] sstable deleted 000005 remove: db/000008.sst -[JOB 8] sstable deleted 000008 +[JOB 6] sstable deleted 000008 remove: db/MANIFEST-000006 -[JOB 8] MANIFEST deleted 000006 +[JOB 6] MANIFEST deleted 000006 disable-file-deletions ---- @@ -213,10 +141,10 @@ sync-data: wal/000007.log close: wal/000007.log reuseForWrite: wal/000004.log -> wal/000012.log sync: wal -[JOB 9] WAL created 000012 (recycled 000004) -[JOB 10] flushing 1 memtable (100B) to L0 +[JOB 7] WAL created 000012 (recycled 000004) +[JOB 8] flushing 1 memtable (100B) to L0 create: db/000013.sst -[JOB 10] flushing: sstable created 000013 +[JOB 8] flushing: sstable created 000013 sync-data: db/000013.sst close: db/000013.sst sync: db @@ -227,13 +155,13 @@ create: db/marker.manifest.000005.MANIFEST-000014 close: db/marker.manifest.000005.MANIFEST-000014 remove: db/marker.manifest.000004.MANIFEST-000011 sync: db -[JOB 10] MANIFEST created 000014 -[JOB 10] flushed 1 memtable (100B) to L0 [000013] (662B), in 1.0s (2.0s total), output rate 662B/s +[JOB 8] MANIFEST created 000014 +[JOB 8] flushed 1 memtable (100B) to L0 [000013] (662B), in 1.0s (2.0s total), output rate 662B/s enable-file-deletions ---- remove: db/MANIFEST-000009 -[JOB 11] MANIFEST deleted 000009 +[JOB 9] MANIFEST deleted 000009 ingest ---- @@ -245,7 +173,7 @@ read-at(26, 27): ext/0 read-at(0, 26): ext/0 close: ext/0 link: ext/0 -> db/000015.sst -[JOB 12] ingesting: sstable created 000015 +[JOB 10] ingesting: sstable created 000015 sync: db open: db/000013.sst read-at(609, 53): db/000013.sst @@ -260,11 +188,11 @@ create: db/marker.manifest.000006.MANIFEST-000016 close: db/marker.manifest.000006.MANIFEST-000016 remove: db/marker.manifest.000005.MANIFEST-000014 sync: db -[JOB 12] MANIFEST created 000016 +[JOB 10] MANIFEST created 000016 remove: db/MANIFEST-000011 -[JOB 12] MANIFEST deleted 000011 +[JOB 10] MANIFEST deleted 000011 remove: ext/0 -[JOB 12] ingested L0:000015 (717B) +[JOB 10] ingested L0:000015 (717B) metrics ---- @@ -318,38 +246,38 @@ read-at(26, 27): ext/b read-at(0, 26): ext/b close: ext/b link: ext/a -> db/000017.sst -[JOB 13] ingesting: sstable created 000017 +[JOB 11] ingesting: sstable created 000017 link: ext/b -> db/000018.sst -[JOB 13] ingesting: sstable created 000018 +[JOB 11] ingesting: sstable created 000018 sync: db sync-data: wal/000012.log close: wal/000012.log reuseForWrite: wal/000007.log -> wal/000019.log sync: wal -[JOB 14] WAL created 000019 (recycled 000007) +[JOB 12] WAL created 000019 (recycled 000007) sync-data: wal/000019.log sync-data: wal/000019.log close: wal/000019.log create: wal/000020.log sync: wal -[JOB 15] WAL created 000020 +[JOB 13] WAL created 000020 remove: ext/a remove: ext/b -[JOB 13] ingested as flushable 000017 (717B), 000018 (717B) +[JOB 11] ingested as flushable 000017 (717B), 000018 (717B) sync-data: wal/000020.log close: wal/000020.log create: wal/000021.log sync: wal -[JOB 16] WAL created 000021 -[JOB 17] flushing 1 memtable (100B) to L0 +[JOB 14] WAL created 000021 +[JOB 15] flushing 1 memtable (100B) to L0 create: db/000022.sst -[JOB 17] flushing: sstable created 000022 +[JOB 15] flushing: sstable created 000022 sync-data: db/000022.sst close: db/000022.sst sync: db sync: db/MANIFEST-000016 -[JOB 17] flushed 1 memtable (100B) to L0 [000022] (662B), in 1.0s (2.0s total), output rate 662B/s -[JOB 18] flushing 2 ingested tables +[JOB 15] flushed 1 memtable (100B) to L0 [000022] (662B), in 1.0s (2.0s total), output rate 662B/s +[JOB 16] flushing 2 ingested tables create: db/MANIFEST-000023 close: db/MANIFEST-000016 sync: db/MANIFEST-000023 @@ -357,13 +285,13 @@ create: db/marker.manifest.000007.MANIFEST-000023 close: db/marker.manifest.000007.MANIFEST-000023 remove: db/marker.manifest.000006.MANIFEST-000016 sync: db -[JOB 18] MANIFEST created 000023 -[JOB 18] flushed 2 ingested flushables L0:000017 (717B) + L6:000018 (717B) in 1.0s (2.0s total), output rate 1.4KB/s +[JOB 16] MANIFEST created 000023 +[JOB 16] flushed 2 ingested flushables L0:000017 (717B) + L6:000018 (717B) in 1.0s (2.0s total), output rate 1.4KB/s remove: db/MANIFEST-000014 -[JOB 18] MANIFEST deleted 000014 -[JOB 19] flushing 1 memtable (100B) to L0 +[JOB 16] MANIFEST deleted 000014 +[JOB 17] flushing 1 memtable (100B) to L0 sync: db/MANIFEST-000023 -[JOB 19] flush error: pebble: empty table +[JOB 17] flush error: pebble: empty table metrics ---- diff --git a/testdata/external_iterator b/testdata/external_iterator index 589b950f24..e0e37bc080 100644 --- a/testdata/external_iterator +++ b/testdata/external_iterator @@ -254,7 +254,7 @@ aaaa@1: (aaaa@1, .) aaaaa@3: (aaaaa@3, .) aaaaa@1: (aaaaa@1, .) stats: (interface (dir, seek, step): (fwd, 5, 5), (rev, 0, 0)), (internal (dir, seek, step): (fwd, 5, 5), (rev, 0, 0)), -(internal-stats: (block-bytes: (total 475B, cached 0B, read-time 0s)), (points: (count 10, key-bytes 50B, value-bytes 50B, tombstoned 0))) +(internal-stats: (block-bytes: (total 547B, cached 0B, read-time 0s)), (points: (count 10, key-bytes 50B, value-bytes 35B, tombstoned 0)), (separated: (count 15, bytes 65B, fetched 25B))) # Note the inclusion of fwd-only. This iterator will use the TrySeekUsingNext # optimization and loads ~half the block-bytes as a result. @@ -283,4 +283,4 @@ aaaa@1: (aaaa@1, .) aaaaa@3: (aaaaa@3, .) aaaaa@1: (aaaaa@1, .) stats: (interface (dir, seek, step): (fwd, 5, 5), (rev, 0, 0)), (internal (dir, seek, step): (fwd, 5, 5), (rev, 0, 0)), -(internal-stats: (block-bytes: (total 281B, cached 0B, read-time 0s)), (points: (count 10, key-bytes 50B, value-bytes 50B, tombstoned 0))) +(internal-stats: (block-bytes: (total 336B, cached 0B, read-time 0s)), (points: (count 10, key-bytes 50B, value-bytes 35B, tombstoned 0)), (separated: (count 5, bytes 25B, fetched 25B))) diff --git a/testdata/flushable_ingest b/testdata/flushable_ingest index ea20491fcd..55c5b57ca4 100644 --- a/testdata/flushable_ingest +++ b/testdata/flushable_ingest @@ -56,12 +56,11 @@ ls 000006.sst 000007.log 000008.log -CURRENT LOCK MANIFEST-000001 OPTIONS-000003 ext -marker.format-version.000015.016 +marker.format-version.000003.016 marker.manifest.000001.MANIFEST-000001 # Test basic WAL replay @@ -78,12 +77,11 @@ ls 000006.sst 000007.log 000008.log -CURRENT LOCK MANIFEST-000001 OPTIONS-000003 ext -marker.format-version.000015.016 +marker.format-version.000003.016 marker.manifest.000001.MANIFEST-000001 open @@ -387,12 +385,11 @@ ls 000006.sst 000007.log 000008.log -CURRENT LOCK MANIFEST-000001 OPTIONS-000003 ext -marker.format-version.000015.016 +marker.format-version.000003.016 marker.manifest.000001.MANIFEST-000001 close @@ -408,12 +405,11 @@ ls 000006.sst 000007.log 000008.log -CURRENT LOCK MANIFEST-000001 OPTIONS-000003 ext -marker.format-version.000015.016 +marker.format-version.000003.016 marker.manifest.000001.MANIFEST-000001 open @@ -440,13 +436,12 @@ ls 000009.sst 000010.sst 000011.log -CURRENT LOCK MANIFEST-000001 MANIFEST-000012 OPTIONS-000013 ext -marker.format-version.000015.016 +marker.format-version.000003.016 marker.manifest.000002.MANIFEST-000012 # Make sure that the new mutable memtable can accept writes. @@ -585,12 +580,11 @@ ls 000005.log 000006.log 000007.sst -CURRENT LOCK MANIFEST-000001 OPTIONS-000003 ext -marker.format-version.000015.016 +marker.format-version.000003.016 marker.manifest.000001.MANIFEST-000001 close @@ -604,13 +598,12 @@ resetToSynced 000004.sst 000005.log 000006.log -CURRENT LOCK MANIFEST-000001 OPTIONS-000003 ext ext1 -marker.format-version.000015.016 +marker.format-version.000003.016 marker.manifest.000001.MANIFEST-000001 ignoreSyncs false diff --git a/testdata/format_major_version_pebblev1_migration b/testdata/format_major_version_pebblev1_migration deleted file mode 100644 index c579a53050..0000000000 --- a/testdata/format_major_version_pebblev1_migration +++ /dev/null @@ -1,170 +0,0 @@ -# Open the DB at one version prior to the version that enforces Pebblev1 tables. -open version=8 ----- - -format-major-version ----- -008 - -# Confirm the allowable range of table formats. - -min-table-format ----- -(LevelDB) - -max-table-format ----- -(Pebble,v2) - -# Disable automatic compactions while we create the tables. - -disable-automatic-compactions true ----- - -# Create and flush a table. The table is created at the max format version for -# this DB version (i.e. Pebblev2). - -batch -set a a ----- - -flush ----- - -# Ingest an external table written at the max table format for the current -# version (i.e. Pebblev2). - -ingest a format=pebblev2 -set pebblev2 pebblev2 ----- - -# Ingest some external table written at earlier versions (i.e. Pebblev1, -# RocksDBv2, LevelDB). - -ingest b format=pebblev1 -set pebblev1 pebblev1 ----- - -ingest c format=rocksdbv2 -set rocksdbv2 rockdbv2 ----- - -ingest d format=leveldb -set leveldb leveldb ----- - -lsm ----- -0.0: - 000005:[a#10,SET-a#10,SET] -6: - 000009:[leveldb#14,SET-leveldb#14,SET] - 000007:[pebblev1#12,SET-pebblev1#12,SET] - 000006:[pebblev2#11,SET-pebblev2#11,SET] - 000008:[rocksdbv2#13,SET-rocksdbv2#13,SET] - -tally-table-formats ----- -(LevelDB): 1 -(RocksDB,v2): 1 -(Pebble,v1): 1 -(Pebble,v2): 2 -(Pebble,v3): 0 -(Pebble,v4): 0 - -# Upgrade the DB to FormatMinTableFormatPebblev1. - -ratchet-format-major-version 009 ----- - -format-major-version ----- -009 - -# The min table format version has been raised to Pebblev1. - -min-table-format ----- -(Pebble,v1) - -max-table-format ----- -(Pebble,v2) - -# Ingesting a table with a format prior to this version fails. - -ingest e format=rocksdbv2 -set rocksdbv2 rockdbv2 ----- -pebble: table format (RocksDB,v2) is not within range supported at DB format major version 9, ((Pebble,v1),(Pebble,v2)) - -# Upgrade the DB to FormatPrePebblev1Marked. The marked count increases to the -# count of tables at versions pre-Pebblev1 (i.e. two tables). - -ratchet-format-major-version 010 ----- - -format-major-version ----- -010 - -min-table-format ----- -(Pebble,v1) - -max-table-format ----- -(Pebble,v2) - -marked-file-count ----- -2 files marked for compaction - -# Upgrade the DB to FormatPrePebblev1MarkedCompacted. The marked count returns -# to zero. - -disable-automatic-compactions false ----- - -ratchet-format-major-version 014 ----- - -format-major-version ----- -014 - -min-table-format ----- -(Pebble,v1) - -max-table-format ----- -(Pebble,v3) - -marked-file-count ----- -0 files marked for compaction - -# The two tables with older table formats were rewritten with newer table format -# versions (note updated table numbers for the leveldb and rocksdb2 tables). - -lsm ----- -0.0: - 000005:[a#10,SET-a#10,SET] -6: - 000013:[leveldb#0,SET-leveldb#0,SET] - 000007:[pebblev1#12,SET-pebblev1#12,SET] - 000006:[pebblev2#11,SET-pebblev2#11,SET] - 000012:[rocksdbv2#0,SET-rocksdbv2#0,SET] - -# Confirm all tables are at least the minimum supported table format version. - -tally-table-formats ----- -(LevelDB): 0 -(RocksDB,v2): 0 -(Pebble,v1): 1 -(Pebble,v2): 4 -(Pebble,v3): 0 -(Pebble,v4): 0 diff --git a/testdata/format_major_version_split_user_key_migration b/testdata/format_major_version_split_user_key_migration deleted file mode 100644 index 735e7c0208..0000000000 --- a/testdata/format_major_version_split_user_key_migration +++ /dev/null @@ -1,148 +0,0 @@ -define -L1 -d.SET.110:d e.SET.140:e ----- -1: - 000004:[d#110,SET-e#140,SET] seqnums:[110-140] points:[d#110,SET-e#140,SET] - -reopen ----- -OK - -# The current public Pebble interface offers no way of constructing a multi-file -# atomic compaction unit, so use the force-ingest command to force an ingestion -# into L1. - -build cd -set c c -set d d ----- - -force-ingest paths=(cd) level=1 ----- -1: - 000008:[c#141,SET-d#141,SET] seqnums:[141-141] points:[c#141,SET-d#141,SET] - 000004:[d#110,SET-e#140,SET] seqnums:[110-140] points:[d#110,SET-e#140,SET] - -format-major-version ----- -005 - -marked-file-count ----- -0 files marked for compaction - -ratchet-format-major-version 006 ----- - -format-major-version ----- -006 - -# Upgrading to format major version 006 should've marked files for compaction. - -marked-file-count ----- -2 files marked for compaction - -reopen ----- -OK - -format-major-version ----- -006 - -# Ensure the files previously marked for compaction are still marked for -# compaction. - -marked-file-count ----- -2 files marked for compaction - -disable-automatic-compactions false ----- - -# Ratcheting to 007 should force compaction of any files still marked for -# compaction. - -ratchet-format-major-version 007 ----- -[JOB 100] compacted(rewrite) L1 [000008 000004] (1.3KB) Score=0.00 + L1 [] (0B) Score=0.00 -> L1 [000013] (649B), in 1.0s (2.0s total), output rate 649B/s - -format-major-version ----- -007 - -lsm ----- -1: - 000013:[c#0,SET-e#0,SET] - -# Reset to a new LSM. - -define -L1 -b.SET.0:b c.SET.5:c -L1 -m.SET.0:m l.SET.5:l -L1 -x.SET.0:x y.SET.5:y ----- -1: - 000004:[b#0,SET-c#5,SET] seqnums:[0-5] points:[b#0,SET-c#5,SET] - 000005:[l#5,SET-m#0,SET] seqnums:[0-5] points:[l#5,SET-m#0,SET] - 000006:[x#0,SET-y#5,SET] seqnums:[0-5] points:[x#0,SET-y#5,SET] - -build ab -set a a -set b b ----- - -build wx -set w w -set x x ----- - -force-ingest paths=(ab, wx) level=1 ----- -1: - 000007:[a#10,SET-b#10,SET] seqnums:[10-10] points:[a#10,SET-b#10,SET] - 000004:[b#0,SET-c#5,SET] seqnums:[0-5] points:[b#0,SET-c#5,SET] - 000005:[l#5,SET-m#0,SET] seqnums:[0-5] points:[l#5,SET-m#0,SET] - 000008:[w#11,SET-x#11,SET] seqnums:[11-11] points:[w#11,SET-x#11,SET] - 000006:[x#0,SET-y#5,SET] seqnums:[0-5] points:[x#0,SET-y#5,SET] - -format-major-version ----- -005 - -ratchet-format-major-version 006 ----- - -format-major-version ----- -006 - -marked-file-count ----- -4 files marked for compaction - -disable-automatic-compactions false ----- - -ratchet-format-major-version 007 ----- -[JOB 100] compacted(rewrite) L1 [000007 000004] (1.3KB) Score=0.00 + L1 [] (0B) Score=0.00 -> L1 [000010] (649B), in 1.0s (2.0s total), output rate 649B/s -[JOB 100] compacted(rewrite) L1 [000008 000006] (1.3KB) Score=0.00 + L1 [] (0B) Score=0.00 -> L1 [000011] (649B), in 1.0s (2.0s total), output rate 649B/s - -lsm ----- -1: - 000010:[a#0,SET-c#0,SET] - 000005:[l#5,SET-m#0,SET] - 000011:[w#0,SET-y#0,SET] - -format-major-version ----- -007 diff --git a/testdata/ingest_load b/testdata/ingest_load index 3e5d690c72..72b2a064b0 100644 --- a/testdata/ingest_load +++ b/testdata/ingest_load @@ -89,15 +89,15 @@ a.RANGEDEL.0:b ranges: #0,0-#0,0 # Loading tables at an unsupported table format results in an error. -# Write a table at version 7 (Pebble,v2) into a DB at version 6 (Pebble,v1). -load writer-version=8 db-version=7 +# Write a table at version 15 (Pebble,v4) into a DB at version 14 (Pebble,v3). +load writer-version=15 db-version=14 a.SET.1: ---- -pebble: table format (Pebble,v2) is not within range supported at DB format major version 7, ((LevelDB),(Pebble,v1)) +pebble: table format (Pebble,v4) is not within range supported at DB format major version 14, ((Pebble,v1),(Pebble,v3)) # Tables with range keys only. -load writer-version=10 db-version=10 +load writer-version=16 db-version=16 rangekey: a-z:{(#0,RANGEKEYSET,@1,foo)} ---- 1: a#0,21-z#72057594037927935,21 @@ -106,7 +106,7 @@ rangekey: a-z:{(#0,RANGEKEYSET,@1,foo)} # Tables with a mixture of point and range keys. -load writer-version=10 db-version=10 +load writer-version=16 db-version=16 a.SET.0: b.SET.0: c.SET.0: @@ -118,7 +118,7 @@ rangekey: y-z:{(#0,RANGEKEYSET,@3,baz)} points: a#0,1-c#0,1 ranges: w#0,21-z#72057594037927935,21 -load writer-version=10 db-version=10 +load writer-version=16 db-version=16 c.SET.0:d rangekey: a-z:{(#0,RANGEKEYSET,@1,foo)} ---- @@ -126,7 +126,7 @@ rangekey: a-z:{(#0,RANGEKEYSET,@1,foo)} points: c#0,1-c#0,1 ranges: a#0,21-z#72057594037927935,21 -load writer-version=10 db-version=10 +load writer-version=16 db-version=16 a.SET.0:z rangekey: c-d:{(#0,RANGEKEYSET,@1,foo)} ---- @@ -136,7 +136,7 @@ rangekey: c-d:{(#0,RANGEKEYSET,@1,foo)} # NB: range dels sort before range keys -load writer-version=10 db-version=10 +load writer-version=16 db-version=16 a.RANGEDEL.0:z rangekey: a-z:{(#0,RANGEKEYSET,@1,foo)} ---- diff --git a/testdata/iter_histories/range_keys_simple b/testdata/iter_histories/range_keys_simple index 6748821e70..e65712d5e7 100644 --- a/testdata/iter_histories/range_keys_simple +++ b/testdata/iter_histories/range_keys_simple @@ -414,26 +414,6 @@ z@10: (z@10, .) z@1: (z@1, .) za@100: (za@100, .) -# Applying range keys to a DB running with a version that doesn't support them -# results in an error. Range keys were added in version 7. -reset format-major-version=6 ----- - -batch commit -range-key-set a z @5 boop ----- -pebble: batch requires at least format major version 8 (current: 6) - -# Constructing iterator over range keys on a DB that doesn't support them -# results in an error. - -reset format-major-version=6 ----- - -combined-iter ----- -pebble: range keys require at least format major version 8 (current: 6) - # Test Prev-ing back over a synthetic range key marker. Synthetic range-key # markers (the keys interleaved at 'c' during a SeekGE(c) when there's a # straddling range key) are ephemeral, and Prev-ing back must move back the diff --git a/testdata/range_del b/testdata/range_del index 14102e07bb..53cca9d90e 100644 --- a/testdata/range_del +++ b/testdata/range_del @@ -629,166 +629,6 @@ b: (4, .) a: (4, .) . -# User-key that spans tables in a level. - -define -L1 - a.SET.12:3 -L1 - a.SET.11:2 -L1 - a.SET.10:1 ----- -mem: 1 -1: - 000004:[a#12,SET-a#12,SET] - 000005:[a#11,SET-a#11,SET] - 000006:[a#10,SET-a#10,SET] - -get seq=10 -a ----- -a: pebble: not found - -get seq=11 -a ----- -a:1 - -get seq=12 -a ----- -a:2 - -get seq=13 -a ----- -a:3 - -iter seq=11 -first -seek-ge a -seek-ge b -last -seek-lt a -seek-lt b ----- -a: (1, .) -a: (1, .) -. -a: (1, .) -. -a: (1, .) - -iter seq=12 -first -seek-ge a -seek-ge b -last -seek-lt a -seek-lt b ----- -a: (2, .) -a: (2, .) -. -a: (2, .) -. -a: (2, .) - -iter seq=13 -first -seek-ge a -seek-ge b -last -seek-lt a -seek-lt b ----- -a: (3, .) -a: (3, .) -. -a: (3, .) -. -a: (3, .) - -define -L1 - a.MERGE.12:3 -L1 - a.MERGE.11:2 -L1 - a.MERGE.10:1 ----- -mem: 1 -1: - 000004:[a#12,MERGE-a#12,MERGE] - 000005:[a#11,MERGE-a#11,MERGE] - 000006:[a#10,MERGE-a#10,MERGE] - -get seq=10 -a ----- -a: pebble: not found - -get seq=11 -a ----- -a:1 - -get seq=12 -a ----- -a:12 - -get seq=13 -a ----- -a:123 - -iter seq=11 -first -seek-ge a -seek-ge b -last -seek-lt a -seek-lt b ----- -a: (1, .) -a: (1, .) -. -a: (1, .) -. -a: (1, .) - -iter seq=12 -first -seek-ge a -seek-ge b -last -seek-lt a -seek-lt b ----- -a: (12, .) -a: (12, .) -. -a: (12, .) -. -a: (12, .) - -iter seq=13 -first -seek-ge a -seek-ge b -last -seek-lt a -seek-lt b ----- -a: (123, .) -a: (123, .) -. -a: (123, .) -. -a: (123, .) - # User-key spread across multiple levels. define @@ -1171,13 +1011,13 @@ mem: 1 compact a-e ---- 1: - 000007:[a#11,SET-c#inf,RANGEDEL] + 000007:[a#11,SETWITHDEL-c#inf,RANGEDEL] 000008:[c#12,SET-e#inf,RANGEDEL] compact d-e ---- 1: - 000007:[a#11,SET-c#inf,RANGEDEL] + 000007:[a#11,SETWITHDEL-c#inf,RANGEDEL] 2: 000008:[c#12,SET-e#inf,RANGEDEL] @@ -1209,7 +1049,7 @@ mem: 1 compact a-e ---- 1: - 000007:[a#11,SET-c#inf,RANGEDEL] + 000007:[a#11,SETWITHDEL-c#inf,RANGEDEL] 000008:[c#12,SET-e#inf,RANGEDEL] compact a-b @@ -1217,7 +1057,7 @@ compact a-b 1: 000008:[c#12,SET-e#inf,RANGEDEL] 2: - 000007:[a#11,SET-c#inf,RANGEDEL] + 000007:[a#11,SETWITHDEL-c#inf,RANGEDEL] iter seq=13 seek-lt d @@ -1257,7 +1097,7 @@ mem: 1 compact a-b ---- 1: - 000008:[a#11,SET-c#inf,RANGEDEL] + 000008:[a#11,SETWITHDEL-c#inf,RANGEDEL] 000009:[c#12,SET-d#inf,RANGEDEL] 000010:[d#10,RANGEDEL-e#inf,RANGEDEL] 2: @@ -1266,7 +1106,7 @@ compact a-b compact d-e ---- 1: - 000008:[a#11,SET-c#inf,RANGEDEL] + 000008:[a#11,SETWITHDEL-c#inf,RANGEDEL] 000009:[c#12,SET-d#inf,RANGEDEL] 3: 000011:[d#10,RANGEDEL-e#inf,RANGEDEL] @@ -1281,7 +1121,7 @@ compact a-b L1 1: 000009:[c#12,SET-d#inf,RANGEDEL] 2: - 000008:[a#11,SET-c#inf,RANGEDEL] + 000008:[a#11,SETWITHDEL-c#inf,RANGEDEL] 3: 000011:[d#10,RANGEDEL-e#inf,RANGEDEL] @@ -1320,7 +1160,7 @@ compact a-b 0.0: 000007:[f#13,SET-f#13,SET] 1: - 000009:[a#11,SET-c#inf,RANGEDEL] + 000009:[a#11,SETWITHDEL-c#inf,RANGEDEL] 000010:[c#12,SET-d#inf,RANGEDEL] 000011:[d#10,RANGEDEL-e#inf,RANGEDEL] 2: @@ -1331,7 +1171,7 @@ compact d-e 0.0: 000007:[f#13,SET-f#13,SET] 1: - 000009:[a#11,SET-c#inf,RANGEDEL] + 000009:[a#11,SETWITHDEL-c#inf,RANGEDEL] 000010:[c#12,SET-d#inf,RANGEDEL] 3: 000012:[d#10,RANGEDEL-e#inf,RANGEDEL] @@ -1344,7 +1184,7 @@ c:v compact f-f L0 ---- 1: - 000009:[a#11,SET-c#inf,RANGEDEL] + 000009:[a#11,SETWITHDEL-c#inf,RANGEDEL] 000010:[c#12,SET-d#inf,RANGEDEL] 000007:[f#13,SET-f#13,SET] 3: @@ -1353,8 +1193,8 @@ compact f-f L0 compact a-f L1 ---- 2: - 000013:[a#11,SET-c#inf,RANGEDEL] - 000014:[c#12,SET-d#inf,RANGEDEL] + 000013:[a#11,SETWITHDEL-c#inf,RANGEDEL] + 000014:[c#12,SETWITHDEL-d#inf,RANGEDEL] 000015:[f#13,SET-f#13,SET] 3: 000012:[d#10,RANGEDEL-e#inf,RANGEDEL] @@ -1401,7 +1241,7 @@ num-entries: 1 num-deletions: 1 num-range-key-sets: 0 point-deletions-bytes-estimate: 0 -range-deletions-bytes-estimate: 836 +range-deletions-bytes-estimate: 699 wait-pending-table-stats 000004 @@ -1410,7 +1250,7 @@ num-entries: 1 num-deletions: 1 num-range-key-sets: 0 point-deletions-bytes-estimate: 0 -range-deletions-bytes-estimate: 1672 +range-deletions-bytes-estimate: 1398 wait-pending-table-stats 000005 @@ -1419,7 +1259,7 @@ num-entries: 2 num-deletions: 2 num-range-key-sets: 0 point-deletions-bytes-estimate: 0 -range-deletions-bytes-estimate: 1672 +range-deletions-bytes-estimate: 1398 # Range deletions with varying overlap. @@ -1522,7 +1362,7 @@ num-entries: 1 num-deletions: 1 num-range-key-sets: 0 point-deletions-bytes-estimate: 0 -range-deletions-bytes-estimate: 782 +range-deletions-bytes-estimate: 645 wait-pending-table-stats 000006 @@ -1531,7 +1371,7 @@ num-entries: 1 num-deletions: 1 num-range-key-sets: 0 point-deletions-bytes-estimate: 0 -range-deletions-bytes-estimate: 771 +range-deletions-bytes-estimate: 634 wait-pending-table-stats 000004 @@ -1540,4 +1380,4 @@ num-entries: 2 num-deletions: 2 num-range-key-sets: 0 point-deletions-bytes-estimate: 0 -range-deletions-bytes-estimate: 1553 +range-deletions-bytes-estimate: 1279 diff --git a/testdata/rocksdb-ingest-only/000003.log b/testdata/rocksdb-ingest-only/000003.log deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/testdata/rocksdb-ingest-only/000006.sst b/testdata/rocksdb-ingest-only/000006.sst deleted file mode 100644 index 1b98083a748c8433ef5b7a155c4a2357253cba22..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1187 zcmaJ=O>Z1U5Ut*9yuF+CcMQmiqCI#eP)N&3ER1AXGRF1+2^2vj4hXG!W@>la?&%(P z_w4RO2qgz3IKmAfenQTiIdeep6S(jPKsiG7?3!FiTAGildR<-hUiEM61bSSamwJZE z=E=FLrqE_xw$WzoP6o=1&;KFbd=cGs?86_Qy$d&Z>as)xFLF7H zAqsA^8(f*W0ei61bT+;4gi191h(vnKM39L_p-hG|5vl?se0z-*4u@(chDrx?MAE(O zO*S1{f?iw*I2Z;*)@b106gPz6->ca>3_utt+==nf7aa0}d9`vZwL8j6$ zNUDH7dLH|b4WgS+%DnKSiCtz9((W>cGM*%Y$`N?Gk;x#1SC8>2Yu{j~k>(+`>4D$o zI3og#)C2FsfapLm(l&`UUK+a~7q(l?3A)%12@HIBdrq7vtX64t#rttdbSiVKt=dm9(>waVYB(6LZcalF|?PNbu?CDkr((Do2wKBP+pWCU91vT<~eSyyv8O{mn5ba z8EWbYO=>fmZN9>CbR!hDKqpP+Ve?vxlrg2&MR0tTnKBnO4fSYv)@?B~!723DyQ_@$ zjPNey#9ASs4OVA#Z|Y%~)3|kWF)~D$lPP5vM&|f`;&;|hVr)4Hh0>UXT$Sx^$qH=|hfjd^~!)rbDI zA+^8I>E#MNWv8(5-_0qgGOC<-`I9|ciV5$~6E%mQzx-jJea|2M!d{Q&t5@9d{MWy} K`suwtzWx{1Yho1u diff --git a/testdata/rocksdb-ingest-only/CURRENT b/testdata/rocksdb-ingest-only/CURRENT deleted file mode 100644 index 875cf23355..0000000000 --- a/testdata/rocksdb-ingest-only/CURRENT +++ /dev/null @@ -1 +0,0 @@ -MANIFEST-000007 diff --git a/testdata/rocksdb-ingest-only/IDENTITY b/testdata/rocksdb-ingest-only/IDENTITY deleted file mode 100644 index b11f8d2ce1..0000000000 --- a/testdata/rocksdb-ingest-only/IDENTITY +++ /dev/null @@ -1 +0,0 @@ -160040b17ab41758-6d6a810cd585edc2 \ No newline at end of file diff --git a/testdata/rocksdb-ingest-only/LOCK b/testdata/rocksdb-ingest-only/LOCK deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/testdata/rocksdb-ingest-only/MANIFEST-000001 b/testdata/rocksdb-ingest-only/MANIFEST-000001 deleted file mode 100644 index d89025ae6eb479dde681d12e61bc276d185fa292..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 13 UcmX?daDFr!10xdyGZPB~03Fi;k^lez diff --git a/testdata/rocksdb-ingest-only/MANIFEST-000007 b/testdata/rocksdb-ingest-only/MANIFEST-000007 deleted file mode 100644 index 4b500e23263c02e9880f569bf307c8a5547ede75..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 135 zcmbR2ea2mkfss)pIX^kOC_gbdBR)Alw;-`7u_V7}#`=h5Obm=n40~%jGuapzIT@Ha zSQwTu)h+dfaZ=dW7IP{jW|ihtq-N&lCS_$-mNP&AqYx(}lqpw{lbcvlT#%lXTU3fE H&A= - // formatVersionedManifestMarker indicating that the - // atomicfs.Marker is the source of truth on the current manifest. - if filename == "" { // The marker hasn't been set yet. This database doesn't exist. return marker, base.FileNum(0).DiskFileNum(), false, nil @@ -959,41 +895,6 @@ func findCurrentManifest( return marker, manifestNum, true, nil } -func readCurrentFile(fs vfs.FS, dirname string) (base.DiskFileNum, error) { - // Read the CURRENT file to find the current manifest file. - current, err := fs.Open(base.MakeFilepath(fs, dirname, fileTypeCurrent, base.FileNum(0).DiskFileNum())) - if err != nil { - return base.FileNum(0).DiskFileNum(), errors.Wrapf(err, "pebble: could not open CURRENT file for DB %q", dirname) - } - defer current.Close() - stat, err := current.Stat() - if err != nil { - return base.FileNum(0).DiskFileNum(), err - } - n := stat.Size() - if n == 0 { - return base.FileNum(0).DiskFileNum(), errors.Errorf("pebble: CURRENT file for DB %q is empty", dirname) - } - if n > 4096 { - return base.FileNum(0).DiskFileNum(), errors.Errorf("pebble: CURRENT file for DB %q is too large", dirname) - } - b := make([]byte, n) - _, err = current.ReadAt(b, 0) - if err != nil { - return base.FileNum(0).DiskFileNum(), err - } - if b[n-1] != '\n' { - return base.FileNum(0).DiskFileNum(), base.CorruptionErrorf("pebble: CURRENT file for DB %q is malformed", dirname) - } - b = bytes.TrimSpace(b) - - _, manifestFileNum, ok := base.ParseFilename(fs, string(b)) - if !ok { - return base.FileNum(0).DiskFileNum(), base.CorruptionErrorf("pebble: MANIFEST name %q is malformed", errors.Safe(b)) - } - return manifestFileNum, nil -} - func newFileMetrics(newFiles []manifest.NewFileEntry) map[int]*LevelMetrics { m := map[int]*LevelMetrics{} for _, nf := range newFiles {