diff --git a/db.go b/db.go index d3b8f90984..e6382df058 100644 --- a/db.go +++ b/db.go @@ -721,11 +721,14 @@ func (d *DB) Apply(batch *Batch, opts *WriteOptions) error { if d.opts.Experimental.RangeKeys == nil { panic("pebble: range keys require the Experimental.RangeKeys option") } - // TODO(jackson): Assert that all range key operands are suffixless. + if d.FormatMajorVersion() < FormatRangeKeys { + panic(fmt.Sprintf( + "pebble: range keys require at least format major version %d (current: %d)", + FormatRangeKeys, d.FormatMajorVersion(), + )) + } - // TODO(jackson): Once the format major version for range keys is - // introduced, error if the batch includes range keys but the active - // format major version doesn't enable them. + // TODO(jackson): Assert that all range key operands are suffixless. } if batch.db == nil { @@ -869,8 +872,16 @@ func (d *DB) newIterInternal(batch *Batch, s *Snapshot, o *IterOptions) *Iterato if err := d.closed.Load(); err != nil { panic(err) } - if o.rangeKeys() && d.opts.Experimental.RangeKeys == nil { - panic("pebble: range keys require the Experimental.RangeKeys option") + if o.rangeKeys() { + if d.opts.Experimental.RangeKeys == nil { + panic("pebble: range keys require the Experimental.RangeKeys option") + } + if d.FormatMajorVersion() < FormatRangeKeys { + panic(fmt.Sprintf( + "pebble: range keys require at least format major version %d (current: %d)", + FormatRangeKeys, d.FormatMajorVersion(), + )) + } } if o != nil && o.RangeKeyMasking.Suffix != nil && o.KeyTypes != IterKeyTypePointsAndRanges { panic("pebble: range key masking requires IterKeyTypePointsAndRanges") diff --git a/format_major_version.go b/format_major_version.go index cd68c2bf57..517074382d 100644 --- a/format_major_version.go +++ b/format_major_version.go @@ -70,11 +70,13 @@ const ( // FormatBlockPropertyCollector is a format major version that introduces // BlockPropertyCollectors. FormatBlockPropertyCollector + // FormatRangeKeys is a format major version that introduces range keys. + FormatRangeKeys // FormatNewest always contains the most recent format major version. // NB: When adding new versions, the MaxTableFormat method should also be // updated to return the maximum allowable version for the new // FormatMajorVersion. - FormatNewest FormatMajorVersion = FormatBlockPropertyCollector + FormatNewest FormatMajorVersion = FormatRangeKeys ) // MaxTableFormat returns the maximum sstable.TableFormat that can be used at @@ -86,6 +88,8 @@ func (v FormatMajorVersion) MaxTableFormat() sstable.TableFormat { return sstable.TableFormatRocksDBv2 case FormatBlockPropertyCollector: return sstable.TableFormatPebblev1 + case FormatRangeKeys: + return sstable.TableFormatPebblev2 default: panic(fmt.Sprintf("pebble: unsupported format major version: %s", v)) } @@ -167,6 +171,9 @@ var formatMajorVersionMigrations = map[FormatMajorVersion]func(*DB) error{ FormatBlockPropertyCollector: func(d *DB) error { return d.finalizeFormatVersUpgrade(FormatBlockPropertyCollector) }, + FormatRangeKeys: func(d *DB) error { + return d.finalizeFormatVersUpgrade(FormatRangeKeys) + }, } const formatVersionMarkerName = `format-version` diff --git a/format_major_version_test.go b/format_major_version_test.go index 34286751b1..761a8fe2e8 100644 --- a/format_major_version_test.go +++ b/format_major_version_test.go @@ -36,6 +36,8 @@ func TestRatchetFormat(t *testing.T) { require.Equal(t, FormatSetWithDelete, d.FormatMajorVersion()) require.NoError(t, d.RatchetFormatMajorVersion(FormatBlockPropertyCollector)) require.Equal(t, FormatBlockPropertyCollector, d.FormatMajorVersion()) + require.NoError(t, d.RatchetFormatMajorVersion(FormatRangeKeys)) + require.Equal(t, FormatRangeKeys, d.FormatMajorVersion()) require.NoError(t, d.Close()) // If we Open the database again, leaving the default format, the @@ -189,6 +191,7 @@ func TestFormatMajorVersions_TableFormat(t *testing.T) { FormatVersioned: sstable.TableFormatRocksDBv2, FormatSetWithDelete: sstable.TableFormatRocksDBv2, FormatBlockPropertyCollector: sstable.TableFormatPebblev1, + FormatRangeKeys: sstable.TableFormatPebblev2, } // Valid versions. diff --git a/ingest.go b/ingest.go index 8f72f55341..6fe9f34193 100644 --- a/ingest.go +++ b/ingest.go @@ -45,7 +45,11 @@ func ingestValidateKey(opts *Options, key *InternalKey) error { } func ingestLoad1( - opts *Options, path string, cacheID uint64, fileNum FileNum, + opts *Options, + fmv FormatMajorVersion, + path string, + cacheID uint64, + fileNum FileNum, ) (*fileMetadata, error) { stat, err := opts.FS.Stat(path) if err != nil { @@ -64,6 +68,18 @@ func ingestLoad1( } defer r.Close() + // Avoid ingesting tables with format versions this DB doesn't support. + tf, err := r.TableFormat() + if err != nil { + return nil, err + } + if tf > fmv.MaxTableFormat() { + return nil, errors.Newf( + "pebble: table with format %s unsupported at DB format major version %d, %s", + tf, fmv, fmv.MaxTableFormat(), + ) + } + meta := &fileMetadata{} meta.FileNum = fileNum meta.Size = uint64(stat.Size()) @@ -154,12 +170,16 @@ func ingestLoad1( } func ingestLoad( - opts *Options, paths []string, cacheID uint64, pending []FileNum, + opts *Options, + fmv FormatMajorVersion, + paths []string, + cacheID uint64, + pending []FileNum, ) ([]*fileMetadata, []string, error) { meta := make([]*fileMetadata, 0, len(paths)) newPaths := make([]string, 0, len(paths)) for i := range paths { - m, err := ingestLoad1(opts, paths[i], cacheID, pending[i]) + m, err := ingestLoad1(opts, fmv, paths[i], cacheID, pending[i]) if err != nil { return nil, nil, err } @@ -525,7 +545,7 @@ func (d *DB) Ingest(paths []string) error { // Load the metadata for all of the files being ingested. This step detects // and elides empty sstables. - meta, paths, err := ingestLoad(d.opts, paths, d.cacheID, pendingOutputs) + meta, paths, err := ingestLoad(d.opts, d.FormatMajorVersion(), paths, d.cacheID, pendingOutputs) if err != nil { return err } diff --git a/ingest_test.go b/ingest_test.go index 34f2e54e1b..1e5b7b34bc 100644 --- a/ingest_test.go +++ b/ingest_test.go @@ -35,11 +35,28 @@ func TestIngestLoad(t *testing.T) { datadriven.RunTest(t, "testdata/ingest_load", func(td *datadriven.TestData) string { switch td.Cmd { case "load": + writerOpts := sstable.WriterOptions{} + var dbVersion FormatMajorVersion + for _, cmdArgs := range td.CmdArgs { + v, err := strconv.Atoi(cmdArgs.Vals[0]) + if err != nil { + return err.Error() + } + switch k := cmdArgs.Key; k { + case "writer-version": + fmv := FormatMajorVersion(v) + writerOpts.TableFormat = fmv.MaxTableFormat() + case "db-version": + dbVersion = FormatMajorVersion(v) + default: + return fmt.Sprintf("unknown cmd %s\n", k) + } + } f, err := mem.Create("ext") if err != nil { return err.Error() } - w := sstable.NewWriter(f, sstable.WriterOptions{}) + w := sstable.NewWriter(f, writerOpts) for _, data := range strings.Split(td.Input, "\n") { j := strings.Index(data, ":") if j < 0 { @@ -57,7 +74,7 @@ func TestIngestLoad(t *testing.T) { Comparer: DefaultComparer, FS: mem, } - meta, _, err := ingestLoad(opts, []string{"ext"}, 0, []FileNum{1}) + meta, _, err := ingestLoad(opts, dbVersion, []string{"ext"}, 0, []FileNum{1}) if err != nil { return err.Error() } @@ -141,7 +158,7 @@ func TestIngestLoadRand(t *testing.T) { Comparer: DefaultComparer, FS: mem, } - meta, _, err := ingestLoad(opts, paths, 0, pending) + meta, _, err := ingestLoad(opts, FormatNewest, paths, 0, pending) require.NoError(t, err) for _, m := range meta { @@ -162,7 +179,7 @@ func TestIngestLoadInvalid(t *testing.T) { Comparer: DefaultComparer, FS: mem, } - if _, _, err := ingestLoad(opts, []string{"invalid"}, 0, []FileNum{1}); err == nil { + if _, _, err := ingestLoad(opts, FormatNewest, []string{"invalid"}, 0, []FileNum{1}); err == nil { t.Fatalf("expected error, but found success") } } diff --git a/open_test.go b/open_test.go index 3ed53d89c6..f5bc45af80 100644 --- a/open_test.go +++ b/open_test.go @@ -101,7 +101,7 @@ func TestNewDBFilenames(t *testing.T) { "LOCK", "MANIFEST-000001", "OPTIONS-000003", - "marker.format-version.000004.005", + "marker.format-version.000005.006", "marker.manifest.000001.MANIFEST-000001", }, } diff --git a/range_keys_test.go b/range_keys_test.go index 885918908e..50f16cde3f 100644 --- a/range_keys_test.go +++ b/range_keys_test.go @@ -6,8 +6,10 @@ package pebble import ( "bytes" "fmt" + "strconv" "testing" + "github.com/cockroachdb/errors" "github.com/cockroachdb/pebble/internal/datadriven" "github.com/cockroachdb/pebble/internal/testkeys" "github.com/cockroachdb/pebble/vfs" @@ -35,10 +37,24 @@ func TestRangeKeys(t *testing.T) { require.NoError(t, d.Close()) } opts := &Options{ - FS: vfs.NewMem(), - Comparer: testkeys.Comparer, + FS: vfs.NewMem(), + Comparer: testkeys.Comparer, + FormatMajorVersion: FormatRangeKeys, } opts.Experimental.RangeKeys = new(RangeKeysArena) + + for _, cmdArgs := range td.CmdArgs { + if cmdArgs.Key != "format-major-version" { + return fmt.Sprintf("unknown command %s\n", cmdArgs.Key) + } + v, err := strconv.Atoi(cmdArgs.Vals[0]) + if err != nil { + return err.Error() + } + // Override the DB version. + opts.FormatMajorVersion = FormatMajorVersion(v) + } + var err error d, err = Open("", opts) require.NoError(t, err) @@ -52,8 +68,19 @@ func TestRangeKeys(t *testing.T) { case "batch": b := d.NewBatch() require.NoError(t, runBatchDefineCmd(td, b)) + var err error + func() { + defer func() { + if r := recover(); r != nil { + err = errors.New(r.(string)) + } + }() + err = b.Commit(nil) + }() + if err != nil { + return err.Error() + } count := b.Count() - require.NoError(t, b.Commit(nil)) return fmt.Sprintf("wrote %d keys\n", count) case "indexed-batch": b = d.NewIndexedBatch() @@ -76,7 +103,19 @@ func TestRangeKeys(t *testing.T) { } o.RangeKeyMasking.Suffix = []byte(arg.Vals[0]) } - iter := newIter(o) + var iter *Iterator + var err error + func() { + defer func() { + if r := recover(); r != nil { + err = errors.New(r.(string)) + } + }() + iter = newIter(o) + }() + if err != nil { + return err.Error() + } return runIterCmd(td, iter, true /* close iter */) case "rangekey-iter": iter := newIter(&IterOptions{KeyTypes: IterKeyTypeRangesOnly}) diff --git a/sstable/reader.go b/sstable/reader.go index ee2dd9c976..c71b80146b 100644 --- a/sstable/reader.go +++ b/sstable/reader.go @@ -2066,6 +2066,7 @@ type Reader struct { mergerOK bool checksumType ChecksumType tableFilter *tableFilterReader + tableFormat TableFormat Properties Properties } @@ -2690,6 +2691,14 @@ func (r *Reader) EstimateDiskUsage(start, end []byte) (uint64, error) { return endBH.Offset + endBH.Length + blockTrailerLen - startBH.Offset, nil } +// TableFormat returns the format version for the table. +func (r *Reader) TableFormat() (TableFormat, error) { + if r.err != nil { + return TableFormatUnspecified, r.err + } + return r.tableFormat, nil +} + // ReadableFile describes subset of vfs.File required for reading SSTs. type ReadableFile interface { io.ReaderAt @@ -2735,6 +2744,7 @@ func NewReader(f ReadableFile, o ReaderOptions, extraOpts ...ReaderOption) (*Rea return nil, r.Close() } r.checksumType = footer.checksum + r.tableFormat = footer.format // Read the metaindex. if err := r.readMetaindex(footer.metaindexBH); err != nil { r.err = err diff --git a/sstable/reader_test.go b/sstable/reader_test.go index ac347907e8..c9d02726da 100644 --- a/sstable/reader_test.go +++ b/sstable/reader_test.go @@ -860,6 +860,35 @@ func TestValidateBlockChecksums(t *testing.T) { } } +func TestReader_TableFormat(t *testing.T) { + test := func(t *testing.T, want TableFormat) { + fs := vfs.NewMem() + f, err := fs.Create("test") + require.NoError(t, err) + + opts := WriterOptions{TableFormat: want} + w := NewWriter(f, opts) + err = w.Close() + require.NoError(t, err) + + f, err = fs.Open("test") + require.NoError(t, err) + r, err := NewReader(f, ReaderOptions{}) + require.NoError(t, err) + defer r.Close() + + got, err := r.TableFormat() + require.NoError(t, err) + require.Equal(t, want, got) + } + + for tf := TableFormatLevelDB; tf <= TableFormatMax; tf++ { + t.Run(tf.String(), func(t *testing.T) { + test(t, tf) + }) + } +} + func buildTestTable( t *testing.T, numEntries uint64, blockSize, indexBlockSize int, compression Compression, ) *Reader { diff --git a/testdata/checkpoint b/testdata/checkpoint index b75fddc366..f35759ef82 100644 --- a/testdata/checkpoint +++ b/testdata/checkpoint @@ -31,6 +31,9 @@ sync: db create: db/marker.format-version.000004.005 close: db/marker.format-version.000004.005 sync: db +create: db/marker.format-version.000005.006 +close: db/marker.format-version.000005.006 +sync: db sync: db/MANIFEST-000001 create: db/000002.log sync: db @@ -96,9 +99,9 @@ close: open-dir: checkpoints/checkpoint1 link: db/OPTIONS-000003 -> checkpoints/checkpoint1/OPTIONS-000003 open-dir: checkpoints/checkpoint1 -create: checkpoints/checkpoint1/marker.format-version.000001.005 -sync: checkpoints/checkpoint1/marker.format-version.000001.005 -close: checkpoints/checkpoint1/marker.format-version.000001.005 +create: checkpoints/checkpoint1/marker.format-version.000001.006 +sync: checkpoints/checkpoint1/marker.format-version.000001.006 +close: checkpoints/checkpoint1/marker.format-version.000001.006 sync: checkpoints/checkpoint1 close: checkpoints/checkpoint1 create: checkpoints/checkpoint1/MANIFEST-000001 @@ -153,7 +156,7 @@ CURRENT LOCK MANIFEST-000001 OPTIONS-000003 -marker.format-version.000004.005 +marker.format-version.000005.006 marker.manifest.000001.MANIFEST-000001 list checkpoints/checkpoint1 @@ -163,7 +166,7 @@ list checkpoints/checkpoint1 000007.sst MANIFEST-000001 OPTIONS-000003 -marker.format-version.000001.005 +marker.format-version.000001.006 marker.manifest.000001.MANIFEST-000001 open checkpoints/checkpoint1 readonly diff --git a/testdata/event_listener b/testdata/event_listener index 082a8cf5cb..67f3369b24 100644 --- a/testdata/event_listener +++ b/testdata/event_listener @@ -38,6 +38,10 @@ create: db/marker.format-version.000004.005 close: db/marker.format-version.000004.005 sync: db upgraded to format version: 005 +create: db/marker.format-version.000005.006 +close: db/marker.format-version.000005.006 +sync: db +upgraded to format version: 006 create: db/MANIFEST-000003 close: db/MANIFEST-000001 sync: db/MANIFEST-000003 @@ -183,7 +187,7 @@ compact 1 2.3 K 0 B 0 (size == estimated-debt, scor zmemtbl 0 0 B ztbl 0 0 B bcache 8 1.4 K 0.0% (score == hit-rate) - tcache 1 672 B 0.0% (score == hit-rate) + tcache 1 680 B 0.0% (score == hit-rate) snaps 0 - 0 (score == earliest seq num) titers 0 filter - - 0.0% (score == utility) @@ -205,9 +209,9 @@ close: open-dir: checkpoint link: db/OPTIONS-000004 -> checkpoint/OPTIONS-000004 open-dir: checkpoint -create: checkpoint/marker.format-version.000001.005 -sync: checkpoint/marker.format-version.000001.005 -close: checkpoint/marker.format-version.000001.005 +create: checkpoint/marker.format-version.000001.006 +sync: checkpoint/marker.format-version.000001.006 +close: checkpoint/marker.format-version.000001.006 sync: checkpoint close: checkpoint create: checkpoint/MANIFEST-000017 diff --git a/testdata/ingest b/testdata/ingest index 00c99b58d4..96f873771f 100644 --- a/testdata/ingest +++ b/testdata/ingest @@ -48,7 +48,7 @@ compact 0 0 B 0 B 0 (size == estimated-debt, scor zmemtbl 0 0 B ztbl 0 0 B bcache 8 1.5 K 42.9% (score == hit-rate) - tcache 1 672 B 50.0% (score == hit-rate) + tcache 1 680 B 50.0% (score == hit-rate) snaps 0 - 0 (score == earliest seq num) titers 0 filter - - 0.0% (score == utility) diff --git a/testdata/ingest_load b/testdata/ingest_load index 8488a2c0bf..7c8bce6cd0 100644 --- a/testdata/ingest_load +++ b/testdata/ingest_load @@ -67,3 +67,10 @@ b.SET.0: a.RANGEDEL.0:b ---- 1: a#0,15-b#0,1 + +# Loading tables at an unsupported table format results in an error. +# Write a table at version 6 (Pebble,v2) into a DB at version 5 (Pebble,v1). +load writer-version=6 db-version=5 +a.SET.1: +---- +pebble: table with format (Pebble,v2) unsupported at DB format major version 5, (Pebble,v1) diff --git a/testdata/metrics b/testdata/metrics index d670e8caab..b4f9891b8d 100644 --- a/testdata/metrics +++ b/testdata/metrics @@ -34,7 +34,7 @@ compact 0 0 B 0 B 0 (size == estimated-debt, scor zmemtbl 1 256 K ztbl 0 0 B bcache 4 698 B 0.0% (score == hit-rate) - tcache 1 672 B 0.0% (score == hit-rate) + tcache 1 680 B 0.0% (score == hit-rate) snaps 0 - 0 (score == earliest seq num) titers 1 filter - - 0.0% (score == utility) @@ -145,7 +145,7 @@ compact 1 0 B 0 B 0 (size == estimated-debt, scor zmemtbl 1 256 K ztbl 1 771 B bcache 4 698 B 33.3% (score == hit-rate) - tcache 1 672 B 50.0% (score == hit-rate) + tcache 1 680 B 50.0% (score == hit-rate) snaps 0 - 0 (score == earliest seq num) titers 1 filter - - 0.0% (score == utility) diff --git a/testdata/rangekeys b/testdata/rangekeys index 435b7f1290..e75afcb25c 100644 --- a/testdata/rangekeys +++ b/testdata/rangekeys @@ -498,3 +498,23 @@ prev-limit y x: valid (., [x-z) @5=boop) . x: valid (., [x-z) @5=boop) + +# Applying range keys to a DB running with a version that doesn't support them +# results in an error. Range keys were added in version 6. +reset format-major-version=5 +---- + +batch +range-key-set a z @5 boop +---- +pebble: range keys require at least format major version 6 (current: 5) + +# Constructing iterator over range keys on a DB that doesn't support them +# results in an error. + +reset format-major-version=5 +---- + +combined-iter +---- +pebble: range keys require at least format major version 6 (current: 5)