From 31c81a4ff565ae8cba7f8e161a0f8a84261608cb Mon Sep 17 00:00:00 2001 From: Jackson Owens Date: Mon, 16 Oct 2023 15:52:40 -0400 Subject: [PATCH] db: defer loading L0 range key blocks during iterator construction Previously, construction of an iterator over range keys that found sstables containing range keys within L0 performed I/O to load the range key blocks during iterator construction. This was less efficient: If the iterator ultimately didn't need to read the keyspace overlapping the sstables containing range keys, the block loads were unnecessary. More significantly, if the I/O failed during iterator construction, the resulting iterator was unusable. It would always error with the original error returned by the failed block load. This is a deviation from iterator error handling across the rest of the iterator stack, which allows an Iterator to be re-seeked to clear the current iterator error. Resolves #2994. --- data_test.go | 9 ++++++ iterator_histories_test.go | 28 ++++++++++++++-- range_keys.go | 58 ++++++++++++++++++++++++---------- testdata/iter_histories/errors | 40 +++++++++++++++++++++++ 4 files changed, 116 insertions(+), 19 deletions(-) diff --git a/data_test.go b/data_test.go index 2d7fe87a2b..19b240bc46 100644 --- a/data_test.go +++ b/data_test.go @@ -1453,6 +1453,15 @@ func runLSMCmd(td *datadriven.TestData, d *DB) string { func parseDBOptionsArgs(opts *Options, args []datadriven.CmdArg) error { for _, cmdArg := range args { switch cmdArg.Key { + case "auto-compactions": + switch cmdArg.Vals[0] { + case "off": + opts.DisableAutomaticCompactions = true + case "on": + opts.DisableAutomaticCompactions = false + default: + return errors.Errorf("Unrecognized %q arg value: %q", cmdArg.Key, cmdArg.Vals[0]) + } case "inject-errors": injs := make([]errorfs.Injector, len(cmdArg.Vals)) for i := 0; i < len(cmdArg.Vals); i++ { diff --git a/iterator_histories_test.go b/iterator_histories_test.go index 5f958ec2c9..792b2faf2f 100644 --- a/iterator_histories_test.go +++ b/iterator_histories_test.go @@ -32,6 +32,7 @@ func TestIterHistories(t *testing.T) { } var d *DB + var buf bytes.Buffer iters := map[string]*Iterator{} batches := map[string]*Batch{} newIter := func(name string, reader Reader, o *IterOptions) *Iterator { @@ -76,6 +77,7 @@ func TestIterHistories(t *testing.T) { defer cleanup() datadriven.RunTest(t, path, func(t *testing.T, td *datadriven.TestData) string { + buf.Reset() switch td.Cmd { case "define": var err error @@ -166,7 +168,6 @@ func TestIterHistories(t *testing.T) { return fmt.Sprintf("unknown reader %q", arg.Vals[0]) } } - var buf bytes.Buffer for _, l := range strings.Split(td.Input, "\n") { v, closer, err := reader.Get([]byte(l)) if err != nil { @@ -187,6 +188,30 @@ func TestIterHistories(t *testing.T) { return err.Error() } return "" + case "layout": + var verbose bool + var filename string + td.ScanArgs(t, "filename", &filename) + td.MaybeScanArgs(t, "verbose", &verbose) + f, err := opts.FS.Open(filename) + if err != nil { + return err.Error() + } + readable, err := sstable.NewSimpleReadable(f) + if err != nil { + return err.Error() + } + r, err := sstable.NewReader(readable, opts.MakeReaderOptions()) + if err != nil { + return err.Error() + } + defer r.Close() + l, err := r.Layout() + if err != nil { + return err.Error() + } + l.Describe(&buf, verbose, r, nil) + return buf.String() case "lsm": return runLSMCmd(td, d) case "metrics": @@ -328,7 +353,6 @@ func TestIterHistories(t *testing.T) { iter := newIter(name, d, &IterOptions{KeyTypes: IterKeyTypeRangesOnly}) return runIterCmd(td, iter, name == "" /* close iter */) case "scan-rangekeys": - var buf bytes.Buffer iter := newIter( pluckStringCmdArg(td, "name"), d, diff --git a/range_keys.go b/range_keys.go index b431863805..f10f1d2b33 100644 --- a/range_keys.go +++ b/range_keys.go @@ -55,25 +55,40 @@ func (i *Iterator) constructRangeKeyIter() { current = i.readState.current } // Next are the file levels: L0 sub-levels followed by lower levels. + + // Add file-specific iterators for L0 files containing range keys. We + // maintain a separate manifest.LevelMetadata for each level containing only + // files that contain range keys, however we don't compute a separate + // L0Sublevels data structure too. // - // Add file-specific iterators for L0 files containing range keys. This is less - // efficient than using levelIters for sublevels of L0 files containing - // range keys, but range keys are expected to be sparse anyway, reducing the - // cost benefit of maintaining a separate L0Sublevels instance for range key - // files and then using it here. - // - // NB: We iterate L0's files in reverse order. They're sorted by - // LargestSeqNum ascending, and we need to add them to the merging iterator - // in LargestSeqNum descending to preserve the merging iterator's invariants - // around Key Trailer order. - iter := current.RangeKeyLevels[0].Iter() - for f := iter.Last(); f != nil; f = iter.Prev() { - spanIter, err := i.newIterRangeKey(f, i.opts.SpanIterOptions()) - if err != nil { - i.rangeKey.iterConfig.AddLevel(&errorKeyspanIter{err: err}) - continue + // We first use L0's LevelMetadata to peek and see whether L0 contains any + // range keys at all. If it does, we create a range key level iterator per + // level that contains range keys using the information from L0Sublevels. + // Some sublevels may not contain any range keys, and we need to iterate + // through the fileMetadata to determine that. Since L0's file count should + // not significantly exceed ~1000 files (see L0CompactionFileThreshold), + // this should be okay. + if !current.RangeKeyLevels[0].Empty() { + // L0 contains at least 1 file containing range keys. + // Add level iterators for the L0 sublevels, iterating from newest to + // oldest. + for j := len(current.L0SublevelFiles) - 1; j >= 0; j-- { + iter := current.L0SublevelFiles[j].Iter() + if !containsAnyRangeKeys(iter) { + continue + } + + li := i.rangeKey.iterConfig.NewLevelIter() + li.Init( + i.opts.SpanIterOptions(), + i.cmp, + i.newIterRangeKey, + iter.Filter(manifest.KeyTypeRange), + manifest.L0Sublevel(j), + manifest.KeyTypeRange, + ) + i.rangeKey.iterConfig.AddLevel(li) } - i.rangeKey.iterConfig.AddLevel(spanIter) } // Add level iterators for the non-empty non-L0 levels. @@ -89,6 +104,15 @@ func (i *Iterator) constructRangeKeyIter() { } } +func containsAnyRangeKeys(iter manifest.LevelIterator) bool { + for f := iter.First(); f != nil; f = iter.Next() { + if f.HasRangeKeys { + return true + } + } + return false +} + // Range key masking // // Pebble iterators may be configured such that range keys with suffixes mask diff --git a/testdata/iter_histories/errors b/testdata/iter_histories/errors index bfca56376e..07668a9b28 100644 --- a/testdata/iter_histories/errors +++ b/testdata/iter_histories/errors @@ -57,3 +57,43 @@ b: (b, .) c: (c, .) d: (d, .) . + +# Regression test for #2994. +# +# Previously, an error while loading an L0 sstable's range key block could +# result in an iterator that would always return the same error. Now, the IO is +# deferred to the first seek. If a seek encounters an IO error, re-seeking the +# iterator should re-attempt the failed IO operation, potentially succeeding if +# the IO error was transient. + +define auto-compactions=off +L0 + a.SET.9:a + rangekey:c-d:{(#0,RANGEKEYSET,@1,foo)} + e@2.SET.2:e@2 +---- +0.0: + 000004:[a#9,SET-e@2#2,SET] + +layout filename=000004.sst +---- + 0 data (38) + 43 index (35) + 83 range-key (29) + 117 properties (645) + 767 meta-index (57) + 829 footer (53) + 882 EOF + +# Inject an error on the first `ReadAt` call on 000004.sst's range key block +# (which is at offset 83). + +reopen auto-compactions=off enable-table-stats=false inject-errors=((ErrInjected (And (PathMatch "000004.sst") (OpFileReadAt 83) (OnIndex 0)))) +---- + +combined-iter +first +first +---- +err=injected error +a: (a, .)