Skip to content

Commit

Permalink
db: defer loading L0 range key blocks during iterator construction
Browse files Browse the repository at this point in the history
Previously, construction of an iterator over range keys that found sstables
containing range keys within L0 performed I/O to load the range key blocks
during iterator construction. This was less efficient: If the iterator
ultimately didn't need to read the keyspace overlapping the sstables containing
range keys, the block loads were unnecessary.

More significantly, if the I/O failed during iterator construction, the
resulting iterator was unusable. It would always error with the original error
returned by the failed block load. This is a deviation from iterator error
handling across the rest of the iterator stack, which allows an Iterator to be
re-seeked to clear the current iterator error.

Resolves cockroachdb#2994.
  • Loading branch information
jbowens committed Oct 17, 2023
1 parent aaedec5 commit 31c81a4
Show file tree
Hide file tree
Showing 4 changed files with 116 additions and 19 deletions.
9 changes: 9 additions & 0 deletions data_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1453,6 +1453,15 @@ func runLSMCmd(td *datadriven.TestData, d *DB) string {
func parseDBOptionsArgs(opts *Options, args []datadriven.CmdArg) error {
for _, cmdArg := range args {
switch cmdArg.Key {
case "auto-compactions":
switch cmdArg.Vals[0] {
case "off":
opts.DisableAutomaticCompactions = true
case "on":
opts.DisableAutomaticCompactions = false
default:
return errors.Errorf("Unrecognized %q arg value: %q", cmdArg.Key, cmdArg.Vals[0])
}
case "inject-errors":
injs := make([]errorfs.Injector, len(cmdArg.Vals))
for i := 0; i < len(cmdArg.Vals); i++ {
Expand Down
28 changes: 26 additions & 2 deletions iterator_histories_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ func TestIterHistories(t *testing.T) {
}

var d *DB
var buf bytes.Buffer
iters := map[string]*Iterator{}
batches := map[string]*Batch{}
newIter := func(name string, reader Reader, o *IterOptions) *Iterator {
Expand Down Expand Up @@ -76,6 +77,7 @@ func TestIterHistories(t *testing.T) {
defer cleanup()

datadriven.RunTest(t, path, func(t *testing.T, td *datadriven.TestData) string {
buf.Reset()
switch td.Cmd {
case "define":
var err error
Expand Down Expand Up @@ -166,7 +168,6 @@ func TestIterHistories(t *testing.T) {
return fmt.Sprintf("unknown reader %q", arg.Vals[0])
}
}
var buf bytes.Buffer
for _, l := range strings.Split(td.Input, "\n") {
v, closer, err := reader.Get([]byte(l))
if err != nil {
Expand All @@ -187,6 +188,30 @@ func TestIterHistories(t *testing.T) {
return err.Error()
}
return ""
case "layout":
var verbose bool
var filename string
td.ScanArgs(t, "filename", &filename)
td.MaybeScanArgs(t, "verbose", &verbose)
f, err := opts.FS.Open(filename)
if err != nil {
return err.Error()
}
readable, err := sstable.NewSimpleReadable(f)
if err != nil {
return err.Error()
}
r, err := sstable.NewReader(readable, opts.MakeReaderOptions())
if err != nil {
return err.Error()
}
defer r.Close()
l, err := r.Layout()
if err != nil {
return err.Error()
}
l.Describe(&buf, verbose, r, nil)
return buf.String()
case "lsm":
return runLSMCmd(td, d)
case "metrics":
Expand Down Expand Up @@ -328,7 +353,6 @@ func TestIterHistories(t *testing.T) {
iter := newIter(name, d, &IterOptions{KeyTypes: IterKeyTypeRangesOnly})
return runIterCmd(td, iter, name == "" /* close iter */)
case "scan-rangekeys":
var buf bytes.Buffer
iter := newIter(
pluckStringCmdArg(td, "name"),
d,
Expand Down
58 changes: 41 additions & 17 deletions range_keys.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,25 +55,40 @@ func (i *Iterator) constructRangeKeyIter() {
current = i.readState.current
}
// Next are the file levels: L0 sub-levels followed by lower levels.

// Add file-specific iterators for L0 files containing range keys. We
// maintain a separate manifest.LevelMetadata for each level containing only
// files that contain range keys, however we don't compute a separate
// L0Sublevels data structure too.
//
// Add file-specific iterators for L0 files containing range keys. This is less
// efficient than using levelIters for sublevels of L0 files containing
// range keys, but range keys are expected to be sparse anyway, reducing the
// cost benefit of maintaining a separate L0Sublevels instance for range key
// files and then using it here.
//
// NB: We iterate L0's files in reverse order. They're sorted by
// LargestSeqNum ascending, and we need to add them to the merging iterator
// in LargestSeqNum descending to preserve the merging iterator's invariants
// around Key Trailer order.
iter := current.RangeKeyLevels[0].Iter()
for f := iter.Last(); f != nil; f = iter.Prev() {
spanIter, err := i.newIterRangeKey(f, i.opts.SpanIterOptions())
if err != nil {
i.rangeKey.iterConfig.AddLevel(&errorKeyspanIter{err: err})
continue
// We first use L0's LevelMetadata to peek and see whether L0 contains any
// range keys at all. If it does, we create a range key level iterator per
// level that contains range keys using the information from L0Sublevels.
// Some sublevels may not contain any range keys, and we need to iterate
// through the fileMetadata to determine that. Since L0's file count should
// not significantly exceed ~1000 files (see L0CompactionFileThreshold),
// this should be okay.
if !current.RangeKeyLevels[0].Empty() {
// L0 contains at least 1 file containing range keys.
// Add level iterators for the L0 sublevels, iterating from newest to
// oldest.
for j := len(current.L0SublevelFiles) - 1; j >= 0; j-- {
iter := current.L0SublevelFiles[j].Iter()
if !containsAnyRangeKeys(iter) {
continue
}

li := i.rangeKey.iterConfig.NewLevelIter()
li.Init(
i.opts.SpanIterOptions(),
i.cmp,
i.newIterRangeKey,
iter.Filter(manifest.KeyTypeRange),
manifest.L0Sublevel(j),
manifest.KeyTypeRange,
)
i.rangeKey.iterConfig.AddLevel(li)
}
i.rangeKey.iterConfig.AddLevel(spanIter)
}

// Add level iterators for the non-empty non-L0 levels.
Expand All @@ -89,6 +104,15 @@ func (i *Iterator) constructRangeKeyIter() {
}
}

func containsAnyRangeKeys(iter manifest.LevelIterator) bool {
for f := iter.First(); f != nil; f = iter.Next() {
if f.HasRangeKeys {
return true
}
}
return false
}

// Range key masking
//
// Pebble iterators may be configured such that range keys with suffixes mask
Expand Down
40 changes: 40 additions & 0 deletions testdata/iter_histories/errors
Original file line number Diff line number Diff line change
Expand Up @@ -57,3 +57,43 @@ b: (b, .)
c: (c, .)
d: (d, .)
.

# Regression test for #2994.
#
# Previously, an error while loading an L0 sstable's range key block could
# result in an iterator that would always return the same error. Now, the IO is
# deferred to the first seek. If a seek encounters an IO error, re-seeking the
# iterator should re-attempt the failed IO operation, potentially succeeding if
# the IO error was transient.

define auto-compactions=off
L0
a.SET.9:a
rangekey:c-d:{(#0,RANGEKEYSET,@1,foo)}
[email protected]:e@2
----
0.0:
000004:[a#9,SET-e@2#2,SET]

layout filename=000004.sst
----
0 data (38)
43 index (35)
83 range-key (29)
117 properties (645)
767 meta-index (57)
829 footer (53)
882 EOF

# Inject an error on the first `ReadAt` call on 000004.sst's range key block
# (which is at offset 83).

reopen auto-compactions=off enable-table-stats=false inject-errors=((ErrInjected (And (PathMatch "000004.sst") (OpFileReadAt 83) (OnIndex 0))))
----

combined-iter
first
first
----
err=injected error
a: (a, .)

0 comments on commit 31c81a4

Please sign in to comment.