Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

manifest: add range annotations #3759

Merged
merged 1 commit into from
Aug 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
112 changes: 110 additions & 2 deletions internal/manifest/annotator.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,12 @@

package manifest

import (
"sort"

"github.com/cockroachdb/pebble/internal/base"
)

// The Annotator type defined below is used by other packages to lazily
// compute a value over a B-Tree. Each node of the B-Tree stores one
// `annotation` per annotator, containing the result of the computation over
Expand All @@ -24,6 +30,10 @@ package manifest
// computed incrementally as edits are applied to a level.
type Annotator[T any] struct {
Aggregator AnnotationAggregator[T]

// scratch is used to hold the aggregated annotation value when computing
// range annotations in order to avoid additional allocations.
scratch *T
}

// An AnnotationAggregator defines how an annotation should be accumulated
Expand Down Expand Up @@ -116,6 +126,80 @@ func (a *Annotator[T]) nodeAnnotation(n *node) (_ *T, cacheOK bool) {
return t, annot.valid
}

// accumulateRangeAnnotation computes this annotator's annotation across all
// files in the node's subtree which overlap with the range defined by bounds.
// The computed annotation is accumulated into a.scratch.
func (a *Annotator[T]) accumulateRangeAnnotation(
n *node,
cmp base.Compare,
bounds base.UserKeyBounds,
// fullyWithinLowerBound and fullyWithinUpperBound indicate whether this
// node's subtree is already known to be within each bound.
fullyWithinLowerBound bool,
fullyWithinUpperBound bool,
) {
// If this node's subtree is fully within the bounds, compute a regular
// annotation.
if fullyWithinLowerBound && fullyWithinUpperBound {
v, _ := a.nodeAnnotation(n)
a.scratch = a.Aggregator.Merge(v, a.scratch)
return
}

// We will accumulate annotations from each item in the end-exclusive
// range [leftItem, rightItem).
leftItem, rightItem := 0, int(n.count)
if !fullyWithinLowerBound {
// leftItem is the index of the first item that overlaps the lower bound.
leftItem = sort.Search(int(n.count), func(i int) bool {
return cmp(bounds.Start, n.items[i].Largest.UserKey) <= 0
})
}
if !fullyWithinUpperBound {
// rightItem is the index of the first item that does not overlap the
// upper bound.
rightItem = sort.Search(int(n.count), func(i int) bool {
return !bounds.End.IsUpperBoundFor(cmp, n.items[i].Smallest.UserKey)
})
}

// Accumulate annotations from every item that overlaps the bounds.
for i := leftItem; i < rightItem; i++ {
v, _ := a.Aggregator.Accumulate(n.items[i], a.scratch)
a.scratch = v
}

if !n.leaf {
// We will accumulate annotations from each child in the end-inclusive
// range [leftChild, rightChild].
leftChild, rightChild := leftItem, rightItem
// If the lower bound overlaps with the child at leftItem, there is no
// need to accumulate annotations from the child to its left.
if leftItem < int(n.count) && cmp(bounds.Start, n.items[leftItem].Smallest.UserKey) >= 0 {
leftChild++
}
// If the upper bound spans beyond the child at rightItem, we must also
// accumulate annotations from the child to its right.
if rightItem < int(n.count) && bounds.End.IsUpperBoundFor(cmp, n.items[rightItem].Largest.UserKey) {
rightChild++
}

for i := leftChild; i <= rightChild; i++ {
a.accumulateRangeAnnotation(
n.children[i],
cmp,
bounds,
// If this child is to the right of leftItem, then its entire
// subtree is within the lower bound.
fullyWithinLowerBound || i > leftItem,
// If this child is to the left of rightItem, then its entire
// subtree is within the upper bound.
fullyWithinUpperBound || i < rightItem,
)
}
}
}

// InvalidateAnnotation removes any existing cached annotations from this
// annotator from a node's subtree.
func (a *Annotator[T]) invalidateNodeAnnotation(n *node) {
Expand All @@ -142,8 +226,8 @@ func (a *Annotator[T]) LevelAnnotation(lm LevelMetadata) *T {
return v
}

// LevelAnnotation calculates the annotation defined by this Annotator for all
// files across the given levels. A pointer to the Annotator is used as the
// MultiLevelAnnotation calculates the annotation defined by this Annotator for
// all files across the given levels. A pointer to the Annotator is used as the
// key for pre-calculated values, so the same Annotator must be used to avoid
// duplicate computation. Annotation must not be called concurrently, and in
// practice this is achieved by requiring callers to hold DB.mu.
Expand All @@ -158,6 +242,22 @@ func (a *Annotator[T]) MultiLevelAnnotation(lms []LevelMetadata) *T {
return aggregated
}

// LevelRangeAnnotation calculates the annotation defined by this Annotator for
// the files within LevelMetadata which are within the range
// [lowerBound, upperBound). A pointer to the Annotator is used as the key for
// pre-calculated values, so the same Annotator must be used to avoid duplicate
// computation. Annotation must not be called concurrently, and in practice this
// is achieved by requiring callers to hold DB.mu.
func (a *Annotator[T]) LevelRangeAnnotation(lm LevelMetadata, bounds base.UserKeyBounds) *T {
if lm.Empty() {
return a.Aggregator.Zero(nil)
}

a.scratch = a.Aggregator.Zero(a.scratch)
a.accumulateRangeAnnotation(lm.tree.root, lm.tree.cmp, bounds, false, false)
return a.scratch
}

// InvalidateAnnotation clears any cached annotations defined by Annotator. A
// pointer to the Annotator is used as the key for pre-calculated values, so
// the same Annotator must be used to clear the appropriate cached annotation.
Expand Down Expand Up @@ -206,6 +306,14 @@ func SumAnnotator(accumulate func(f *FileMetadata) (v uint64, cacheOK bool)) *An
}
}

// NumFilesAnnotator is an Annotator which computes an annotation value
// equal to the number of files included in the annotation. Particularly, it
// can be used to efficiently calculate the number of files in a given key
// range using range annotations.
var NumFilesAnnotator = SumAnnotator(func(f *FileMetadata) (uint64, bool) {
return 1, true
})

// PickFileAggregator implements the AnnotationAggregator interface. It defines
// an aggregator that picks a single file from a set of eligible files.
type PickFileAggregator struct {
Expand Down
158 changes: 127 additions & 31 deletions internal/manifest/annotator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,54 +5,47 @@
package manifest

import (
"math/rand"
"testing"

"github.com/cockroachdb/pebble/internal/base"
"github.com/stretchr/testify/require"
)

func makeTestLevelMetadata(count int) (LevelMetadata, []*FileMetadata) {
files := make([]*FileMetadata, count)
for i := 0; i < count; i++ {
files[i] = newItem(key(i))
// Creates a version with numFiles files in level 6.
func makeTestVersion(numFiles int) (*Version, []*FileMetadata) {
files := make([]*FileMetadata, numFiles)
for i := 0; i < numFiles; i++ {
// Each file spans 10 keys, e.g. [0->9], [10->19], etc.
files[i] = (&FileMetadata{}).ExtendPointKeyBounds(
base.DefaultComparer.Compare, key(i*10), key(i*10+9),
)
files[i].InitPhysicalBacking()
}

lm := MakeLevelMetadata(base.DefaultComparer.Compare, 6, files)
return lm, files
}
var levelFiles [7][]*FileMetadata
levelFiles[6] = files

// NumFilesAnnotator is an Annotator which computes an annotation value
// equal to the number of files included in the annotation.
var NumFilesAnnotator = SumAnnotator(func(f *FileMetadata) (uint64, bool) {
return 1, true
})
v := NewVersion(base.DefaultComparer, 0, levelFiles)
return v, files
}

func TestNumFilesAnnotator(t *testing.T) {
const count = 1000
lm, _ := makeTestLevelMetadata(0)
v, _ := makeTestVersion(0)

for i := 1; i <= count; i++ {
lm.tree.Insert(newItem(key(i)))
numFiles := *NumFilesAnnotator.LevelAnnotation(lm)
v.Levels[6].tree.Insert(newItem(key(i)))
numFiles := *NumFilesAnnotator.LevelAnnotation(v.Levels[6])
require.EqualValues(t, i, numFiles)
}

numFiles := *NumFilesAnnotator.LevelAnnotation(lm)
require.EqualValues(t, count, numFiles)

numFiles = *NumFilesAnnotator.LevelAnnotation(lm)
require.EqualValues(t, count, numFiles)

lm.tree.Delete(newItem(key(count / 2)))
numFiles = *NumFilesAnnotator.LevelAnnotation(lm)
require.EqualValues(t, count-1, numFiles)
}

func BenchmarkNumFilesAnnotator(b *testing.B) {
lm, _ := makeTestLevelMetadata(0)
v, _ := makeTestVersion(0)
for i := 1; i <= b.N; i++ {
lm.tree.Insert(newItem(key(i)))
numFiles := *NumFilesAnnotator.LevelAnnotation(lm)
v.Levels[6].tree.Insert(newItem(key(i)))
numFiles := *NumFilesAnnotator.LevelAnnotation(v.Levels[6])
require.EqualValues(b, uint64(i), numFiles)
}
}
Expand All @@ -70,12 +63,115 @@ func TestPickFileAggregator(t *testing.T) {
},
}

lm, files := makeTestLevelMetadata(1)
v, files := makeTestVersion(1)

for i := 1; i <= count; i++ {
lm.tree.Insert(newItem(key(i)))
pickedFile := a.LevelAnnotation(lm)
v.Levels[6].tree.Insert(newItem(key(i)))
pickedFile := a.LevelAnnotation(v.Levels[6])
// The picked file should always be the one with the smallest key.
require.Same(t, files[0], pickedFile)
}
}

func bounds(i int, j int, exclusive bool) base.UserKeyBounds {
b := base.UserKeyBoundsEndExclusiveIf(key(i).UserKey, key(j).UserKey, exclusive)
return b
}

func randomBounds(rng *rand.Rand, count int) base.UserKeyBounds {
first := rng.Intn(count)
second := rng.Intn(count)
exclusive := rng.Intn(2) == 0
return bounds(min(first, second), max(first, second), exclusive)
}

func requireMatchOverlaps(t *testing.T, v *Version, bounds base.UserKeyBounds) {
overlaps := v.Overlaps(6, bounds)
numFiles := *NumFilesAnnotator.LevelRangeAnnotation(v.Levels[6], bounds)
require.EqualValues(t, overlaps.length, numFiles)
}

func TestNumFilesRangeAnnotationEmptyRanges(t *testing.T) {
const count = 5_000
v, files := makeTestVersion(count)

// Delete files containing key ranges [0, 999] and [24_000, 25_999].
for i := 0; i < 100; i++ {
v.Levels[6].tree.Delete(files[i])
}
for i := 2400; i < 2600; i++ {
v.Levels[6].tree.Delete(files[i])
}

// Ranges that are completely empty.
requireMatchOverlaps(t, v, bounds(1, 999, false))
requireMatchOverlaps(t, v, bounds(0, 1000, true))
requireMatchOverlaps(t, v, bounds(50_000, 60_000, false))
requireMatchOverlaps(t, v, bounds(24_500, 25_500, false))
requireMatchOverlaps(t, v, bounds(24_000, 26_000, true))

// Partial overlaps with empty ranges.
requireMatchOverlaps(t, v, bounds(0, 1000, false))
requireMatchOverlaps(t, v, bounds(20, 1001, true))
requireMatchOverlaps(t, v, bounds(20, 1010, true))
requireMatchOverlaps(t, v, bounds(23_000, 27_000, true))
requireMatchOverlaps(t, v, bounds(25_000, 40_000, false))
requireMatchOverlaps(t, v, bounds(25_500, 26_001, true))

// Ranges which only spans a single table.
requireMatchOverlaps(t, v, bounds(45_000, 45_000, true))
requireMatchOverlaps(t, v, bounds(30_000, 30_001, true))
requireMatchOverlaps(t, v, bounds(23_000, 23_000, false))
}

func TestNumFilesRangeAnnotationRandomized(t *testing.T) {
const count = 10_000
const numIterations = 10_000

v, _ := makeTestVersion(count)

rng := rand.New(rand.NewSource(int64(0)))
for i := 0; i < numIterations; i++ {
requireMatchOverlaps(t, v, randomBounds(rng, count*11))
}
}

func BenchmarkNumFilesRangeAnnotation(b *testing.B) {
const count = 100_000
v, files := makeTestVersion(count)

rng := rand.New(rand.NewSource(int64(0)))
b.Run("annotator", func(b *testing.B) {
for i := 0; i < b.N; i++ {
b := randomBounds(rng, count*11)
// Randomly delete and reinsert a file to verify that range
// annotations are still fast despite small mutations.
toDelete := rng.Intn(count)
v.Levels[6].tree.Delete(files[toDelete])

NumFilesAnnotator.LevelRangeAnnotation(v.Levels[6], b)

v.Levels[6].tree.Insert(files[toDelete])
}
})

// Also benchmark an equivalent aggregation using version.Overlaps to show
// the difference in performance.
b.Run("overlaps", func(b *testing.B) {
for i := 0; i < b.N; i++ {
b := randomBounds(rng, count*11)
toDelete := rng.Intn(count)
v.Levels[6].tree.Delete(files[toDelete])

overlaps := v.Overlaps(6, b)
iter := overlaps.Iter()
numFiles := 0
for f := iter.First(); f != nil; f = iter.Next() {
numFiles++
}

v.Levels[6].tree.Insert(files[toDelete])
}
})

}
Loading
Loading