Skip to content

Commit

Permalink
sstable: add range key support to BlockIntervalCollector
Browse files Browse the repository at this point in the history
Currently, the `sstable.BlockIntervalCollector` exists as a helper
struct to which an implementation of
`sstable.DataBlockIntervalCollector` is passed. The latter contains the
user-defined logic for converting a key and value in an associated
interval that can be maintained.

The `BlockIntervalCollector` struct handles the logic required to
implement `sstable.BlockPropertyCollector`, encoding the interval as a
property when the data block is completed, and maintaining index-block-
and table-level intervals.

Update `sstable.Writer` to pass through range keys to each block
property collector as they are added to the writer.

As range and point keys can be thought of as existing in separate
keyspaces within the same LSM, range keys only contribute to table level
intervals (unioned with the point key interval), rather than block- and
index-level intervals.

Another way to rationalize this decision is to consider that range keys
are contained in a single, dedicated block in the SSTable, while the
point keys can span multiple blocks and have an associated index entry
into which the properties for the specific block are encoded. Block
level filtering applies to point keys only, whereas table-level
filtering takes the union of the point and range key intervals for the
table.

One downside of taking the union of the range and point keys for the
table level interval is an increased rate of false positives when
filtering tables based on an interval and the range key interval is
wider than the point key interval.

This change alters the `NewBlockIntervalCollector` function to take in
two `DataBlockIntervalCollector`s, one for point and range keys. This is
required to track the intervals separately within the
`BlockIntervalCollector`. The caller has the flexibility of passing in
`nil` for one (or both) of the collectors, in which case either point or
range keys (or both) will be ignored by the collector. This can be used,
for example, to construct a collectors that apply exclusively to either
point or range keys.
  • Loading branch information
nicktrav committed Jan 20, 2022
1 parent 586718d commit ec6c216
Show file tree
Hide file tree
Showing 5 changed files with 497 additions and 48 deletions.
16 changes: 9 additions & 7 deletions iterator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1224,7 +1224,9 @@ func TestIteratorBlockIntervalFilter(t *testing.T) {
bpCollectors = append(bpCollectors, func() BlockPropertyCollector {
return sstable.NewBlockIntervalCollector(
fmt.Sprintf("%d", coll.id),
&testBlockIntervalCollector{numLength: 2, offsetFromEnd: coll.offset})
&testBlockIntervalCollector{numLength: 2, offsetFromEnd: coll.offset},
nil, /* range key collector */
)
})
}
opts := &Options{
Expand Down Expand Up @@ -1344,9 +1346,9 @@ func TestIteratorRandomizedBlockIntervalFilter(t *testing.T) {
FormatMajorVersion: FormatNewest,
BlockPropertyCollectors: []func() BlockPropertyCollector{
func() BlockPropertyCollector {
return sstable.NewBlockIntervalCollector("0", &testBlockIntervalCollector{
numLength: 2,
})
return sstable.NewBlockIntervalCollector(
"0", &testBlockIntervalCollector{numLength: 2}, nil, /* range key collector */
)
},
},
}
Expand Down Expand Up @@ -1657,9 +1659,9 @@ func BenchmarkBlockPropertyFilter(b *testing.B) {
FormatMajorVersion: FormatNewest,
BlockPropertyCollectors: []func() BlockPropertyCollector{
func() BlockPropertyCollector {
return sstable.NewBlockIntervalCollector("0", &testBlockIntervalCollector{
numLength: 3,
})
return sstable.NewBlockIntervalCollector(
"0", &testBlockIntervalCollector{numLength: 3}, nil, /* range key collector */
)
},
},
}
Expand Down
66 changes: 58 additions & 8 deletions sstable/block_property.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"sync"

"github.com/cockroachdb/pebble/internal/base"
"github.com/cockroachdb/pebble/internal/rangekey"
)

// Block properties are an optional user-facing feature that can be used to
Expand Down Expand Up @@ -101,9 +102,18 @@ type BlockPropertyFilter interface {
//
// Users must not expect this to preserve differences between empty sets --
// they will all get turned into the semantically equivalent [0,0).
//
// A BlockIntervalCollector that collects over point and range keys needs to
// have both the point and range DataBlockIntervalCollector specified, since
// point and range keys are fed to the BlockIntervalCollector in an interleaved
// fashion, independently of one another. This also implies that the
// DataBlockIntervalCollectors for point and range keys should be references to
// independent instances, rather than references to the same collector, as point
// and range keys are tracked independently.
type BlockIntervalCollector struct {
name string
dbic DataBlockIntervalCollector
name string
points DataBlockIntervalCollector
ranges DataBlockIntervalCollector

blockInterval interval
indexInterval interval
Expand All @@ -126,12 +136,31 @@ type DataBlockIntervalCollector interface {
FinishDataBlock() (lower uint64, upper uint64, err error)
}

// NewBlockIntervalCollector constructs a BlockIntervalCollector, with the
// given name and data block collector.
// NewBlockIntervalCollector constructs a BlockIntervalCollector with the given
// name. The BlockIntervalCollector makes use of the given point and range key
// DataBlockIntervalCollectors when encountering point and range keys,
// respectively.
//
// The caller may pass a nil DataBlockIntervalCollector for one of the point or
// range key collectors, in which case keys of those types will be ignored. This
// allows for flexible construction of BlockIntervalCollectors that operate on
// just point keys, just range keys, or both point and range keys.
//
// If both point and range keys are to be tracked, two independent collectors
// should be provided, rather than the same collector passed in twice (see the
// comment on BlockIntervalCollector for more detail)
func NewBlockIntervalCollector(
name string, blockAttributeCollector DataBlockIntervalCollector) *BlockIntervalCollector {
name string,
pointCollector, rangeCollector DataBlockIntervalCollector,
) *BlockIntervalCollector {
if pointCollector == nil && rangeCollector == nil {
panic("sstable: at least one interval collector must be provided")
}
return &BlockIntervalCollector{
name: name, dbic: blockAttributeCollector}
name: name,
points: pointCollector,
ranges: rangeCollector,
}
}

// Name implements the BlockPropertyCollector interface.
Expand All @@ -141,13 +170,23 @@ func (b *BlockIntervalCollector) Name() string {

// Add implements the BlockPropertyCollector interface.
func (b *BlockIntervalCollector) Add(key InternalKey, value []byte) error {
return b.dbic.Add(key, value)
if rangekey.IsRangeKey(key.Kind()) {
if b.ranges != nil {
return b.ranges.Add(key, value)
}
} else if b.points != nil {
return b.points.Add(key, value)
}
return nil
}

// FinishDataBlock implements the BlockPropertyCollector interface.
func (b *BlockIntervalCollector) FinishDataBlock(buf []byte) ([]byte, error) {
if b.points == nil {
return buf, nil
}
var err error
b.blockInterval.lower, b.blockInterval.upper, err = b.dbic.FinishDataBlock()
b.blockInterval.lower, b.blockInterval.upper, err = b.points.FinishDataBlock()
if err != nil {
return buf, err
}
Expand All @@ -172,6 +211,17 @@ func (b *BlockIntervalCollector) FinishIndexBlock(buf []byte) ([]byte, error) {

// FinishTable implements the BlockPropertyCollector interface.
func (b *BlockIntervalCollector) FinishTable(buf []byte) ([]byte, error) {
// If the collector is tracking range keys, the range key interval is union-ed
// with the point key interval for the table.
if b.ranges != nil {
var rangeInterval interval
var err error
rangeInterval.lower, rangeInterval.upper, err = b.ranges.FinishDataBlock()
if err != nil {
return buf, err
}
b.tableInterval.union(rangeInterval)
}
return b.tableInterval.encode(buf), nil
}

Expand Down
Loading

0 comments on commit ec6c216

Please sign in to comment.