Skip to content

Commit

Permalink
feedback: only count range del estimates stats when range dels present
Browse files Browse the repository at this point in the history
Fix a bug where range keys weren't calculated for tables containing
range dels.

Only compute deletion range estimates when range dels are present in the
deletion span.

Add an additional test case.

This will squash into the second commit.
  • Loading branch information
nicktrav committed Jun 11, 2022
1 parent 25c4af2 commit 4c656db
Show file tree
Hide file tree
Showing 2 changed files with 85 additions and 13 deletions.
48 changes: 35 additions & 13 deletions table_stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,7 @@ func (d *DB) loadTableStats(
return
}
}
if r.Properties.NumRangeDeletions > 0 {
if r.Properties.NumRangeDeletions > 0 || r.Properties.NumRangeKeyDels > 0 {
if compactionHints, err = d.loadTableRangeDelStats(r, v, level, meta, &stats); err != nil {
return
}
Expand Down Expand Up @@ -316,26 +316,48 @@ func (d *DB) loadTableRangeDelStats(
// estimateSizeBeneath which is costly, and improves the accuracy of our
// overall estimate.
for s := iter.First(); s != nil; s = iter.Next() {
// If the file is in the last level of the LSM, there is no
// data beneath it. The fact that there is still a range
// tombstone in a bottommost file suggests that an open
// snapshot kept the tombstone around. Estimate disk usage
// within the file itself.
start, end := s.Start, s.End
if level == numLevels-1 {
// We only need to consider deletion size estimates for tables that contain
// point keys.
var hasPoints bool
for _, k := range s.Keys {
if k.Kind() == base.InternalKeyKindRangeDelete {
hasPoints = true
break
}
}

// If the file is in the last level of the LSM, there is no data beneath
// it. The fact that there is still a range tombstone in a bottommost file
// suggests that an open snapshot kept the tombstone around. Estimate disk
// usage within the file itself.
// NOTE: If the span `s` wholly contains a table containing range keys,
// the returned size estimate will be slightly inflated by the range key
// block. However, in practice, range keys are expected to be rare, and
// the size of the range key block relative to the overall size of the
// table is expected to be small.
if hasPoints && level == numLevels-1 {
size, err := r.EstimateDiskUsage(start, end)
if err != nil {
return nil, err
}
stats.RangeDeletionsBytesEstimate += size

// As the file is in the bottommost level, there is no need to collect a
// deletion hint.
continue
}

// While the size estimates for point keys should only be updated if this
// span contains a range del, the sequence numbers are required for the
// hint. Unconditionally descend, but conditionally update the estimates.
estimate, hintSeqNum, err := d.estimateSizeBeneath(v, level, meta, start, end)
if err != nil {
return nil, err
}
stats.RangeDeletionsBytesEstimate += estimate
if hasPoints {
stats.RangeDeletionsBytesEstimate += estimate
}

// If any files were completely contained with the range,
// hintSeqNum is the smallest sequence number contained in any
Expand Down Expand Up @@ -449,11 +471,11 @@ func (d *DB) estimateSizeBeneath(
}

func maybeSetStatsFromProperties(meta *fileMetadata, props *sstable.Properties) bool {
// If a table has range deletions, we can't calculate the
// RangeDeletionsBytesEstimate statistic and can't populate table stats
// from just the properties. The table stats collector goroutine will
// populate the stats.
if props.NumRangeDeletions != 0 {
// If a table has range deletions or range key deletions, we can't calculate
// the RangeDeletionsBytesEstimate statistic and can't populate table stats
// from just the properties. The table stats collector goroutine will populate
// the stats.
if props.NumRangeDeletions != 0 || props.NumRangeKeyDels != 0 {
return false
}

Expand Down
50 changes: 50 additions & 0 deletions testdata/table_stats
Original file line number Diff line number Diff line change
Expand Up @@ -321,3 +321,53 @@ num-deletions: 1
num-range-keys: 0
point-deletions-bytes-estimate: 0
range-deletions-bytes-estimate: 991

# A hint for exclusively range key deletions that covers a table with point keys
# should not contain an estimate for point keys.
# TODO(travers): Once range keys in batches are flushed to tables, this testcase
# can be updated to use the batch / flush combination.

define
----

# A table with point keys.
batch
set b b
----

flush
----
0.0:
000005:[b#1,SET-b#1,SET]

# A table with a mixture of point and range keys.
ingest ext0
set c c
range-key-set d d @1 foo
----
0.0:
000005:[b#1,SET-b#1,SET]
6:
000006:[c#2,SET-c#2,SET]

# The table with the range key del, that spans the previous two tables.
ingest ext0
range-key-del a z
----
0.1:
000007:[a#3,RANGEKEYDEL-z#72057594037927935,RANGEKEYDEL]
0.0:
000005:[b#1,SET-b#1,SET]
6:
000006:[c#2,SET-c#2,SET]

# The hint on table 000007 does estimates zero size for range deleted point
# keys.
wait-pending-table-stats
000007
----
num-entries: 0
num-deletions: 0
num-range-keys: 1
point-deletions-bytes-estimate: 0
range-deletions-bytes-estimate: 0

0 comments on commit 4c656db

Please sign in to comment.