Skip to content

Commit

Permalink
db: consider scheduling compaction when adding point tombstone compen…
Browse files Browse the repository at this point in the history
…sation

Previously, when the asynchronous table stats collector calculated a positive
PointDeletionsBytesEstimate, it did not consider scheduling compactions unless
it also calculated a positive RangeDeletionsBytesEstimate. If the database was
otherwise quiet with few flushes and no in-progress compactions, this could
delay the scheduling of a compaction until the next flush or ingest, despite
levels have scores ≥ 1.0. This was illustrated with CockroachDB's point
tombstone roachtest, with at times no running compactions despite levels having
scores as high as 50 due to high volumes of data dropped by point tombstonees.

Note, an issue still remains whereby L0 files that delete large amounts of data
do not trigger compactions out of L0 on their own. This is a consequence of
L0's different scoring heuristics which do not consider compensated size. See
  • Loading branch information
jbowens committed May 24, 2023
1 parent 1426e67 commit cc0aecd
Show file tree
Hide file tree
Showing 4 changed files with 97 additions and 12 deletions.
13 changes: 7 additions & 6 deletions compaction_picker.go
Original file line number Diff line number Diff line change
Expand Up @@ -575,15 +575,16 @@ type candidateLevelInfo struct {
file manifest.LevelFile
}

func fileCompensation(f *fileMetadata) uint64 {
return uint64(f.Stats.PointDeletionsBytesEstimate) + f.Stats.RangeDeletionsBytesEstimate
}

// compensatedSize returns f's file size, inflated according to compaction
// priorities.
func compensatedSize(f *fileMetadata) uint64 {
sz := f.Size
// Add in the estimate of disk space that may be reclaimed by compacting
// the file's tombstones.
sz += uint64(f.Stats.PointDeletionsBytesEstimate)
sz += f.Stats.RangeDeletionsBytesEstimate
return sz
// Add in the estimate of disk space that may be reclaimed by compacting the
// file's tombstones.
return f.Size + fileCompensation(f)
}

// compensatedSizeAnnotator implements manifest.Annotator, annotating B-Tree
Expand Down
34 changes: 29 additions & 5 deletions compaction_picker_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1314,6 +1314,18 @@ func TestCompactionPickerScores(t *testing.T) {
var buf bytes.Buffer
datadriven.RunTest(t, "testdata/compaction_picker_scores", func(td *datadriven.TestData) string {
switch td.Cmd {
case "define":
require.NoError(t, d.Close())

d, err = runDBDefineCmd(td, opts)
if err != nil {
return err.Error()
}
d.mu.Lock()
s := d.mu.versions.currentVersion().String()
d.mu.Unlock()
return s

case "disable-table-stats":
d.mu.Lock()
d.opts.private.disableTableStats = true
Expand All @@ -1327,19 +1339,31 @@ func TestCompactionPickerScores(t *testing.T) {
d.mu.Unlock()
return ""

case "define":
require.NoError(t, d.Close())

d, err = runDBDefineCmd(td, opts)
if err != nil {
case "ingest":
if err = runBuildCmd(td, d, d.opts.FS); err != nil {
return err.Error()
}
if err = runIngestCmd(td, d, d.opts.FS); err != nil {
return err.Error()
}
d.mu.Lock()
s := d.mu.versions.currentVersion().String()
d.mu.Unlock()
return s

case "lsm":
return runLSMCmd(td, d)

case "scores":
// Wait for any running compactions to complete before calculating
// scores. Otherwise, the output of this command is
// nondeterministic.
d.mu.Lock()
for d.mu.compact.compactingCount > 0 {
d.mu.compact.cond.Wait()
}
d.mu.Unlock()

buf.Reset()
fmt.Fprintf(&buf, "L Size Score\n")
for l, lm := range d.Metrics().Levels {
Expand Down
2 changes: 1 addition & 1 deletion table_stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ func (d *DB) collectTableStats() bool {
maybeCompact := false
for _, c := range collected {
c.fileMetadata.Stats = c.TableStats
maybeCompact = maybeCompact || c.TableStats.RangeDeletionsBytesEstimate > 0
maybeCompact = maybeCompact || fileCompensation(c.fileMetadata) > 0
c.fileMetadata.StatsMarkValid()
}
d.mu.tableStats.cond.Broadcast()
Expand Down
60 changes: 60 additions & 0 deletions testdata/compaction_picker_scores
Original file line number Diff line number Diff line change
Expand Up @@ -105,3 +105,63 @@ L3 0 B 0.0
L4 0 B 0.0
L5 785 B 0.0
L6 809 B -

# Run a similar test as above, but this time the table containing the DELs is
# ingested after the database is initialized, with table stats enabled and
# automatic compactions enabled. When the ingested sstable's stats are loaded,
# it should trigger an automatic compaction of the ingested sstable on account
# of the high point-deletions-bytes-estimate value.
#
# This a regression test for an issue where the table stats collector wouldn't
# attempt to schedule a compaction if a file only had compensation due to point
# deletions and not range deletions.

define lbase-max-bytes=65536 enable-table-stats=true auto-compactions=on
L6
a.SET.1:<rand-bytes=65536>
b.SET.1:<rand-bytes=65536>
c.SET.1:<rand-bytes=65536>
d.SET.1:<rand-bytes=65536>
e.SET.1:<rand-bytes=65536>
----
6:
000004:[a#1,SET-e#1,SET]

ingest ext1
del a:
del b:
del c:
del d:
del e:
----
5:
000005:[a:#1,DEL-e:#1,DEL]
6:
000004:[a#1,SET-e#1,SET]

wait-pending-table-stats
000005
----
num-entries: 5
num-deletions: 5
num-range-key-sets: 0
point-deletions-bytes-estimate: 1065
range-deletions-bytes-estimate: 0

scores
----
L Size Score
L0 0 B 0.0
L1 0 B 0.0
L2 0 B 0.0
L3 0 B 0.0
L4 0 B 0.0
L5 840 B 0.0
L6 809 B -

lsm
----
5:
000005:[a:#1,DEL-e:#1,DEL]
6:
000004:[a#1,SET-e#1,SET]

0 comments on commit cc0aecd

Please sign in to comment.