Skip to content

Commit

Permalink
db: smoother thresholds for the delete pacer
Browse files Browse the repository at this point in the history
The delete pacer has two heuristics to disable pacing altogether: one
is when the free space drops below 16GB and one when the ratio of
obsolete to live bytes goes over 20%.

Disabling pacing altogether can lead to a very sudden burst of large
deletions, causing large latency spikes. In particular the obsolete
bytes heuristic can be triggered when a large table is deleted.

In this change we make the transition smoother, using the idea that we
want to get back to the desired thresholds within a given timeframe
(10 seconds for the free bytes heuristic, 5 minutes for the obsolete
bytes heuristic). Instead of effectively increasing the pacing rate to
infinity, we increase it based on how many bytes are exceeding the
threshold.

Note that the previous status quo corresponds to setting infinitesimal
timeframes (e.g. 1ns).
  • Loading branch information
RaduBerinde committed Jul 13, 2023
1 parent 02413ad commit 7bb765e
Show file tree
Hide file tree
Showing 2 changed files with 82 additions and 37 deletions.
59 changes: 38 additions & 21 deletions pacer.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,16 @@ type deletionPacerInfo struct {
// mechanism helps mitigate that.
type deletionPacer struct {
// If there are less than freeSpaceThreshold bytes of free space on
// disk, do not pace deletions at all.
// disk, increase the pace of deletions such that we delete enough bytes to
// get back to the threshold within the freeSpaceTimeframe.
freeSpaceThreshold uint64
freeSpaceTimeframe time.Duration

// If the ratio of obsolete bytes to live bytes is greater than
// obsoleteBytesMaxRatio, do not pace deletions at all.
obsoleteBytesMaxRatio float64
// obsoleteBytesMaxRatio, increase the pace of deletions such that we delete
// enough bytes to get back to the threshold within the obsoleteBytesTimeframe.
obsoleteBytesMaxRatio float64
obsoleteBytesTimeframe time.Duration

mu struct {
sync.Mutex
Expand All @@ -57,8 +61,11 @@ func newDeletionPacer(
now time.Time, targetByteDeletionRate int64, getInfo func() deletionPacerInfo,
) *deletionPacer {
d := &deletionPacer{
freeSpaceThreshold: 16 << 30, // 16 GB
obsoleteBytesMaxRatio: 0.20,
freeSpaceThreshold: 16 << 30, // 16 GB
freeSpaceTimeframe: 10 * time.Second,

obsoleteBytesMaxRatio: 0.20,
obsoleteBytesTimeframe: 5 * time.Minute,

targetByteDeletionRate: targetByteDeletionRate,
getInfo: getInfo,
Expand Down Expand Up @@ -88,29 +95,39 @@ func (p *deletionPacer) PacingDelay(now time.Time, bytesToDelete uint64) (waitSe
return 0.0
}

baseRate := float64(p.targetByteDeletionRate)
// If recent deletion rate is more than our target, use that so that we don't
// fall behind.
historicRate := func() float64 {
p.mu.Lock()
defer p.mu.Unlock()
return float64(p.mu.history.Sum(now)) / deletePacerHistory.Seconds()
}()
if historicRate > baseRate {
baseRate = historicRate
}

// Apply heuristics to increase the deletion rate.
var extraRate float64
info := p.getInfo()
if info.freeBytes <= p.freeSpaceThreshold {
return 0.0
}
obsoleteBytesRatio := 1.0
if info.liveBytes > 0 {
obsoleteBytesRatio = float64(info.obsoleteBytes) / float64(info.liveBytes)
// Increase the rate so that we can free up enough bytes within the timeframe.
extraRate = float64(p.freeSpaceThreshold-info.freeBytes) / p.freeSpaceTimeframe.Seconds()
}
if obsoleteBytesRatio >= p.obsoleteBytesMaxRatio {
if info.liveBytes == 0 {
// We don't know the obsolete bytes ratio. Disable pacing altogether.
return 0.0
}

rate := p.targetByteDeletionRate

// See if recent deletion rate is more than our target; if so, use that as our
// target so that we don't fall behind.
p.mu.Lock()
defer p.mu.Unlock()
if historyRate := p.mu.history.Sum(now) / int64(deletePacerHistory/time.Second); rate < historyRate {
rate = historyRate
obsoleteBytesRatio := float64(info.obsoleteBytes) / float64(info.liveBytes)
if obsoleteBytesRatio >= p.obsoleteBytesMaxRatio {
// Increase the rate so that we can free up enough bytes within the timeframe.
r := (obsoleteBytesRatio - p.obsoleteBytesMaxRatio) * float64(info.liveBytes) / p.obsoleteBytesTimeframe.Seconds()
if extraRate < r {
extraRate = r
}
}

return float64(bytesToDelete) / float64(rate)
return float64(bytesToDelete) / (baseRate + extraRate)
}

// history is a helper used to keep track of the recent history of a set of
Expand Down
60 changes: 44 additions & 16 deletions pacer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ package pebble

import (
"fmt"
"math"
"math/rand"
"sort"
"testing"
Expand All @@ -25,59 +26,86 @@ func TestDeletionPacer(t *testing.T) {
// second value is the deleted bytes. The time of pacing is the same as the
// last time in the history.
history [][2]int
// expected wait time for 100 MB.
// expected pacing rate in MB/s.
expected float64
}{
{
freeBytes: 160 * GB,
obsoleteBytes: 1 * MB,
liveBytes: 160 * MB,
expected: 1.0,
expected: 100.0,
},
// As freeBytes < free space threshold, there should be no throttling.
// As freeBytes is 2GB below the free space threshold, rate should be
// increased by 204.8MB/s.
{
freeBytes: 5 * GB,
freeBytes: 14 * GB,
obsoleteBytes: 1 * MB,
liveBytes: 100 * MB,
expected: 0.0,
expected: 304.8,
},
// As obsoleteBytesRatio > 0.20, there should be no throttling.
// As freeBytes is 10GB below the free space threshold, rate should be
// increased to by 1GB/s.
{
freeBytes: 500 * GB,
obsoleteBytes: 50 * MB,
freeBytes: 6 * GB,
obsoleteBytes: 1 * MB,
liveBytes: 100 * MB,
expected: 0.0,
expected: 1124.0,
},
// obsoleteBytesRatio is 50%. We need to delete 30GB within 5 minutes.
{
freeBytes: 500 * GB,
obsoleteBytes: 50 * GB,
liveBytes: 100 * GB,
expected: 202.4,
},
// When obsolete ratio unknown, there should be no throttling.
{
freeBytes: 500 * GB,
obsoleteBytes: 0,
liveBytes: 0,
expected: 0.0,
expected: math.Inf(1),
},
// History shows 200MB/sec deletions on average over last 5 minutes, wait
// time should be half.
// History shows 200MB/sec deletions on average over last 5 minutes.
{
freeBytes: 160 * GB,
obsoleteBytes: 1 * MB,
liveBytes: 160 * MB,
history: [][2]int{{0, 5 * 60 * 200 * MB}},
expected: 0.5,
expected: 200.0,
},
// History shows 200MB/sec deletions on average over last 5 minutes and
// freeBytes is 10GB below the threshold.
{
freeBytes: 6 * GB,
obsoleteBytes: 1 * MB,
liveBytes: 160 * MB,
history: [][2]int{{0, 5 * 60 * 200 * MB}},
expected: 1224.0,
},
// History shows 200MB/sec deletions on average over last 5 minutes and
// obsoleteBytesRatio is 50%.
{
freeBytes: 500 * GB,
obsoleteBytes: 50 * GB,
liveBytes: 100 * GB,
history: [][2]int{{0, 5 * 60 * 200 * MB}},
expected: 302.4,
},
// History shows 1000MB/sec deletions on average over last 5 minutes.
{
freeBytes: 160 * GB,
obsoleteBytes: 1 * MB,
liveBytes: 160 * MB,
history: [][2]int{{0, 60 * 1000 * MB}, {3 * 60, 60 * 4 * 1000 * MB}, {4 * 60, 0}},
expected: 0.1,
expected: 1000.0,
},
// First entry in history is too old, it should be discarded.
{
freeBytes: 160 * GB,
obsoleteBytes: 1 * MB,
liveBytes: 160 * MB,
history: [][2]int{{0, 10 * 60 * 10000 * MB}, {3 * 60, 4 * 60 * 200 * MB}, {7 * 60, 1 * 60 * 200 * MB}},
expected: 0.5,
expected: 200.0,
},
}
for tcIdx, tc := range testCases {
Expand All @@ -96,7 +124,7 @@ func TestDeletionPacer(t *testing.T) {
last = start.Add(time.Second * time.Duration(h[0]))
pacer.ReportDeletion(last, uint64(h[1]))
}
result := pacer.PacingDelay(last, 100*MB)
result := 1.0 / pacer.PacingDelay(last, 1*MB)
require.InDelta(t, tc.expected, result, 1e-7)
})
}
Expand Down

0 comments on commit 7bb765e

Please sign in to comment.