Skip to content

Commit

Permalink
kvserver: make GC intent scoring more aggressive
Browse files Browse the repository at this point in the history
Users often experience buildup of intents due to the GC queue not being
aggressive enough in cleaning them up. Currently, the GC queue will only
trigger based on intents if the average intent age is 10 days and if
there is also MVCC garbage that can be cleaned up.

This patch makes the intent scoring more aggressive, triggering GC when
the average intent age is 8 hours regardless of other MVCC garbage. The
previous commit added a cooldown timer to prevent the GC queue from
spinning on a replica if the intents couldn't be cleaned up (e.g.
because they belong to an in-progress long-running transaction).

Release note (ops change): trigger MVCC and intent garbage collection
when the average intent age is 8 hours, down from 10 days.
  • Loading branch information
erikgrinaker committed Jun 1, 2021
1 parent 51bfbc6 commit 99da240
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 26 deletions.
26 changes: 13 additions & 13 deletions pkg/kv/kvserver/gc_queue.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,12 +45,12 @@ const (
gcQueueIntentCooldownDuration = 2 * time.Hour
// intentAgeNormalization is the average age of outstanding intents
// which amount to a score of "1" added to total replica priority.
intentAgeNormalization = 24 * time.Hour // 1 day
intentAgeNormalization = 8 * time.Hour

// Thresholds used to decide whether to queue for GC based
// on keys and intents.
gcKeyScoreThreshold = 2
gcIntentScoreThreshold = 10
gcIntentScoreThreshold = 1

probablyLargeAbortSpanSysCountThreshold = 10000
largeAbortSpanBytesThreshold = 16 * (1 << 20) // 16mb
Expand Down Expand Up @@ -173,17 +173,13 @@ func (gcq *gcQueue) shouldQueue(
if !canGC {
return false, 0
}
// If performing a GC will not advance the GC threshold, there's no reason
// to GC again.
if newThreshold.Equal(oldThreshold) {
return false, 0
}
canAdvanceGCThreshold := !newThreshold.Equal(oldThreshold)
lastGC, err := repl.getQueueLastProcessed(ctx, gcq.name)
if err != nil {
log.VErrEventf(ctx, 2, "failed to fetch last processed time: %v", err)
return false, 0
}
r := makeGCQueueScore(ctx, repl, gcTimestamp, lastGC, *zone.GC)
r := makeGCQueueScore(ctx, repl, gcTimestamp, lastGC, *zone.GC, canAdvanceGCThreshold)
return r.ShouldQueue, r.FinalScore
}

Expand All @@ -193,6 +189,7 @@ func makeGCQueueScore(
now hlc.Timestamp,
lastGC hlc.Timestamp,
policy zonepb.GCPolicy,
canAdvanceGCThreshold bool,
) gcQueueScore {
repl.mu.Lock()
ms := *repl.mu.state.Stats
Expand All @@ -205,7 +202,8 @@ func makeGCQueueScore(
// Use desc.RangeID for fuzzing the final score, so that different ranges
// have slightly different priorities and even symmetrical workloads don't
// trigger GC at the same time.
r := makeGCQueueScoreImpl(ctx, int64(repl.RangeID), now, ms, policy, lastGC)
r := makeGCQueueScoreImpl(
ctx, int64(repl.RangeID), now, ms, policy, lastGC, canAdvanceGCThreshold)
return r
}

Expand Down Expand Up @@ -305,6 +303,7 @@ func makeGCQueueScoreImpl(
ms enginepb.MVCCStats,
policy zonepb.GCPolicy,
lastGC hlc.Timestamp,
canAdvanceGCThreshold bool,
) gcQueueScore {
ms.Forward(now.WallTime)
var r gcQueueScore
Expand Down Expand Up @@ -374,7 +373,7 @@ func makeGCQueueScoreImpl(
r.FinalScore = r.FuzzFactor * (valScore + r.IntentScore)

// First determine whether we should queue based on MVCC score alone.
r.ShouldQueue = r.FuzzFactor*valScore > gcKeyScoreThreshold
r.ShouldQueue = canAdvanceGCThreshold && r.FuzzFactor*valScore > gcKeyScoreThreshold

// Next, determine whether we should queue based on intent score. For
// intents, we also enforce a cooldown time since we may not actually
Expand Down Expand Up @@ -480,19 +479,20 @@ func (gcq *gcQueue) process(
// Consult the protected timestamp state to determine whether we can GC and
// the timestamp which can be used to calculate the score and updated GC
// threshold.
canGC, cacheTimestamp, gcTimestamp, _, newThreshold :=
canGC, cacheTimestamp, gcTimestamp, oldThreshold, newThreshold :=
repl.checkProtectedTimestampsForGC(ctx, *zone.GC)
if !canGC {
return false, nil
}
canAdvanceGCThreshold := !newThreshold.Equal(oldThreshold)
// We don't recheck ShouldQueue here, since the range may have been enqueued
// manually e.g. via the admin server.
lastGC, err := repl.getQueueLastProcessed(ctx, gcq.name)
if err != nil {
lastGC = hlc.Timestamp{}
log.VErrEventf(ctx, 2, "failed to fetch last processed time: %v", err)
}
r := makeGCQueueScore(ctx, repl, gcTimestamp, lastGC, *zone.GC)
r := makeGCQueueScore(ctx, repl, gcTimestamp, lastGC, *zone.GC, canAdvanceGCThreshold)
log.VEventf(ctx, 2, "processing replica %s with score %s", repl.String(), r)
// Synchronize the new GC threshold decision with concurrent
// AdminVerifyProtectedTimestamp requests.
Expand Down Expand Up @@ -543,7 +543,7 @@ func (gcq *gcQueue) process(

log.Eventf(ctx, "MVCC stats after GC: %+v", repl.GetMVCCStats())
log.Eventf(ctx, "GC score after GC: %s", makeGCQueueScore(
ctx, repl, repl.store.Clock().Now(), lastGC, *zone.GC))
ctx, repl, repl.store.Clock().Now(), lastGC, *zone.GC, canAdvanceGCThreshold))
updateStoreMetricsWithGCInfo(gcq.store.metrics, info)
return true, nil
}
Expand Down
26 changes: 13 additions & 13 deletions pkg/kv/kvserver/gc_queue_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,8 @@ func TestGCQueueMakeGCScoreInvariantQuick(t *testing.T) {
}
now := initialNow.Add(timePassed.Nanoseconds(), 0)
r := makeGCQueueScoreImpl(
ctx, int64(seed), now, ms, zonepb.GCPolicy{TTLSeconds: ttlSec}, hlc.Timestamp{})
ctx, int64(seed), now, ms, zonepb.GCPolicy{TTLSeconds: ttlSec}, hlc.Timestamp{},
true /* canAdvanceGCThreshold */)
wouldHaveToDeleteSomething := gcBytes*int64(ttlSec) < ms.GCByteAge(now.WallTime)
result := !r.ShouldQueue || wouldHaveToDeleteSomething
if !result {
Expand All @@ -132,7 +133,7 @@ func TestGCQueueMakeGCScoreAnomalousStats(t *testing.T) {
LiveBytes: int64(liveBytes),
ValBytes: int64(valBytes),
KeyBytes: int64(keyBytes),
}, zonepb.GCPolicy{TTLSeconds: 60}, hlc.Timestamp{})
}, zonepb.GCPolicy{TTLSeconds: 60}, hlc.Timestamp{}, true /* canAdvanceGCThreshold */)
return r.DeadFraction >= 0 && r.DeadFraction <= 1
}, &quick.Config{MaxCount: 1000}); err != nil {
t.Fatal(err)
Expand All @@ -156,7 +157,7 @@ func TestGCQueueMakeGCScoreLargeAbortSpan(t *testing.T) {
context.Background(), seed,
hlc.Timestamp{WallTime: expiration + 1},
ms, zonepb.GCPolicy{TTLSeconds: 10000},
hlc.Timestamp{},
hlc.Timestamp{}, true, /* canAdvanceGCThreshold */
)
require.True(t, r.ShouldQueue)
require.NotZero(t, r.FinalScore)
Expand All @@ -171,7 +172,7 @@ func TestGCQueueMakeGCScoreLargeAbortSpan(t *testing.T) {
context.Background(), seed,
hlc.Timestamp{WallTime: expiration + 1},
ms, zonepb.GCPolicy{TTLSeconds: 10000},
hlc.Timestamp{},
hlc.Timestamp{}, true, /* canAdvanceGCThreshold */
)
require.True(t, r.ShouldQueue)
require.NotZero(t, r.FinalScore)
Expand All @@ -182,7 +183,7 @@ func TestGCQueueMakeGCScoreLargeAbortSpan(t *testing.T) {
r := makeGCQueueScoreImpl(context.Background(), seed,
hlc.Timestamp{WallTime: expiration},
ms, zonepb.GCPolicy{TTLSeconds: 10000},
hlc.Timestamp{WallTime: expiration - 100},
hlc.Timestamp{WallTime: expiration - 100}, true, /* canAdvanceGCThreshold */
)
require.False(t, r.ShouldQueue)
require.Zero(t, r.FinalScore)
Expand Down Expand Up @@ -221,7 +222,8 @@ func TestGCQueueMakeGCScoreIntentCooldown(t *testing.T) {
ms.ValBytes = 1e9
}

r := makeGCQueueScoreImpl(ctx, seed, now, ms, policy, tc.lastGC)
r := makeGCQueueScoreImpl(
ctx, seed, now, ms, policy, tc.lastGC, true /* canAdvanceGCThreshold */)
require.Equal(t, tc.expectGC, r.ShouldQueue)
})
}
Expand Down Expand Up @@ -342,7 +344,7 @@ func (cws *cachedWriteSimulator) shouldQueue(
ts := hlc.Timestamp{}.Add(ms.LastUpdateNanos+after.Nanoseconds(), 0)
r := makeGCQueueScoreImpl(context.Background(), 0 /* seed */, ts, ms, zonepb.GCPolicy{
TTLSeconds: int32(ttl.Seconds()),
}, hlc.Timestamp{})
}, hlc.Timestamp{}, true /* canAdvanceGCThreshold */)
if fmt.Sprintf("%.2f", r.FinalScore) != fmt.Sprintf("%.2f", prio) || b != r.ShouldQueue {
cws.t.Errorf("expected queued=%t (is %t), prio=%.2f, got %.2f: after=%s, ttl=%s:\nms: %+v\nscore: %s",
b, r.ShouldQueue, prio, r.FinalScore, after, ttl, ms, r)
Expand Down Expand Up @@ -447,15 +449,13 @@ func TestGCQueueMakeGCScoreRealistic(t *testing.T) {

// Write 1000 distinct 1kb intents at the initial timestamp. This means that
// the average intent age is just the time elapsed from now, and this is roughly
// normalized by one day at the time of writing. Note that the size of the writes
// normalized by one hour at the time of writing. Note that the size of the writes
// doesn't matter. In reality, the value-based GC score will often strike first.
cws.multiKey(100, valSize, txn, &ms)

cws.shouldQueue(false, 1.00, 24*time.Hour, irrelevantTTL, ms)
cws.shouldQueue(false, 1.99, 2*24*time.Hour, irrelevantTTL, ms)
cws.shouldQueue(false, 3.99, 4*24*time.Hour, irrelevantTTL, ms)
cws.shouldQueue(false, 6.98, 7*24*time.Hour, irrelevantTTL, ms)
cws.shouldQueue(true, 11.97, 12*24*time.Hour, irrelevantTTL, ms)
cws.shouldQueue(false, 0.12, 1*time.Hour, irrelevantTTL, ms)
cws.shouldQueue(false, 0.87, 7*time.Hour, irrelevantTTL, ms)
cws.shouldQueue(true, 1.12, 9*time.Hour, irrelevantTTL, ms)
}
}

Expand Down

0 comments on commit 99da240

Please sign in to comment.