Skip to content

Commit

Permalink
roachpb,gc,mvcc: add UseClearRange option to GCRequest_GCKey
Browse files Browse the repository at this point in the history
This commit adds an optimization to massively reduce the overhead of
garbage collecting large numbers of versions. When running garbage
collection, we currently iterate through the store to send (Key, Timestamp)
pairs in a GCRequest to the store to be evaluated and applied. This can
be rather slow when deleting large numbers of keys, particularly due to the
need to paginate. The primary motivation for pagination is to ensure that
we do not create raft commands with deletions that are too large.

In practice, we find that for the tiny values in a sequence, we find that we
can GC around 1800 versions/s with #51184 and around 900 without it (though
note that in that PR the more versions exist, the worse the throughput will
be). This remains abysmally slow. I imagine that using larger batches could
be one approach to increase the throughput, but, as it stands, 256 KiB is
not a tiny raft command.

This instead turns to the ClearRange operation which can delete all of versions
of a key with the replication overhead of just two copies. This approach is
somewhat controversial because, as @petermattis puts it:

```
We need to be careful about this. Historically, adding many range tombstones
was very bad for performance. I think we resolved most (all?) of those issues,
but I'm still nervous about embracing using range tombstones below the level
of a Range.
```

Nevertheless, the results are enticing. Rather than pinning a core at full
utilization for minutes just to clear the versions written to a sequence over
the course of a bit more than an hour, we can clear that in ~2 seconds.

Release note (performance improvement): Improved performance of garbage
collection in the face of large numbers of versions.
  • Loading branch information
ajwerner committed Jul 13, 2020
1 parent 8e5423b commit c6b040a
Show file tree
Hide file tree
Showing 13 changed files with 828 additions and 659 deletions.
35 changes: 34 additions & 1 deletion c-deps/libroach/protos/roachpb/api.pb.cc

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

21 changes: 21 additions & 0 deletions c-deps/libroach/protos/roachpb/api.pb.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion docs/generated/settings/settings.html
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,6 @@
<tr><td><code>trace.debug.enable</code></td><td>boolean</td><td><code>false</code></td><td>if set, traces for recent requests can be seen in the /debug page</td></tr>
<tr><td><code>trace.lightstep.token</code></td><td>string</td><td><code></code></td><td>if set, traces go to Lightstep using this token</td></tr>
<tr><td><code>trace.zipkin.collector</code></td><td>string</td><td><code></code></td><td>if set, traces go to the given Zipkin instance (example: '127.0.0.1:9411'); ignored if trace.lightstep.token is set</td></tr>
<tr><td><code>version</code></td><td>custom validation</td><td><code>20.1-11</code></td><td>set the active cluster version in the format '<major>.<minor>'</td></tr>
<tr><td><code>version</code></td><td>custom validation</td><td><code>20.1-12</code></td><td>set the active cluster version in the format '<major>.<minor>'</td></tr>
</tbody>
</table>
7 changes: 2 additions & 5 deletions pkg/cli/debug.go
Original file line number Diff line number Diff line change
Expand Up @@ -573,14 +573,11 @@ func runDebugGCCmd(cmd *cobra.Command, args []string) error {
policy := zonepb.GCPolicy{TTLSeconds: int32(gcTTLInSeconds)}
now := hlc.Timestamp{WallTime: timeutil.Now().UnixNano()}
thresh := gc.CalculateThreshold(now, policy)
info, err := gc.Run(
context.Background(),
&desc, snap,
now, thresh, policy,
info, err := gc.Run(context.Background(), &desc, snap, now, thresh, policy,
gc.NoopGCer{},
func(_ context.Context, _ []roachpb.Intent) error { return nil },
func(_ context.Context, _ *roachpb.Transaction, _ []roachpb.LockUpdate) error { return nil },
)
false /* canUseClearRange */)
if err != nil {
return err
}
Expand Down
7 changes: 7 additions & 0 deletions pkg/clusterversion/cockroach_versions.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ const (
VersionNoOriginFKIndexes
VersionClientRangeInfosOnBatchResponse
VersionNodeMembershipStatus
VersionClearRangeForGC

// Add new versions here (step one of two).
)
Expand Down Expand Up @@ -539,6 +540,12 @@ var versionsSingleton = keyedVersions([]keyedVersion{
Key: VersionNodeMembershipStatus,
Version: roachpb.Version{Major: 20, Minor: 1, Unstable: 11},
},
{
// VersionClearRangeForGC gates the usage of UseClearRange in the
// GCRequest_GCKey proto. See comment on proto definition for more details.
Key: VersionClearRangeForGC,
Version: roachpb.Version{Major: 20, Minor: 1, Unstable: 12},
},

// Add new versions here (step two of two).

Expand Down
61 changes: 48 additions & 13 deletions pkg/kv/kvserver/gc/gc.go
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,7 @@ func Run(
gcer GCer,
cleanupIntentsFn CleanupIntentsFunc,
cleanupTxnIntentsAsyncFn CleanupTxnIntentsAsyncFunc,
canUseClearRange bool,
) (Info, error) {

txnExp := now.Add(-kvserverbase.TxnCleanupThreshold.Nanoseconds(), 0)
Expand All @@ -182,7 +183,8 @@ func Run(
// Maps from txn ID to txn and intent key slice.
txnMap := map[uuid.UUID]*roachpb.Transaction{}
intentKeyMap := map[uuid.UUID][]roachpb.Key{}
err := processReplicatedKeyRange(ctx, desc, snap, now, newThreshold, gcer, txnMap, intentKeyMap, &info)
err := processReplicatedKeyRange(ctx, desc, snap, now, newThreshold, gcer,
txnMap, intentKeyMap, &info, canUseClearRange)
if err != nil {
return Info{}, err
}
Expand Down Expand Up @@ -237,6 +239,7 @@ func processReplicatedKeyRange(
txnMap map[uuid.UUID]*roachpb.Transaction,
intentKeyMap map[uuid.UUID][]roachpb.Key,
info *Info,
canUseClearRange bool,
) error {
var alloc bufalloc.ByteAllocator
// Compute intent expiration (intent age at which we attempt to resolve).
Expand Down Expand Up @@ -284,11 +287,13 @@ func processReplicatedKeyRange(
// version for a key has been reached, if haveGarbageForThisKey, we'll add the
// current key to the batch with the gcTimestampForThisKey.
var (
batchGCKeys []roachpb.GCRequest_GCKey
batchGCKeysBytes int64
haveGarbageForThisKey bool
gcTimestampForThisKey hlc.Timestamp
sentBatchForThisKey bool
batchGCKeys []roachpb.GCRequest_GCKey
batchGCKeysBytes int64
haveGarbageForThisKey bool
gcTimestampForThisKey hlc.Timestamp
keyBytesForThisKey int64
sentBatchForThisKey bool
useClearRangeForThisKey bool
)
it := makeGCIterator(desc, snap)
defer it.close()
Expand All @@ -310,7 +315,12 @@ func processReplicatedKeyRange(
isNewest := s.curIsNewest()
if isGarbage(threshold, s.cur, s.next, isNewest) {
keyBytes := int64(s.cur.Key.EncodedSize())
batchGCKeysBytes += keyBytes
// If we have decided that we're going to use clear range for this key,
// we've already accounted for the overhead of those key bytes.
if !useClearRangeForThisKey {
batchGCKeysBytes += keyBytes
keyBytesForThisKey += keyBytes
}
haveGarbageForThisKey = true
gcTimestampForThisKey = s.cur.Key.Timestamp
info.AffectedVersionsKeyBytes += keyBytes
Expand All @@ -319,23 +329,48 @@ func processReplicatedKeyRange(
if affected := isNewest && (sentBatchForThisKey || haveGarbageForThisKey); affected {
info.NumKeysAffected++
}
shouldSendBatch := batchGCKeysBytes >= KeyVersionChunkBytes
if shouldSendBatch || isNewest && haveGarbageForThisKey {

atBatchSizeLimit := batchGCKeysBytes >= KeyVersionChunkBytes
if atBatchSizeLimit && !useClearRangeForThisKey {
// We choose to use clear range for a key if we'd fill up an entire batch
// with just that key.
//
// TODO(ajwerner): Perhaps we should ensure that there are actually a
// large number of versions utilizing all of these bytes and not a small
// number of versions of a very large key. What's the right minimum number
// of keys?
useClearRangeForThisKey = canUseClearRange && len(batchGCKeys) == 0
if useClearRangeForThisKey {
// Adjust the accounting for the size of this batch given that now
// we're going to deal with this key using clear range.
batchGCKeysBytes -= keyBytesForThisKey
batchGCKeysBytes += 2 * int64(s.cur.Key.EncodedSize())
keyBytesForThisKey = 0
}
}

if addKeyToBatch := (atBatchSizeLimit && !useClearRangeForThisKey) ||
(isNewest && haveGarbageForThisKey); addKeyToBatch {
alloc, s.cur.Key.Key = alloc.Copy(s.cur.Key.Key, 0)
batchGCKeys = append(batchGCKeys, roachpb.GCRequest_GCKey{
Key: s.cur.Key.Key,
Timestamp: gcTimestampForThisKey,
Key: s.cur.Key.Key,
Timestamp: gcTimestampForThisKey,
UseClearRange: useClearRangeForThisKey,
})
haveGarbageForThisKey = false
gcTimestampForThisKey = hlc.Timestamp{}
keyBytesForThisKey = 0
useClearRangeForThisKey = false

// Mark that we sent a batch for this key so we know that we had garbage
// even if it turns out that there's no more garbage for this key.
// We want to count a key as affected once even if we paginate the
// deletion of its versions.
sentBatchForThisKey = shouldSendBatch && !isNewest
sentBatchForThisKey = atBatchSizeLimit && !isNewest
}
if shouldSendBatch {

if shouldSendBatch := (atBatchSizeLimit && !useClearRangeForThisKey) ||
(isNewest && useClearRangeForThisKey); shouldSendBatch {
if err := gcer.GC(ctx, batchGCKeys); err != nil {
if errors.Is(err, ctx.Err()) {
return err
Expand Down
1 change: 1 addition & 0 deletions pkg/kv/kvserver/gc/gc_old_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ func runGCOld(
gcer GCer,
cleanupIntentsFn CleanupIntentsFunc,
cleanupTxnIntentsAsyncFn CleanupTxnIntentsAsyncFunc,
_ bool,
) (Info, error) {

iter := rditer.NewReplicaDataIterator(desc, snap,
Expand Down
14 changes: 7 additions & 7 deletions pkg/kv/kvserver/gc/gc_random_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -96,19 +96,17 @@ func TestRunNewVsOld(t *testing.T) {
oldGCer := makeFakeGCer()
policy := zonepb.GCPolicy{TTLSeconds: tc.ttl}
newThreshold := CalculateThreshold(tc.now, policy)
const useClearRange = false
gcInfoOld, err := runGCOld(ctx, tc.ds.desc(), snap, tc.now,
newThreshold, policy,
&oldGCer,
oldGCer.resolveIntents,
oldGCer.resolveIntentsAsync)
oldGCer.resolveIntentsAsync,
useClearRange)
require.NoError(t, err)

newGCer := makeFakeGCer()
gcInfoNew, err := Run(ctx, tc.ds.desc(), snap, tc.now,
newThreshold, policy,
&newGCer,
newGCer.resolveIntents,
newGCer.resolveIntentsAsync)
gcInfoNew, err := Run(ctx, tc.ds.desc(), snap, tc.now, newThreshold, policy, &newGCer, newGCer.resolveIntents, newGCer.resolveIntentsAsync, false)
require.NoError(t, err)

oldGCer.normalize()
Expand All @@ -131,6 +129,7 @@ func BenchmarkRun(b *testing.B) {
}
snap := eng.NewSnapshot()
policy := zonepb.GCPolicy{TTLSeconds: spec.ttl}
const useClearRange = false
return runGCFunc(ctx, spec.ds.desc(), snap, spec.now,
CalculateThreshold(spec.now, policy),
policy,
Expand All @@ -140,7 +139,8 @@ func BenchmarkRun(b *testing.B) {
},
func(ctx context.Context, txn *roachpb.Transaction, intents []roachpb.LockUpdate) error {
return nil
})
},
useClearRange)
}
makeTest := func(old bool, spec randomRunGCTestSpec) func(b *testing.B) {
return func(b *testing.B) {
Expand Down
Loading

0 comments on commit c6b040a

Please sign in to comment.