Skip to content

Commit

Permalink
Merge #67978 #67990
Browse files Browse the repository at this point in the history
67978: roachtest: don't run zfs/ycsb/* on AWS r=tbg a=tbg

This threw off the nightly roachtest AWS run.

Release note: None


67990: kvserver: add GC intent resolution timeout r=aliher1911,tbg a=erikgrinaker

This patch adds a 2-minute timeout for GC intent resolution batches, to
ensure progress even when encountering a stuck range (recall that intent
GC will attempt to remove intents on other ranges that belong to a txn
anchored on the current range).

Resolves #67296.

Release note: None

/cc @cockroachdb/kv 

Co-authored-by: Tobias Grieger <[email protected]>
Co-authored-by: Erik Grinaker <[email protected]>
  • Loading branch information
3 people committed Jul 26, 2021
3 parents 64c888f + 6efba5b + 3020d22 commit 3fd1428
Show file tree
Hide file tree
Showing 4 changed files with 26 additions and 7 deletions.
4 changes: 3 additions & 1 deletion build/teamcity-nightly-roachtest.sh
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,9 @@ case "${CLOUD}" in
PARALLELISM=3
CPUQUOTA=384
if [ -z "${TESTS}" ]; then
TESTS="kv(0|95)|ycsb|tpcc/(headroom/n4cpu16)|tpccbench/(nodes=3/cpu=16)|scbench/randomload/(nodes=3/ops=2000/conc=1)|backup/(KMS/n3cpu4)"
# NB: anchor ycsb to beginning of line to avoid matching `zfs/ycsb/*` which
# isn't supported on AWS at time of writing.
TESTS="kv(0|95)|^ycsb|tpcc/(headroom/n4cpu16)|tpccbench/(nodes=3/cpu=16)|scbench/randomload/(nodes=3/ops=2000/conc=1)|backup/(KMS/n3cpu4)"
fi
;;
*)
Expand Down
1 change: 1 addition & 0 deletions pkg/kv/kvserver/gc/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ go_library(
"//pkg/storage",
"//pkg/storage/enginepb",
"//pkg/util/bufalloc",
"//pkg/util/contextutil",
"//pkg/util/hlc",
"//pkg/util/log",
"//pkg/util/protoutil",
Expand Down
21 changes: 15 additions & 6 deletions pkg/kv/kvserver/gc/gc.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/storage"
"github.com/cockroachdb/cockroach/pkg/storage/enginepb"
"github.com/cockroachdb/cockroach/pkg/util/bufalloc"
"github.com/cockroachdb/cockroach/pkg/util/contextutil"
"github.com/cockroachdb/cockroach/pkg/util/hlc"
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/cockroach/pkg/util/protoutil"
Expand Down Expand Up @@ -202,6 +203,8 @@ type RunOptions struct {
// process in one go. This number should be lower than intent resolver default to
// prevent further splitting in resolver.
MaxTxnsPerIntentCleanupBatch int64
// IntentCleanupBatchTimeout is the timeout for processing a batch of intents. 0 to disable.
IntentCleanupBatchTimeout time.Duration
}

// CleanupIntentsFunc synchronously resolves the supplied intents
Expand Down Expand Up @@ -252,6 +255,7 @@ func Run(
maxIntentsPerIntentCleanupBatch: options.MaxIntentsPerIntentCleanupBatch,
maxIntentKeyBytesPerIntentCleanupBatch: options.MaxIntentKeyBytesPerIntentCleanupBatch,
maxTxnsPerIntentCleanupBatch: options.MaxTxnsPerIntentCleanupBatch,
intentCleanupBatchTimeout: options.IntentCleanupBatchTimeout,
}, cleanupIntentsFn, &info)
if err != nil {
return Info{}, err
Expand Down Expand Up @@ -439,6 +443,7 @@ type intentBatcherOptions struct {
maxIntentsPerIntentCleanupBatch int64
maxIntentKeyBytesPerIntentCleanupBatch int64
maxTxnsPerIntentCleanupBatch int64
intentCleanupBatchTimeout time.Duration
}

// newIntentBatcher initializes an intentBatcher. Batcher will take ownership of
Expand Down Expand Up @@ -504,12 +509,16 @@ func (b *intentBatcher) maybeFlushPendingIntents(ctx context.Context) error {
return ctx.Err()
}

// FIXME(erikgrinaker): We should have a timeout for suboperations here now
// that the overall GC timeout has been increased, to make sure we'll make
// progress even with range unavailability. That's probably best done once
// batching is implemented, so we'll wait for that:
// https://github.com/cockroachdb/cockroach/pull/65847
err := b.cleanupIntentsFn(ctx, b.pendingIntents)
var err error
cleanupIntentsFn := func(ctx context.Context) error {
return b.cleanupIntentsFn(ctx, b.pendingIntents)
}
if b.options.intentCleanupBatchTimeout > 0 {
err = contextutil.RunWithTimeout(
ctx, "intent GC batch", b.options.intentCleanupBatchTimeout, cleanupIntentsFn)
} else {
err = cleanupIntentsFn(ctx)
}
if err == nil {
// IntentTxns and PushTxn will be equal here, since
// pushes to transactions whose record lies in this
Expand Down
7 changes: 7 additions & 0 deletions pkg/kv/kvserver/gc_queue.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,12 @@ const (
gcQueueTimerDuration = 1 * time.Second
// gcQueueTimeout is the timeout for a single GC run.
gcQueueTimeout = 10 * time.Minute
// gcQueueIntentBatchTimeout is the timeout for resolving a single batch of
// intents. It is used to ensure progress in the face of unavailable ranges
// (since intent resolution may touch other ranges), but can prevent progress
// for ranged intent resolution if it exceeds the timeout.
gcQueueIntentBatchTimeout = 2 * time.Minute

// gcQueueIntentCooldownDuration is the duration to wait between GC attempts
// of the same range when triggered solely by intents. This is to prevent
// continually spinning on intents that belong to active transactions, which
Expand Down Expand Up @@ -519,6 +525,7 @@ func (gcq *gcQueue) process(
MaxIntentsPerIntentCleanupBatch: maxIntentsPerCleanupBatch,
MaxIntentKeyBytesPerIntentCleanupBatch: maxIntentKeyBytesPerCleanupBatch,
MaxTxnsPerIntentCleanupBatch: intentresolver.MaxTxnsPerIntentCleanupBatch,
IntentCleanupBatchTimeout: gcQueueIntentBatchTimeout,
},
*zone.GC,
&replicaGCer{repl: repl},
Expand Down

0 comments on commit 3fd1428

Please sign in to comment.