Skip to content

Commit

Permalink
storage: limit the bandwidth used for snapshots
Browse files Browse the repository at this point in the history
Limit the bandwidth used for snapshots. Preemptive snapshots are
throttled to 2 MB/sec (COCKROACH_PREEMPTIVE_SNAPSHOT_RATE) and Raft
snapshots are throttled to 8 MB/sec (COCKROACH_RAFT_SNAPSHOT_RATE). The
effect of limiting the bandwidth is that a preemptive snapshot for a 64
MB range will take ~32s to send and a Raft snapshot will take ~8s. The
benefit is a much smaller impact on foreground traffic.

Fixes cockroachdb#10972
  • Loading branch information
petermattis committed Apr 10, 2017
1 parent b185d50 commit be834a3
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 5 deletions.
8 changes: 6 additions & 2 deletions glide.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

30 changes: 28 additions & 2 deletions pkg/storage/store.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ import (
"github.com/opentracing/opentracing-go"
"github.com/pkg/errors"
"golang.org/x/net/context"
"golang.org/x/time/rate"

"github.com/cockroachdb/cockroach/pkg/base"
"github.com/cockroachdb/cockroach/pkg/build"
Expand Down Expand Up @@ -3234,6 +3235,11 @@ type SnapshotStorePool interface {
throttle(reason throttleReason, toStoreID roachpb.StoreID)
}

var preemptiveSnapshotRate = envutil.EnvOrDefaultBytes(
"COCKROACH_PREEMPTIVE_SNAPSHOT_RATE", 2<<20 /* 2 MB */)
var raftSnapshotRate = envutil.EnvOrDefaultBytes(
"COCKROACH_RAFT_SNAPSHOT_RATE", 8<<20 /* 8 MB */)

// sendSnapshot sends an outgoing snapshot via a pre-opened GRPC stream.
func sendSnapshot(
ctx context.Context,
Expand Down Expand Up @@ -3281,12 +3287,26 @@ func sendSnapshot(
header.State.Desc.RangeID, resp.Status)
}

// The size of batches to send. This is the granularity of rate limiting.
const batchSize = 256 << 10 // 256 KB

// Convert the bytes/sec rate limit to batches/sec.
//
// TODO(peter): Using bytes/sec for rate limiting seems more natural but has
// practical difficulties. We either need to use a very large burst size
// which seems to disable the rate limiting, or call WaitN in smaller than
// burst size chunks which caused excessive slowness in testing. Would be
// nice to figure this out, but the batches/sec rate limit works for now.
targetRate := rate.Limit(raftSnapshotRate) / batchSize
if header.CanDecline {
targetRate = rate.Limit(preemptiveSnapshotRate) / batchSize
}
limiter := rate.NewLimiter(targetRate, 1 /* burst size */)

// Determine the unreplicated key prefix so we can drop any
// unreplicated keys from the snapshot.
unreplicatedPrefix := keys.MakeRangeIDUnreplicatedPrefix(header.State.Desc.RangeID)
var alloc bufalloc.ByteAllocator
// TODO(jordan) make this configurable. For now, 1MB.
const batchSize = 1 << 20
n := 0
var b engine.Batch
for ; ; snap.Iter.Next() {
Expand Down Expand Up @@ -3315,6 +3335,9 @@ func sendSnapshot(
}

if len(b.Repr()) >= batchSize {
if err := limiter.WaitN(ctx, 1); err != nil {
return err
}
if err := sendBatch(stream, b); err != nil {
return err
}
Expand All @@ -3325,6 +3348,9 @@ func sendSnapshot(
}
}
if b != nil {
if err := limiter.WaitN(ctx, 1); err != nil {
return err
}
if err := sendBatch(stream, b); err != nil {
return err
}
Expand Down

0 comments on commit be834a3

Please sign in to comment.