Skip to content

Commit

Permalink
Additional backoff strategies with cap + jitter aws#1336
Browse files Browse the repository at this point in the history
  • Loading branch information
lwoggardner committed Jan 26, 2017
1 parent 36ee64e commit 34d3a34
Show file tree
Hide file tree
Showing 2 changed files with 99 additions and 2 deletions.
35 changes: 34 additions & 1 deletion aws-sdk-core/lib/aws-sdk-core/plugins/retry_errors.rb
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,44 @@ module Plugins
# are retried. Generally, these are throttling errors, data
# checksum errors, networking errors, timeout errors and auth
# errors from expired credentials.
# @seahorse.client.option [Number] : retry_max_delay (20)
# The maximum number of seconds to delay between retries for capped backoff strategies
# @seahorse.client.option [Number] : retry_base_delay (0.3)
# The base delay in seconds for exponential backoff functions - all are based on ((2 ** attempt) * retry_base_delay))
class RetryErrors < Seahorse::Client::Plugin

# Sources
# http://docs.aws.amazon.com/general/latest/gr/api-retries.html
# https://www.awsarchitectureblog.com/2015/03/backoff.html
#
# Java SDK
# https://github.com/aws/aws-sdk-java/blob/master/aws-java-sdk-core/src/main/java/com/amazonaws/retry/PredefinedBackoffStrategies.java
# 20s max delay. Equal Jitter for throttled events, full jitter for everything else, max retries defaults to 30!
#
# NodeJS SDK
# https://github.com/aws/aws-sdk-js/blob/d0aa9db29be01cd909eec4780dffb9d182cde5e4/lib/util.js#L822
# Default Full Jitter 3 retries, 100ms base, no cap
#

# Original uncapped, exponential backoff
EXPONENTIAL_BACKOFF = lambda { |c| Kernel.sleep(2 ** c.retries * c.config.retry_base_delay) }

# Exponential backoff capped to :max_delay
CAPPED_BACKOFF = lambda { |c| Kernel.sleep([c.config.retry_max_delay, (2 ** c.retries * c.config.retry_base_delay)].min) }

# Retain at least half of the capped backoff, + random the other half.
EQUAL_JITTER_BACKOFF = lambda { |c| delay = ([c.config.retry_max_delay, (2 ** c.retries * c.config.retry_base_delay)].min)/2.0 ; Kernel.sleep((delay + Kernel.rand(0..delay))) }

# Full Jitter, random between no delay and the capped exponential backoff
FULL_JITTER_BACKOFF = lambda { |c| Kernel.sleep(Kernel.rand(0..[c.config.retry_max_delay,(2 ** c.retries * c.config.retry_base_delay)].min))}

DEFAULT_BACKOFF = EXPONENTIAL_BACKOFF # EQUAL_JITTER might be a better default option

option(:retry_limit, 3)
option(:retry_max_delay,20) # same as the java sdk, caps exponential backoff after 6 retries.
option(:retry_base_delay,0.3)
option(:retry_backoff, DEFAULT_BACKOFF)

option(:retry_backoff, lambda { |c| Kernel.sleep(2 ** c.retries * 0.3) })

# @api private
class ErrorInspector
Expand Down
66 changes: 65 additions & 1 deletion aws-sdk-core/spec/aws/plugins/retry_errors_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,20 @@ module Plugins
expect(config.retry_limit).to eq(3)
end

it 'defaults config.retry_max_delay to 20' do
config = Seahorse::Client::Configuration.new
RetryErrors.new.add_options(config)
config = config.build!
expect(config.retry_max_delay).to eq(20)
end

it 'defaults config.retry_base_delay to 0.3' do
config = Seahorse::Client::Configuration.new
RetryErrors.new.add_options(config)
config = config.build!
expect(config.retry_base_delay).to eq(0.3)
end

describe 'ErrorInspector' do

def inspector(error, http_status_code = 404)
Expand Down Expand Up @@ -202,14 +216,64 @@ def handle(send_handler = nil, &block)
handle(send_handler)
end

it 'backs off exponentially between each retry attempt' do
it 'backs off according to custom retry_backoff proc' do
config.retry_backoff = lambda { |c| Kernel.sleep([0.4, 0.2, 1.7][c.retries]) }
expect(Kernel).to receive(:sleep).with(0.4).ordered
expect(Kernel).to receive(:sleep).with(0.2).ordered
expect(Kernel).to receive(:sleep).with(1.7).ordered
resp.error = EC2::Errors::RequestLimitExceeded.new(nil,nil)
handle { |context| resp }
end

it 'backs off exponentially between each retry attempt with EXPONENTIAL_BACKOFF strategy' do
config.retry_backoff = RetryErrors::EXPONENTIAL_BACKOFF
expect(Kernel).to receive(:sleep).with(0.3).ordered
expect(Kernel).to receive(:sleep).with(0.6).ordered
expect(Kernel).to receive(:sleep).with(1.2).ordered
resp.error = EC2::Errors::RequestLimitExceeded.new(nil,nil)
handle { |context| resp }
end

it 'caps backs off delay attempt with CAPPED_BACKOFF strategy' do
config.retry_backoff = RetryErrors::CAPPED_BACKOFF
config.retry_max_delay = 4.0
config.retry_limit = 6
expect(Kernel).to receive(:sleep).with(0.3).ordered
expect(Kernel).to receive(:sleep).with(0.6).ordered
expect(Kernel).to receive(:sleep).with(1.2).ordered
expect(Kernel).to receive(:sleep).with(2.4).ordered
expect(Kernel).to receive(:sleep).with(4.0).ordered
expect(Kernel).to receive(:sleep).with(4.0).ordered
resp.error = EC2::Errors::RequestLimitExceeded.new(nil,nil)
handle { |context| resp }
end

it 'randomises the backoff delay with FULL_JITTER_BACKOFF strategy' do
config.retry_backoff = RetryErrors::FULL_JITTER_BACKOFF
config.retry_max_delay = 2.0
config.retry_limit = 4
# TODO: should we control Kernel.rand to get a proper repeatable test?
expect(Kernel).to receive(:sleep).with(be_between(0,0.3)).ordered
expect(Kernel).to receive(:sleep).with(be_between(0,0.6)).ordered
expect(Kernel).to receive(:sleep).with(be_between(0,1.2)).ordered
expect(Kernel).to receive(:sleep).with(be_between(0,2.0)).ordered
resp.error = EC2::Errors::RequestLimitExceeded.new(nil,nil)
handle { |context| resp }
end

it 'randomises the backoff delay with EQUAL_JITTER_BACKOFF strategy' do
config.retry_backoff = RetryErrors::EQUAL_JITTER_BACKOFF
config.retry_max_delay = 2.0
config.retry_limit = 4
# TODO: should we control Kernel.rand to get a proper repeatable test?
expect(Kernel).to receive(:sleep).with(be_between(0.15,0.3)).ordered
expect(Kernel).to receive(:sleep).with(be_between(0.3,0.6)).ordered
expect(Kernel).to receive(:sleep).with(be_between(0.6,1.2)).ordered
expect(Kernel).to receive(:sleep).with(be_between(1.0,2.0)).ordered
resp.error = EC2::Errors::RequestLimitExceeded.new(nil,nil)
handle { |context| resp }
end

it 'increments the retry count on the context' do
resp.error = EC2::Errors::RequestLimitExceeded.new(nil,nil)
handle { |context| resp }
Expand Down

0 comments on commit 34d3a34

Please sign in to comment.