-
Notifications
You must be signed in to change notification settings - Fork 252
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Retry on 5xx, error on 4xx #465
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,15 +1,37 @@ | ||
use crate::policies::{Policy, PolicyResult, Request, Response}; | ||
use crate::sleep::sleep; | ||
use crate::PipelineContext; | ||
use crate::{HttpError, PipelineContext}; | ||
use chrono::{DateTime, Local}; | ||
use http::StatusCode; | ||
use std::sync::Arc; | ||
use std::time::Duration; | ||
|
||
/// A retry policy. | ||
/// | ||
/// All retry policies follow a similar pattern only differing in how | ||
/// they determine if the retry has expired and for how long they should | ||
/// sleep between retries. | ||
pub trait RetryPolicy { | ||
/// Determine if no more retries should be performed. | ||
/// | ||
/// Must return true if no more retries should be attempted. | ||
fn is_expired(&self, first_retry_time: &mut Option<DateTime<Local>>, retry_count: u32) -> bool; | ||
/// Determine how long before the next retry should be attempted. | ||
fn sleep_duration(&self, retry_count: u32) -> Duration; | ||
} | ||
|
||
/// The status codes where a retry should be attempted. | ||
/// | ||
/// On all other 4xx and 5xx status codes no retry is attempted. | ||
const RETRY_STATUSES: &[StatusCode] = &[ | ||
StatusCode::REQUEST_TIMEOUT, | ||
StatusCode::TOO_MANY_REQUESTS, | ||
StatusCode::INTERNAL_SERVER_ERROR, | ||
StatusCode::BAD_GATEWAY, | ||
StatusCode::SERVICE_UNAVAILABLE, | ||
StatusCode::GATEWAY_TIMEOUT, | ||
]; | ||
|
||
#[async_trait::async_trait] | ||
impl<T, C> Policy<C> for T | ||
where | ||
|
@@ -26,19 +48,50 @@ where | |
let mut retry_count = 0; | ||
|
||
loop { | ||
match next[0].send(ctx, request, &next[1..]).await { | ||
Ok(response) => return Ok(response), | ||
Err(error) => { | ||
log::error!("Error occurred when making request: {}", error); | ||
if self.is_expired(&mut first_retry_time, retry_count) { | ||
let error = match next[0].send(ctx, request, &next[1..]).await { | ||
Ok(response) if (200..400).contains(&response.status().as_u16()) => { | ||
log::trace!( | ||
"Succesful response. Request={:?} response={:?}", | ||
request, | ||
response | ||
); | ||
// Successful status code | ||
return Ok(response); | ||
} | ||
Ok(response) => { | ||
// Error status code | ||
let status = response.status(); | ||
let body = response.into_body_string().await; | ||
let error = Box::new(HttpError::ErrorStatusCode { status, body }); | ||
if !RETRY_STATUSES.contains(&status) { | ||
log::error!( | ||
"server returned error status which will not be retried: {}", | ||
status | ||
); | ||
// Server didn't return a status we retry on so return early | ||
return Err(error); | ||
} else { | ||
retry_count += 1; | ||
|
||
sleep(self.sleep_duration(retry_count)).await; | ||
} | ||
log::debug!( | ||
"server returned error status which requires retry: {}", | ||
status | ||
); | ||
error | ||
} | ||
Err(error) => { | ||
log::debug!( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Line 67 uses There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This will be retried (and hopefully succeed on retry) where as line 67 will be returned to the user as an error. Therefore, line 67 represents an error that the user will definitely handle while hopefully this line is just a transient state. |
||
"error occurred when making request which will be retried: {}", | ||
error | ||
); | ||
error | ||
} | ||
}; | ||
|
||
if self.is_expired(&mut first_retry_time, retry_count) { | ||
return Err(error); | ||
} | ||
retry_count += 1; | ||
|
||
sleep(self.sleep_duration(retry_count)).await; | ||
} | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The C# SDK retries IOExceptions so we should retry here too (assuming we can discriminate the error to errors raised by the http policy).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Are you saying we should only retry IO related errors? Right now any error is retried.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, sorry, that was what I meant... 😔
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ok I can tackle that in another PR. This might require that we change the pipeline to not work with
Box<dyn Error>
but withazure_core::Error
instead.