forked from vectordotdev/vector
-
Notifications
You must be signed in to change notification settings - Fork 0
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add retries to GCS sink healthcheck #4
Open
alexander-jiang
wants to merge
2
commits into
data_infra_vector_stable
Choose a base branch
from
data/retry-gcs-sink-healthcheck
base: data_infra_vector_stable
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,5 @@ | ||
diff --git a/src/gcp.rs b/src/gcp.rs | ||
index bfc486f92..148fa9dec 100644 | ||
index bfc486f92..baa8e143d 100644 | ||
Check failure Code scanning / check-spelling Unrecognized Spelling Error
bfc is not a recognized word. (unrecognized-spelling)
|
||
--- a/src/gcp.rs | ||
+++ b/src/gcp.rs | ||
@@ -16,7 +16,7 @@ use hyper::header::AUTHORIZATION; | ||
|
@@ -10,18 +10,18 @@ | |
+use tokio::sync::watch; | ||
use vector_lib::configurable::configurable_component; | ||
use vector_lib::sensitive_string::SensitiveString; | ||
|
||
@@ -25,6 +25,11 @@ use crate::{config::ProxyConfig, http::HttpClient, http::HttpError}; | ||
const SERVICE_ACCOUNT_TOKEN_URL: &str = | ||
"http://metadata.google.internal/computeMetadata/v1/instance/service-accounts/default/token"; | ||
|
||
+// See https://cloud.google.com/compute/docs/access/authenticate-workloads#applications | ||
+const METADATA_TOKEN_EXPIRY_MARGIN_SECS: u64 = 200; | ||
+ | ||
+const METADATA_TOKEN_ERROR_RETRY_SECS: u64 = 2; | ||
+ | ||
pub const PUBSUB_URL: &str = "https://pubsub.googleapis.com"; | ||
|
||
pub static PUBSUB_ADDRESS: Lazy<String> = Lazy::new(|| { | ||
@@ -194,19 +199,25 @@ impl GcpAuthenticator { | ||
async fn token_regenerator(self, sender: watch::Sender<()>) { | ||
|
@@ -55,28 +55,75 @@ | |
} | ||
} | ||
diff --git a/src/sinks/gcs_common/config.rs b/src/sinks/gcs_common/config.rs | ||
index 914d780c8..e59a4e8e4 100644 | ||
index 914d780c8..41a657ab8 100644 | ||
--- a/src/sinks/gcs_common/config.rs | ||
+++ b/src/sinks/gcs_common/config.rs | ||
@@ -6,7 +6,7 @@ use vector_lib::configurable::configurable_component; | ||
|
||
@@ -3,10 +3,11 @@ use http::{StatusCode, Uri}; | ||
use hyper::Body; | ||
use snafu::Snafu; | ||
use vector_lib::configurable::configurable_component; | ||
+use tokio::time::{interval, Duration}; | ||
|
||
use crate::{ | ||
gcp::{GcpAuthenticator, GcpError}, | ||
- http::HttpClient, | ||
+ http::{HttpClient, HttpError}, | ||
sinks::{ | ||
gcs_common::service::GcsResponse, | ||
util::retries::{RetryAction, RetryLogic}, | ||
@@ -141,7 +141,7 @@ pub struct GcsRetryLogic; | ||
|
||
@@ -111,14 +112,37 @@ pub fn build_healthcheck( | ||
) -> crate::Result<Healthcheck> { | ||
let healthcheck = async move { | ||
let uri = base_url.parse::<Uri>()?; | ||
- let mut request = http::Request::head(uri).body(Body::empty())?; | ||
- | ||
- auth.apply(&mut request); | ||
+ let mut num_retries = 0; | ||
+ let max_retries = 3; | ||
+ // repeat healthcheck every 5 sec | ||
+ let mut interval = interval(Duration::from_secs(5)); | ||
+ let mut num_failures = 0; | ||
|
||
let not_found_error = GcsError::BucketNotFound { bucket }.into(); | ||
|
||
- let response = client.send(request).await?; | ||
- healthcheck_response(response, not_found_error) | ||
+ loop { | ||
+ interval.tick().await; | ||
+ let mut request = http::Request::head(uri.clone()).body(Body::empty())?; | ||
+ | ||
+ auth.apply(&mut request); | ||
+ | ||
+ let response = client.send(request).await?; | ||
+ num_retries += 1; | ||
+ if response.status().is_success() { | ||
+ // the healthcheck passes on the first success | ||
+ return healthcheck_response(response, not_found_error); | ||
+ } else { | ||
+ // debug the healthcheck response | ||
+ warn!("healthcheck response was not successful! {:#?}", response); | ||
+ num_failures += 1; | ||
+ } | ||
+ | ||
+ if num_retries >= max_retries { | ||
+ info!("non-success healthcheck responses = {}", num_failures); | ||
+ info!("total healthcheck attempts = {}", num_retries); | ||
+ return healthcheck_response(response, not_found_error); | ||
+ } | ||
+ } | ||
}; | ||
|
||
Ok(healthcheck.boxed()) | ||
@@ -141,7 +165,7 @@ pub struct GcsRetryLogic; | ||
|
||
// This is a clone of HttpRetryLogic for the Body type, should get merged | ||
impl RetryLogic for GcsRetryLogic { | ||
- type Error = hyper::Error; | ||
+ type Error = HttpError; | ||
type Response = GcsResponse; | ||
|
||
fn is_retriable_error(&self, _error: &Self::Error) -> bool { | ||
@@ -159,7 +159,7 @@ impl RetryLogic for GcsRetryLogic { | ||
@@ -159,7 +183,7 @@ impl RetryLogic for GcsRetryLogic { | ||
} | ||
_ if status.is_server_error() => RetryAction::Retry(status.to_string().into()), | ||
_ if status.is_success() => RetryAction::Successful, | ||
|
@@ -91,7 +138,7 @@ | |
+++ b/src/sinks/util/http.rs | ||
@@ -470,6 +470,7 @@ impl RetryLogic for HttpRetryLogic { | ||
let status = response.status(); | ||
|
||
match status { | ||
+ StatusCode::UNAUTHORIZED => RetryAction::Retry("unauthorized".into()), | ||
StatusCode::TOO_MANY_REQUESTS => RetryAction::Retry("too many requests".into()), | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Check warning
Code scanning / check-spelling
Candidate Pattern Warning