Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow retries on Vault MP Config Source initialization #20343

Merged
merged 1 commit into from
Oct 8, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ private StartResult startContainer(DevServicesConfig devServicesConfig) {

boolean needToStart = !ConfigUtils.isPropertyPresent(URL_CONFIG_KEY);
if (!needToStart) {
log.debug("Not starting devservices for default Vault client as url have been provided");
log.debug("Not starting devservices for default Vault client as url has been provided");
return null;
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
package io.quarkus.vault.runtime;

import io.quarkus.vault.VaultException;

public class VaultIOException extends VaultException {

public VaultIOException() {
}

public VaultIOException(String message) {
super(message);
}

public VaultIOException(String message, Throwable cause) {
super(message, cause);
}

public VaultIOException(Throwable cause) {
super(cause);
}

public VaultIOException(String message, Throwable cause, boolean enableSuppression, boolean writableStackTrace) {
super(message, cause, enableSuppression, writableStackTrace);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,15 @@
import static io.quarkus.vault.runtime.client.MutinyVertxClientFactory.createHttpClient;
import static java.util.Collections.emptyMap;

import java.net.ConnectException;
import java.net.MalformedURLException;
import java.net.URL;
import java.time.Duration;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.concurrent.CompletionException;
import java.util.function.Supplier;

import javax.annotation.PreDestroy;
Expand All @@ -24,8 +26,10 @@
import io.quarkus.runtime.TlsConfig;
import io.quarkus.vault.VaultException;
import io.quarkus.vault.runtime.VaultConfigHolder;
import io.quarkus.vault.runtime.VaultIOException;
import io.quarkus.vault.runtime.config.VaultBootstrapConfig;
import io.smallrye.mutiny.Uni;
import io.vertx.core.VertxException;
import io.vertx.core.http.HttpMethod;
import io.vertx.mutiny.core.Vertx;
import io.vertx.mutiny.core.buffer.Buffer;
Expand Down Expand Up @@ -172,6 +176,7 @@ private <T> T exec(HttpRequest<Buffer> request, Object body, Class<T> resultClas
try {
Uni<HttpResponse<Buffer>> uni = body == null ? request.send()
: request.sendBuffer(Buffer.buffer(requestBody(body)));

HttpResponse<Buffer> response = uni.await().atMost(getRequestTimeout());

if (response.statusCode() != expectedCode) {
Expand All @@ -183,8 +188,32 @@ private <T> T exec(HttpRequest<Buffer> request, Object body, Class<T> resultClas
} else {
return null;
}

} catch (JsonProcessingException e) {
throw new VaultException(e);

} catch (io.smallrye.mutiny.TimeoutException e) {
// happens if we reach the atMost condition - see UniAwait.atMost(Duration)
throw new VaultIOException(e);

} catch (VertxException e) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@vsevel in quarkus-oidc we add a few retries in such cases, so please consider it, but it is not needed for this PR

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

initially I had the retry logic in the vault client itself. but I did not want to apply it to all calls. so I started passing args around to control when and when not to retry. to avoid changing to much the behavior (introducing lags instead of errors), I wanted to limit it to the very first time we fetch properties out of the vault, because it is in the critical path for startup. we will see if this needs to be added in other use cases.

if ("Connection was closed".equals(e.getMessage())) {
// happens if the connection gets closed (idle timeout, reset by peer, ...)
throw new VaultIOException(e);
} else {
throw e;
}

} catch (CompletionException e) {
if (e.getCause() instanceof ConnectException) {
// unable to establish connection
throw new VaultIOException(e);
} else if (e.getCause() instanceof java.util.concurrent.TimeoutException) {
// timeout on request - see HttpRequest.timeout(long)
throw new VaultIOException(e);
} else {
throw e;
}
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ public class VaultBootstrapConfig {
public static final String DEFAULT_SECRET_CONFIG_CACHE_PERIOD = "10M";
public static final String KUBERNETES_CACERT = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt";
public static final String DEFAULT_CONNECT_TIMEOUT = "5S";
public static final String DEFAULT_READ_TIMEOUT = "1S";
public static final String DEFAULT_READ_TIMEOUT = "5S";
public static final String DEFAULT_TLS_USE_KUBERNETES_CACERT = "true";
public static final String DEFAULT_KUBERNETES_AUTH_MOUNT_PATH = "auth/kubernetes";

Expand Down Expand Up @@ -141,6 +141,12 @@ public class VaultBootstrapConfig {
@ConfigDocMapKey("prefix")
public Map<String, KvPathConfig> secretConfigKvPathPrefix;

/**
* Maximum number of attempts when fetching MP Config properties on the initial connection.
*/
@ConfigItem(defaultValue = "1")
public int mpConfigInitialAttempts;

/**
* Used to hide confidential infos, for logging in particular.
* Possible values are:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@
import org.jboss.logging.Logger;

import io.quarkus.arc.Arc;
import io.quarkus.vault.VaultException;
import io.quarkus.vault.VaultKVSecretEngine;
import io.quarkus.vault.runtime.VaultIOException;
import io.smallrye.mutiny.infrastructure.Infrastructure;

public class VaultConfigSource implements ConfigSource {
Expand All @@ -24,6 +26,7 @@ public class VaultConfigSource implements ConfigSource {

private AtomicReference<VaultCacheEntry<Map<String, String>>> cache = new AtomicReference<>(null);
private VaultBootstrapConfig vaultBootstrapConfig;
private volatile boolean firstTime = true;

public VaultConfigSource(VaultBootstrapConfig vaultBootstrapConfig) {
this.vaultBootstrapConfig = vaultBootstrapConfig;
Expand Down Expand Up @@ -73,21 +76,51 @@ private Map<String, String> getSecretConfig() {

Map<String, String> properties = new HashMap<>();

try {
// default kv paths
vaultBootstrapConfig.secretConfigKvPath.ifPresent(strings -> fetchSecrets(strings, null, properties));

// prefixed kv paths
vaultBootstrapConfig.secretConfigKvPathPrefix.forEach((key, value) -> fetchSecrets(value.paths, key, properties));

log.debug("loaded " + properties.size() + " properties from vault");
} catch (RuntimeException e) {
return tryReturnLastKnownValue(e, cacheEntry);
if (firstTime) {
log.debug("fetch secrets first time with attempts = " + vaultBootstrapConfig.mpConfigInitialAttempts);
fetchSecretsFirstTime(properties);
firstTime = false;
} else {
try {
fetchSecrets(properties);
log.debug("refreshed " + properties.size() + " properties from vault");
} catch (RuntimeException e) {
return tryReturnLastKnownValue(e, cacheEntry);
}
}

cache.set(new VaultCacheEntry(properties));
return properties;
}

private void fetchSecretsFirstTime(Map<String, String> properties) {
VaultIOException last = null;
for (int i = 0; i < vaultBootstrapConfig.mpConfigInitialAttempts; i++) {
try {
if (i > 0) {
log.debug("retrying to fetch secrets");
}
fetchSecrets(properties);
log.debug("loaded " + properties.size() + " properties from vault");
return;
} catch (VaultIOException e) {
log.debug("attempt " + (i + 1) + " to fetch secrets from vault failed with: " + e);
last = e;
}
}
if (last == null) {
throw new VaultException("unexpected");
} else {
throw last;
}
}

private void fetchSecrets(Map<String, String> properties) {
// default kv paths
vaultBootstrapConfig.secretConfigKvPath.ifPresent(strings -> fetchSecrets(strings, null, properties));

// prefixed kv paths
vaultBootstrapConfig.secretConfigKvPathPrefix.forEach((key, value) -> fetchSecrets(value.paths, key, properties));
}

private void fetchSecrets(List<String> paths, String prefix, Map<String, String> properties) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
package io.quarkus.vault;

import static io.quarkus.credentials.CredentialsProvider.PASSWORD_PROPERTY_NAME;

import org.eclipse.microprofile.config.inject.ConfigProperty;
import org.jboss.shrinkwrap.api.ShrinkWrap;
import org.jboss.shrinkwrap.api.spec.JavaArchive;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.RegisterExtension;

import io.quarkus.test.QuarkusUnitTest;

@Disabled // test is expected to fail on quarkus app startup
public class VaultUnableToConnectITCase {

// start the test with no vault listening on 4850
// the application startup should fail with the following debug logs 3 times:
// attempt ... to fetch secrets from vault failed with: ...
// retrying to fetch secrets
// ...
// you can also validate the retry on read timeout by starting a server that answers
// POSTs on http://localhost:4850/v1/auth/userpass/login/{user} and sleeps more than 10 secs
// again you are expected to see several attempts in the logs

@RegisterExtension
static final QuarkusUnitTest config = new QuarkusUnitTest()
.setArchiveProducer(() -> ShrinkWrap.create(JavaArchive.class)
.addAsResource("application-unable-to-connect.properties", "application.properties"));

@ConfigProperty(name = PASSWORD_PROPERTY_NAME)
String someSecret;

@Test
void test() {
// we will not reach that point
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
quarkus.vault.url=http://localhost:4850
quarkus.vault.authentication.userpass.username=bob
quarkus.vault.authentication.userpass.password=sinclair
quarkus.vault.secret-config-kv-path=config

quarkus.vault.read-timeout=10S
quarkus.vault.mp-config-initial-attempts=3

quarkus.log.category."io.quarkus.vault".level=DEBUG

#quarkus.log.level=DEBUG
quarkus.log.min-level=DEBUG
quarkus.log.console.level=DEBUG
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,5 @@ quarkus.vault.renew-grace-period=10

quarkus.log.category."io.quarkus.vault".level=DEBUG

# CI can sometimes be slow, there is no need to fail a test if Vault doesn't respond in 1 second which is the default
quarkus.vault.read-timeout=5S

#quarkus.log.level=DEBUG
#quarkus.log.console.level=DEBUG
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,5 @@ quarkus.vault.renew-grace-period=10

quarkus.log.category."io.quarkus.vault".level=DEBUG

# CI can sometimes be slow, there is no need to fail a test if Vault doesn't respond in 1 second which is the default
quarkus.vault.read-timeout=5S

#quarkus.log.level=DEBUG
#quarkus.log.console.level=DEBUG
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,5 @@ quarkus.vault.renew-grace-period=10

quarkus.log.category."io.quarkus.vault".level=DEBUG

# CI can sometimes be slow, there is no need to fail a test if Vault doesn't respond in 1 second which is the default
quarkus.vault.read-timeout=5S

#quarkus.log.level=DEBUG
#quarkus.log.console.level=DEBUG


Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,3 @@ quarkus.vault.enterprise.namespace=accounting
quarkus.vault.authentication.client-token=123
quarkus.vault.kv-secret-engine-version=1
quarkus.tls.trust-all=true
# CI can sometimes be slow, there is no need to fail a test if Vault doesn't respond in 1 second which is the default
quarkus.vault.read-timeout=5S
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,5 @@ quarkus.vault.renew-grace-period=10

quarkus.log.category."io.quarkus.vault".level=DEBUG

# CI can sometimes be slow, there is no need to fail a test if Vault doesn't respond in 1 second which is the default
quarkus.vault.read-timeout=5S

#quarkus.log.level=DEBUG
#quarkus.log.console.level=DEBUG
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,3 @@ quarkus.vault.secret-config-kv-path=multi/default1,multi/default2
quarkus.vault.secret-config-kv-path.singer=multi/singer1,multi/singer2

quarkus.tls.trust-all=true

# CI can sometimes be slow, there is no need to fail a test if Vault doesn't respond in 1 second which is the default
quarkus.vault.read-timeout=5S
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,3 @@ quarkus.vault.log-confidentiality-level=low
quarkus.vault.renew-grace-period=10

quarkus.log.category."io.quarkus.vault".level=DEBUG

# CI can sometimes be slow, there is no need to fail a test if Vault doesn't respond in 1 second which is the default
quarkus.vault.read-timeout=5S
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,3 @@ quarkus.vault.authentication.userpass.username=bob
quarkus.vault.authentication.userpass.password=sinclair

quarkus.vault.tls.skip-verify=true

# CI can sometimes be slow, there is no need to fail a test if Vault doesn't respond in 1 second which is the default
quarkus.vault.read-timeout=5S
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,5 @@ quarkus.vault.renew-grace-period=10

quarkus.log.category."io.quarkus.vault".level=DEBUG

# CI can sometimes be slow, there is no need to fail a test if Vault doesn't respond in 1 second which is the default
quarkus.vault.read-timeout=5S

#quarkus.log.level=DEBUG
#quarkus.log.console.level=DEBUG
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,5 @@ quarkus.vault.renew-grace-period=10

quarkus.log.category."io.quarkus.vault".level=DEBUG

# CI can sometimes be slow, there is no need to fail a test if Vault doesn't respond in 1 second which is the default
quarkus.vault.read-timeout=5S

#quarkus.log.level=DEBUG
#quarkus.log.console.level=DEBUG
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,5 @@ quarkus.vault.renew-grace-period=10

quarkus.log.category."io.quarkus.vault".level=DEBUG

# CI can sometimes be slow, there is no need to fail a test if Vault doesn't respond in 1 second which is the default
quarkus.vault.read-timeout=5S

#quarkus.log.level=DEBUG
#quarkus.log.console.level=DEBUG