-
Notifications
You must be signed in to change notification settings - Fork 15
allow hikari password changes at runtime #5308
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -23,6 +23,7 @@ | |
import com.palantir.nexus.db.DBType; | ||
import com.palantir.nexus.db.pool.config.ConnectionConfig; | ||
import com.palantir.nexus.db.sql.ExceptionCheck; | ||
import com.zaxxer.hikari.HikariConfig; | ||
import com.zaxxer.hikari.HikariDataSource; | ||
import com.zaxxer.hikari.HikariPoolMXBean; | ||
import com.zaxxer.hikari.pool.HikariPool.PoolInitializationException; | ||
|
@@ -52,7 +53,8 @@ | |
public class HikariCPConnectionManager extends BaseConnectionManager { | ||
private static final Logger log = LoggerFactory.getLogger(HikariCPConnectionManager.class); | ||
|
||
private ConnectionConfig connConfig; | ||
private final ConnectionConfig connConfig; | ||
private final HikariConfig hikariConfig; | ||
|
||
private enum StateType { | ||
// Base state at construction. Nothing is set. | ||
|
@@ -83,6 +85,7 @@ private static class State { | |
|
||
public HikariCPConnectionManager(ConnectionConfig connConfig) { | ||
this.connConfig = Preconditions.checkNotNull(connConfig, "ConnectionConfig must not be null"); | ||
this.hikariConfig = connConfig.getHikariConfig(); | ||
} | ||
|
||
@Override | ||
|
@@ -293,17 +296,15 @@ private HikariDataSource getDataSourcePool() { | |
|
||
try { | ||
try { | ||
dataSourcePool = new HikariDataSource(connConfig.getHikariConfig()); | ||
dataSourcePool = new HikariDataSource(hikariConfig); | ||
} catch (IllegalArgumentException e) { | ||
// allow multiple pools on same JVM (they need unique names / endpoints) | ||
if (e.getMessage().contains("A metric named")) { | ||
String poolName = connConfig.getConnectionPoolName(); | ||
|
||
connConfig | ||
.getHikariConfig() | ||
.setPoolName( | ||
poolName + "-" + ThreadLocalRandom.current().nextInt()); | ||
dataSourcePool = new HikariDataSource(connConfig.getHikariConfig()); | ||
hikariConfig.setPoolName( | ||
poolName + "-" + ThreadLocalRandom.current().nextInt()); | ||
dataSourcePool = new HikariDataSource(hikariConfig); | ||
} else { | ||
throw e; | ||
} | ||
|
@@ -364,6 +365,50 @@ public DBType getDbType() { | |
return connConfig.getDbType(); | ||
} | ||
|
||
@Override | ||
public void setPassword(String newPassword) { | ||
Preconditions.checkNotNull(newPassword, "password cannot be null"); | ||
State localState = state; | ||
if (localState.type == StateType.ZERO) { | ||
synchronized (this) { | ||
// need to check again in case init just happened | ||
localState = state; | ||
if (localState.type == StateType.ZERO) { | ||
// the pool is not initialized, need to set password on the config object | ||
internalSetPassword(newPassword, hikariConfig); | ||
return; | ||
} | ||
} | ||
} | ||
if (localState.type == StateType.NORMAL) { | ||
// pool is initialized, need to set password directly on the pool | ||
internalSetPassword(newPassword, localState.dataSourcePool); | ||
Comment on lines
+383
to
+385
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not that familiar with this codepath, but what happens if the state changes after this There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Equally, what happens (and can it happen?) if there is a race to change the password - is it possible that the username gets set to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This doesn't need to be synchronized because the only other possible state change is closing the pool. That is completely fine to happen concurrently because all this does is update a volatile field on the pool (it is fine to update that concurrently/after the pool is closed). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also it is completely fine for the username to be null while the password is not null (the password field would just be ignored in that case which means it will use the original configured password). This would only happen with a specific execution order of concurrent calls where one setPassword passes in the original password and another passes in a different password. So consider the following two concurrent calls where the static config has the password set as "originalPassword":
The possible outcomes of this are as follows for the username/password override fields:
Regardless of the outcome, the behavior is arguably correct (you can say one of the calls "wins" and that is what the end behavior is). There is also no point in time where any password would be used other than one of the passwords from the concurrent calls. In practice there should not be concurrent calls to this. The only caller should be a subscription on the witchcraft Runtime config object. Witchcraft uses a single task on a schedule using scheduleWithFixedDelay to check the runtime config and calls the subscribers on the same thread (so no Runtime config subscription calls will ever overlap). The only thing we need to really worry about is other threads that may call init() or close(), but that is handled correctly by the current code which checks the state. |
||
} else if (localState.type == StateType.CLOSED) { | ||
throw new SafeRuntimeException("Hikari pool is closed", localState.closeTrace); | ||
} | ||
} | ||
|
||
/** | ||
* Sets the password (and username) on the HikariConfig (or HikariDataSource pool) if it is necessary. | ||
* | ||
* <p>Note that when the password matches static config, performance is better when setting the username and | ||
* password to null so that hikari uses the original JDBC parameters and avoids making new copies for every | ||
* new connection. See {@link com.zaxxer.hikari.pool.PoolBase#newConnection}. Also see | ||
* {@link com.palantir.nexus.db.pool.config.OracleConnectionConfig#getHikariProperties} for the JDBC properties. | ||
* | ||
* <p>Hikari checks the username field for being null to determine if it needs to copy the JDBC parameters, so | ||
* the username *must* be set for it to respect the password change. | ||
*/ | ||
private void internalSetPassword(String newPassword, HikariConfig configOrPool) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As above, I'm a little concerned about concurrency (but I might be being paranoid here). |
||
if (newPassword.equals(connConfig.getDbPassword().unmasked())) { | ||
configOrPool.setUsername(null); | ||
configOrPool.setPassword(null); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. thanks for the comment above, that explains this part well |
||
} else { | ||
configOrPool.setPassword(newPassword); | ||
configOrPool.setUsername(connConfig.getDbLogin()); | ||
} | ||
} | ||
|
||
private final class ConnectionAcquisitionProfiler { | ||
private final Stopwatch globalStopwatch; | ||
private final Stopwatch trialStopwatch; | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,39 +19,28 @@ | |
import static org.assertj.core.api.Assertions.assertThat; | ||
import static org.assertj.core.api.Assertions.fail; | ||
|
||
import com.palantir.docker.compose.DockerComposeRule; | ||
import com.palantir.docker.compose.configuration.ShutdownStrategy; | ||
import com.palantir.docker.compose.connection.Container; | ||
import com.palantir.docker.compose.connection.DockerPort; | ||
import com.palantir.docker.compose.logging.LogDirectory; | ||
import com.google.common.base.Throwables; | ||
import com.palantir.atlasdb.keyvalue.dbkvs.DbkvsPostgresTestSuite; | ||
import com.palantir.logsafe.exceptions.SafeRuntimeException; | ||
import com.palantir.nexus.db.pool.config.ConnectionConfig; | ||
import com.palantir.nexus.db.pool.config.ImmutableMaskedValue; | ||
import com.palantir.nexus.db.pool.config.ImmutablePostgresConnectionConfig; | ||
import java.net.InetSocketAddress; | ||
import com.palantir.nexus.db.pool.config.PostgresConnectionConfig; | ||
import java.sql.Connection; | ||
import java.sql.ResultSet; | ||
import java.sql.SQLException; | ||
import java.sql.SQLTransientConnectionException; | ||
import java.sql.Statement; | ||
import java.time.Duration; | ||
import org.awaitility.Awaitility; | ||
import org.junit.After; | ||
import org.junit.Before; | ||
import org.junit.ClassRule; | ||
import org.junit.Rule; | ||
import org.junit.Test; | ||
import org.junit.rules.ExpectedException; | ||
|
||
public class HikariCpConnectionManagerTest { | ||
|
||
private static final int POSTGRES_PORT_NUMBER = 5432; | ||
|
||
@ClassRule | ||
public static final DockerComposeRule docker = DockerComposeRule.builder() | ||
.file("src/test/resources/docker-compose.yml") | ||
.waitingForService("postgres", Container::areAllPortsOpen) | ||
.saveLogsTo(LogDirectory.circleAwareLogDirectory(HikariCpConnectionManagerTest.class)) | ||
.shutdownStrategy(ShutdownStrategy.AGGRESSIVE_WITH_NETWORK_CLEANUP) | ||
.build(); | ||
|
||
private ConnectionManager manager; | ||
|
||
@Before | ||
|
@@ -141,6 +130,103 @@ public void testCloseIdempotent() throws SQLException { | |
manager.close(); // shouldn't throw | ||
} | ||
|
||
@Test | ||
public void testRuntimePasswordChange() throws SQLException { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. From looking at the base class and the tests, it seems rather difficult to write a test that confirms that attempts to concurrently edit the password don't cause an issue; still, if at all possible, it'd be quite interesting to see. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not sure that a test would be able to confirm correctness unless you somehow can control the execution order on different threads and iterate through all possible execution orders. I think it's fine to have undefined behavior as the only caller to this should be a witchcraft Runtime subscription which is guaranteed to never have multiple concurrent calls. If someone does end up having concurrent calls and ends up in an unexpected state, either the password is correct or not. If the password is not correct, all connections would fail and they'd probably need to restart the service (or have something which triggers calling setPassword again later). For this case I would argue the bug is in the service which made concurrent calls to the setPassword method. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yeah, in sync here that trying to setPassword concurrently (as long as we're not doing it) shouldn't be allowed. Maybe we could stick a |
||
// create a new user to avoid messing up the main user for other tests | ||
String testUsername = "testpasswordchange"; | ||
String password1 = "password1"; | ||
String password2 = "password2"; | ||
String password3 = "password3"; | ||
|
||
// initial config intentionally has the wrong password for the test user | ||
PostgresConnectionConfig testConfig = createConnectionConfig(testUsername, password3, 0, 3); | ||
|
||
try (Connection conn = manager.getConnection()) { | ||
try (Statement statement = conn.createStatement()) { | ||
statement.execute(String.format("CREATE USER %s WITH PASSWORD '%s'", testUsername, password1)); | ||
} | ||
try (Statement statement = conn.createStatement()) { | ||
statement.execute(String.format( | ||
"GRANT ALL PRIVILEGES ON DATABASE %s TO %s", testConfig.getDbName(), testUsername)); | ||
} | ||
} | ||
|
||
ConnectionManager testManager = new HikariCPConnectionManager(testConfig); | ||
// fixing the password before init should work | ||
testManager.setPassword(password1); | ||
|
||
try (Connection conn = testManager.getConnection()) { | ||
try (Statement statement = conn.createStatement()) { | ||
statement.execute(String.format("ALTER USER %s WITH PASSWORD '%s'", testUsername, password2)); | ||
} | ||
// existing connection should still work | ||
checkConnection(conn); | ||
|
||
// trying to get a new connection should fail (times out with wrong password) | ||
assertPasswordWrong(testManager); | ||
|
||
// fix the password on the pool | ||
testManager.setPassword(password2); | ||
// original connection should still work | ||
checkConnection(conn); | ||
|
||
// new connection should also work | ||
try (Connection conn2 = testManager.getConnection()) { | ||
checkConnection(conn2); | ||
} | ||
|
||
// changing the pool password again and one connection should work (conn still in the pool) | ||
testManager.setPassword(password3); | ||
try (Connection conn2 = testManager.getConnection()) { | ||
checkConnection(conn2); | ||
|
||
// getting one more connection should see the password error | ||
assertPasswordWrong(testManager); | ||
|
||
// use existing conn to fix the password | ||
try (Statement statement = conn2.createStatement()) { | ||
statement.execute(String.format("ALTER USER %s WITH PASSWORD '%s'", testUsername, password3)); | ||
} | ||
|
||
// now a new connection should work | ||
try (Connection conn3 = testManager.getConnection()) { | ||
checkConnection(conn3); | ||
} | ||
} | ||
} finally { | ||
testManager.close(); | ||
} | ||
} | ||
|
||
private static void assertPasswordWrong(ConnectionManager testManager) { | ||
// This is needed because it appears that sometimes postgres password changes do not take effect immediately. | ||
// If the password change happens too late, we end up with a connection in the pool that we did not expect. | ||
// In that case we need to wait the configured time (1 second) for hikari to remove the idle connection from | ||
// the pool before we can detect that the password is wrong. Note that I cannot reproduce this case locally, | ||
// but it appears to almost always happen on circle. | ||
Awaitility.await("assertPasswordWrong") | ||
.atMost(Duration.ofSeconds(10)) | ||
.pollInterval(Duration.ofSeconds(2)) | ||
.pollDelay(Duration.ofMillis(100)) | ||
.until(() -> isPasswordWrong(testManager)); | ||
} | ||
|
||
private static boolean isPasswordWrong(ConnectionManager testManager) { | ||
try (Connection ignored = testManager.getConnection()) { | ||
return false; | ||
} catch (SQLException e) { | ||
// if "password authentication failed" is in the cause chain, the password is wrong | ||
// otherwise this is an unexpected exception | ||
for (Throwable t : Throwables.getCausalChain(e)) { | ||
String message = t.getMessage(); | ||
if (message != null && message.contains("password authentication failed")) { | ||
return true; | ||
} | ||
} | ||
throw new SafeRuntimeException("unexpected exception checking password on ConnectionManager", e); | ||
} | ||
} | ||
|
||
private static void checkConnection(Connection conn) throws SQLException { | ||
try (Statement statement = conn.createStatement()) { | ||
try (ResultSet result = statement.executeQuery("SELECT 123")) { | ||
|
@@ -152,16 +238,23 @@ private static void checkConnection(Connection conn) throws SQLException { | |
} | ||
|
||
private static ConnectionConfig createConnectionConfig(int maxConnections) { | ||
DockerPort port = docker.containers().container("postgres").port(POSTGRES_PORT_NUMBER); | ||
InetSocketAddress postgresAddress = new InetSocketAddress(port.getIp(), port.getExternalPort()); | ||
return createConnectionConfig("palantir", "palantir", maxConnections, maxConnections); | ||
} | ||
|
||
private static PostgresConnectionConfig createConnectionConfig( | ||
String username, String password, int minConnections, int maxConnections) { | ||
PostgresConnectionConfig suiteConfig = DbkvsPostgresTestSuite.getConnectionConfig(); | ||
return ImmutablePostgresConnectionConfig.builder() | ||
.dbName("atlas") | ||
.dbLogin("palantir") | ||
.dbPassword(ImmutableMaskedValue.of("palantir")) | ||
.host(postgresAddress.getHostName()) | ||
.port(postgresAddress.getPort()) | ||
.dbName(suiteConfig.getDbName()) | ||
.host(suiteConfig.getHost()) | ||
.port(suiteConfig.getPort()) | ||
.dbLogin(username) | ||
.dbPassword(ImmutableMaskedValue.of(password)) | ||
.minConnections(minConnections) | ||
.maxConnections(maxConnections) | ||
.checkoutTimeout(2000) | ||
// if this is set too high, testRuntimePasswordChange will be unreliable (and will take longer) | ||
.maxConnectionAge(1) | ||
.build(); | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
type: improvement | ||
improvement: | ||
description: |- | ||
The Hikari ConnectionManager now has a setPassword method which allows changing the password at runtime without needing to shutdown and restart services. A future change will add standard runtime configs for this (for now this is only usable by internal products that have their own custom runtime config). | ||
links: | ||
- https://github.com/palantir/atlasdb/pull/5308 |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -51,4 +51,16 @@ public interface ConnectionManager { | |
void initUnchecked(); | ||
|
||
DBType getDbType(); | ||
|
||
/** | ||
* Change the password which will be used for new connections. Existing connections are not modified in any way. | ||
* | ||
* <p>Note that if the password is changed at runtime, there is a slight negative impact on pool performance | ||
* because Hikari will copy the JDBC parameters every time a new connection is made. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yeah let's say concurrent calls to this method = UB |
||
* | ||
* <p>Concurrent calls to this method should be avoided since the behavior for concurrent calls is not | ||
* deterministic. It is recommended to only call this method from a single runtime config subscription so this is | ||
* called when the runtime config changes. | ||
*/ | ||
void setPassword(String newPassword); | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
While I believe that this is fine (since the only
State
with type set toZERO
has all nulls, and so checking the type is equivalent to checking the whole object), the methodcheckAndGetDataSourcePool
performs a similar operation in the sense that it switches against the possible states that we can be in, and notably it compares the whole object vs just the type.To err on the side of caution, can we follow the structure used there? In other words:
state
againstlocalState
directlyThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I thought about that, but that is actually suboptimal (and would result in needing to write duplicate code). When you examine the state again (while being synchronized) if the state happens to be the normal state, you would actually want to have the same behavior as what is outside of the synchronized block). It would also mean that you potentially need to add a third volatile read instead of what is written here where it is guaranteed a max of two volatile memory reads.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'd hope that the perf of this method isn't that critical (though now that you've written this more efficient impl it makes sense, of course)!