Skip to content

Commit

Permalink
Fix connection retries (#2247)
Browse files Browse the repository at this point in the history
  • Loading branch information
lilgreenbird authored Nov 7, 2023
1 parent 729520d commit 0cd01c8
Show file tree
Hide file tree
Showing 8 changed files with 180 additions and 196 deletions.
3 changes: 1 addition & 2 deletions src/main/java/com/microsoft/sqlserver/jdbc/IOBuffer.java
Original file line number Diff line number Diff line change
Expand Up @@ -726,8 +726,7 @@ final InetSocketAddress open(String host, int port, int timeoutMillis, boolean u
setSocketOptions(tcpSocket, this);

// set SO_TIMEOUT
int socketTimeout = con.getSocketTimeoutMilliseconds();
tcpSocket.setSoTimeout(socketTimeout);
tcpSocket.setSoTimeout(con.getSocketTimeoutMilliseconds());

inputStream = tcpInputStream = new ProxyInputStream(tcpSocket.getInputStream());
outputStream = tcpOutputStream = tcpSocket.getOutputStream();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

import java.lang.Thread.State;
import java.util.concurrent.ScheduledFuture;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.logging.Level;

Expand Down Expand Up @@ -164,7 +165,10 @@ int getLoginTimeoutSeconds() {

void reconnect(TDSCommand cmd) throws InterruptedException {
reconnectErrorReceived = null;
reconnectThread = new ReconnectThread(this.connection, cmd);
connectRetryCount = this.connection.getRetryCount();
if (connectRetryCount > 0) {
reconnectThread = new ReconnectThread(this.connection, cmd);
}
reconnectThread.start();
reconnectThread.join();
reconnectErrorReceived = reconnectThread.getException();
Expand Down Expand Up @@ -450,8 +454,9 @@ public void run() {
}

boolean keepRetrying = true;
long connectRetryInterval = TimeUnit.SECONDS.toMillis(con.getRetryInterval());

while ((connectRetryCount > 0) && (!stopRequested) && keepRetrying) {
while ((connectRetryCount >= 0) && (!stopRequested) && keepRetrying) {
if (loggerExternal.isLoggable(Level.FINER)) {
loggerExternal.finer("Running reconnect for command: " + command.toString() + " ; ConnectRetryCount = "
+ connectRetryCount);
Expand All @@ -468,7 +473,7 @@ public void run() {
} else {
try {
if (connectRetryCount > 1) {
Thread.sleep((long) (con.getRetryInterval()) * 1000);
Thread.sleep(connectRetryInterval);
}
} catch (InterruptedException ie) {
// re-interrupt thread
Expand All @@ -492,7 +497,7 @@ public void run() {
}
}

if ((connectRetryCount == 0) && (keepRetrying)) {
if ((connectRetryCount <= 0) && (keepRetrying)) {
eReceived = new SQLServerException(SQLServerException.getErrString("R_crClientAllRecoveryAttemptsFailed"),
eReceived);
}
Expand Down
271 changes: 105 additions & 166 deletions src/main/java/com/microsoft/sqlserver/jdbc/SQLServerConnection.java

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -393,7 +393,7 @@ static String generateStateCode(SQLServerConnection con, int errNum, Integer dat
* original error string.
*/
static String checkAndAppendClientConnId(String errMsg, SQLServerConnection conn) {
if (null != conn && conn.attachConnId()) {
if (null != conn && conn.isConnected()) {
UUID clientConnId = conn.getClientConIdInternal();
assert null != clientConnId;
StringBuilder sb = new StringBuilder(errMsg);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -459,7 +459,7 @@ public void testConnectCountInLoginAndCorrectRetryCount() {

int connectRetryCount = 3;
int connectRetryInterval = 1;
int longLoginTimeout = loginTimeOutInSeconds * 4; // 120 seconds
int longLoginTimeout = loginTimeOutInSeconds * 4;

try {
SQLServerDataSource ds = new SQLServerDataSource();
Expand All @@ -474,17 +474,53 @@ public void testConnectCountInLoginAndCorrectRetryCount() {
assertTrue(con == null, TestResource.getResource("R_shouldNotConnect"));
}
} catch (Exception e) {
assertTrue(e.getMessage().contains(TestResource.getResource("R_cannotOpenDatabase")), e.getMessage());
long totalTime = System.currentTimeMillis() - timerStart;

assertTrue(e.getMessage().contains(TestResource.getResource("R_cannotOpenDatabase")), e.getMessage());
int expectedMinimumTimeInMillis = (connectRetryCount * connectRetryInterval) * 1000; // 3 seconds

// Minimum time is 0 seconds per attempt and connectRetryInterval * connectRetryCount seconds of interval.
// Maximum is unknown, but is needs to be less than longLoginTimeout or else this is an issue.
assertTrue(totalTime > expectedMinimumTimeInMillis, TestResource.getResource("R_executionNotLong"));
assertTrue(totalTime < (longLoginTimeout * 1000L), TestResource.getResource("R_executionTooLong"));
assertTrue(totalTime > expectedMinimumTimeInMillis, TestResource.getResource("R_executionNotLong")
+ " totalTime: " + totalTime + " expectedTime: " + expectedMinimumTimeInMillis);
assertTrue( totalTime < (longLoginTimeout * 1000L), TestResource.getResource("R_executionTooLong")
+ "totalTime: " + totalTime + " expectedTime: " + expectedMinimumTimeInMillis);
}
}

/**
* Tests whether connectRetryCount and connectRetryInterval are properly respected in the login loop. As well, tests
* that connection is retried the proper number of times. This is for cases with zero retries.
*/
@Test
public void testConnectCountInLoginAndCorrectRetryCountWithZeroRetry() {
long timerStart = 0;

int connectRetryCount = 0;
int connectRetryInterval = 60;
int longLoginTimeout = loginTimeOutInSeconds * 3; // 90 seconds

try {
SQLServerDataSource ds = new SQLServerDataSource();
ds.setURL(connectionString);
ds.setLoginTimeout(longLoginTimeout);
ds.setConnectRetryCount(connectRetryCount);
ds.setConnectRetryInterval(connectRetryInterval);
ds.setDatabaseName(RandomUtil.getIdentifier("DataBase"));
timerStart = System.currentTimeMillis();

try (Connection con = ds.getConnection()) {
assertTrue(con == null, TestResource.getResource("R_shouldNotConnect"));
}
} catch (Exception e) {
assertTrue(e.getMessage().contains(TestResource.getResource("R_cannotOpenDatabase")), e.getMessage());
long totalTime = System.currentTimeMillis() - timerStart;

// Maximum is unknown, but is needs to be less than longLoginTimeout or else this is an issue.
assertTrue(totalTime < (longLoginTimeout * 1000L), TestResource.getResource("R_executionTooLong"));
}
}

@Test
@Tag(Constants.xAzureSQLDW)
@Tag(Constants.xAzureSQLDB)
Expand Down Expand Up @@ -974,10 +1010,9 @@ public void testThreadInterruptedStatus() throws InterruptedException {
Runnable runnable = new Runnable() {
public void run() {
SQLServerDataSource ds = new SQLServerDataSource();

ds.setURL(connectionString);
ds.setServerName("invalidServerName" + UUID.randomUUID());
ds.setLoginTimeout(5);
ds.setDatabaseName("invalidDatabase" + UUID.randomUUID());
ds.setLoginTimeout(30);
try (Connection con = ds.getConnection()) {} catch (SQLException e) {}
}
};
Expand All @@ -992,7 +1027,8 @@ public void run() {
Thread.sleep(8000);
executor.shutdownNow();

assertTrue(status && future.isCancelled(), TestResource.getResource("R_threadInterruptNotSet"));
assertTrue(status && future.isCancelled(), TestResource.getResource("R_threadInterruptNotSet") + " status: "
+ status + " isCancelled: " + future.isCancelled());
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ public class TimeoutTest extends AbstractTest {
static String randomServer = RandomUtil.getIdentifier("Server");
static String waitForDelaySPName = RandomUtil.getIdentifier("waitForDelaySP");
static final int waitForDelaySeconds = 10;
static final int defaultTimeout = 15; // loginTimeout default value
static final int defaultTimeout = 60; // loginTimeout default value

@BeforeAll
public static void setupTests() throws Exception {
Expand All @@ -55,13 +55,13 @@ public void testDefaultLoginTimeout() {
try (Connection con = PrepUtil.getConnection("jdbc:sqlserver://" + randomServer + "connectRetryCount=0")) {
fail(TestResource.getResource("R_shouldNotConnect"));
} catch (Exception e) {
timerEnd = System.currentTimeMillis();
assertTrue((e.getMessage().contains(TestResource.getResource("R_tcpipConnectionToHost")))
|| ((isSqlAzure() || isSqlAzureDW())
? e.getMessage().contains(
TestResource.getResource("R_connectTimedOut"))
: false),
e.getMessage());
timerEnd = System.currentTimeMillis();
}

verifyTimeout(timerEnd - timerStart, defaultTimeout);
Expand All @@ -77,13 +77,13 @@ public void testURLLoginTimeout() {
try (Connection con = PrepUtil.getConnection("jdbc:sqlserver://" + randomServer + ";logintimeout=" + timeout)) {
fail(TestResource.getResource("R_shouldNotConnect"));
} catch (Exception e) {
timerEnd = System.currentTimeMillis();
assertTrue((e.getMessage().contains(TestResource.getResource("R_tcpipConnectionToHost")))
|| ((isSqlAzure() || isSqlAzureDW())
? e.getMessage().contains(
TestResource.getResource("R_connectTimedOut"))
: false),
e.getMessage());
timerEnd = System.currentTimeMillis();
}

verifyTimeout(timerEnd - timerStart, timeout);
Expand All @@ -100,13 +100,13 @@ public void testDMLoginTimeoutApplied() {
try (Connection con = PrepUtil.getConnection("jdbc:sqlserver://" + randomServer)) {
fail(TestResource.getResource("R_shouldNotConnect"));
} catch (Exception e) {
timerEnd = System.currentTimeMillis();
assertTrue((e.getMessage().contains(TestResource.getResource("R_tcpipConnectionToHost")))
|| ((isSqlAzure() || isSqlAzureDW())
? e.getMessage().contains(
TestResource.getResource("R_connectTimedOut"))
: false),
e.getMessage());
timerEnd = System.currentTimeMillis();
}

verifyTimeout(timerEnd - timerStart, timeout);
Expand All @@ -124,6 +124,7 @@ public void testDMLoginTimeoutNotApplied() {
.getConnection("jdbc:sqlserver://" + randomServer + ";loginTimeout=" + timeout)) {
fail(TestResource.getResource("R_shouldNotConnect"));
} catch (Exception e) {
timerEnd = System.currentTimeMillis();
assertTrue(
(e.getMessage().contains(TestResource.getResource("R_tcpipConnectionToHost")))
|| ((isSqlAzure() || isSqlAzureDW())
Expand All @@ -132,7 +133,6 @@ public void testDMLoginTimeoutNotApplied() {
.getResource("R_connectTimedOut"))
: false),
e.getMessage());
timerEnd = System.currentTimeMillis();
}
verifyTimeout(timerEnd - timerStart, timeout);
} finally {
Expand All @@ -152,13 +152,13 @@ public void testConnectRetryBadServer() {
.getConnection("jdbc:sqlserver://" + randomServer + ";loginTimeout=" + loginTimeout)) {
fail(TestResource.getResource("R_shouldNotConnect"));
} catch (Exception e) {
timerEnd = System.currentTimeMillis();
assertTrue((e.getMessage().contains(TestResource.getResource("R_tcpipConnectionToHost")))
|| ((isSqlAzure() || isSqlAzureDW())
? e.getMessage().contains(
TestResource.getResource("R_connectTimedOut"))
: false),
e.getMessage());
timerEnd = System.currentTimeMillis();
}

verifyTimeout(timerEnd - timerStart, loginTimeout);
Expand All @@ -179,13 +179,13 @@ public void testConnectRetryServerError() {
+ ";connectRetryInterval=" + connectRetryInterval)) {
fail(TestResource.getResource("R_shouldNotConnect"));
} catch (Exception e) {
timerEnd = System.currentTimeMillis();
assertTrue((e.getMessage().contains(TestResource.getResource("R_cannotOpenDatabase")))
|| ((isSqlAzure() || isSqlAzureDW())
? e.getMessage().contains(
TestResource.getResource("R_connectTimedOut"))
: false),
e.getMessage());
timerEnd = System.currentTimeMillis();
}

// connect + all retries should always be <= loginTimeout
Expand All @@ -211,13 +211,13 @@ public void testConnectRetryServerErrorDS() {
try (Connection con = PrepUtil.getConnection(connectStr)) {
fail(TestResource.getResource("R_shouldNotConnect"));
} catch (Exception e) {
timerEnd = System.currentTimeMillis();
assertTrue((e.getMessage().contains(TestResource.getResource("R_cannotOpenDatabase")))
|| ((isSqlAzure() || isSqlAzureDW())
? e.getMessage().contains(
TestResource.getResource("R_connectTimedOut"))
: false),
e.getMessage());
timerEnd = System.currentTimeMillis();
}

// connect + all retries should always be <= loginTimeout
Expand All @@ -228,8 +228,8 @@ public void testConnectRetryServerErrorDS() {
@Test
public void testConnectRetryTimeout() {
long timerEnd = 0;
long timerStart = System.currentTimeMillis();
int loginTimeout = 2;
long timerStart = System.currentTimeMillis();

// non existent server with very short loginTimeout so there is no time to do all retries
try (Connection con = PrepUtil.getConnection(
Expand All @@ -238,13 +238,13 @@ public void testConnectRetryTimeout() {
+ (new Random().nextInt(defaultTimeout - 1) + 1) + ";loginTimeout=" + loginTimeout)) {
fail(TestResource.getResource("R_shouldNotConnect"));
} catch (Exception e) {
timerEnd = System.currentTimeMillis();
assertTrue((e.getMessage().contains(TestResource.getResource("R_cannotOpenDatabase")))
|| ((isSqlAzure() || isSqlAzureDW())
? e.getMessage().contains(
TestResource.getResource("R_connectTimedOut"))
: false),
e.getMessage());
timerEnd = System.currentTimeMillis();
}

verifyTimeout(timerEnd - timerStart, loginTimeout);
Expand All @@ -260,13 +260,13 @@ public void testFailoverInstanceResolution() throws SQLException {
+ ";databaseName=FailoverDB_abc;failoverPartner=" + randomServer + "\\foo;user=sa;password=pwd;")) {
fail(TestResource.getResource("R_shouldNotConnect"));
} catch (Exception e) {
timerEnd = System.currentTimeMillis();
assertTrue((e.getMessage().contains(TestResource.getResource("R_tcpipConnectionToHost")))
|| ((isSqlAzure() || isSqlAzureDW())
? e.getMessage().contains(
TestResource.getResource("R_connectTimedOut"))
: false),
e.getMessage());
timerEnd = System.currentTimeMillis();
}

verifyTimeout(timerEnd - timerStart, defaultTimeout * 2);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ public void testBasicConnectionAAD() throws Exception {
basicReconnect("jdbc:sqlserver://" + azureServer + ";database=" + azureDatabase + ";user="
+ azureUserName + ";password=" + azurePassword
+ ";loginTimeout=90;Authentication=ActiveDirectoryPassword");
retry = THROTTLE_RETRY_COUNT + 1;
retry = THROTTLE_RETRY_COUNT + 1;
} catch (Exception e) {
if (e.getMessage().matches(TestUtils.formatErrorMsg("R_crClientAllRecoveryAttemptsFailed"))) {
System.out.println(e.getMessage() + ". Recovery failed, retry #" + retry + " in "
Expand Down Expand Up @@ -265,7 +265,8 @@ public void testPooledConnectionDB() throws SQLException {
@Test
public void testPooledConnectionLang() throws SQLException {
SQLServerConnectionPoolDataSource mds = new SQLServerConnectionPoolDataSource();
mds.setURL(connectionString);
mds.setURL(connectionString + ";connectRetryCount=1");

PooledConnection pooledConnection = mds.getPooledConnection();
String lang0 = null, lang1 = null;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,10 @@ private void timeoutVariations(Map<String, String> props, long expectedDuration,
fail("Successfully executed query on a blocked connection.");
} catch (SQLException e) {
double elapsedTime = System.currentTimeMillis() - startTime;

// Timeout should occur after query timeout and not login timeout
assertTrue("Query did not timeout in " + expectedDuration + "ms, elapsed time(ms): " + elapsedTime,
elapsedTime < expectedDuration);
assertTrue("Exception: " + e.getMessage() + ": Query did not timeout in " + expectedDuration
+ "ms, elapsed time(ms): " + elapsedTime, elapsedTime < expectedDuration);
if (expectedErrMsg.isPresent()) {
assertTrue(TestResource.getResource("R_unexpectedErrorMessage") + e.getMessage(),
e.getMessage().matches(TestUtils.formatErrorMsg(expectedErrMsg.get())));
Expand Down Expand Up @@ -82,6 +83,9 @@ public void testDefaultTimeout() throws SQLException {
public void testDefaultRetry() throws SQLException {
Map<String, String> m = new HashMap<>();
m.put("loginTimeout", "5");

// ensure count is not set to something else as this test assumes exactly just 1 retry
m.put("connectRetryCount", "1");
timeoutVariations(m, 6000, Optional.empty());
}

Expand Down

0 comments on commit 0cd01c8

Please sign in to comment.