From 5bc5156b06716e8b50cb0460fb1b2d3fa088eedf Mon Sep 17 00:00:00 2001 From: Sergey Kolomenkin Date: Mon, 15 May 2023 16:57:42 +0400 Subject: [PATCH] fix race condition causing unexpected delay during connection close It happens when connection close was started and it signaled conditional variable before waiting for the same variable was started in reconnect thread. The code example to reproduce the problem: ```c++ int main() { printf("Begin...\n"); natsConnection * connection = nullptr; natsOptions * options = nullptr; constexpr auto onConnectHandler = [](natsConnection *, void *) { }; natsOptions_Create(&options); natsOptions_SetRetryOnFailedConnect(options, true, onConnectHandler, nullptr); natsOptions_SetURL(options, "nats://localhost:54321"); const auto ts1 = nats_Now(); const natsStatus err = natsConnection_Connect(&connection, options); const auto ts2 = nats_Now(); if (err == NATS_NOT_YET_CONNECTED || err == NATS_OK) { natsConnection_Close(connection); } const auto ts3 = nats_Now(); printf("connection err code: %s\n", natsStatus_GetText(err)); printf("Connect took: %lld ms\n", (ts2 - ts1)); printf("Connection close took: %lld ms\n", (ts3 - ts2)); return 0; } ``` Program output: ``` Begin... connection err code: Not Yet Connected Connect took: 2017 ms Connection close took: 2057 ms ``` Here we are connecting to a NATS server which is offline. NATS_NOT_YET_CONNECTED status is returned. We have 2 seconds before next connection try. I expect connection to close very fast. But closing freezes for these 2 seconds. --- src/conn.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/conn.c b/src/conn.c index b769a7711..8e7d66deb 100644 --- a/src/conn.c +++ b/src/conn.c @@ -1581,8 +1581,6 @@ _doReconnect(void *arg) natsConn_Unlock(nc); sleepTime = crd(nc, wlf, crdClosure); natsConn_Lock(nc); - if (natsConn_isClosed(nc)) - break; } else { @@ -1590,6 +1588,8 @@ _doReconnect(void *arg) if (jitter > 0) sleepTime += rand() % jitter; } + if (natsConn_isClosed(nc)) + break; natsCondition_TimedWait(nc->reconnectCond, nc->mu, sleepTime); } else