Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Don't initiate new connections during a client shutdown (async API) #225

Merged
merged 6 commits into from
Aug 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 24 additions & 4 deletions hircluster.c
Original file line number Diff line number Diff line change
Expand Up @@ -1951,6 +1951,9 @@ int redisClusterConnect2(redisClusterContext *cc) {
if (cc == NULL) {
return REDIS_ERR;
}
/* Clear a previously set shutdown flag since we allow a
* reconnection of an async context using this API (legacy). */
cc->flags &= ~HIRCLUSTER_FLAG_SHUTDOWN;

return _redisClusterConnect2(cc);
}
Expand Down Expand Up @@ -3925,6 +3928,10 @@ static int updateSlotMapAsync(redisClusterAsyncContext *acc,
/* Don't allow concurrent slot map updates. */
return REDIS_ERR;
}
if (acc->cc->flags & HIRCLUSTER_FLAG_SHUTDOWN) {
/* No slot map updates during a client shutdown. */
return REDIS_ERR;
}

if (ac == NULL) {
if (acc->cc->nodes == NULL) {
Expand Down Expand Up @@ -4017,7 +4024,8 @@ static void redisClusterAsyncCallback(redisAsyncContext *ac, void *r,
goto done;
}

if (cad->retry_count == NO_RETRY) /* Skip retry handling */
/* Skip retry handling when not expected, or during a client shutdown. */
if (cad->retry_count == NO_RETRY || cc->flags & HIRCLUSTER_FLAG_SHUTDOWN)
goto done;

error_type = cluster_reply_error_type(reply);
Expand Down Expand Up @@ -4138,6 +4146,12 @@ int redisClusterAsyncFormattedCommand(redisClusterAsyncContext *acc,

cc = acc->cc;

/* Don't accept new commands when the client is about to be shutdown. */
if (cc->flags & HIRCLUSTER_FLAG_SHUTDOWN) {
__redisClusterAsyncSetError(acc, REDIS_ERR_OTHER, "client closing");
return REDIS_ERR;
}

if (cc->err) {
cc->err = 0;
memset(cc->errstr, '\0', strlen(cc->errstr));
Expand Down Expand Up @@ -4243,20 +4257,24 @@ int redisClusterAsyncFormattedCommandToNode(redisClusterAsyncContext *acc,
redisClusterCallbackFn *fn,
void *privdata, char *cmd,
int len) {
redisClusterContext *cc;
redisClusterContext *cc = acc->cc;
redisAsyncContext *ac;
int status;
cluster_async_data *cad = NULL;
struct cmd *command = NULL;

/* Don't accept new commands when the client is about to be shutdown. */
if (cc->flags & HIRCLUSTER_FLAG_SHUTDOWN) {
__redisClusterAsyncSetError(acc, REDIS_ERR_OTHER, "disconnecting");
return REDIS_ERR;
}

ac = actx_get_by_node(acc, node);
if (ac == NULL) {
/* Specific error already set */
return REDIS_ERR;
}

cc = acc->cc;

if (cc->err) {
cc->err = 0;
memset(cc->errstr, '\0', strlen(cc->errstr));
Expand Down Expand Up @@ -4432,6 +4450,7 @@ void redisClusterAsyncDisconnect(redisClusterAsyncContext *acc) {
}

cc = acc->cc;
cc->flags |= HIRCLUSTER_FLAG_SHUTDOWN;

if (cc->nodes == NULL) {
return;
Expand Down Expand Up @@ -4461,6 +4480,7 @@ void redisClusterAsyncFree(redisClusterAsyncContext *acc) {
}

cc = acc->cc;
cc->flags |= HIRCLUSTER_FLAG_SHUTDOWN;

redisClusterFree(cc);

Expand Down
3 changes: 3 additions & 0 deletions hircluster.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,9 @@
/* Flag to enable routing table updates using the command 'cluster slots'.
* Default is the 'cluster nodes' command. */
#define HIRCLUSTER_FLAG_ROUTE_USE_SLOTS 0x4000
/* Flag specific to the async API which means that the user requested a
* client shutdown by a disconnect or free. */
#define HIRCLUSTER_FLAG_SHUTDOWN 0x8000

/* Events, for redisClusterSetEventCallback() */
#define HIRCLUSTER_EVENT_SLOTMAP_UPDATED 1
Expand Down
8 changes: 8 additions & 0 deletions tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -274,3 +274,11 @@ add_test(NAME slots-not-served-test-async
COMMAND "${CMAKE_SOURCE_DIR}/tests/scripts/slots-not-served-test-async.sh"
"$<TARGET_FILE:clusterclient_async>"
WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}/tests/scripts/")
add_test(NAME client-disconnect-test-async
COMMAND "${CMAKE_SOURCE_DIR}/tests/scripts/client-disconnect-test.sh"
"$<TARGET_FILE:clusterclient_async>"
WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}/tests/scripts/")
add_test(NAME client-disconnect-without-slotmap-update-test-async
COMMAND "${CMAKE_SOURCE_DIR}/tests/scripts/client-disconnect-without-slotmap-update-test.sh"
"$<TARGET_FILE:clusterclient_async>"
WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}/tests/scripts/")
26 changes: 26 additions & 0 deletions tests/clusterclient_async.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
* Will send following commands using the `..ToNode()` API and a
* cluster node iterator to send each command to all known nodes.
*
* !disconnect - Disconnect the client.
*
* An example input of first sending 2 commands and waiting for their responses,
* before sending a single command and waiting for its response:
*
Expand Down Expand Up @@ -135,6 +137,8 @@ void sendNextCommand(evutil_socket_t fd, short kind, void *arg) {
"!all in !resend not supported");
send_to_all = 1;
}
if (strcmp(cmd, "!disconnect") == 0)
redisClusterAsyncDisconnect(acc);
continue; /* Skip line */
}

Expand Down Expand Up @@ -198,16 +202,34 @@ void eventCallback(const redisClusterContext *cc, int event, void *privdata) {
printf("Event: %s\n", e);
}

void connectCallback(const redisAsyncContext *ac, int status) {
char *s = "";
if (status != REDIS_OK)
s = "failed to ";
printf("Event: %sconnect to %s:%d\n", s, ac->c.tcp.host, ac->c.tcp.port);
}

void disconnectCallback(const redisAsyncContext *ac, int status) {
char *s = "";
if (status != REDIS_OK)
s = "failed to ";
printf("Event: %sdisconnect from %s:%d\n", s, ac->c.tcp.host,
ac->c.tcp.port);
}

int main(int argc, char **argv) {
int use_cluster_slots = 1; // Get topology via CLUSTER SLOTS
int show_events = 0;
int show_connection_events = 0;

int optind;
for (optind = 1; optind < argc && argv[optind][0] == '-'; optind++) {
if (strcmp(argv[optind], "--use-cluster-nodes") == 0) {
use_cluster_slots = 0; // Use the default CLUSTER NODES instead
} else if (strcmp(argv[optind], "--events") == 0) {
show_events = 1;
} else if (strcmp(argv[optind], "--connection-events") == 0) {
show_connection_events = 1;
} else {
fprintf(stderr, "Unknown argument: '%s'\n", argv[optind]);
}
Expand All @@ -233,6 +255,10 @@ int main(int argc, char **argv) {
if (show_events) {
redisClusterSetEventCallback(acc->cc, eventCallback, NULL);
}
if (show_connection_events) {
redisClusterAsyncSetConnectCallback(acc, connectCallback);
redisClusterAsyncSetDisconnectCallback(acc, disconnectCallback);
}

if (redisClusterConnect2(acc->cc) != REDIS_OK) {
printf("Connect error: %s\n", acc->cc->errstr);
Expand Down
75 changes: 75 additions & 0 deletions tests/scripts/client-disconnect-test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
#!/bin/bash
#
# Verify that a client disconnects from known nodes without following
# redirects, this to avoid reconnecting to already disconnected nodes.
# The client will not accept new commands thereafter.
#
# Usage: $0 /path/to/clusterclient-binary

clientprog=${1:-./clusterclient_async}
testname=client-disconnect-test

# Sync processes waiting for CONT signals.
perl -we 'use sigtrap "handler", sub{exit}, "CONT"; sleep 1; die "timeout"' &
syncpid1=$!;

# Start simulated redis node #1
timeout 5s ./simulated-redis.pl -p 7401 -d --sigcont $syncpid1 <<'EOF' &
EXPECT CONNECT
EXPECT ["CLUSTER", "SLOTS"]
SEND [[0, 16383, ["127.0.0.1", 7401, "nodeid1"]]]
EXPECT CLOSE

EXPECT CONNECT
EXPECT ["SET", "foo", "initial"]
SEND +OK

EXPECT ["SET", "foo", "redirect"]
SEND -MOVED 12182 127.0.0.1:7402

EXPECT CLOSE
EOF
server1=$!

# Wait until node is ready to accept client connections
wait $syncpid1;

# Run client
timeout 4s "$clientprog" --connection-events 127.0.0.1:7401 > "$testname.out" <<'EOF'
SET foo initial

# Send a command that is expected to be redirected just before
# requesting a client disconnect.
!async
SET foo redirect
!disconnect
!sync

# Commands are not accepted after a disconnect.
SET foo not-accepted
EOF
clientexit=$?

# Wait for server to exit
wait $server1; server1exit=$?

# Check exit statuses
if [ $server1exit -ne 0 ]; then
echo "Simulated server #1 exited with status $server1exit"
exit $server1exit
fi
if [ $clientexit -ne 0 ]; then
echo "$clientprog exited with status $clientexit"
exit $clientexit
fi

expected="Event: connect to 127.0.0.1:7401
OK
MOVED 12182 127.0.0.1:7402
Event: disconnect from 127.0.0.1:7401
error: client closing"

echo "$expected" | diff -u - "$testname.out" || exit 99

# Clean up
rm "$testname.out"
104 changes: 104 additions & 0 deletions tests/scripts/client-disconnect-without-slotmap-update-test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
#!/bin/bash
#
# Verify that a client disconnects from known nodes without starting new
# slot map updates. A client shouldn't reconnect or connect to new nodes
# while shutting down.
#
# This testcase starts 2 cluster nodes and the client connects to both.
# The client triggers a disconnect right after a command has been sent,
# which will invoke its callback with a NULL reply. This NULL reply
# should not trigger a slot map update which it would in other cases.
#
# Usage: $0 /path/to/clusterclient-binary

clientprog=${1:-./clusterclient_async}
testname=client-disconnect-test

# Sync process just waiting for server to be ready to accept connection.
perl -we 'use sigtrap "handler", sub{exit}, "CONT"; sleep 1; die "timeout"' &
syncpid1=$!
perl -we 'use sigtrap "handler", sub{exit}, "CONT"; sleep 1; die "timeout"' &
syncpid2=$!

# Start simulated redis node #1
timeout 5s ./simulated-redis.pl -p 7401 -d --sigcont $syncpid1 <<'EOF' &
EXPECT CONNECT
EXPECT ["CLUSTER", "SLOTS"]
SEND [[0, 6000, ["127.0.0.1", 7401, "nodeid1"]],[6001, 16383, ["127.0.0.1", 7402, "nodeid2"]]]
EXPECT CLOSE

EXPECT CONNECT
EXPECT ["SET", "bar", "initial"]
SEND +OK

# Normally a slot map update is expected here, but not during disconnects.

EXPECT CLOSE
EOF
server1=$!

# Start simulated redis node #2
timeout 5s ./simulated-redis.pl -p 7402 -d --sigcont $syncpid2 <<'EOF' &
EXPECT CONNECT
EXPECT ["SET", "foo", "initial"]
SEND +OK

EXPECT ["SET", "foo", "null-reply"]
# The client will invoke callbacks for outstanding requests with a NULL reply.
# Normally this would trigger a slot map update, but not during disconnects.

EXPECT CLOSE
EOF
server2=$!

# Wait until both nodes are ready to accept client connections
wait $syncpid1 $syncpid2;

# Run client
timeout 4s "$clientprog" --connection-events 127.0.0.1:7401 > "$testname.out" <<'EOF'
SET foo initial
SET bar initial

# Make sure a slot map update is not throttled.
!sleep

# Send a command just before requesting a client disconnect.
# A NULL reply should not trigger a slot map update after a disconnect.
!async
SET foo null-reply
!disconnect
!sync

EOF
clientexit=$?

# Wait for servers to exit
wait $server1; server1exit=$?
wait $server2; server2exit=$?

# Check exit statuses
if [ $server1exit -ne 0 ]; then
echo "Simulated server #1 exited with status $server1exit"
exit $server1exit
fi
if [ $server2exit -ne 0 ]; then
echo "Simulated server #2 exited with status $server2exit"
exit $server2exit
fi
if [ $clientexit -ne 0 ]; then
echo "$clientprog exited with status $clientexit"
exit $clientexit
fi

expected="Event: connect to 127.0.0.1:7402
OK
Event: connect to 127.0.0.1:7401
OK
Event: disconnect from 127.0.0.1:7401
error: Timeout
Event: failed to disconnect from 127.0.0.1:7402"

echo "$expected" | diff -u - "$testname.out" || exit 99

# Clean up
rm "$testname.out"
Loading