Skip to content

Commit

Permalink
Add new DEBUG dict-resizing command to disable the dict resize
Browse files Browse the repository at this point in the history
The test fails here and there:
```
*** [err]: expire scan should skip dictionaries with lot's of empty buckets in tests/unit/expire.tcl
scan didn't handle slot skipping logic.
```

There are two case:
1. In the case of passing the test, we use child process to avoid the
dict resize, but it can not completely limit it, since in the dictDelete
we still have chance to trigger the resize (hit the force radio). The
reason why our test passed before is because the expire dict is still
in the rehashing process, so the dictDelete, the dictShrinkIfNeeded can
not trigger the resize.

2. In the case of failing the test, the expire dict finished the rehashing,
so the last dictDelete, the dictShrinkIfNeeded trigger the dict resize
since it hit the force radio, so the skipping logic fail.

This PR add a new DEBUG command to disbale the dict resize.
  • Loading branch information
enjoy-binbin committed Feb 8, 2024
1 parent 81666a6 commit 779f62b
Show file tree
Hide file tree
Showing 5 changed files with 18 additions and 27 deletions.
5 changes: 5 additions & 0 deletions src/debug.c
Original file line number Diff line number Diff line change
Expand Up @@ -496,6 +496,8 @@ void debugCommand(client *c) {
" In case RESET is provided the peak reset time will be restored to the default value",
"REPLYBUFFER RESIZING <0|1>",
" Enable or disable the reply buffer resize cron job",
"DICT-RESIZING <0|1>",
" Enable or disable the main dict and expire dict resizing.",
NULL
};
addExtendedReplyHelp(c, help, clusterDebugCommandExtendedHelp());
Expand Down Expand Up @@ -1021,6 +1023,9 @@ NULL
return;
}
addReply(c, shared.ok);
} else if (!strcasecmp(c->argv[1]->ptr, "dict-resizing") && c->argc == 3) {
server.dict_resizing = atoi(c->argv[2]->ptr);
addReply(c, shared.ok);
} else if(!handleDebugClusterCommand(c)) {
addReplySubcommandSyntaxError(c);
return;
Expand Down
4 changes: 4 additions & 0 deletions src/server.c
Original file line number Diff line number Diff line change
Expand Up @@ -429,6 +429,9 @@ uint64_t dictEncObjHash(const void *key) {
* but to guarantee the performance of redis, we still allow dict to expand
* if dict load factor exceeds HASHTABLE_MAX_LOAD_FACTOR. */
int dictResizeAllowed(size_t moreMem, double usedRatio) {
/* for debug purposes: dict is not allowed to be resized. */
if (!server.dict_resizing) return 0;

if (usedRatio <= HASHTABLE_MAX_LOAD_FACTOR) {
return !overMaxmemoryAfterAlloc(moreMem);
} else {
Expand Down Expand Up @@ -2079,6 +2082,7 @@ void initServerConfig(void) {
server.next_client_id = 1; /* Client IDs, start from 1 .*/
server.page_size = sysconf(_SC_PAGESIZE);
server.pause_cron = 0;
server.dict_resizing = 1;

server.latency_tracking_info_percentiles_len = 3;
server.latency_tracking_info_percentiles = zmalloc(sizeof(double)*(server.latency_tracking_info_percentiles_len));
Expand Down
1 change: 1 addition & 0 deletions src/server.h
Original file line number Diff line number Diff line change
Expand Up @@ -1754,6 +1754,7 @@ struct redisServer {
char *proc_title_template; /* Process title template format */
clientBufferLimitsConfig client_obuf_limits[CLIENT_TYPE_OBUF_COUNT];
int pause_cron; /* Don't run cron tasks (debug) */
int dict_resizing; /* Whether to allow main dict and expired dict to be resized (debug) */
int latency_tracking_enabled; /* 1 if extended latency tracking is enabled, 0 otherwise. */
double *latency_tracking_info_percentiles; /* Extended latency tracking info output percentile list configuration. */
int latency_tracking_info_percentiles_len;
Expand Down
13 changes: 4 additions & 9 deletions tests/unit/expire.tcl
Original file line number Diff line number Diff line change
Expand Up @@ -854,8 +854,7 @@ start_cluster 1 0 {tags {"expire external:skip cluster slow"}} {
r psetex key 500 val

# disable resizing
r config set rdb-key-save-delay 10000000
r bgsave
r debug dict-resizing 0

# delete data to have lot's (99%) of empty buckets (slot 12182 should be skipped)
for {set j 1} {$j <= 99} {incr j} {
Expand All @@ -872,20 +871,16 @@ start_cluster 1 0 {tags {"expire external:skip cluster slow"}} {
[r dbsize] eq 1
} else {
if {[r dbsize] eq 0} {
puts [r debug htstats 0]
fail "scan didn't handle slot skipping logic."
} else {
puts [r debug htstats 0]
fail "scan didn't process all valid slots."
}
}

# Enable resizing
r config set rdb-key-save-delay 0
catch {exec kill -9 [get_child_pid 0]}
wait_for_condition 1000 10 {
[s rdb_bgsave_in_progress] eq 0
} else {
fail "bgsave did not stop in time."
}
r debug dict-resizing 1

# put some data into slot 12182 and trigger the resize
r psetex "{foo}0" 500 a
Expand Down
22 changes: 4 additions & 18 deletions tests/unit/other.tcl
Original file line number Diff line number Diff line change
Expand Up @@ -439,8 +439,7 @@ start_cluster 1 0 {tags {"other external:skip cluster slow"}} {
assert_match "*table size: 128*" [r debug HTSTATS 0]

# disable resizing
r config set rdb-key-save-delay 10000000
r bgsave
r debug dict-resizing 0

# delete data to have lot's (96%) of empty buckets
for {set j 1} {$j <= 123} {incr j} {
Expand All @@ -449,13 +448,7 @@ start_cluster 1 0 {tags {"other external:skip cluster slow"}} {
assert_match "*table size: 128*" [r debug HTSTATS 0]

# enable resizing
r config set rdb-key-save-delay 0
catch {exec kill -9 [get_child_pid 0]}
wait_for_condition 1000 10 {
[s rdb_bgsave_in_progress] eq 0
} else {
fail "bgsave did not stop in time."
}
r debug dict-resizing 1

# waiting for serverCron to resize the tables
wait_for_condition 1000 10 {
Expand All @@ -475,21 +468,14 @@ start_cluster 1 0 {tags {"other external:skip cluster slow"}} {
}

# disable resizing
r config set rdb-key-save-delay 10000000
r bgsave
r debug dict-resizing 0

for {set j 1} {$j <= 123} {incr j} {
r del "{alice}$j"
}

# enable resizing
r config set rdb-key-save-delay 0
catch {exec kill -9 [get_child_pid 0]}
wait_for_condition 1000 10 {
[s rdb_bgsave_in_progress] eq 0
} else {
fail "bgsave did not stop in time."
}
r debug dict-resizing 1

# waiting for serverCron to resize the tables
wait_for_condition 1000 10 {
Expand Down

0 comments on commit 779f62b

Please sign in to comment.