From f78e30b01d107f7d34b9d7eb05d635b39515714a Mon Sep 17 00:00:00 2001 From: Dmitry Antipov Date: Fri, 19 Aug 2022 11:10:37 +0300 Subject: [PATCH] Workaround qemu-img hang when comparing gluster:// images Workaround (actually not a fix, see below) 'qemu-img' hang (caused by an undefined behavior) comparing two gluster:// images. Discovered as follows: Storage configuration: Volume Name: test0 Type: Distribute Volume ID: 2930e059-29b3-440f-9a45-d5da735aa8cf Status: Started Snapshot Count: 0 Number of Bricks: 3 Transport-type: tcp Bricks: Brick1: 192.168.222.111:/pool/0 Brick2: 192.168.222.111:/pool/1 Brick3: 192.168.222.111:/pool/2 Options Reconfigured: storage.fips-mode-rchecksum: on transport.address-family: inet nfs.disable: on Volume Name: test1 Type: Distribute Volume ID: 5531b9fe-59cc-4b7d-a47a-7d3097a5cedb Status: Started Snapshot Count: 0 Number of Bricks: 3 Transport-type: tcp Bricks: Brick1: 192.168.222.111:/pool/3 Brick2: 192.168.222.111:/pool/4 Brick3: 192.168.222.111:/pool/5 Options Reconfigured: storage.fips-mode-rchecksum: on transport.address-family: inet nfs.disable: on Test: qemu-img create gluster://192.168.222.111/test0/test0.img 16G qemu-img create gluster://192.168.222.111/test1/test1.img 16G qemu-img compare gluster://192.168.222.111/test0/test0.img gluster://192.168.222.111/test1/test1.img Issue: # gdb -q /usr/local/sbin/qemu-img Reading symbols from /usr/local/sbin/qemu-img... (gdb) b rpc_clnt_destroy Function "rpc_clnt_destroy" not defined. Make breakpoint pending on future shared library load? (y or [n]) y Breakpoint 1 (rpc_clnt_destroy) pending. (gdb) r compare gluster://192.168.222.111/test0/test0.img gluster://192.168.222.111/test1/test1.img Starting program: /usr/local/sbin/qemu-img compare gluster://192.168.222.111/test0/test0.img gluster://192.168.222.111/test1/test1.img ... Thread 14 "glfs_timer" hit Breakpoint 1, rpc_clnt_destroy (rpc=0x7fffc003b808) at rpc-clnt.c:1741 1741 rpcclnt_cb_program_t *program = NULL; (gdb) bt #0 rpc_clnt_destroy (rpc=0x7fffc003b808) at rpc-clnt.c:1741 ;; RPC object 0x7fffc003b808 is going to be freed #1 0x00007ffff6bceba8 in rpc_clnt_notify (trans=0x7fffc003b9b8, mydata=0x7fffc003b838, event=RPC_TRANSPORT_CLEANUP, data=0x0) at rpc-clnt.c:875 #2 0x00007ffff6bcac3d in rpc_transport_unref (this=0x7fffc003b9b8) at rpc-transport.c:502 #3 0x00007ffff6bd0948 in rpc_clnt_trigger_destroy (rpc=0x7fffc003b808) at rpc-clnt.c:1734 #4 0x00007ffff6bd0aff in rpc_clnt_unref (rpc=0x7fffc003b808) at rpc-clnt.c:1792 #5 0x00007ffff6bcd3a4 in call_bail (data=0x7fffc003b808) at rpc-clnt.c:190 #6 0x00007ffff6a93c5b in gf_timer_proc (data=0x5555561dab58) at timer.c:152 #7 0x00007ffff77cbfdd in start_thread (arg=) at pthread_create.c:442 #8 0x00007ffff784d5a0 in clone3 () at ../sysdeps/unix/sysv/linux/x86_64/clone3.S:81 (gdb) p rpc->refcount $1 = {lk = 0x7fffc003b968 "", value = 0} (gdb) p rpc->ctx->cleanup_started $2 = 1 '\001' (gdb) b rpc_clnt_start_ping Breakpoint 2 at 0x7ffff6bd464b: file rpc-clnt-ping.c, line 281. (gdb) c Continuing. ... Thread 14 "glfs_timer" hit Breakpoint 2, rpc_clnt_start_ping (rpc_ptr=0x7fffc003b808) at rpc-clnt-ping.c:281 281 struct rpc_clnt *rpc = NULL; (gdb) bt #0 rpc_clnt_start_ping (rpc_ptr=0x7fffc003b808) at rpc-clnt-ping.c:281 ;; RPC object 0x7fffc003b808 is used after free #1 0x00007ffff6a93c5b in gf_timer_proc (data=0x5555561dab58) at timer.c:152 #2 0x00007ffff77cbfdd in start_thread (arg=) at pthread_create.c:442 #3 0x00007ffff784d5a0 in clone3 () at ../sysdeps/unix/sysv/linux/x86_64/clone3.S:81 The problem is when ctx->cleanup_started is true, gf_timer_call_cancel() refuses to remove RPC client's ping timer (rpc->conn.ping_timer), and the latter ticks _after_ the corresponding RPC client object is destroyed. This issue raises a design questions: what should be done when GFAPI client calls 'glfs_fini()' but there is an internal timer scheduled 10 seconds thereafter? Should 'glfs_fini()' cancel any internal timers at the very beginning? I'm not sure, but the quick and dirty workaround is to indicate success returned from gf_timer_call_cancel() but mark the corresponding timer as canceled and do not call its callback before freeing the timer itself. Signed-off-by: Dmitry Antipov Updates: #1000 --- libglusterfs/src/glusterfs/timer.h | 1 + libglusterfs/src/timer.c | 6 ++++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/libglusterfs/src/glusterfs/timer.h b/libglusterfs/src/glusterfs/timer.h index 40428ebadaa..d255248528b 100644 --- a/libglusterfs/src/glusterfs/timer.h +++ b/libglusterfs/src/glusterfs/timer.h @@ -30,6 +30,7 @@ struct _gf_timer { void *data; xlator_t *xl; gf_boolean_t fired; + gf_boolean_t canceled; }; struct _gf_timer_registry { diff --git a/libglusterfs/src/timer.c b/libglusterfs/src/timer.c index 8882aba4c0b..2449a28a541 100644 --- a/libglusterfs/src/timer.c +++ b/libglusterfs/src/timer.c @@ -83,7 +83,8 @@ gf_timer_call_cancel(glusterfs_ctx_t *ctx, gf_timer_t *event) if (ctx->cleanup_started) { gf_msg_callingfn("timer", GF_LOG_INFO, 0, LG_MSG_CTX_CLEANUP_STARTED, "ctx cleanup started"); - return -1; + event->canceled = _gf_true; + return 0; } LOCK(&ctx->lock); @@ -149,7 +150,8 @@ gf_timer_proc(void *data) old_THIS = THIS; THIS = event->xl; } - event->callbk(event->data); + if (!event->canceled) + event->callbk(event->data); GF_FREE(event); if (old_THIS) { THIS = old_THIS;