From 841d8444e786383b9aaec7178c6029d7335da7ac Mon Sep 17 00:00:00 2001 From: Bernd Schubert Date: Wed, 27 May 2020 17:33:16 +0200 Subject: [PATCH] prov/tcp Fix a use-after-free triggered by client_send_connreq I'm not entirely sure if it is fixes the issue our QA is seeing (as they get err_entry.err=-104 - a wrong negative value), but with error injection I could easily trigger a use-after-free with the root from this function (with err_entry.err=104, though, so I still don't know where the wrong error sign came from). In my error injection reproducer ofi_send_socket() fails sometimes, which then triggers free of cm_ctx without removing the fd and cm_ctx from polling. Next poll round will then access cm_ctx and trigger a use-after-free. client_send_connreq tx_cm_data ofi_send_socket -> fails goto err ... err: free(cm_ctx) ASAN reports READ of size 4 at 0x6120000106c8 thread T4 (rpc_poll-0) #0 0x7f77005e0f21 in process_cm_ctx prov/tcp/src/tcpx_conn_mgr.c:482 #1 0x7f77005e15ef in tcpx_conn_mgr_run prov/tcp/src/tcpx_conn_mgr.c:535 #2 0x7f77005fc429 in tcpx_eq_read prov/tcp/src/tcpx_eq.c:48 #3 0x4926dd in fi_eq_read /home/bschubert/local/rhel7/libfabric/include/rdma/fi_eq.h:352 0x6120000106c8 is located 8 bytes inside of 280-byte region [0x6120000106c0,0x6120000107d8) freed by thread T4 (rpc_poll-0) here: #0 0x7f77015915e7 in __interceptor_free #1 0x7f77005e083b in client_send_connreq prov/tcp/src/tcpx_conn_mgr.c:422 #2 0x7f77005e0f7e in process_cm_ctx prov/tcp/src/tcpx_conn_mgr.c:487 #3 0x7f77005e15ef in tcpx_conn_mgr_run prov/tcp/src/tcpx_conn_mgr.c:535 #4 0x7f77005fc429 in tcpx_eq_read prov/tcp/src/tcpx_eq.c:48 previously allocated by thread T5 (rpc_conn_mgr) here: #0 0x7f7701591b7e in __interceptor_calloc #1 0x7f77005edb5c in tcpx_ep_connect prov/tcp/src/tcpx_ep.c:103 #2 0x478b2f in fi_connect /home/bschubert/local/rhel7/libfabric/include/rdma/fi_cm.h:98 Signed-off-by: Bernd Schubert --- prov/tcp/src/tcpx_conn_mgr.c | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/prov/tcp/src/tcpx_conn_mgr.c b/prov/tcp/src/tcpx_conn_mgr.c index 286fd9e9fdc..dc86bb516da 100644 --- a/prov/tcp/src/tcpx_conn_mgr.c +++ b/prov/tcp/src/tcpx_conn_mgr.c @@ -378,31 +378,47 @@ static void client_send_connreq(struct util_wait *wait, if (ret < 0 || status) { FI_WARN(&tcpx_prov, FI_LOG_EP_CTRL, "connection failure\n"); ret = (ret < 0)? -ofi_sockerr() : status; - goto err; + goto err_del; } ret = tx_cm_data(ep->sock, ofi_ctrl_connreq, cm_ctx); if (ret) - goto err; + goto err_del; ret = ofi_wait_del_fd(wait, ep->sock); - if (ret) - goto err; + if (ret) { + FI_WARN(&tcpx_prov, FI_LOG_EP_CTRL, + "Could not remove fd from wait: %s\n", + fi_strerror(-ret)); + goto err_report; + } cm_ctx->type = CLIENT_RECV_CONNRESP; ret = ofi_wait_add_fd(wait, ep->sock, POLLIN, tcpx_eq_wait_try_func, NULL, cm_ctx); if (ret) - goto err; + goto err_report; return; -err: + +err_del: + ret = ofi_wait_del_fd(wait, ep->sock); + if (ret) + FI_WARN(&tcpx_prov, FI_LOG_EP_CTRL, + "Could not remove fd from wait: %s\n", + fi_strerror(-ret)); +err_report: memset(&err_entry, 0, sizeof err_entry); err_entry.fid = cm_ctx->fid; err_entry.context = cm_ctx->fid->context; err_entry.err = -ret; + /* cm_ctx must only be freed once its wait fd was + * removed from polling, otherwise next poll round will trigger a + * use-after-free + */ free(cm_ctx); + fi_eq_write(&ep->util_ep.eq->eq_fid, FI_SHUTDOWN, &err_entry, sizeof(err_entry), UTIL_FLAG_ERROR); }