From 9164e26e2f323c43c9a671cb510bb4df03e45628 Mon Sep 17 00:00:00 2001 From: George Bosilca Date: Tue, 2 Oct 2018 11:37:18 -0400 Subject: [PATCH] Provide the correct socklen to bind. Get Brian's patch from #5825 and his log message: Fix a failure in binding the initiating side of a connection on MacOS. MacOS doesn't like passing the size of the storage structure (sockaddr_storage) instead of the expected size of the structure (sockaddr_in or sockaddr_in6), which was causing bind() failures. This patch simply changes the structure size to the expected size. Add a more clear error message in debug mode. Signed-off-by: George Bosilca --- opal/mca/btl/tcp/btl_tcp_endpoint.c | 31 +++++++++++++++++------------ 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/opal/mca/btl/tcp/btl_tcp_endpoint.c b/opal/mca/btl/tcp/btl_tcp_endpoint.c index 6a4221de8ca..99fcd7610eb 100644 --- a/opal/mca/btl/tcp/btl_tcp_endpoint.c +++ b/opal/mca/btl/tcp/btl_tcp_endpoint.c @@ -728,34 +728,39 @@ static int mca_btl_tcp_endpoint_start_connect(mca_btl_base_endpoint_t* btl_endpo /* start the connect - will likely fail with EINPROGRESS */ mca_btl_tcp_proc_tosocks(btl_endpoint->endpoint_addr, &endpoint_addr); - + /* Bind the socket to one of the addresses associated with * this btl module. This sets the source IP to one of the * addresses shared in modex, so that the destination rank * can properly pair btl modules, even in cases where Linux * might do something unexpected with routing */ - opal_socklen_t sockaddr_addrlen = sizeof(struct sockaddr_storage); if (endpoint_addr.ss_family == AF_INET) { assert(NULL != &btl_endpoint->endpoint_btl->tcp_ifaddr); if (bind(btl_endpoint->endpoint_sd, (struct sockaddr*) &btl_endpoint->endpoint_btl->tcp_ifaddr, - sockaddr_addrlen) < 0) { - BTL_ERROR(("bind() failed: %s (%d)", strerror(opal_socket_errno), opal_socket_errno)); + sizeof(struct sockaddr_in)) < 0) { + BTL_ERROR(("bind on local address (%s:%d) failed: %s (%d)", + opal_net_get_hostname((struct sockaddr*) &btl_endpoint->endpoint_btl->tcp_ifaddr), + htons(((struct sockaddr_in*)&btl_endpoint->endpoint_btl->tcp_ifaddr)->sin_port), + strerror(opal_socket_errno), opal_socket_errno)); - CLOSE_THE_SOCKET(btl_endpoint->endpoint_sd); - return OPAL_ERROR; - } + CLOSE_THE_SOCKET(btl_endpoint->endpoint_sd); + return OPAL_ERROR; + } } #if OPAL_ENABLE_IPV6 if (endpoint_addr.ss_family == AF_INET6) { assert(NULL != &btl_endpoint->endpoint_btl->tcp_ifaddr_6); if (bind(btl_endpoint->endpoint_sd, (struct sockaddr*) &btl_endpoint->endpoint_btl->tcp_ifaddr_6, - sockaddr_addrlen) < 0) { - BTL_ERROR(("bind() failed: %s (%d)", strerror(opal_socket_errno), opal_socket_errno)); + sizeof(struct sockaddr_in6)) < 0) { + BTL_ERROR(("bind on local address (%s:%d) failed: %s (%d)", + opal_net_get_hostname((struct sockaddr*) &btl_endpoint->endpoint_btl->tcp_ifaddr), + htons(((struct sockaddr_in*)&btl_endpoint->endpoint_btl->tcp_ifaddr)->sin_port), + strerror(opal_socket_errno), opal_socket_errno)); - CLOSE_THE_SOCKET(btl_endpoint->endpoint_sd); - return OPAL_ERROR; - } - } + CLOSE_THE_SOCKET(btl_endpoint->endpoint_sd); + return OPAL_ERROR; + } + } #endif opal_output_verbose(10, opal_btl_base_framework.framework_output, "btl: tcp: attempting to connect() to %s address %s on port %d",