Skip to content

Commit

Permalink
Provide the correct socklen to bind.
Browse files Browse the repository at this point in the history
Get Brian's patch from open-mpi#5825 and his log message:
Fix a failure in binding the initiating side of a connection
on MacOS. MacOS doesn't like passing the size of the storage
structure (sockaddr_storage) instead of the expected size of
the structure (sockaddr_in or sockaddr_in6), which was causing
bind() failures. This patch simply changes the structure size
to the expected size.

Add a more clear error message in debug mode.

Signed-off-by: George Bosilca <[email protected]>
  • Loading branch information
bosilca committed Oct 2, 2018
1 parent fcc1d30 commit 9164e26
Showing 1 changed file with 18 additions and 13 deletions.
31 changes: 18 additions & 13 deletions opal/mca/btl/tcp/btl_tcp_endpoint.c
Original file line number Diff line number Diff line change
Expand Up @@ -728,34 +728,39 @@ static int mca_btl_tcp_endpoint_start_connect(mca_btl_base_endpoint_t* btl_endpo

/* start the connect - will likely fail with EINPROGRESS */
mca_btl_tcp_proc_tosocks(btl_endpoint->endpoint_addr, &endpoint_addr);

/* Bind the socket to one of the addresses associated with
* this btl module. This sets the source IP to one of the
* addresses shared in modex, so that the destination rank
* can properly pair btl modules, even in cases where Linux
* might do something unexpected with routing */
opal_socklen_t sockaddr_addrlen = sizeof(struct sockaddr_storage);
if (endpoint_addr.ss_family == AF_INET) {
assert(NULL != &btl_endpoint->endpoint_btl->tcp_ifaddr);
if (bind(btl_endpoint->endpoint_sd, (struct sockaddr*) &btl_endpoint->endpoint_btl->tcp_ifaddr,
sockaddr_addrlen) < 0) {
BTL_ERROR(("bind() failed: %s (%d)", strerror(opal_socket_errno), opal_socket_errno));
sizeof(struct sockaddr_in)) < 0) {
BTL_ERROR(("bind on local address (%s:%d) failed: %s (%d)",
opal_net_get_hostname((struct sockaddr*) &btl_endpoint->endpoint_btl->tcp_ifaddr),
htons(((struct sockaddr_in*)&btl_endpoint->endpoint_btl->tcp_ifaddr)->sin_port),
strerror(opal_socket_errno), opal_socket_errno));

CLOSE_THE_SOCKET(btl_endpoint->endpoint_sd);
return OPAL_ERROR;
}
CLOSE_THE_SOCKET(btl_endpoint->endpoint_sd);
return OPAL_ERROR;
}
}
#if OPAL_ENABLE_IPV6
if (endpoint_addr.ss_family == AF_INET6) {
assert(NULL != &btl_endpoint->endpoint_btl->tcp_ifaddr_6);
if (bind(btl_endpoint->endpoint_sd, (struct sockaddr*) &btl_endpoint->endpoint_btl->tcp_ifaddr_6,
sockaddr_addrlen) < 0) {
BTL_ERROR(("bind() failed: %s (%d)", strerror(opal_socket_errno), opal_socket_errno));
sizeof(struct sockaddr_in6)) < 0) {
BTL_ERROR(("bind on local address (%s:%d) failed: %s (%d)",
opal_net_get_hostname((struct sockaddr*) &btl_endpoint->endpoint_btl->tcp_ifaddr),
htons(((struct sockaddr_in*)&btl_endpoint->endpoint_btl->tcp_ifaddr)->sin_port),
strerror(opal_socket_errno), opal_socket_errno));

CLOSE_THE_SOCKET(btl_endpoint->endpoint_sd);
return OPAL_ERROR;
}
}
CLOSE_THE_SOCKET(btl_endpoint->endpoint_sd);
return OPAL_ERROR;
}
}
#endif
opal_output_verbose(10, opal_btl_base_framework.framework_output,
"btl: tcp: attempting to connect() to %s address %s on port %d",
Expand Down

0 comments on commit 9164e26

Please sign in to comment.