Skip to content

Commit

Permalink
connectd: don't insist on ping replies when other traffic is flowing.
Browse files Browse the repository at this point in the history
Got complaints about us hanging up on some nodes because they don't respond
to pings in a timely manner (e.g. ACINQ?).  This only matters to us if no
other traffic is flowing, so mitigate that.

This may solve the problem because we've previously seen implementations
badly prioritize gossip traffic, and thus important messages can get queued
behind gossip dumps!

Signed-off-by: Rusty Russell <[email protected]>
Changelog-Fixed: connectd: give busy peers more time to respond to pings.
  • Loading branch information
rustyrussell committed Jun 26, 2022
1 parent 92f10f2 commit e3a5fa9
Show file tree
Hide file tree
Showing 4 changed files with 27 additions and 14 deletions.
1 change: 1 addition & 0 deletions connectd/connectd.c
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,7 @@ static struct peer *new_peer(struct daemon *daemon,
peer->ready_to_die = false;
peer->active = false;
peer->peer_outq = msg_queue_new(peer, false);
peer->last_recv_time = time_now();

#if DEVELOPER
peer->dev_writes_enabled = NULL;
Expand Down
3 changes: 3 additions & 0 deletions connectd/connectd.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,9 @@ struct peer {
/* Random ping timer, to detect dead connections. */
struct oneshot *ping_timer;

/* Last time we received traffic */
struct timeabs last_recv_time;

#if DEVELOPER
bool dev_read_enabled;
/* If non-NULL, this counts down; 0 means disable */
Expand Down
26 changes: 18 additions & 8 deletions connectd/multiplex.c
Original file line number Diff line number Diff line change
Expand Up @@ -423,16 +423,23 @@ static void set_ping_timer(struct peer *peer)

static void send_ping(struct peer *peer)
{
/* Already have a ping in flight? */
if (peer->expecting_pong != PONG_UNEXPECTED) {
status_peer_debug(&peer->id, "Last ping unreturned: hanging up");
if (peer->to_peer)
io_close(peer->to_peer);
return;
/* If it's still sending us traffic, maybe ping reply is backed up?
* That's OK, ping is just to make sure it's still alive, and clearly
* it is. */
if (time_before(peer->last_recv_time,
timeabs_sub(time_now(), time_from_sec(60)))) {
/* Already have a ping in flight? */
if (peer->expecting_pong != PONG_UNEXPECTED) {
status_peer_debug(&peer->id, "Last ping unreturned: hanging up");
if (peer->to_peer)
io_close(peer->to_peer);
return;
}

inject_peer_msg(peer, take(make_ping(NULL, 1, 0)));
peer->expecting_pong = PONG_EXPECTED_PROBING;
}

inject_peer_msg(peer, take(make_ping(NULL, 1, 0)));
peer->expecting_pong = PONG_EXPECTED_PROBING;
set_ping_timer(peer);
}

Expand Down Expand Up @@ -987,6 +994,9 @@ static struct io_plan *read_body_from_peer_done(struct io_conn *peer_conn,
}
tal_free(peer->peer_in);

/* We got something! */
peer->last_recv_time = time_now();

/* dev_disconnect can disable read */
if (!IFDEV(peer->dev_read_enabled, true))
return read_hdr_from_peer(peer_conn, peer);
Expand Down
11 changes: 5 additions & 6 deletions tests/test_connection.py
Original file line number Diff line number Diff line change
Expand Up @@ -3847,12 +3847,11 @@ def test_ping_timeout(node_factory):

l1, l2 = node_factory.line_graph(2, opts=[{'dev-no-reconnect': None,
'disconnect': l1_disconnects},
{}])
# Takes 15-45 seconds, then another to try second ping
# Because of ping timer randomness we don't know which side hangs up first
wait_for(lambda: l1.rpc.getpeer(l2.info['id'])['connected'] is False, timeout=45 + 45 + 5)
wait_for(lambda: (l1.daemon.is_in_log('Last ping unreturned: hanging up')
or l2.daemon.is_in_log('Last ping unreturned: hanging up')))
{'dev-no-ping-timer': None}])
# Will set timers at 15-45 seconds, but won't send unless 60 seconds
# after last recd traffic, and has to send *second* ping before getting upset.
wait_for(lambda: l1.rpc.getpeer(l2.info['id'])['connected'] is False, timeout=60 + 45 + 45 + 5)
wait_for(lambda: l1.daemon.is_in_log('Last ping unreturned: hanging up'))


@pytest.mark.openchannel('v1')
Expand Down

0 comments on commit e3a5fa9

Please sign in to comment.