-
Notifications
You must be signed in to change notification settings - Fork 901
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Return an error if we can't connect and limit the number of retries #813
Changes from 4 commits
2157d5b
a564763
6fdbc0c
bfe7ddd
0726ff9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -100,6 +100,15 @@ struct reaching { | |
|
||
/* Did we succeed? */ | ||
bool succeeded; | ||
|
||
/* How many times have we attempted to connect? */ | ||
u32 attempts; | ||
|
||
/* How many timest to attempt */ | ||
u32 max_attempts; | ||
|
||
/* Timestamp of the first attempt */ | ||
u32 first_attempt; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Mildly prefer timestamps as struct timeabs, which is clearer, but not worth another round of feedback for. |
||
}; | ||
|
||
/* Things we need when we're talking direct to the peer. */ | ||
|
@@ -1557,13 +1566,25 @@ static void try_connect(struct reaching *reach); | |
|
||
static void connect_failed(struct io_conn *conn, struct reaching *reach) | ||
{ | ||
status_trace("Failed connected out for %s, will try again", | ||
type_to_string(trc, struct pubkey, &reach->id)); | ||
|
||
/* FIXME: Configurable timer! */ | ||
new_reltimer(&reach->daemon->timers, reach, | ||
time_from_sec(5), | ||
try_connect, reach); | ||
u32 diff = time_now().ts.tv_sec - reach->first_attempt; | ||
reach->attempts++; | ||
|
||
if (reach->attempts >= reach->max_attempts) { | ||
status_trace("Failed to connect after %d attempts, giving up " | ||
"after %d seconds", | ||
reach->attempts, diff); | ||
daemon_conn_send( | ||
&reach->daemon->master, | ||
take(towire_gossip_peer_connection_failed( | ||
conn, &reach->id, diff, reach->attempts, false))); | ||
tal_free(reach); | ||
} else { | ||
status_trace("Failed connected out for %s, will try again", | ||
type_to_string(trc, struct pubkey, &reach->id)); | ||
/* FIXME: Configurable timer! */ | ||
new_reltimer(&reach->daemon->timers, reach, time_from_sec(5), | ||
try_connect, reach); | ||
} | ||
} | ||
|
||
static struct io_plan *conn_init(struct io_conn *conn, struct reaching *reach) | ||
|
@@ -1625,6 +1646,12 @@ static void try_connect(struct reaching *reach) | |
/* FIXME: add reach_failed message */ | ||
status_trace("No address known for %s, giving up", | ||
type_to_string(trc, struct pubkey, &reach->id)); | ||
daemon_conn_send( | ||
&reach->daemon->master, | ||
take(towire_gossip_peer_connection_failed( | ||
reach, &reach->id, | ||
time_now().ts.tv_sec - reach->first_attempt, | ||
reach->attempts, true))); | ||
tal_free(reach); | ||
return; | ||
} | ||
|
@@ -1683,6 +1710,9 @@ static bool try_reach_peer(struct daemon *daemon, const struct pubkey *id) | |
reach->succeeded = false; | ||
reach->daemon = daemon; | ||
reach->id = *id; | ||
reach->first_attempt = time_now().ts.tv_sec; | ||
reach->attempts = 0; | ||
reach->max_attempts = 10; | ||
list_add_tail(&daemon->reaching, &reach->list); | ||
tal_add_destructor(reach, destroy_reaching); | ||
|
||
|
@@ -1913,6 +1943,7 @@ static struct io_plan *recv_req(struct io_conn *conn, struct daemon_conn *master | |
case WIRE_GOSSIP_RESOLVE_CHANNEL_REPLY: | ||
case WIRE_GOSSIP_PEER_CONNECTED: | ||
case WIRE_GOSSIP_PEER_ALREADY_CONNECTED: | ||
case WIRE_GOSSIP_PEER_CONNECTION_FAILED: | ||
case WIRE_GOSSIP_PEER_NONGOSSIP: | ||
case WIRE_GOSSIP_GET_UPDATE: | ||
case WIRE_GOSSIP_GET_UPDATE_REPLY: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -43,6 +43,13 @@ gossip_peer_connected,,lfeatures,lflen*u8 | |
gossip_peer_already_connected,3015 | ||
gossip_peer_already_connected,,id,struct pubkey | ||
|
||
# gossipd -> master: attempted to connect, unsuccessful, gave up | ||
gossip_peer_connection_failed,3020 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We need some good way to allocate message numbers, now I need to bump the message number in #638 again... There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, fortunately compile will fail with a clash, so whoever is applying it can rebase and fixup... |
||
gossip_peer_connection_failed,,id,struct pubkey | ||
gossip_peer_connection_failed,,timeout,u32 | ||
gossip_peer_connection_failed,,attempts,u32 | ||
gossip_peer_connection_failed,,addr_unknown,bool | ||
|
||
# Gossipd -> master: peer sent non-gossip packet. Two fds: peer and gossip | ||
gossip_peer_nongossip,3003 | ||
gossip_peer_nongossip,,id,struct pubkey | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -606,6 +606,41 @@ void peer_already_connected(struct lightningd *ld, const u8 *msg) | |
connect_succeeded(ld, &id); | ||
} | ||
|
||
void peer_connection_failed(struct lightningd *ld, const u8 *msg) | ||
{ | ||
struct pubkey id; | ||
u32 attempts, timediff; | ||
struct connect *i, *next; | ||
bool addr_unknown; | ||
char *error; | ||
|
||
if (!fromwire_gossip_peer_connection_failed(msg, NULL, &id, &attempts, | ||
&timediff, &addr_unknown)) | ||
fatal( | ||
"Gossip gave bad GOSSIP_PEER_CONNECTION_FAILED message %s", | ||
tal_hex(msg, msg)); | ||
|
||
if (addr_unknown) { | ||
error = tal_fmt( | ||
msg, "No address known for node %s, please provide one", | ||
type_to_string(msg, struct pubkey, &id)); | ||
} else { | ||
error = tal_fmt(msg, "Could not connect to %s after %d seconds and %d attempts", | ||
type_to_string(msg, struct pubkey, &id), timediff, | ||
attempts); | ||
} | ||
|
||
/* Careful! Completing command frees connect. */ | ||
list_for_each_safe(&ld->connects, i, next, list) { | ||
if (!pubkey_eq(&i->id, &id)) | ||
continue; | ||
|
||
command_fail(i->cmd, "%s", error); | ||
} | ||
} | ||
|
||
|
||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This seems like a lot of vertical space :) |
||
void peer_sent_nongossip(struct lightningd *ld, | ||
const struct pubkey *id, | ||
const struct wireaddr *addr, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nit: comment misspelling "timest"