From 23a58e05ea1ad0728df92838b00fa0e3c2d42d8b Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 30 Jan 2018 11:14:06 +1030 Subject: [PATCH 1/5] channeld: handle unexpected messages better. In particular, decode error messages correctly and do the right thing with messages about other channels. Signed-off-by: Rusty Russell --- channeld/channel.c | 94 ++++++++++++++++++++++++++++++---------- tests/test_lightningd.py | 2 +- 2 files changed, 73 insertions(+), 23 deletions(-) diff --git a/channeld/channel.c b/channeld/channel.c index 70f3421b0e31..253cefcffaa2 100644 --- a/channeld/channel.c +++ b/channeld/channel.c @@ -1555,7 +1555,6 @@ static void handle_peer_shutdown(struct peer *peer, const u8 *shutdown) static void peer_in(struct peer *peer, const u8 *msg) { enum wire_type type = fromwire_peektype(msg); - status_trace("peer_in %s", wire_type_name(type)); /* FIXME: We don't support concurrent channels with same peer. */ if (type == WIRE_OPEN_CHANNEL) { @@ -1767,6 +1766,73 @@ static void resend_commitment(struct peer *peer, const struct changed_htlc *last assert(peer->revocations_received == peer->next_index[REMOTE] - 2); } +/* Handle random messages we might get, returning NULL if we handled it. */ +static u8 *read_peer_msg(struct peer *peer) +{ + u8 *msg; + struct channel_id channel_id; + + msg = sync_crypto_read(peer, &peer->cs, PEER_FD); + if (!msg) + peer_conn_broken(peer); + + status_trace("peer_in %s", wire_type_name(fromwire_peektype(msg))); + + if (is_gossip_msg(msg)) { + /* Forward to gossip daemon */ + wire_sync_write(GOSSIP_FD, take(msg)); + return NULL; + } + + if (fromwire_peektype(msg) == WIRE_PING) { + handle_ping(peer, msg); + return tal_free(msg); + } + + if (fromwire_peektype(msg) == WIRE_ERROR) { + struct channel_id chanid; + char *err = sanitize_error(msg, msg, &chanid); + + /* BOLT #1: + * + * The channel is referred to by `channel_id`, unless + * `channel_id` is 0 (i.e. all bytes are 0), in which + * case it refers to all channels. + * ... + + * The receiving node: + * - upon receiving `error`: + * - MUST fail the channel referred to by the error + * message. + * - if no existing channel is referred to by the + * message: + * - MUST ignore the message. + */ + if (channel_id_is_all(&chanid) + || structeq(&chanid, &peer->channel_id)) { + status_failed(STATUS_FAIL_PEER_BAD, + "Received ERROR %s", err); + } + return tal_free(msg); + } + + /* They're talking about a different channel? */ + if (extract_channel_id(msg, &channel_id) + && !structeq(&channel_id, &peer->channel_id)) { + status_trace("Rejecting %s for unknown channel_id %s", + wire_type_name(fromwire_peektype(msg)), + type_to_string(msg, struct channel_id, + &channel_id)); + enqueue_peer_msg(peer, + take(towire_errorfmt(msg, &channel_id, + "Multiple channels" + " unsupported"))); + return tal_free(msg); + } + + return msg; +} + static void peer_reconnect(struct peer *peer) { struct channel_id channel_id; @@ -1797,22 +1863,8 @@ static void peer_reconnect(struct peer *peer) status_failed(STATUS_FAIL_PEER_IO, "Failed writing reestablish: %s", strerror(errno)); -again: - msg = sync_crypto_read(peer, &peer->cs, PEER_FD); - if (!msg) - status_failed(STATUS_FAIL_PEER_IO, - "Failed reading reestablish: %s", strerror(errno)); - - if (is_gossip_msg(msg)) { - /* Forward to gossip daemon */ - wire_sync_write(GOSSIP_FD, take(msg)); - goto again; - } - - if (fromwire_peektype(msg) == WIRE_PING) { - handle_ping(peer, msg); - goto again; - } + /* Read until they say something interesting */ + while ((msg = read_peer_msg(peer)) == NULL); if (!fromwire_channel_reestablish(msg, NULL, &channel_id, &next_local_commitment_number, @@ -2706,11 +2758,9 @@ int main(int argc, char *argv[]) gossip_in(peer, msg); } else if (FD_ISSET(PEER_FD, &rfds)) { /* This could take forever, but who cares? */ - msg = sync_crypto_read(peer, &peer->cs, PEER_FD); - - if (!msg) - peer_conn_broken(peer); - peer_in(peer, msg); + msg = read_peer_msg(peer); + if (msg) + peer_in(peer, msg); } else msg = NULL; tal_free(msg); diff --git a/tests/test_lightningd.py b/tests/test_lightningd.py index adf31df4a273..79df7544af32 100644 --- a/tests/test_lightningd.py +++ b/tests/test_lightningd.py @@ -917,7 +917,7 @@ def test_permfail(self): closetxid = l1.bitcoin.rpc.getrawmempool(False)[0] # "Internal error" in hex - l1.daemon.wait_for_log('WIRE_ERROR.*496e7465726e616c206572726f72') + l1.daemon.wait_for_log(r'ERROR.*Internal error') # l2 will send out tx (l1 considers it a transient error) bitcoind.generate_block(1) From ec3e858d4edc18dc9d225e8dab44aa035cd0f9b9 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 30 Jan 2018 11:14:08 +1030 Subject: [PATCH 2/5] closingd: handle unexpected messages better. In particular, handle pings. The rest is modelled on the channeld one, but annoyingly different enough that it's hard to share code without significant work. Signed-off-by: Rusty Russell --- closingd/Makefile | 1 + closingd/closing.c | 115 ++++++++++++++++++++++++++++++++++++--------- 2 files changed, 94 insertions(+), 22 deletions(-) diff --git a/closingd/Makefile b/closingd/Makefile index 5e64e9f142f1..4b7f1e982d95 100644 --- a/closingd/Makefile +++ b/closingd/Makefile @@ -54,6 +54,7 @@ CLOSINGD_COMMON_OBJS := \ common/msg_queue.o \ common/peer_failed.o \ common/permute_tx.o \ + common/ping.o \ common/status.o \ common/subdaemon.o \ common/type_to_string.o \ diff --git a/closingd/closing.c b/closingd/closing.c index 0fbee2bccb76..f370aca62c9d 100644 --- a/closingd/closing.c +++ b/closingd/closing.c @@ -1,15 +1,18 @@ #include +#include #include #include #include #include #include #include +#include #include #include #include #include #include +#include #include #include #include @@ -80,6 +83,92 @@ static u64 one_towards(u64 target, u64 value) return value; } +static void handle_ping(const u8 *msg, + struct crypto_state *cs, + const struct channel_id *our_channel_id) +{ + u8 *pong; + + if (!check_ping_make_pong(msg, msg, &pong)) + peer_failed(PEER_FD, cs, our_channel_id, "Bad ping"); + + status_trace("Got ping, sending %s", pong ? + wire_type_name(fromwire_peektype(pong)) + : "nothing"); + + if (pong && !sync_crypto_write(cs, PEER_FD, take(pong))) + status_failed(STATUS_FAIL_PEER_IO, + "Failed writing pong: %s", strerror(errno)); +} + +/* Handle random messages we might get, returning NULL if we handled it. */ +static u8 *read_peer_msg(const tal_t *ctx, + struct crypto_state *cs, + const struct channel_id *our_channel_id) +{ + u8 *msg; + struct channel_id channel_id; + + msg = sync_crypto_read(ctx, cs, PEER_FD); + if (!msg) + status_failed(STATUS_FAIL_PEER_IO, + "Failed reading from peer: %s", strerror(errno)); + + if (is_gossip_msg(msg)) { + /* Forward to gossip daemon */ + wire_sync_write(GOSSIP_FD, take(msg)); + return NULL; + } + + if (fromwire_peektype(msg) == WIRE_PING) { + handle_ping(msg, cs, our_channel_id); + return tal_free(msg); + } + + if (fromwire_peektype(msg) == WIRE_ERROR) { + struct channel_id chanid; + char *err = sanitize_error(msg, msg, &chanid); + + /* BOLT #1: + * + * The channel is referred to by `channel_id`, unless + * `channel_id` is 0 (i.e. all bytes are 0), in which + * case it refers to all channels. + * ... + + * The receiving node: + * - upon receiving `error`: + * - MUST fail the channel referred to by the error + * message. + * - if no existing channel is referred to by the + * message: + * - MUST ignore the message. + */ + if (channel_id_is_all(&chanid) + || structeq(&chanid, our_channel_id)) { + status_failed(STATUS_FAIL_PEER_BAD, + "Received ERROR %s", err); + } + return tal_free(msg); + } + + /* They're talking about a different channel? */ + if (extract_channel_id(msg, &channel_id) + && !structeq(&channel_id, our_channel_id)) { + status_trace("Rejecting %s for unknown channel_id %s", + wire_type_name(fromwire_peektype(msg)), + type_to_string(msg, struct channel_id, + &channel_id)); + sync_crypto_write(cs, PEER_FD, + take(towire_errorfmt(msg, &channel_id, + "Multiple channels" + " unsupported"))); + return tal_free(msg); + } + + return msg; +} + static void do_reconnect(struct crypto_state *cs, const struct channel_id *channel_id, const u64 next_index[NUM_SIDES], @@ -108,17 +197,8 @@ static void do_reconnect(struct crypto_state *cs, status_failed(STATUS_FAIL_PEER_IO, "Failed writing reestablish: %s", strerror(errno)); -again: - msg = sync_crypto_read(tmpctx, cs, PEER_FD); - if (!msg) - status_failed(STATUS_FAIL_PEER_IO, - "Failed reading reestablish: %s", strerror(errno)); - - if (is_gossip_msg(msg)) { - if (!wire_sync_write(GOSSIP_FD, take(msg))) - status_failed(STATUS_FAIL_GOSSIP_IO, "Writing gossip"); - goto again; - } + /* Wait for them to say something interesting */ + while ((msg = read_peer_msg(tmpctx, cs, channel_id)) == NULL); if (!fromwire_channel_reestablish(msg, NULL, &their_channel_id, &next_local_commitment_number, @@ -263,17 +343,8 @@ int main(int argc, char *argv[]) break; again: - msg = sync_crypto_read(tmpctx, &cs, PEER_FD); - if (!msg) - status_failed(STATUS_FAIL_PEER_IO, "Reading input"); - - /* We don't send gossip at this stage, but we can recv it */ - if (is_gossip_msg(msg)) { - if (!wire_sync_write(GOSSIP_FD, take(msg))) - status_failed(STATUS_FAIL_GOSSIP_IO, - "Writing gossip"); - goto again; - } + /* Wait for them to say something interesting */ + while ((msg = read_peer_msg(tmpctx, &cs, &channel_id)) == NULL); /* BOLT #2: * From 3feee7a4ab2c1dfc265710af6785f8b51ede1c7a Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 30 Jan 2018 11:14:08 +1030 Subject: [PATCH 3/5] openingd: handle unexpected messages better. In particular, this one didn't handle them trying to open a different channel at the same time. Again, deliberately very similar, but unfortunately different enough that sharing is awkward. Signed-off-by: Rusty Russell --- openingd/opening.c | 149 ++++++++++++++++++++++++++------------------- 1 file changed, 85 insertions(+), 64 deletions(-) diff --git a/openingd/opening.c b/openingd/opening.c index f9d0deab9e5d..dd3da50dd161 100644 --- a/openingd/opening.c +++ b/openingd/opening.c @@ -203,60 +203,90 @@ static void temporary_channel_id(struct channel_id *channel_id) channel_id->id[i] = pseudorand(256); } -/* We have to handle random gossip message and pings. */ -static u8 *read_next_peer_msg(struct state *state, const tal_t *ctx) +static void handle_ping(const u8 *msg, + struct crypto_state *cs, + const struct channel_id *our_channel_id) { - for (;;) { - u8 *msg = sync_crypto_read(ctx, &state->cs, PEER_FD); - if (!msg) - return NULL; - - if (fromwire_peektype(msg) == WIRE_PING) { - u8 *pong; - if (!check_ping_make_pong(ctx, msg, &pong)) { - status_trace("Bad ping message"); - return tal_free(msg); - } - if (pong && !sync_crypto_write(&state->cs, PEER_FD, - take(pong))) - status_failed(STATUS_FAIL_PEER_IO, - "Sending pong"); - } else if (is_gossip_msg(msg)) { - /* We relay gossip to gossipd, but don't relay from */ - if (!wire_sync_write(GOSSIP_FD, take(msg))) - status_failed(STATUS_FAIL_PEER_IO, - "Relaying gossip message"); - } else if (fromwire_peektype(msg) == WIRE_ERROR) { - struct channel_id chanid; - char *err = sanitize_error(msg, msg, &chanid); - - /* BOLT #1: - * - * The channel is referred to by `channel_id`, unless - * `channel_id` is 0 (i.e. all bytes are 0), in which - * case it refers to all channels. - * ... - - * The receiving node: - * - upon receiving `error`: - * - MUST fail the channel referred to by the error - * message. - * - if no existing channel is referred to by the - * message: - * - MUST ignore the message. - */ - if (channel_id_is_all(&chanid)) - peer_failed(PEER_FD, &state->cs, - &state->channel_id, - "Error packet: %s", err); - - if (structeq(&chanid, &state->channel_id)) - negotiation_failed(state, false, - "Error packet: %s", err); - } else { - return msg; - } + u8 *pong; + + if (!check_ping_make_pong(msg, msg, &pong)) + peer_failed(PEER_FD, cs, our_channel_id, "Bad ping"); + + status_trace("Got ping, sending %s", pong ? + wire_type_name(fromwire_peektype(pong)) + : "nothing"); + + if (pong && !sync_crypto_write(cs, PEER_FD, take(pong))) + status_failed(STATUS_FAIL_PEER_IO, + "Failed writing pong: %s", strerror(errno)); +} + +/* Handle random messages we might get, returning NULL if we handled it. */ +static u8 *read_peer_msg(struct state *state) +{ + u8 *msg; + struct channel_id channel_id; + + msg = sync_crypto_read(state, &state->cs, PEER_FD); + if (!msg) + status_failed(STATUS_FAIL_PEER_IO, + "Failed reading from peer: %s", strerror(errno)); + + if (is_gossip_msg(msg)) { + /* Forward to gossip daemon */ + wire_sync_write(GOSSIP_FD, take(msg)); + return NULL; } + + if (fromwire_peektype(msg) == WIRE_PING) { + handle_ping(msg, &state->cs, &state->channel_id); + return tal_free(msg); + } + + if (fromwire_peektype(msg) == WIRE_ERROR) { + struct channel_id chanid; + char *err = sanitize_error(msg, msg, &chanid); + + /* BOLT #1: + * + * The channel is referred to by `channel_id`, unless + * `channel_id` is 0 (i.e. all bytes are 0), in which + * case it refers to all channels. + * ... + + * The receiving node: + * - upon receiving `error`: + * - MUST fail the channel referred to by the error + * message. + * - if no existing channel is referred to by the + * message: + * - MUST ignore the message. + */ + if (channel_id_is_all(&chanid)) + status_failed(STATUS_FAIL_PEER_BAD, + "Received ERROR %s", err); + else if (structeq(&chanid, &state->channel_id)) + negotiation_failed(state, false, + "Error packet: %s", err); + + return tal_free(msg); + } + + /* They're talking about a different channel? */ + if (extract_channel_id(msg, &channel_id) + && !structeq(&channel_id, &state->channel_id)) { + status_trace("Rejecting %s for unknown channel_id %s", + wire_type_name(fromwire_peektype(msg)), + type_to_string(msg, struct channel_id, + &channel_id)); + sync_crypto_write(&state->cs, PEER_FD, + take(towire_errorfmt(msg, &channel_id, + "Multiple channels" + " unsupported"))); + return tal_free(msg); + } + + return msg; } static u8 *funder_channel(struct state *state, @@ -323,10 +353,7 @@ static u8 *funder_channel(struct state *state, state->remoteconf = tal(state, struct channel_config); - msg = read_next_peer_msg(state, state); - if (!msg) - status_failed(STATUS_FAIL_PEER_IO, - "Reading accept_channel: %s", strerror(errno)); + while ((msg = read_peer_msg(state)) == NULL); /* BOLT #2: * @@ -447,10 +474,7 @@ static u8 *funder_channel(struct state *state, * commitment transaction, so they can broadcast it knowing they can * redeem their funds if they need to. */ - msg = read_next_peer_msg(state, state); - if (!msg) - status_failed(STATUS_FAIL_PEER_IO, - "Writing funding_signed: %s", strerror(errno)); + while ((msg = read_peer_msg(state)) == NULL); if (!fromwire_funding_signed(msg, NULL, &id_in, &sig)) peer_failed(PEER_FD, &state->cs, &state->channel_id, @@ -633,10 +657,7 @@ static u8 *fundee_channel(struct state *state, status_failed(STATUS_FAIL_PEER_IO, "Writing accept_channel: %s", strerror(errno)); - msg = read_next_peer_msg(state, state); - if (!msg) - status_failed(STATUS_FAIL_PEER_IO, - "Reading funding_created: %s", strerror(errno)); + while ((msg = read_peer_msg(state)) == NULL); if (!fromwire_funding_created(msg, NULL, &id_in, &state->funding_txid, From 402d27d6f5be0154cd577e1bacc30d358e022017 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 30 Jan 2018 11:14:08 +1030 Subject: [PATCH 4/5] subd: use peer log for messages (if any). This makes much more sense when you ask for a specific peer's log. Also, we put the peerid rather than pid (). Signed-off-by: Rusty Russell --- lightningd/peer_control.c | 6 ++---- lightningd/subd.c | 11 ++++++++++- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/lightningd/peer_control.c b/lightningd/peer_control.c index 7cd90e090910..92ef7b23afcb 100644 --- a/lightningd/peer_control.c +++ b/lightningd/peer_control.c @@ -699,12 +699,10 @@ static void copy_to_parent_log(const char *prefix, const char *str, struct peer *peer) { - const char *idstr = type_to_string(peer, struct pubkey, &peer->id); if (continued) - log_add(peer->ld->log, "Peer %s: ... %s", idstr, str); + log_add(peer->ld->log, "%s ... %s", prefix, str); else - log_(peer->ld->log, level, "Peer %s: %s", idstr, str); - tal_free(idstr); + log_(peer->ld->log, level, "%s %s", prefix, str); } void populate_peer(struct lightningd *ld, struct peer *peer) diff --git a/lightningd/subd.c b/lightningd/subd.c index 92c00c264950..6155b8e9ecd5 100644 --- a/lightningd/subd.c +++ b/lightningd/subd.c @@ -625,7 +625,16 @@ static struct subd *new_subd(struct lightningd *ld, return tal_free(sd); } sd->ld = ld; - sd->log = new_log(sd, ld->log_book, "%s(%u):", name, sd->pid); + if (peer) { + /* FIXME: Use minimal unique pubkey prefix for logs! */ + const char *idstr = type_to_string(peer, struct pubkey, + &peer->id); + sd->log = new_log(sd, peer->log_book, "%s(%s):", name, idstr); + tal_free(idstr); + } else { + sd->log = new_log(sd, ld->log_book, "%s(%u):", name, sd->pid); + } + sd->name = name; sd->must_not_exit = false; sd->msgname = msgname; From 04e16b3279ab520003303bac9415350d52eb1c27 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 30 Jan 2018 16:59:02 +1030 Subject: [PATCH 5/5] log: block reporting on minor memleak. Exception: Node /tmp/lightning-t5gxc6gs/test_closing_different_fees/lightning-2/ has memory leaks: [{'value': '0x55caa0a0b8d0', 'label': 'ccan/ccan/tal/str/str.c:90:char[]', 'backtrace': ['ccan/ccan/tal/tal.c:467 (tal_alloc_)', 'ccan/ccan/tal/tal.c:496 (tal_alloc_arr_)', 'ccan/ccan/tal/str/str.c:90 (tal_vfmt)', 'lightningd/log.c:131 (new_log)', 'lightningd/subd.c:632 (new_subd)', 'lightningd/subd.c:686 (new_peer_subd)', 'lightningd/peer_control.c:2487 (peer_accept_channel)', 'lightningd/peer_control.c:674 (peer_sent_nongossip)', 'lightningd/gossip_control.c:55 (peer_nongossip)', 'lightningd/gossip_control.c:142 (gossip_msg)', 'lightningd/subd.c:477 (sd_msg_read)', 'lightningd/subd.c:319 (read_fds)', 'ccan/ccan/io/io.c:59 (next_plan)', 'ccan/ccan/io/io.c:387 (do_plan)', 'ccan/ccan/io/io.c:397 (io_ready)', 'ccan/ccan/io/poll.c:305 (io_loop)', 'lightningd/lightningd.c:347 (main)', '(null):0 ((null))', '(null):0 ((null))', '(null):0 ((null))'], 'parents': ['lightningd/log.c:103:struct log_book', 'lightningd/lightningd.c:43:struct lightningd']}] Technically, true, but we save more memory by sharing the prefix pointer than we lose by leaking it. However, we'd ideally refcount so it's freed if the log is freed and all the entries using it are pruned from the log book. Signed-off-by: Rusty Russell --- lightningd/log.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lightningd/log.c b/lightningd/log.c index 6b731ad41c09..ad288c21b6d2 100644 --- a/lightningd/log.c +++ b/lightningd/log.c @@ -128,7 +128,8 @@ new_log(const tal_t *ctx, struct log_book *record, const char *fmt, ...) log->lr = record; va_start(ap, fmt); /* log->lr owns this, since its entries keep a pointer to it. */ - log->prefix = tal_vfmt(log->lr, fmt, ap); + /* FIXME: Refcount this! */ + log->prefix = notleak(tal_vfmt(log->lr, fmt, ap)); va_end(ap); return log;