Skip to content

Commit

Permalink
Don't remove nodes if there's no channel_update for a temp failure
Browse files Browse the repository at this point in the history
Previously, we were requiring any `UPDATE` onion errors to include
a `channel_update`, as the spec mandates[1]. If we see an onion
error which is missing one we treat it as a misbehaving node that
isn't behaving according to the spec and simply remove the node.

Sadly, it appears at least some versions of CLN are such nodes, and
opt to not include `channel_update` at all if they're returning a
`temporary_channel_failure`. This causes us to completely remove
CLN nodes from our graph after they fail to forward our HTLC.

While CLN is violating the spec here, there's not a lot of reason
to not allow it, so we go ahead and do so here, treating it simply
as any other failure by letting the scorer handle it.

[1] The spec says `Please note that the channel_update field is
mandatory in messages whose failure_code includes the UPDATE flag`
however doesn't repeat it in the requirements section so its not
crazy that someone missed it when implementing.
  • Loading branch information
TheBlueMatt committed Apr 23, 2023
1 parent bc54441 commit 70c2264
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 29 deletions.
51 changes: 36 additions & 15 deletions lightning/src/ln/onion_utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -493,21 +493,28 @@ pub(super) fn process_onion_failure<T: secp256k1::Signing, L: Deref>(secp_ctx: &
} else {
log_trace!(logger, "Failure provided features a channel update without type prefix. Deprecated, but allowing for now.");
}
if let Ok(chan_update) = msgs::ChannelUpdate::read(&mut Cursor::new(&update_slice)) {
let update_opt = msgs::ChannelUpdate::read(&mut Cursor::new(&update_slice));
if update_opt.is_ok() || update_slice.is_empty() {
// if channel_update should NOT have caused the failure:
// MAY treat the channel_update as invalid.
let is_chan_update_invalid = match error_code & 0xff {
7 => false,
11 => amt_to_forward > chan_update.contents.htlc_minimum_msat,
12 => amt_to_forward
.checked_mul(chan_update.contents.fee_proportional_millionths as u64)
11 => update_opt.is_ok() &&
amt_to_forward >
update_opt.as_ref().unwrap().contents.htlc_minimum_msat,
12 => update_opt.is_ok() && amt_to_forward
.checked_mul(update_opt.as_ref().unwrap()
.contents.fee_proportional_millionths as u64)
.map(|prop_fee| prop_fee / 1_000_000)
.and_then(|prop_fee| prop_fee.checked_add(chan_update.contents.fee_base_msat as u64))
.and_then(|prop_fee| prop_fee.checked_add(
update_opt.as_ref().unwrap().contents.fee_base_msat as u64))
.map(|fee_msats| route_hop.fee_msat >= fee_msats)
.unwrap_or(false),
13 => route_hop.cltv_expiry_delta as u16 >= chan_update.contents.cltv_expiry_delta,
13 => update_opt.is_ok() &&
route_hop.cltv_expiry_delta as u16 >=
update_opt.as_ref().unwrap().contents.cltv_expiry_delta,
14 => false, // expiry_too_soon; always valid?
20 => chan_update.contents.flags & 2 == 0,
20 => update_opt.as_ref().unwrap().contents.flags & 2 == 0,
_ => false, // unknown error code; take channel_update as valid
};
if is_chan_update_invalid {
Expand All @@ -518,17 +525,31 @@ pub(super) fn process_onion_failure<T: secp256k1::Signing, L: Deref>(secp_ctx: &
is_permanent: true,
});
} else {
// Make sure the ChannelUpdate contains the expected
// short channel id.
if failing_route_hop.short_channel_id == chan_update.contents.short_channel_id {
short_channel_id = Some(failing_route_hop.short_channel_id);
if let Ok(chan_update) = update_opt {
// Make sure the ChannelUpdate contains the expected
// short channel id.
if failing_route_hop.short_channel_id == chan_update.contents.short_channel_id {
short_channel_id = Some(failing_route_hop.short_channel_id);
} else {
log_info!(logger, "Node provided a channel_update for which it was not authoritative, ignoring.");
}
network_update = Some(NetworkUpdate::ChannelUpdateMessage {
msg: chan_update,
})
} else {
log_info!(logger, "Node provided a channel_update for which it was not authoritative, ignoring.");
network_update = Some(NetworkUpdate::ChannelFailure {
short_channel_id: route_hop.short_channel_id,
is_permanent: false,
});
}
network_update = Some(NetworkUpdate::ChannelUpdateMessage {
msg: chan_update,
})
};
} else {
// If the channel_update had a non-zero length (i.e. was
// present) but we coulnd't read it, treat it as a total
// node failure.
log_info!(logger,
"Failed to read a channel_update of len {} in an onion",
update_slice.len());
}
}
}
Expand Down
25 changes: 11 additions & 14 deletions lightning/src/routing/gossip.rs
Original file line number Diff line number Diff line change
Expand Up @@ -352,7 +352,7 @@ impl<L: Deref> NetworkGraph<L> where L::Target: Logger {
let _ = self.update_channel(msg);
},
NetworkUpdate::ChannelFailure { short_channel_id, is_permanent } => {
let action = if is_permanent { "Removing" } else { "Disabling" };
let action = if is_permanent { "Removing" } else { "Not touching" };
log_debug!(self.logger, "{} channel graph entry for {} due to a payment failure.", action, short_channel_id);
self.channel_failed(short_channel_id, is_permanent);
},
Expand Down Expand Up @@ -1633,9 +1633,12 @@ impl<L: Deref> NetworkGraph<L> where L::Target: Logger {
}

/// Marks a channel in the graph as failed if a corresponding HTLC fail was sent.
///
/// If permanent, removes a channel from the local storage.
/// May cause the removal of nodes too, if this was their last channel.
/// If not permanent, makes channels unavailable for routing.
///
/// If not permanent, no action is taken as such a failure likely indicates the node simply
/// lacked liquidity and your scorer should handle this instead.
pub fn channel_failed(&self, short_channel_id: u64, is_permanent: bool) {
#[cfg(feature = "std")]
let current_time_unix = Some(SystemTime::now().duration_since(UNIX_EPOCH).expect("Time must be > 1970").as_secs());
Expand All @@ -1646,9 +1649,12 @@ impl<L: Deref> NetworkGraph<L> where L::Target: Logger {
}

/// Marks a channel in the graph as failed if a corresponding HTLC fail was sent.
///
/// If permanent, removes a channel from the local storage.
/// May cause the removal of nodes too, if this was their last channel.
/// If not permanent, makes channels unavailable for routing.
///
/// If not permanent, no action is taken as such a failure likely indicates the node simply
/// lacked liquidity and your scorer should handle this instead.
fn channel_failed_with_time(&self, short_channel_id: u64, is_permanent: bool, current_time_unix: Option<u64>) {
let mut channels = self.channels.write().unwrap();
if is_permanent {
Expand All @@ -1657,15 +1663,6 @@ impl<L: Deref> NetworkGraph<L> where L::Target: Logger {
self.removed_channels.lock().unwrap().insert(short_channel_id, current_time_unix);
Self::remove_channel_in_nodes(&mut nodes, &chan, short_channel_id);
}
} else {
if let Some(chan) = channels.get_mut(&short_channel_id) {
if let Some(one_to_two) = chan.one_to_two.as_mut() {
one_to_two.enabled = false;
}
if let Some(two_to_one) = chan.two_to_one.as_mut() {
two_to_one.enabled = false;
}
}
}
}

Expand Down Expand Up @@ -2450,7 +2447,7 @@ pub(crate) mod tests {
assert!(network_graph.read_only().channels().get(&short_channel_id).unwrap().one_to_two.is_some());
}

// Non-permanent closing just disables a channel
// Non-permanent failure doesn't touch the channel at all
{
match network_graph.read_only().channels().get(&short_channel_id) {
None => panic!(),
Expand All @@ -2467,7 +2464,7 @@ pub(crate) mod tests {
match network_graph.read_only().channels().get(&short_channel_id) {
None => panic!(),
Some(channel_info) => {
assert!(!channel_info.one_to_two.as_ref().unwrap().enabled);
assert!(channel_info.one_to_two.as_ref().unwrap().enabled);
}
};
}
Expand Down

0 comments on commit 70c2264

Please sign in to comment.