diff --git a/prdoc/pr_6016.prdoc b/prdoc/pr_6016.prdoc new file mode 100644 index 000000000000..967c3a766068 --- /dev/null +++ b/prdoc/pr_6016.prdoc @@ -0,0 +1,15 @@ +title: Litep2p network backend do not disconnect all peers on SetReservedPeers command + +doc: + - audience: [ Node Dev, Node Operator ] + description: | + Previously, when the `SetReservedPeers` was received, all peers except the new + reserved peers were disconnected. + This PR ensures that previously reserved nodes are kept connected as regular nodes if + enough slots are available. + While at it, this PR excludes reserved peers from the candidates of peers obtained from + the peerstore. + +crates: + - name: sc-network + bump: patch diff --git a/substrate/client/network/src/litep2p/shim/notification/peerset.rs b/substrate/client/network/src/litep2p/shim/notification/peerset.rs index 2fd7920909e3..fb822794ccf0 100644 --- a/substrate/client/network/src/litep2p/shim/notification/peerset.rs +++ b/substrate/client/network/src/litep2p/shim/notification/peerset.rs @@ -88,6 +88,8 @@ const DISCONNECT_ADJUSTMENT: Reputation = Reputation::new(-256, "Peer disconnect const OPEN_FAILURE_ADJUSTMENT: Reputation = Reputation::new(-1024, "Open failure"); /// Is the peer reserved? +/// +/// Regular peers count towards slot allocation. #[derive(Debug, Copy, Clone, PartialEq, Eq)] pub enum Reserved { Yes, @@ -118,6 +120,15 @@ pub enum Direction { Outbound(Reserved), } +impl Direction { + fn set_reserved(&mut self, new_reserved: Reserved) { + match self { + Direction::Inbound(ref mut reserved) | Direction::Outbound(ref mut reserved) => + *reserved = new_reserved, + } + } +} + impl From for traits::Direction { fn from(direction: Direction) -> traits::Direction { match direction { @@ -784,7 +795,9 @@ impl Peerset { } /// Calculate how many of the connected peers were counted as normal inbound/outbound peers - /// which is needed to adjust slot counts when new reserved peers are added + /// which is needed to adjust slot counts when new reserved peers are added. + /// + /// If the peer is not already in the [`Peerset`], it is added as a disconnected peer. fn calculate_slot_adjustment<'a>( &'a mut self, peers: impl Iterator, @@ -819,6 +832,26 @@ impl Peerset { }) } + /// Checks if the peer should be disconnected based on the current state of the [`Peerset`] + /// and the provided direction. + /// + /// Note: The role of the peer is not checked. + fn should_disconnect(&self, direction: Direction) -> bool { + match direction { + Direction::Inbound(_) => self.num_in >= self.max_in, + Direction::Outbound(_) => self.num_out >= self.max_out, + } + } + + /// Increment the slot count for given peer. + fn increment_slot(&mut self, direction: Direction) { + match direction { + Direction::Inbound(Reserved::No) => self.num_in += 1, + Direction::Outbound(Reserved::No) => self.num_out += 1, + _ => {}, + } + } + /// Get the number of inbound peers. #[cfg(test)] pub fn num_in(&self) -> usize { @@ -949,8 +982,9 @@ impl Stream for Peerset { }, // set new reserved peers for the protocol // - // current reserved peers not in the new set are disconnected and the new reserved - // peers are scheduled for outbound substreams + // Current reserved peers not in the new set are moved to the regular set of peers + // or disconnected (if there are no slots available). The new reserved peers are + // scheduled for outbound substreams PeersetCommand::SetReservedPeers { peers } => { log::debug!(target: LOG_TARGET, "{}: set reserved peers {peers:?}", self.protocol); @@ -960,39 +994,58 @@ impl Stream for Peerset { // // calculate how many of the previously connected peers were counted as regular // peers and substract these counts from `num_out`/`num_in` + // + // If a reserved peer is not already tracked, it is added as disconnected by + // `calculate_slot_adjustment`. This ensures at the next slot allocation (1sec) + // that we'll try to establish a connection with the reserved peer. let (in_peers, out_peers) = self.calculate_slot_adjustment(peers.iter()); self.num_out -= out_peers; self.num_in -= in_peers; - // add all unknown peers to `self.peers` - peers.iter().for_each(|peer| { - if !self.peers.contains_key(peer) { - self.peers.insert(*peer, PeerState::Disconnected); - } - }); - - // collect all peers who are not in the new reserved set - let peers_to_remove = self - .peers - .iter() - .filter_map(|(peer, _)| (!peers.contains(peer)).then_some(*peer)) - .collect::>(); + // collect all *reserved* peers who are not in the new reserved set + let reserved_peers_maybe_remove = + self.reserved_peers.difference(&peers).cloned().collect::>(); self.reserved_peers = peers; - let peers = peers_to_remove + let peers_to_remove = reserved_peers_maybe_remove .into_iter() .filter(|peer| { match self.peers.remove(&peer) { - Some(PeerState::Connected { direction }) => { - log::trace!( - target: LOG_TARGET, - "{}: close connection to {peer:?}, direction {direction:?}", - self.protocol, - ); - - self.peers.insert(*peer, PeerState::Closing { direction }); - true + Some(PeerState::Connected { mut direction }) => { + // The direction contains a `Reserved::Yes` flag, because this + // is a reserve peer that we want to close. + // The `Reserved::Yes` ensures we don't adjust the slot count + // when the substream is closed. + + let disconnect = + self.reserved_only || self.should_disconnect(direction); + + if disconnect { + log::trace!( + target: LOG_TARGET, + "{}: close connection to previously reserved {peer:?}, direction {direction:?}", + self.protocol, + ); + + self.peers.insert(*peer, PeerState::Closing { direction }); + true + } else { + log::trace!( + target: LOG_TARGET, + "{}: {peer:?} is no longer reserved, move to regular peers, direction {direction:?}", + self.protocol, + ); + + // The peer is kept connected as non-reserved. This will + // further count towards the slot count. + direction.set_reserved(Reserved::No); + self.increment_slot(direction); + + self.peers + .insert(*peer, PeerState::Connected { direction }); + false + } }, // substream might have been opening but not yet fully open when // the protocol request the reserved set to be changed @@ -1021,11 +1074,13 @@ impl Stream for Peerset { log::trace!( target: LOG_TARGET, - "{}: close substreams to {peers:?}", + "{}: close substreams to {peers_to_remove:?}", self.protocol, ); - return Poll::Ready(Some(PeersetNotificationCommand::CloseSubstream { peers })) + return Poll::Ready(Some(PeersetNotificationCommand::CloseSubstream { + peers: peers_to_remove, + })) }, PeersetCommand::AddReservedPeers { peers } => { log::debug!(target: LOG_TARGET, "{}: add reserved peers {peers:?}", self.protocol); @@ -1102,6 +1157,7 @@ impl Stream for Peerset { self.peers.insert(*peer, PeerState::Backoff); None }, + // if there is a rapid change in substream state, the peer may // be canceled when the substream is asked to be closed. // @@ -1122,6 +1178,7 @@ impl Stream for Peerset { self.peers.insert(*peer, PeerState::Canceled { direction }); None }, + // substream to the peer might have failed to open which caused // the peer to be backed off // @@ -1138,6 +1195,7 @@ impl Stream for Peerset { self.peers.insert(*peer, PeerState::Disconnected); None }, + // if a node disconnects, it's put into `PeerState::Closing` // which indicates that `Peerset` wants the substream closed and // has asked litep2p to close it but it hasn't yet received a @@ -1167,125 +1225,70 @@ impl Stream for Peerset { // if there are enough slots, the peer is just converted to // a regular peer and the used slot count is increased and if the // peer cannot be accepted, litep2p is asked to close the substream. - PeerState::Connected { direction } => match direction { - Direction::Inbound(_) => match self.num_in < self.max_in { - true => { - log::trace!( - target: LOG_TARGET, - "{}: {peer:?} converted to regular inbound peer (inbound open)", - self.protocol, - ); - - self.num_in += 1; - self.peers.insert( - *peer, - PeerState::Connected { - direction: Direction::Inbound(Reserved::No), - }, - ); - - None - }, - false => { - self.peers.insert( - *peer, - PeerState::Closing { - direction: Direction::Inbound(Reserved::Yes), - }, - ); - - Some(*peer) - }, - }, - Direction::Outbound(_) => match self.num_out < self.max_out { - true => { - log::trace!( - target: LOG_TARGET, - "{}: {peer:?} converted to regular outbound peer (outbound open)", - self.protocol, - ); - - self.num_out += 1; - self.peers.insert( - *peer, - PeerState::Connected { - direction: Direction::Outbound(Reserved::No), - }, - ); - - None - }, - false => { - self.peers.insert( - *peer, - PeerState::Closing { - direction: Direction::Outbound(Reserved::Yes), - }, - ); - - Some(*peer) - }, - }, + PeerState::Connected { mut direction } => { + let disconnect = self.should_disconnect(direction); + + if disconnect { + log::trace!( + target: LOG_TARGET, + "{}: close connection to removed reserved {peer:?}, direction {direction:?}", + self.protocol, + ); + + self.peers.insert(*peer, PeerState::Closing { direction }); + Some(*peer) + } else { + log::trace!( + target: LOG_TARGET, + "{}: {peer:?} converted to regular peer {peer:?} direction {direction:?}", + self.protocol, + ); + + // The peer is kept connected as non-reserved. This will + // further count towards the slot count. + direction.set_reserved(Reserved::No); + self.increment_slot(direction); + + self.peers + .insert(*peer, PeerState::Connected { direction }); + + None + } }, - PeerState::Opening { direction } => match direction { - Direction::Inbound(_) => match self.num_in < self.max_in { - true => { - log::trace!( - target: LOG_TARGET, - "{}: {peer:?} converted to regular inbound peer (inbound opening)", - self.protocol, - ); - - self.num_in += 1; - self.peers.insert( - *peer, - PeerState::Opening { - direction: Direction::Inbound(Reserved::No), - }, - ); - - None - }, - false => { - self.peers.insert( - *peer, - PeerState::Canceled { - direction: Direction::Inbound(Reserved::Yes), - }, - ); - - None - }, - }, - Direction::Outbound(_) => match self.num_out < self.max_out { - true => { - log::trace!( - target: LOG_TARGET, - "{}: {peer:?} converted to regular outbound peer (outbound opening)", - self.protocol, - ); - - self.num_out += 1; - self.peers.insert( - *peer, - PeerState::Opening { - direction: Direction::Outbound(Reserved::No), - }, - ); - - None - }, - false => { - self.peers.insert( - *peer, - PeerState::Canceled { - direction: Direction::Outbound(Reserved::Yes), - }, - ); - - None - }, - }, + + PeerState::Opening { mut direction } => { + let disconnect = self.should_disconnect(direction); + + if disconnect { + log::trace!( + target: LOG_TARGET, + "{}: cancel substream to disconnect removed reserved peer {peer:?}, direction {direction:?}", + self.protocol, + ); + + self.peers.insert( + *peer, + PeerState::Canceled { + direction + }, + ); + } else { + log::trace!( + target: LOG_TARGET, + "{}: {peer:?} converted to regular peer {peer:?} direction {direction:?}", + self.protocol, + ); + + // The peer is kept connected as non-reserved. This will + // further count towards the slot count. + direction.set_reserved(Reserved::No); + self.increment_slot(direction); + + self.peers + .insert(*peer, PeerState::Opening { direction }); + } + + None }, } }) @@ -1373,12 +1376,17 @@ impl Stream for Peerset { // if the number of outbound peers is lower than the desired amount of outbound peers, // query `PeerStore` and try to get a new outbound candidated. if self.num_out < self.max_out && !self.reserved_only { + // From the candidates offered by the peerstore we need to ignore: + // - all peers that are not in the `PeerState::Disconnected` state (ie they are + // connected / closing) + // - reserved peers since we initiated a connection to them in the previous step let ignore: HashSet = self .peers .iter() .filter_map(|(peer, state)| { (!std::matches!(state, PeerState::Disconnected)).then_some(*peer) }) + .chain(self.reserved_peers.iter().cloned()) .collect(); let peers: Vec<_> = diff --git a/substrate/client/network/src/litep2p/shim/notification/tests/peerset.rs b/substrate/client/network/src/litep2p/shim/notification/tests/peerset.rs index 4f7bfffaa1fc..295a5b441b3e 100644 --- a/substrate/client/network/src/litep2p/shim/notification/tests/peerset.rs +++ b/substrate/client/network/src/litep2p/shim/notification/tests/peerset.rs @@ -794,8 +794,6 @@ async fn set_reserved_peers_but_available_slots() { // when `Peerset` is polled (along with two random peers) and later on `SetReservedPeers` // is called with the common peer and with two new random peers let common_peer = *known_peers.iter().next().unwrap(); - let disconnected_peers = known_peers.iter().skip(1).copied().collect::>(); - assert_eq!(disconnected_peers.len(), 2); let (mut peerset, to_peerset) = Peerset::new( ProtocolName::from("/notif/1"), @@ -809,6 +807,8 @@ async fn set_reserved_peers_but_available_slots() { assert_eq!(peerset.num_in(), 0usize); assert_eq!(peerset.num_out(), 0usize); + // We have less than 25 outbound peers connected. At the next slot allocation we + // query the `peerstore_handle` for more peers to connect to. match peerset.next().await { Some(PeersetNotificationCommand::OpenSubstream { peers: out_peers }) => { assert_eq!(out_peers.len(), 3); @@ -845,29 +845,167 @@ async fn set_reserved_peers_but_available_slots() { .unbounded_send(PeersetCommand::SetReservedPeers { peers: reserved_peers.clone() }) .unwrap(); + // The command `SetReservedPeers` might evict currently reserved peers if + // we don't have enough slot capacity to move them to regular nodes. + // In this case, we did not have previously any reserved peers. match peerset.next().await { - Some(PeersetNotificationCommand::CloseSubstream { peers: out_peers }) => { - assert_eq!(out_peers.len(), 2); + Some(PeersetNotificationCommand::CloseSubstream { peers }) => { + // This ensures we don't disconnect peers when receiving `SetReservedPeers`. + assert_eq!(peers.len(), 0); + }, + event => panic!("invalid event: {event:?}"), + } - for peer in &out_peers { - assert!(disconnected_peers.contains(peer)); + // verify that `Peerset` is aware of five peers, with two of them as outbound. + assert_eq!(peerset.peers().len(), 5); + assert_eq!(peerset.num_in(), 0usize); + assert_eq!(peerset.num_out(), 2usize); + assert_eq!(peerset.reserved_peers().len(), 3usize); + + match peerset.next().await { + Some(PeersetNotificationCommand::OpenSubstream { peers }) => { + assert_eq!(peers.len(), 2); + assert!(!peers.contains(&common_peer)); + + for peer in &peers { + assert!(reserved_peers.contains(peer)); + assert!(peerset.reserved_peers().contains(peer)); assert_eq!( peerset.peers().get(peer), - Some(&PeerState::Closing { direction: Direction::Outbound(Reserved::No) }), + Some(&PeerState::Opening { direction: Direction::Outbound(Reserved::Yes) }), + ); + } + }, + event => panic!("invalid event: {event:?}"), + } + + assert_eq!(peerset.peers().len(), 5); + assert_eq!(peerset.num_in(), 0usize); + assert_eq!(peerset.num_out(), 2usize); + assert_eq!(peerset.reserved_peers().len(), 3usize); +} + +#[tokio::test] +async fn set_reserved_peers_move_previously_reserved() { + sp_tracing::try_init_simple(); + + let peerstore_handle = Arc::new(peerstore_handle_test()); + let known_peers = (0..3) + .map(|_| { + let peer = PeerId::random(); + peerstore_handle.add_known_peer(peer); + peer + }) + .collect::>(); + + // We'll keep this peer as reserved and move the the others to regular nodes. + let common_peer = *known_peers.iter().next().unwrap(); + let moved_peers = known_peers.iter().skip(1).copied().collect::>(); + let known_peers = known_peers.into_iter().collect::>(); + assert_eq!(moved_peers.len(), 2); + + let (mut peerset, to_peerset) = Peerset::new( + ProtocolName::from("/notif/1"), + 25, + 25, + false, + known_peers.clone(), + Default::default(), + peerstore_handle, + ); + assert_eq!(peerset.num_in(), 0usize); + assert_eq!(peerset.num_out(), 0usize); + + // We are not connected to the reserved peers. + match peerset.next().await { + Some(PeersetNotificationCommand::OpenSubstream { peers: out_peers }) => { + assert_eq!(out_peers.len(), 3); + + for peer in &out_peers { + assert_eq!( + peerset.peers().get(&peer), + Some(&PeerState::Opening { direction: Direction::Outbound(Reserved::Yes) }) ); } }, event => panic!("invalid event: {event:?}"), } - // verify that `Peerset` is aware of five peers, with two of them as outbound - // (the two disconnected peers) + // verify all three peers are marked as reserved peers and they don't count towards + // slot allocation. + assert_eq!(peerset.num_in(), 0usize); + assert_eq!(peerset.num_out(), 0usize); + assert_eq!(peerset.reserved_peers().len(), 3usize); + + // report that all substreams were opened + for peer in &known_peers { + assert!(std::matches!( + peerset.report_substream_opened(*peer, traits::Direction::Outbound), + OpenResult::Accept { .. } + )); + assert_eq!( + peerset.peers().get(peer), + Some(&PeerState::Connected { direction: Direction::Outbound(Reserved::Yes) }) + ); + } + + // set reserved peers with `common_peer` being one of them + let reserved_peers = HashSet::from_iter([common_peer, PeerId::random(), PeerId::random()]); + to_peerset + .unbounded_send(PeersetCommand::SetReservedPeers { peers: reserved_peers.clone() }) + .unwrap(); + + // The command `SetReservedPeers` might evict currently reserved peers if + // we don't have enough slot capacity to move them to regular nodes. + // In this case, we have enough capacity. + match peerset.next().await { + Some(PeersetNotificationCommand::CloseSubstream { peers }) => { + // This ensures we don't disconnect peers when receiving `SetReservedPeers`. + assert_eq!(peers.len(), 0); + }, + event => panic!("invalid event: {event:?}"), + } + + // verify that `Peerset` is aware of five peers. + // 2 of the previously reserved peers are moved as outbound regular peers and + // count towards slot allocation. assert_eq!(peerset.peers().len(), 5); assert_eq!(peerset.num_in(), 0usize); assert_eq!(peerset.num_out(), 2usize); + assert_eq!(peerset.reserved_peers().len(), 3usize); + + // Ensure the previously reserved are not regular nodes. + for (peer, state) in peerset.peers() { + // This peer was previously reserved and remained reserved after `SetReservedPeers`. + if peer == &common_peer { + assert_eq!( + state, + &PeerState::Connected { direction: Direction::Outbound(Reserved::Yes) } + ); + continue + } + + // Part of the new reserved nodes. + if reserved_peers.contains(peer) { + assert_eq!(state, &PeerState::Disconnected); + continue + } + + // Previously reserved, but remained connected. + if moved_peers.contains(peer) { + // This was previously `Reseved::Yes` but moved to regular nodes. + assert_eq!( + state, + &PeerState::Connected { direction: Direction::Outbound(Reserved::No) } + ); + continue + } + panic!("Invalid state peer={peer:?} state={state:?}"); + } match peerset.next().await { Some(PeersetNotificationCommand::OpenSubstream { peers }) => { + // Open desires with newly reserved. assert_eq!(peers.len(), 2); assert!(!peers.contains(&common_peer)); @@ -885,7 +1023,103 @@ async fn set_reserved_peers_but_available_slots() { assert_eq!(peerset.peers().len(), 5); assert_eq!(peerset.num_in(), 0usize); - - // two substreams are closing still closing assert_eq!(peerset.num_out(), 2usize); + assert_eq!(peerset.reserved_peers().len(), 3usize); +} + +#[tokio::test] +async fn set_reserved_peers_cannot_move_previously_reserved() { + sp_tracing::try_init_simple(); + + let peerstore_handle = Arc::new(peerstore_handle_test()); + let known_peers = (0..3) + .map(|_| { + let peer = PeerId::random(); + peerstore_handle.add_known_peer(peer); + peer + }) + .collect::>(); + + // We'll keep this peer as reserved and move the the others to regular nodes. + let common_peer = *known_peers.iter().next().unwrap(); + let moved_peers = known_peers.iter().skip(1).copied().collect::>(); + let known_peers = known_peers.into_iter().collect::>(); + assert_eq!(moved_peers.len(), 2); + + // We don't have capacity to move peers. + let (mut peerset, to_peerset) = Peerset::new( + ProtocolName::from("/notif/1"), + 0, + 0, + false, + known_peers.clone(), + Default::default(), + peerstore_handle, + ); + assert_eq!(peerset.num_in(), 0usize); + assert_eq!(peerset.num_out(), 0usize); + + // We are not connected to the reserved peers. + match peerset.next().await { + Some(PeersetNotificationCommand::OpenSubstream { peers: out_peers }) => { + assert_eq!(out_peers.len(), 3); + + for peer in &out_peers { + assert_eq!( + peerset.peers().get(&peer), + Some(&PeerState::Opening { direction: Direction::Outbound(Reserved::Yes) }) + ); + } + }, + event => panic!("invalid event: {event:?}"), + } + + // verify all three peers are marked as reserved peers and they don't count towards + // slot allocation. + assert_eq!(peerset.num_in(), 0usize); + assert_eq!(peerset.num_out(), 0usize); + assert_eq!(peerset.reserved_peers().len(), 3usize); + + // report that all substreams were opened + for peer in &known_peers { + assert!(std::matches!( + peerset.report_substream_opened(*peer, traits::Direction::Outbound), + OpenResult::Accept { .. } + )); + assert_eq!( + peerset.peers().get(peer), + Some(&PeerState::Connected { direction: Direction::Outbound(Reserved::Yes) }) + ); + } + + // set reserved peers with `common_peer` being one of them + let reserved_peers = HashSet::from_iter([common_peer, PeerId::random(), PeerId::random()]); + to_peerset + .unbounded_send(PeersetCommand::SetReservedPeers { peers: reserved_peers.clone() }) + .unwrap(); + + // The command `SetReservedPeers` might evict currently reserved peers if + // we don't have enough slot capacity to move them to regular nodes. + // In this case, we don't have enough capacity. + match peerset.next().await { + Some(PeersetNotificationCommand::CloseSubstream { peers }) => { + // This ensures we don't disconnect peers when receiving `SetReservedPeers`. + assert_eq!(peers.len(), 2); + + for peer in peers { + // Ensure common peer is not disconnected. + assert_ne!(common_peer, peer); + + assert_eq!( + peerset.peers().get(&peer), + Some(&PeerState::Closing { direction: Direction::Outbound(Reserved::Yes) }) + ); + } + }, + event => panic!("invalid event: {event:?}"), + } + + assert_eq!(peerset.num_in(), 0usize); + assert_eq!(peerset.num_out(), 0usize); + assert_eq!(peerset.reserved_peers().len(), 3usize); }