diff --git a/zebra-network/src/constants.rs b/zebra-network/src/constants.rs index 4bc3a4fb7ec..8ff7ba9c9f1 100644 --- a/zebra-network/src/constants.rs +++ b/zebra-network/src/constants.rs @@ -245,6 +245,20 @@ pub const ADDR_RESPONSE_LIMIT_DENOMINATOR: usize = 3; pub const MAX_ADDRS_IN_ADDRESS_BOOK: usize = MAX_ADDRS_IN_MESSAGE * (ADDR_RESPONSE_LIMIT_DENOMINATOR + 1); +/// The maximum number of self-connection nonces Zebra will track. +/// +/// This is a tradeoff between: +/// - avoiding peer attacks that maliciously remove nonces by: +/// - providing a fake nonce observed in our outgoing network traffic +/// - making large numbers of connections to clear the nonce set: +/// - 3000 inbound connections takes just under an hour; +/// - 3000 outbound connections take just over 2 minutes, +/// but outbound connection peer selection and reconnections are controlled by Zebra +/// - memory usage: 16 bytes per `Nonce`, 48 kB for 3000 nonces +/// - collision probability: 2^32 has ~50% collision probability, so we use a much lower limit +/// +pub const MAX_SELF_CONNECTION_NONCES: usize = 3000; + /// Truncate timestamps in outbound address messages to this time interval. /// /// ## SECURITY diff --git a/zebra-network/src/peer/handshake.rs b/zebra-network/src/peer/handshake.rs index 80b73c8daee..1833ac81cde 100644 --- a/zebra-network/src/peer/handshake.rs +++ b/zebra-network/src/peer/handshake.rs @@ -2,7 +2,6 @@ use std::{ cmp::min, - collections::HashSet, fmt, future::Future, net::{IpAddr, Ipv4Addr, SocketAddr}, @@ -14,6 +13,7 @@ use std::{ use chrono::{TimeZone, Utc}; use futures::{channel::oneshot, future, pin_mut, FutureExt, SinkExt, StreamExt}; +use indexmap::IndexSet; use tokio::{ io::{AsyncRead, AsyncWrite}, sync::broadcast, @@ -71,7 +71,7 @@ where address_book_updater: tokio::sync::mpsc::Sender, inv_collector: broadcast::Sender, minimum_peer_version: MinimumPeerVersion, - nonces: Arc>>, + nonces: Arc>>, parent_span: Span, } @@ -515,7 +515,7 @@ where let (tx, _rx) = tokio::sync::mpsc::channel(1); tx }); - let nonces = Arc::new(futures::lock::Mutex::new(HashSet::new())); + let nonces = Arc::new(futures::lock::Mutex::new(IndexSet::new())); let user_agent = self.user_agent.unwrap_or_default(); let our_services = self.our_services.unwrap_or_else(PeerServices::empty); let relay = self.relay.unwrap_or(false); @@ -572,7 +572,7 @@ pub async fn negotiate_version( peer_conn: &mut Framed, connected_addr: &ConnectedAddr, config: Config, - nonces: Arc>>, + nonces: Arc>>, user_agent: String, our_services: PeerServices, relay: bool, @@ -588,7 +588,24 @@ where // It is ok to wait for the lock here, because handshakes have a short // timeout, and the async mutex will be released when the task times // out. - let is_unique_nonce = nonces.lock().await.insert(local_nonce); + // + // Do the insert and remove in the same critical section, so we only need + // to remove one nonce at a time. (If they were in separate critical sections, + // the future could be dropped after adding the nonce, but before removing excess nonces.) + let is_unique_nonce = { + let mut locked_nonces = nonces.lock().await; + let is_unique_nonce = locked_nonces.insert(local_nonce); + + // # Security + // + // Limit the amount of memory used for nonces. + // Avoid attacks that replay nonces in order to clear them from the nonce set. + if locked_nonces.len() > constants::MAX_SELF_CONNECTION_NONCES { + locked_nonces.shift_remove_index(0); + } + + is_unique_nonce + }; if !is_unique_nonce { return Err(HandshakeError::LocalDuplicateNonce); } @@ -687,13 +704,12 @@ where // We must wait for the lock before we continue with the connection, to avoid // self-connection. If the connection times out, the async lock will be // released. - let nonce_reuse = { - let mut locked_nonces = nonces.lock().await; - let nonce_reuse = locked_nonces.contains(&remote.nonce); - // Regardless of whether we observed nonce reuse, clean up the nonce set. - locked_nonces.remove(&local_nonce); - nonce_reuse - }; + // + // # Security + // + // We don't remove the nonce here, because peers that observe our network traffic could + // maliciously remove nonces, enabling self-connections. + let nonce_reuse = nonces.lock().await.contains(&remote.nonce); if nonce_reuse { Err(HandshakeError::RemoteNonceReuse)?; }