Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add infrastructure for Subspace networking metrics. #2284

Merged
merged 4 commits into from
Dec 14, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ pub(super) fn configure_dsn(
bootstrap_addresses: bootstrap_nodes,
kademlia_mode: KademliaMode::Dynamic,
external_addresses,
metrics,
external_metrics: metrics,
disable_bootstrap_on_start,
..default_config
};
Expand Down
2 changes: 1 addition & 1 deletion crates/subspace-networking/examples/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ async fn main() {
let config_1 = Config {
listen_on: vec!["/ip4/0.0.0.0/tcp/0".parse().unwrap()],
allow_non_global_addresses_in_dht: true,
metrics: Some(metrics),
external_metrics: Some(metrics),
..Config::default()
};
let (node_1, mut node_runner_1) = subspace_networking::construct(config_1).unwrap();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ use std::sync::Arc;
use std::time::Duration;
use subspace_metrics::{start_prometheus_metrics_server, RegistryAdapter};
use subspace_networking::libp2p::multiaddr::Protocol;
use subspace_networking::utils::strip_peer_id;
use subspace_networking::utils::{strip_peer_id, SubspaceMetrics};
use subspace_networking::{
peer_id, Config, KademliaMode, KnownPeersManager, KnownPeersManagerConfig,
};
Expand Down Expand Up @@ -155,8 +155,10 @@ async fn main() -> Result<(), Box<dyn Error>> {
// Metrics
let mut metric_registry = Registry::default();
let metrics_endpoints_are_specified = !metrics_endpoints.is_empty();
let metrics =
let external_metrics =
metrics_endpoints_are_specified.then(|| Metrics::new(&mut metric_registry));
let metrics =
metrics_endpoints_are_specified.then(|| SubspaceMetrics::new(&mut metric_registry));

let prometheus_task = metrics_endpoints_are_specified
.then(|| {
Expand Down Expand Up @@ -199,6 +201,7 @@ async fn main() -> Result<(), Box<dyn Error>> {
bootstrap_addresses: bootstrap_nodes,
kademlia_mode: KademliaMode::Static(Mode::Server),
external_addresses,
external_metrics,
metrics,
networking_parameters_registry: known_peers_registry.boxed(),

Expand Down
9 changes: 7 additions & 2 deletions crates/subspace-networking/src/constructor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ use crate::protocols::request_response::request_response_factory::RequestHandler
use crate::protocols::reserved_peers::Config as ReservedPeersConfig;
use crate::shared::Shared;
use crate::utils::rate_limiter::RateLimiter;
use crate::utils::strip_peer_id;
use crate::utils::{strip_peer_id, SubspaceMetrics};
use crate::{PeerInfo, PeerInfoConfig};
use backoff::{ExponentialBackoff, SystemClock};
use futures::channel::mpsc;
Expand Down Expand Up @@ -239,7 +239,9 @@ pub struct Config<LocalRecordProvider> {
/// Backoff policy for temporary banning of unreachable peers.
pub temporary_ban_backoff: ExponentialBackoff,
/// Optional external prometheus metrics. None will disable metrics gathering.
pub metrics: Option<Metrics>,
pub external_metrics: Option<Metrics>,
/// Internal prometheus metrics. None will disable metrics gathering.
pub metrics: Option<SubspaceMetrics>,
shamil-gadelshin marked this conversation as resolved.
Show resolved Hide resolved
/// Defines protocol version for the network peers. Affects network partition.
pub protocol_version: String,
/// Specifies a source for peer information. None disables the protocol.
Expand Down Expand Up @@ -379,6 +381,7 @@ where
max_pending_outgoing_connections: SWARM_MAX_PENDING_OUTGOING_CONNECTIONS,
temporary_bans_cache_size: TEMPORARY_BANS_CACHE_SIZE,
temporary_ban_backoff,
external_metrics: None,
metrics: None,
protocol_version,
peer_info_provider,
Expand Down Expand Up @@ -450,6 +453,7 @@ where
max_pending_outgoing_connections,
temporary_bans_cache_size,
temporary_ban_backoff,
external_metrics,
metrics,
protocol_version,
peer_info_provider,
Expand Down Expand Up @@ -639,6 +643,7 @@ where
networking_parameters_registry,
reserved_peers: strip_peer_id(reserved_peers).into_iter().collect(),
temporary_bans,
external_metrics,
metrics,
protocol_version,
general_connection_decision_handler,
Expand Down
23 changes: 18 additions & 5 deletions crates/subspace-networking/src/node_runner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ use crate::protocols::request_response::request_response_factory::{
};
use crate::shared::{Command, CreatedSubscription, NewPeerInfo, PeerDiscovered, Shared};
use crate::utils::rate_limiter::RateLimiterPermit;
use crate::utils::{is_global_address_or_dns, strip_peer_id, PeerAddress};
use crate::utils::{is_global_address_or_dns, strip_peer_id, PeerAddress, SubspaceMetrics};
use async_mutex::Mutex as AsyncMutex;
use bytes::Bytes;
use event_listener_primitives::HandlerId;
Expand Down Expand Up @@ -126,8 +126,10 @@ where
reserved_peers: HashMap<PeerId, Multiaddr>,
/// Temporarily banned peers.
temporary_bans: Arc<Mutex<TemporaryBans>>,
/// Prometheus metrics.
metrics: Option<Metrics>,
/// External Prometheus metrics.
external_metrics: Option<Metrics>,
/// Subspace Prometheus metrics.
metrics: Option<SubspaceMetrics>,
/// Mapping from specific peer to ip addresses
peer_ip_addresses: HashMap<PeerId, HashSet<IpAddr>>,
/// Defines protocol version for the network peers. Affects network partition.
Expand Down Expand Up @@ -175,7 +177,8 @@ where
pub(crate) networking_parameters_registry: Box<dyn KnownPeersRegistry>,
pub(crate) reserved_peers: HashMap<PeerId, Multiaddr>,
pub(crate) temporary_bans: Arc<Mutex<TemporaryBans>>,
pub(crate) metrics: Option<Metrics>,
pub(crate) external_metrics: Option<Metrics>,
pub(crate) metrics: Option<SubspaceMetrics>,
pub(crate) protocol_version: String,
pub(crate) general_connection_decision_handler: Option<ConnectedPeersHandler>,
pub(crate) special_connection_decision_handler: Option<ConnectedPeersHandler>,
Expand All @@ -198,6 +201,7 @@ where
mut networking_parameters_registry,
reserved_peers,
temporary_bans,
external_metrics,
metrics,
protocol_version,
general_connection_decision_handler,
Expand Down Expand Up @@ -236,6 +240,7 @@ where
networking_parameters_registry,
reserved_peers,
temporary_bans,
external_metrics,
metrics,
peer_ip_addresses: HashMap::new(),
protocol_version,
Expand Down Expand Up @@ -544,6 +549,10 @@ where
if num_established.get() == 1 {
shared.handlers.connected_peer.call_simple(&peer_id);
}

if let Some(metrics) = self.metrics.as_mut() {
metrics.inc_established_connections()
}
}
SwarmEvent::ConnectionClosed {
peer_id,
Expand Down Expand Up @@ -588,6 +597,10 @@ where
if num_established == 0 {
shared.handlers.disconnected_peer.call_simple(&peer_id);
}

if let Some(metrics) = self.metrics.as_mut() {
metrics.dec_established_connections()
};
}
SwarmEvent::OutgoingConnectionError { peer_id, error, .. } => {
if let Some(peer_id) = &peer_id {
Expand Down Expand Up @@ -1550,7 +1563,7 @@ where
}

fn register_event_metrics(&mut self, swarm_event: &SwarmEvent<Event>) {
if let Some(ref mut metrics) = self.metrics {
if let Some(ref mut metrics) = self.external_metrics {
match swarm_event {
SwarmEvent::Behaviour(Event::Ping(ping_event)) => {
metrics.record(ping_event);
Expand Down
35 changes: 35 additions & 0 deletions crates/subspace-networking/src/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ use event_listener_primitives::Bag;
use futures::future::{Fuse, FusedFuture, FutureExt};
use libp2p::multiaddr::Protocol;
use libp2p::{Multiaddr, PeerId};
use prometheus_client::metrics::gauge::Gauge;
use prometheus_client::registry::Registry;
use std::future::Future;
use std::marker::PhantomData;
use std::num::NonZeroUsize;
Expand All @@ -21,6 +23,39 @@ use tokio::runtime::Handle;
use tokio::task;
use tracing::warn;

const NETWORKING_REGISTRY_PREFIX: &str = "subspace";

/// Metrics for Subspace networking
pub struct SubspaceMetrics {
established_connections: Gauge,
}

impl SubspaceMetrics {
/// Constructor
pub fn new(registry: &mut Registry) -> Self {
let sub_registry = registry.sub_registry_with_prefix(NETWORKING_REGISTRY_PREFIX);

let gauge = Gauge::default();
sub_registry.register(
"established_connections",
"The current number of established connections",
gauge.clone(),
);

Self {
established_connections: gauge,
}
}

pub(crate) fn inc_established_connections(&mut self) {
self.established_connections.inc();
}

pub(crate) fn dec_established_connections(&mut self) {
self.established_connections.dec();
}
}

/// Joins async join handle on drop
pub(crate) struct AsyncJoinOnDrop<T>(Option<Fuse<task::JoinHandle<T>>>);

Expand Down
2 changes: 1 addition & 1 deletion crates/subspace-service/src/dsn.rs
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ pub(crate) fn create_dsn_instance(
bootstrap_addresses: dsn_config.bootstrap_nodes,
external_addresses: dsn_config.external_addresses,
kademlia_mode: KademliaMode::Static(Mode::Client),
metrics,
external_metrics: metrics,
disable_bootstrap_on_start: dsn_config.disable_bootstrap_on_start,

..default_networking_config
Expand Down
Loading