Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(metrics): add info nox config metrics #2387

Merged
merged 5 commits into from
Sep 26, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions crates/chain-listener/src/listener.rs
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,18 @@ impl ChainListener {
async fn refresh_current_commitment_id(&mut self) -> eyre::Result<()> {
match self.chain_connector.get_current_commitment_id().await {
Ok(id) => {
// This is the only place where `current_commitment` is updated, so it should be fine
// to observe the metrics it here
if id != self.current_commitment {
if let Some(current_commitment) = &self.current_commitment {
self.observe(|m| {
m.observe_removed_commitment(current_commitment.to_string())
});
}
if let Some(new_commitment) = &id {
self.observe(|m| m.observe_new_commitment(new_commitment.to_string()));
}
}
self.current_commitment = id;
Ok(())
}
Expand Down
27 changes: 27 additions & 0 deletions crates/peer-metrics/src/chain_listener.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ use crate::{execution_time_buckets, register};
use prometheus_client::encoding::EncodeLabelSet;
use prometheus_client::metrics::counter::Counter;
use prometheus_client::metrics::exemplar::CounterWithExemplar;
use prometheus_client::metrics::family::Family;
use prometheus_client::metrics::gauge::Gauge;
use prometheus_client::metrics::histogram::Histogram;
use prometheus_client::registry::Registry;
Expand All @@ -30,6 +31,11 @@ struct TxLabel {
tx_hash: String,
}

#[derive(EncodeLabelSet, Hash, Clone, Eq, PartialEq, Debug)]
struct CommitmentLabel {
commitment_id: String,
}

#[derive(Clone)]
pub struct ChainListenerMetrics {
// how many request Nox sends to ccp
Expand All @@ -54,6 +60,7 @@ pub struct ChainListenerMetrics {
blocks_processed: Counter,
last_process_block: Gauge,
current_commitment_status: Gauge,
current_commitment: Family<CommitmentLabel, Gauge>,
}

impl ChainListenerMetrics {
Expand Down Expand Up @@ -143,6 +150,13 @@ impl ChainListenerMetrics {
"Current commitment status",
);

let current_commitment = register(
sub_registry,
Family::default(),
"current_commitment",
"Current commitment",
);

Self {
ccp_requests_total,
ccp_replies_total,
Expand All @@ -156,6 +170,7 @@ impl ChainListenerMetrics {
blocks_processed,
last_process_block,
current_commitment_status,
current_commitment,
}
}

Expand Down Expand Up @@ -198,4 +213,16 @@ impl ChainListenerMetrics {
pub fn observe_commiment_status(&self, status: u64) {
self.current_commitment_status.set(status as i64);
}

pub fn observe_new_commitment(&self, commitment_id: String) {
self.current_commitment
.get_or_create(&CommitmentLabel { commitment_id })
.set(1);
}

pub fn observe_removed_commitment(&self, commitment_id: String) {
self.current_commitment
.get_or_create(&CommitmentLabel { commitment_id })
.set(0);
}
}
52 changes: 51 additions & 1 deletion crates/peer-metrics/src/info.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,18 @@
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/

use prometheus_client::encoding::EncodeLabelSet;
use std::fmt::{Error, Write};

use prometheus_client::encoding::{EncodeLabelSet, EncodeLabelValue, LabelValueEncoder};
use prometheus_client::metrics::info::Info;
use prometheus_client::registry::Registry;

pub struct NoxInfo {
pub version: NoxVersion,
pub chain_info: ChainInfo,
pub vm_info: VmInfo,
pub network_info: NetworkInfo,
pub system_info: SystemInfo,
}

#[derive(Debug, Clone, Hash, Eq, PartialEq, EncodeLabelSet)]
Expand Down Expand Up @@ -71,6 +76,42 @@ impl ChainInfo {
}
}

#[derive(Default, Debug, Clone, Hash, Eq, PartialEq, EncodeLabelSet)]
pub struct SystemInfo {
pub cpus_range: String,
pub system_cpu_count: usize,
pub particle_execution_timeout_sec: u64,
pub max_spell_particle_ttl_sec: u64,
}

#[derive(Default, Debug, Clone, Hash, Eq, PartialEq, EncodeLabelSet)]
pub struct VmInfo {
pub allow_gpu: u8,
pub public_ip: String,
pub host_ssh_port: u16,
pub vm_ssh_port: u16,
pub port_range: String,
}

#[derive(Default, Debug, Clone, Hash, Eq, PartialEq, EncodeLabelSet)]
pub struct NetworkInfo {
pub tcp_port: u16,
pub websocket_port: u16,
pub listen_ip: String,
pub network_type: String,
pub bootstrap_nodes: Addresses,
pub external_address: Option<String>,
pub external_multiaddresses: Addresses,
}

#[derive(Debug, Clone, Hash, Eq, PartialEq, Default)]
pub struct Addresses(pub Vec<String>);
impl EncodeLabelValue for Addresses {
fn encode(&self, encoder: &mut LabelValueEncoder) -> Result<(), Error> {
encoder.write_str(&self.0.join(", "))
}
}

pub fn add_info_metrics(registry: &mut Registry, nox_info: NoxInfo) {
let sub_registry = registry.sub_registry_with_prefix("nox");

Expand All @@ -79,4 +120,13 @@ pub fn add_info_metrics(registry: &mut Registry, nox_info: NoxInfo) {

let chain_info = Info::new(nox_info.chain_info);
sub_registry.register("chain", "Chain Nox Info", chain_info);

let network_info = Info::new(nox_info.network_info);
sub_registry.register("network", "Network Nox Info", network_info);

let vm_info = Info::new(nox_info.vm_info);
sub_registry.register("vm", "VM Nox Info", vm_info);

let system_info = Info::new(nox_info.system_info);
sub_registry.register("system", "System Nox Info", system_info);
}
4 changes: 3 additions & 1 deletion crates/peer-metrics/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,9 @@ pub use connection_pool::ConnectionPoolMetrics;
pub use connectivity::ConnectivityMetrics;
pub use connectivity::Resolution;
pub use dispatcher::DispatcherMetrics;
pub use info::{add_info_metrics, ChainInfo, NoxInfo, NoxVersion};
pub use info::{
add_info_metrics, Addresses, ChainInfo, NetworkInfo, NoxInfo, NoxVersion, SystemInfo, VmInfo,
};
use particle_execution::ParticleParams;
pub use particle_executor::{FunctionKind, ParticleExecutorMetrics, WorkerLabel, WorkerType};
pub use services_metrics::{
Expand Down
18 changes: 18 additions & 0 deletions crates/server-config/src/node_config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
*/

use std::collections::{BTreeMap, HashMap};
use std::fmt::{Display, Formatter};
use std::net::{IpAddr, Ipv4Addr};
use std::ops::Deref;
use std::path::{Path, PathBuf};
Expand Down Expand Up @@ -212,6 +213,17 @@ impl TryFrom<&Network> for StreamProtocol {
}
}

impl Display for Network {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match self {
Network::Dar => write!(f, "dar"),
kmd-fl marked this conversation as resolved.
Show resolved Hide resolved
Network::Stage => write!(f, "stage"),
Network::Kras => write!(f, "kras"),
kmd-fl marked this conversation as resolved.
Show resolved Hide resolved
Network::Custom(bytes) => write!(f, "custom:{}", hex::encode(bytes)),
}
}
}

impl UnresolvedNodeConfig {
pub fn resolve(self, persistent_base_dir: &Path) -> eyre::Result<NodeConfig> {
let bootstrap_nodes = match self.local {
Expand Down Expand Up @@ -682,3 +694,9 @@ fn default_port_range_config() -> PortRangeConfig {
end: 65535,
}
}

impl Display for PortRangeConfig {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}-{}", self.start, self.end)
kmd-fl marked this conversation as resolved.
Show resolved Hide resolved
}
}
114 changes: 85 additions & 29 deletions nox/src/node.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ use peer_metrics::{
ServicesMetrics, ServicesMetricsBackend, SpellMetrics, VmPoolMetrics,
};
use server_config::system_services_config::ServiceKey;
use server_config::{NetworkConfig, ResolvedConfig};
use server_config::{NetworkConfig, NodeConfig, ResolvedConfig};
use sorcerer::Sorcerer;
use spell_event_bus::api::{PeerEvent, SpellEventBusApi, TriggerEvent};
use spell_event_bus::bus::SpellEventBus;
Expand Down Expand Up @@ -420,34 +420,7 @@ impl<RT: AquaRuntime> Node<RT> {
}),
};
if let Some(m) = metrics_registry.as_mut() {
let mut chain_info = peer_metrics::ChainInfo::default(peer_id.to_string());
if let Some(connector_cfg) = &config.chain_config {
chain_info.http_endpoint = connector_cfg.http_endpoint.clone();
chain_info.diamond_contract_address =
connector_cfg.diamond_contract_address.clone();
chain_info.network_id = connector_cfg.network_id;
chain_info.default_base_fee = connector_cfg.default_base_fee.clone();
chain_info.default_priority_fee = connector_cfg.default_priority_fee.clone();
}

if let Some(chain_listener_cfg) = &config.chain_listener_config {
chain_info.ws_endpoint = chain_listener_cfg.ws_endpoint.clone();
chain_info.proof_poll_period_secs = chain_listener_cfg.proof_poll_period.as_secs();
chain_info.min_batch_count = chain_listener_cfg.min_batch_count;
chain_info.max_batch_count = chain_listener_cfg.max_batch_count;
chain_info.max_proof_batch_size = chain_listener_cfg.max_proof_batch_size;
chain_info.epoch_end_window_secs = chain_listener_cfg.epoch_end_window.as_secs();
}

let nox_info = peer_metrics::NoxInfo {
version: peer_metrics::NoxVersion {
node_version: node_info.node_version.to_string(),
air_version: node_info.air_version.to_string(),
spell_version: node_info.spell_version.to_string(),
},
chain_info,
};

let nox_info = to_nox_info_metrics(&config, &node_info, peer_id.to_string());
peer_metrics::add_info_metrics(m, nox_info);
}
custom_service_functions.extend_one(make_peer_builtin(node_info));
Expand Down Expand Up @@ -866,6 +839,89 @@ fn services_wasm_backend_config(config: &ResolvedConfig) -> WasmBackendConfig {
}
}

fn to_nox_info_metrics(
config: &NodeConfig,
node_info: &NodeInfo,
peer_id: String,
) -> peer_metrics::NoxInfo {
use peer_metrics::*;

let mut chain_info = ChainInfo::default(peer_id);
if let Some(connector_cfg) = &config.chain_config {
chain_info.http_endpoint = connector_cfg.http_endpoint.clone();
chain_info.diamond_contract_address = connector_cfg.diamond_contract_address.clone();
chain_info.network_id = connector_cfg.network_id;
chain_info.default_base_fee = connector_cfg.default_base_fee.clone();
chain_info.default_priority_fee = connector_cfg.default_priority_fee.clone();
}

if let Some(chain_listener_cfg) = &config.chain_listener_config {
chain_info.ws_endpoint = chain_listener_cfg.ws_endpoint.clone();
chain_info.proof_poll_period_secs = chain_listener_cfg.proof_poll_period.as_secs();
chain_info.min_batch_count = chain_listener_cfg.min_batch_count;
chain_info.max_batch_count = chain_listener_cfg.max_batch_count;
chain_info.max_proof_batch_size = chain_listener_cfg.max_proof_batch_size;
chain_info.epoch_end_window_secs = chain_listener_cfg.epoch_end_window.as_secs();
}

let version = NoxVersion {
node_version: node_info.node_version.to_string(),
air_version: node_info.air_version.to_string(),
spell_version: node_info.spell_version.to_string(),
};

let vm_info = config
.vm
.as_ref()
.map(|vm| VmInfo {
allow_gpu: if vm.allow_gpu { 1 } else { 0 },
public_ip: vm.network.public_ip.to_string(),
host_ssh_port: vm.network.host_ssh_port,
vm_ssh_port: vm.network.vm_ssh_port,
port_range: format!("{}", vm.network.port_range),
})
.unwrap_or_default();

let network_info = NetworkInfo {
tcp_port: config.listen_config.tcp_port,
websocket_port: config.listen_config.websocket_port,
listen_ip: config.listen_config.listen_ip.to_string(),
network_type: format!("{}", config.network),
bootstrap_nodes: Addresses(
config
.bootstrap_nodes
.clone()
.iter()
.map(|a| a.to_string())
.collect::<_>(),
),
external_address: config.external_address.map(|a| a.to_string()),
external_multiaddresses: Addresses(
config
.external_multiaddresses
.clone()
.iter()
.map(|a| a.to_string())
.collect::<_>(),
),
};

let system_info = SystemInfo {
cpus_range: format!("{}", config.cpus_range),
system_cpu_count: config.system_cpu_count,
particle_execution_timeout_sec: config.particle_execution_timeout.as_secs(),
max_spell_particle_ttl_sec: config.max_spell_particle_ttl.as_secs(),
};

NoxInfo {
version,
chain_info,
vm_info,
network_info,
system_info,
}
}

#[cfg(test)]
mod tests {
use std::path::PathBuf;
Expand Down
Loading