From 6e0c9069453b70e2169bc8403f77fd2079d610dd Mon Sep 17 00:00:00 2001 From: Jeb Bearer Date: Fri, 6 Dec 2024 17:07:42 -0800 Subject: [PATCH] Add metrics exposing a node's idea of its peers' catchup reliability --- builder/src/non_permissioned.rs | 2 + justfile | 6 +- marketplace-builder/src/builder.rs | 2 + sequencer-sqlite/Cargo.lock | 188 +++++++++++++++++++++-------- sequencer/src/api.rs | 9 ++ sequencer/src/catchup.rs | 34 +++++- sequencer/src/lib.rs | 10 +- 7 files changed, 193 insertions(+), 58 deletions(-) diff --git a/builder/src/non_permissioned.rs b/builder/src/non_permissioned.rs index 9367e60f3..2ee35c03a 100644 --- a/builder/src/non_permissioned.rs +++ b/builder/src/non_permissioned.rs @@ -19,6 +19,7 @@ use hotshot_types::{ data::{fake_commitment, ViewNumber}, traits::{ block_contents::{vid_commitment, GENESIS_VID_NUM_STORAGE_NODES}, + metrics::NoMetrics, node_implementation::Versions, EncodeBytes, }, @@ -53,6 +54,7 @@ pub async fn build_instance_state( Arc::new(StatePeers::::from_urls( state_peers, Default::default(), + &NoMetrics, )), V::Base::VERSION, ); diff --git a/justfile b/justfile index 5d7521a92..e1ea9300b 100644 --- a/justfile +++ b/justfile @@ -10,13 +10,13 @@ demo *args: demo-native *args: build scripts/demo-native {{args}} -build: +build profile="test": #!/usr/bin/env bash set -euxo pipefail # Use the same target dir for both `build` invocations export CARGO_TARGET_DIR=${CARGO_TARGET_DIR:-target} - cargo build --profile test - cargo build --profile test --manifest-path ./sequencer-sqlite/Cargo.toml + cargo build --profile {{profile}} + cargo build --profile {{profile}} --manifest-path ./sequencer-sqlite/Cargo.toml demo-native-mp *args: build scripts/demo-native -f process-compose.yaml -f process-compose-mp.yml {{args}} diff --git a/marketplace-builder/src/builder.rs b/marketplace-builder/src/builder.rs index 4b8d024dc..350503973 100644 --- a/marketplace-builder/src/builder.rs +++ b/marketplace-builder/src/builder.rs @@ -29,6 +29,7 @@ use hotshot_types::{ data::{fake_commitment, Leaf, ViewNumber}, traits::{ block_contents::{vid_commitment, GENESIS_VID_NUM_STORAGE_NODES}, + metrics::NoMetrics, node_implementation::{ConsensusTime, NodeType, Versions}, EncodeBytes, }, @@ -77,6 +78,7 @@ pub async fn build_instance_state( Arc::new(StatePeers::::from_urls( state_peers, Default::default(), + &NoMetrics, )), V::Base::version(), ); diff --git a/sequencer-sqlite/Cargo.lock b/sequencer-sqlite/Cargo.lock index 17a3ef03f..82294d114 100644 --- a/sequencer-sqlite/Cargo.lock +++ b/sequencer-sqlite/Cargo.lock @@ -1046,6 +1046,15 @@ dependencies = [ "rand 0.8.5", ] +[[package]] +name = "backon" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba5289ec98f68f28dd809fd601059e6aa908bb8f6108620930828283d4ee23d7" +dependencies = [ + "fastrand 2.2.0", +] + [[package]] name = "backtrace" version = "0.3.74" @@ -1346,13 +1355,31 @@ dependencies = [ "embedded-io", ] +[[package]] +name = "capnp" +version = "0.20.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bce4e2d41c16cf9188f47ca4d59fdcdca1f33705af211bdb41f0afbd3442f8b5" +dependencies = [ + "embedded-io", +] + [[package]] name = "capnpc" version = "0.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c75ba30e0f08582d53c2f3710cf4bb65ff562614b1ba86906d7391adffe189ec" dependencies = [ - "capnp", + "capnp 0.19.8", +] + +[[package]] +name = "capnpc" +version = "0.20.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1aa3d5f01e69ed11656d2c7c47bf34327ea9bfb5c85c7de787fcd7b6c5e45b61" +dependencies = [ + "capnp 0.20.3", ] [[package]] @@ -1378,6 +1405,16 @@ dependencies = [ "thiserror 1.0.69", ] +[[package]] +name = "cargo_toml" +version = "0.15.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "599aa35200ffff8f04c1925aa1acc92fa2e08874379ef42e210a80e527e60838" +dependencies = [ + "serde", + "toml 0.7.8", +] + [[package]] name = "cbor4ii" version = "0.3.3" @@ -1401,9 +1438,9 @@ dependencies = [ [[package]] name = "cdn-broker" version = "0.4.0" -source = "git+https://github.com/EspressoSystems/Push-CDN?tag=0.4.5#f6cc7c2fc53eaa52a4901e775d9be7ac820af72c" +source = "git+https://github.com/EspressoSystems/Push-CDN?tag=0.4.7#5406fde54e61058428a7b55e1a98b699f0f606f1" dependencies = [ - "cdn-proto 0.4.0 (git+https://github.com/EspressoSystems/Push-CDN?tag=0.4.5)", + "cdn-proto 0.4.0 (git+https://github.com/EspressoSystems/Push-CDN?tag=0.4.7)", "clap", "console-subscriber", "dashmap", @@ -1424,9 +1461,9 @@ dependencies = [ [[package]] name = "cdn-broker" version = "0.4.0" -source = "git+https://github.com/EspressoSystems/Push-CDN?tag=0.4.7#5406fde54e61058428a7b55e1a98b699f0f606f1" +source = "git+https://github.com/EspressoSystems/Push-CDN?tag=0.5.1-upgrade#849e7edb32788e42738541ba4d5c64d3e061d86d" dependencies = [ - "cdn-proto 0.4.0 (git+https://github.com/EspressoSystems/Push-CDN?tag=0.4.7)", + "cdn-proto 0.4.0 (git+https://github.com/EspressoSystems/Push-CDN?tag=0.5.1-upgrade)", "clap", "console-subscriber", "dashmap", @@ -1462,9 +1499,9 @@ dependencies = [ [[package]] name = "cdn-marshal" version = "0.4.0" -source = "git+https://github.com/EspressoSystems/Push-CDN?tag=0.4.5#f6cc7c2fc53eaa52a4901e775d9be7ac820af72c" +source = "git+https://github.com/EspressoSystems/Push-CDN?tag=0.4.7#5406fde54e61058428a7b55e1a98b699f0f606f1" dependencies = [ - "cdn-proto 0.4.0 (git+https://github.com/EspressoSystems/Push-CDN?tag=0.4.5)", + "cdn-proto 0.4.0 (git+https://github.com/EspressoSystems/Push-CDN?tag=0.4.7)", "clap", "jf-signature 0.1.0", "tokio", @@ -1475,9 +1512,9 @@ dependencies = [ [[package]] name = "cdn-marshal" version = "0.4.0" -source = "git+https://github.com/EspressoSystems/Push-CDN?tag=0.4.7#5406fde54e61058428a7b55e1a98b699f0f606f1" +source = "git+https://github.com/EspressoSystems/Push-CDN?tag=0.5.1-upgrade#849e7edb32788e42738541ba4d5c64d3e061d86d" dependencies = [ - "cdn-proto 0.4.0 (git+https://github.com/EspressoSystems/Push-CDN?tag=0.4.7)", + "cdn-proto 0.4.0 (git+https://github.com/EspressoSystems/Push-CDN?tag=0.5.1-upgrade)", "clap", "jf-signature 0.1.0", "tokio", @@ -1488,13 +1525,13 @@ dependencies = [ [[package]] name = "cdn-proto" version = "0.4.0" -source = "git+https://github.com/EspressoSystems/Push-CDN?tag=0.4.5#f6cc7c2fc53eaa52a4901e775d9be7ac820af72c" +source = "git+https://github.com/EspressoSystems/Push-CDN?tag=0.4.7#5406fde54e61058428a7b55e1a98b699f0f606f1" dependencies = [ "anyhow", "ark-serialize", "async-trait", - "capnp", - "capnpc", + "capnp 0.19.8", + "capnpc 0.19.0", "derivative", "jf-signature 0.1.0", "kanal", @@ -1506,7 +1543,7 @@ dependencies = [ "quinn", "rand 0.8.5", "rcgen 0.13.1", - "redis 0.25.4", + "redis 0.26.1", "rkyv", "rustls 0.23.17", "rustls-pki-types", @@ -1522,13 +1559,13 @@ dependencies = [ [[package]] name = "cdn-proto" version = "0.4.0" -source = "git+https://github.com/EspressoSystems/Push-CDN?tag=0.4.7#5406fde54e61058428a7b55e1a98b699f0f606f1" +source = "git+https://github.com/EspressoSystems/Push-CDN?tag=0.5.1-upgrade#849e7edb32788e42738541ba4d5c64d3e061d86d" dependencies = [ "anyhow", "ark-serialize", "async-trait", - "capnp", - "capnpc", + "capnp 0.20.3", + "capnpc 0.20.1", "derivative", "jf-signature 0.1.0", "kanal", @@ -1540,7 +1577,7 @@ dependencies = [ "quinn", "rand 0.8.5", "rcgen 0.13.1", - "redis 0.26.1", + "redis 0.27.6", "rkyv", "rustls 0.23.17", "rustls-pki-types", @@ -1650,12 +1687,10 @@ name = "client" version = "0.1.0" dependencies = [ "anyhow", - "contract-bindings", "espresso-types", "ethers", "futures", "jf-merkle-tree", - "sequencer-utils", "surf-disco", "tokio", "tracing", @@ -1776,7 +1811,7 @@ dependencies = [ "rust-ini", "serde", "serde_json", - "toml", + "toml 0.8.19", "yaml-rust2", ] @@ -2764,7 +2799,6 @@ dependencies = [ "fluent-asserter", "futures", "hotshot", - "hotshot-orchestrator", "hotshot-query-service", "hotshot-types", "itertools 0.12.1", @@ -2940,7 +2974,7 @@ dependencies = [ "serde", "serde_json", "syn 2.0.89", - "toml", + "toml 0.8.19", "walkdir", ] @@ -3962,7 +3996,7 @@ dependencies = [ "tagged-base64", "thiserror 1.0.69", "tide-disco", - "toml", + "toml 0.8.19", "vbs", ] @@ -3972,8 +4006,6 @@ version = "0.1.0" dependencies = [ "anyhow", "ark-bn254", - "ark-ec", - "ark-ed-on-bn254", "ark-ff", "ark-poly", "ark-serialize", @@ -4009,7 +4041,7 @@ dependencies = [ "tagged-base64", "tide-disco", "tokio", - "toml", + "toml 0.8.19", "tracing", "tracing-subscriber 0.3.18", "vbs", @@ -4063,7 +4095,7 @@ dependencies = [ "serde", "tide-disco", "tokio", - "toml", + "toml 0.8.19", "tracing", "vbs", ] @@ -4100,7 +4132,7 @@ dependencies = [ "thiserror 1.0.69", "tide-disco", "tokio", - "toml", + "toml 0.8.19", "tracing", "vbs", "vec1", @@ -4145,7 +4177,7 @@ dependencies = [ "tide-disco", "time 0.3.36", "tokio", - "toml", + "toml 0.8.19", "tracing", "tracing-subscriber 0.3.18", "trait-variant", @@ -4211,7 +4243,7 @@ dependencies = [ "tide-disco", "time 0.3.36", "tokio", - "toml", + "toml 0.8.19", "tracing", "url", "vbs", @@ -4361,7 +4393,7 @@ dependencies = [ "thiserror 1.0.69", "time 0.3.36", "tokio", - "toml", + "toml 0.8.19", "tracing", "typenum", "url", @@ -6091,23 +6123,20 @@ dependencies = [ "async-trait", "bincode", "clap", - "cld", "committable", "espresso-types", "futures", "hotshot", "hotshot-events-service", "hotshot-types", - "jf-signature 0.2.0", "rand 0.8.5", "serde", - "serde_json", "sqlx", "surf-disco", "thiserror 1.0.69", "tide-disco", "tokio", - "toml", + "toml 0.8.19", "tracing", "vbs", ] @@ -7187,13 +7216,24 @@ dependencies = [ "uint", ] +[[package]] +name = "priority-queue" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "714c75db297bc88a63783ffc6ab9f830698a6705aa0201416931759ef4c8183d" +dependencies = [ + "autocfg", + "equivalent", + "indexmap 2.6.0", +] + [[package]] name = "proc-macro-crate" version = "3.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8ecf48c7ca261d60b74ab1a7b20da18bede46776b2e55535cb958eb595c5fa7b" dependencies = [ - "toml_edit", + "toml_edit 0.22.22", ] [[package]] @@ -7593,9 +7633,9 @@ dependencies = [ [[package]] name = "redis" -version = "0.25.4" +version = "0.26.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0d7a6955c7511f60f3ba9e86c6d02b3c3f144f8c24b288d1f4e18074ab8bbec" +checksum = "e902a69d09078829137b4a5d9d082e0490393537badd7c91a3d69d14639e115f" dependencies = [ "arc-swap", "async-trait", @@ -7604,6 +7644,7 @@ dependencies = [ "futures", "futures-util", "itoa", + "num-bigint", "percent-encoding", "pin-project-lite 0.2.15", "ryu", @@ -7615,23 +7656,24 @@ dependencies = [ [[package]] name = "redis" -version = "0.26.1" +version = "0.27.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e902a69d09078829137b4a5d9d082e0490393537badd7c91a3d69d14639e115f" +checksum = "09d8f99a4090c89cc489a94833c901ead69bfbf3877b4867d5482e321ee875bc" dependencies = [ "arc-swap", "async-trait", + "backon", "bytes 1.8.0", "combine", "futures", "futures-util", + "itertools 0.13.0", "itoa", "num-bigint", "percent-encoding", "pin-project-lite 0.2.15", "ryu", "tokio", - "tokio-retry", "tokio-util", "url", ] @@ -7682,7 +7724,7 @@ dependencies = [ "time 0.3.36", "tokio", "tokio-postgres", - "toml", + "toml 0.8.19", "url", "walkdir", ] @@ -8393,8 +8435,8 @@ dependencies = [ "async-once-cell", "async-trait", "bincode", - "cdn-broker 0.4.0 (git+https://github.com/EspressoSystems/Push-CDN?tag=0.4.5)", - "cdn-marshal 0.4.0 (git+https://github.com/EspressoSystems/Push-CDN?tag=0.4.5)", + "cdn-broker 0.4.0 (git+https://github.com/EspressoSystems/Push-CDN?tag=0.5.1-upgrade)", + "cdn-marshal 0.4.0 (git+https://github.com/EspressoSystems/Push-CDN?tag=0.5.1-upgrade)", "clap", "client", "committable", @@ -8428,6 +8470,7 @@ dependencies = [ "num_enum", "parking_lot", "portpicker", + "priority-queue", "rand 0.8.5", "rand_chacha 0.3.1", "rand_distr", @@ -8443,8 +8486,9 @@ dependencies = [ "tagged-base64", "tide-disco", "time 0.3.36", + "todo_by", "tokio", - "toml", + "toml 0.8.19", "tracing", "tracing-subscriber 0.3.18", "url", @@ -9703,7 +9747,7 @@ dependencies = [ "tagged-base64", "tide", "tide-websockets", - "toml", + "toml 0.8.19", "tracing", "tracing-distributed", "tracing-futures", @@ -9836,6 +9880,20 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" +[[package]] +name = "todo_by" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e25529b77ab1841ec52a4e8356632b417698b53ea9ad62c6be7d04d5e420dc4" +dependencies = [ + "cargo_toml", + "chrono", + "proc-macro2", + "quote", + "semver 1.0.23", + "syn 2.0.89", +] + [[package]] name = "tokio" version = "1.41.1" @@ -9982,6 +10040,18 @@ dependencies = [ "tokio", ] +[[package]] +name = "toml" +version = "0.7.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd79e69d3b627db300ff956027cc6c3798cef26d22526befdfcd12feeb6d2257" +dependencies = [ + "serde", + "serde_spanned", + "toml_datetime", + "toml_edit 0.19.15", +] + [[package]] name = "toml" version = "0.8.19" @@ -9991,7 +10061,7 @@ dependencies = [ "serde", "serde_spanned", "toml_datetime", - "toml_edit", + "toml_edit 0.22.22", ] [[package]] @@ -10003,6 +10073,19 @@ dependencies = [ "serde", ] +[[package]] +name = "toml_edit" +version = "0.19.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b5bb770da30e5cbfde35a2d7b9b8a2c4b8ef89548a7a6aeab5c9a576e3e7421" +dependencies = [ + "indexmap 2.6.0", + "serde", + "serde_spanned", + "toml_datetime", + "winnow 0.5.40", +] + [[package]] name = "toml_edit" version = "0.22.22" @@ -10013,7 +10096,7 @@ dependencies = [ "serde", "serde_spanned", "toml_datetime", - "winnow", + "winnow 0.6.20", ] [[package]] @@ -10990,6 +11073,15 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" +[[package]] +name = "winnow" +version = "0.5.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f593a95398737aeed53e489c785df13f3618e41dbcd6718c6addbf1395aa6876" +dependencies = [ + "memchr", +] + [[package]] name = "winnow" version = "0.6.20" diff --git a/sequencer/src/api.rs b/sequencer/src/api.rs index eba2108c4..80c9d3a1c 100644 --- a/sequencer/src/api.rs +++ b/sequencer/src/api.rs @@ -1535,6 +1535,7 @@ mod test { StatePeers::>::from_urls( vec![format!("http://localhost:{port}").parse().unwrap()], Default::default(), + &NoMetrics, ) })) .build(); @@ -1579,6 +1580,7 @@ mod test { StatePeers::>::from_urls( vec![format!("http://localhost:{port}").parse().unwrap()], Default::default(), + &NoMetrics, ), &NoMetrics, test_helpers::STAKE_TABLE_CAPACITY_FOR_TEST, @@ -1644,6 +1646,7 @@ mod test { StatePeers::>::from_urls( vec![format!("http://localhost:{port}").parse().unwrap()], Default::default(), + &NoMetrics, ) })) .network_config(TestConfigBuilder::default().l1_url(l1).build()) @@ -1721,6 +1724,7 @@ mod test { StatePeers::>::from_urls( vec![format!("http://localhost:{port}").parse().unwrap()], Default::default(), + &NoMetrics, ) })) .network_config(TestConfigBuilder::default().l1_url(l1).build()) @@ -1781,6 +1785,7 @@ mod test { StatePeers::::from_urls( vec![format!("http://localhost:{port}").parse().unwrap()], BackoffParams::default(), + &NoMetrics, ) }); @@ -1788,6 +1793,7 @@ mod test { peers[2] = StatePeers::::from_urls( vec![url.clone()], BackoffParams::default(), + &NoMetrics, ); let config = TestNetworkConfigBuilder::::with_num_nodes() @@ -1974,6 +1980,7 @@ mod test { StatePeers::::from_urls( vec![format!("http://localhost:{port}").parse().unwrap()], Default::default(), + &NoMetrics, ) })) .network_config( @@ -2143,6 +2150,7 @@ mod test { StatePeers::>::from_urls( vec![format!("http://localhost:{port}").parse().unwrap()], Default::default(), + &NoMetrics, ) })) .network_config(TestConfigBuilder::default().l1_url(l1).build()) @@ -2207,6 +2215,7 @@ mod test { let peers = StatePeers::>::from_urls( vec!["https://notarealnode.network".parse().unwrap(), url], Default::default(), + &NoMetrics, ); // Fetch the config from node 1, a different node than the one running the service. diff --git a/sequencer/src/catchup.rs b/sequencer/src/catchup.rs index a388c8241..67dcc595c 100644 --- a/sequencer/src/catchup.rs +++ b/sequencer/src/catchup.rs @@ -12,7 +12,12 @@ use espresso_types::{ }; use futures::future::{Future, FutureExt, TryFuture, TryFutureExt}; use hotshot_types::{ - data::ViewNumber, network::NetworkConfig, traits::node_implementation::ConsensusTime as _, + data::ViewNumber, + network::NetworkConfig, + traits::{ + metrics::{Counter, CounterFamily, Metrics}, + node_implementation::ConsensusTime as _, + }, ValidatorConfig, }; use itertools::Itertools; @@ -37,12 +42,20 @@ use crate::{ struct Client { inner: surf_disco::Client, url: Url, + requests: Arc>, + failures: Arc>, } impl Client { - pub fn new(url: Url) -> Self { + pub fn new( + url: Url, + requests: &(impl CounterFamily + ?Sized), + failures: &(impl CounterFamily + ?Sized), + ) -> Self { Self { inner: surf_disco::Client::new(url.clone()), + requests: Arc::new(requests.create(vec![url.to_string()])), + failures: Arc::new(failures.create(vec![url.to_string()])), url, } } @@ -169,8 +182,10 @@ impl StatePeers { for (id, success) in requests { scores.change_priority_by(&id, |score| { score.requests += 1; + self.clients[id].requests.add(1); if !success { score.failures += 1; + self.clients[id].failures.add(1); } }); } @@ -178,17 +193,28 @@ impl StatePeers { res } - pub fn from_urls(urls: Vec, backoff: BackoffParams) -> Self { + pub fn from_urls( + urls: Vec, + backoff: BackoffParams, + metrics: &(impl Metrics + ?Sized), + ) -> Self { if urls.is_empty() { panic!("Cannot create StatePeers with no peers"); } + let metrics = metrics.subgroup("catchup".into()); + let requests = metrics.counter_family("requests".into(), vec!["peer".into()]); + let failures = metrics.counter_family("request_failures".into(), vec!["peer".into()]); + let scores = urls .iter() .enumerate() .map(|(i, _)| (i, PeerScore::default())) .collect(); - let clients = urls.into_iter().map(Client::new).collect(); + let clients = urls + .into_iter() + .map(|url| Client::new(url, &*requests, &*failures)) + .collect(); Self { clients, diff --git a/sequencer/src/lib.rs b/sequencer/src/lib.rs index 79475b6ae..25b5d9cc7 100644 --- a/sequencer/src/lib.rs +++ b/sequencer/src/lib.rs @@ -53,7 +53,7 @@ use hotshot_types::{ light_client::{StateKeyPair, StateSignKey}, signature_key::{BLSPrivKey, BLSPubKey}, traits::{ - metrics::Metrics, + metrics::{Metrics, NoMetrics}, network::{ConnectedNetwork, Topic}, node_implementation::{NodeImplementation, NodeType, Versions}, }, @@ -312,8 +312,11 @@ pub async fn init_node( // If we were told to fetch the config from an already-started peer, do so. (None, Some(peers)) => { tracing::info!(?peers, "loading network config from peers"); - let peers = - StatePeers::::from_urls(peers, network_params.catchup_backoff); + let peers = StatePeers::::from_urls( + peers, + network_params.catchup_backoff, + &NoMetrics, + ); let config = peers.fetch_config(validator_config.clone()).await?; tracing::info!( @@ -521,6 +524,7 @@ pub async fn init_node( StatePeers::::from_urls( network_params.state_peers, network_params.catchup_backoff, + metrics, ), ) .await,