Skip to content

Commit

Permalink
Add metrics exposing a node's idea of its peers' catchup reliability
Browse files Browse the repository at this point in the history
  • Loading branch information
jbearer committed Dec 7, 2024
1 parent 662e3fd commit 67c3e17
Show file tree
Hide file tree
Showing 8 changed files with 67 additions and 12 deletions.
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions builder/src/non_permissioned.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ use hotshot_types::{
data::{fake_commitment, ViewNumber},
traits::{
block_contents::{vid_commitment, GENESIS_VID_NUM_STORAGE_NODES},
metrics::NoMetrics,
node_implementation::Versions,
EncodeBytes,
},
Expand Down Expand Up @@ -53,6 +54,7 @@ pub async fn build_instance_state<V: Versions>(
Arc::new(StatePeers::<SequencerApiVersion>::from_urls(
state_peers,
Default::default(),
&NoMetrics,
)),
V::Base::VERSION,
);
Expand Down
6 changes: 3 additions & 3 deletions justfile
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,13 @@ demo *args:
demo-native *args: build
scripts/demo-native {{args}}

build:
build profile="test":
#!/usr/bin/env bash
set -euxo pipefail
# Use the same target dir for both `build` invocations
export CARGO_TARGET_DIR=${CARGO_TARGET_DIR:-target}
cargo build --profile test
cargo build --profile test --manifest-path ./sequencer-sqlite/Cargo.toml
cargo build --profile {{profile}}
cargo build --profile {{profile}} --manifest-path ./sequencer-sqlite/Cargo.toml
demo-native-mp *args: build
scripts/demo-native -f process-compose.yaml -f process-compose-mp.yml {{args}}
Expand Down
2 changes: 2 additions & 0 deletions marketplace-builder/src/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ use hotshot_types::{
data::{fake_commitment, Leaf, ViewNumber},
traits::{
block_contents::{vid_commitment, Transaction as _, GENESIS_VID_NUM_STORAGE_NODES},
metrics::NoMetrics,
node_implementation::{ConsensusTime, NodeType, Versions},
EncodeBytes,
},
Expand Down Expand Up @@ -74,6 +75,7 @@ pub async fn build_instance_state<V: Versions>(
Arc::new(StatePeers::<SequencerApiVersion>::from_urls(
state_peers,
Default::default(),
&NoMetrics,
)),
V::Base::version(),
);
Expand Down
14 changes: 13 additions & 1 deletion sequencer-sqlite/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 9 additions & 0 deletions sequencer/src/api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1527,6 +1527,7 @@ mod test {
StatePeers::<StaticVersion<0, 1>>::from_urls(
vec![format!("http://localhost:{port}").parse().unwrap()],
Default::default(),
&NoMetrics,
)
}))
.build();
Expand Down Expand Up @@ -1571,6 +1572,7 @@ mod test {
StatePeers::<StaticVersion<0, 1>>::from_urls(
vec![format!("http://localhost:{port}").parse().unwrap()],
Default::default(),
&NoMetrics,
),
&NoMetrics,
test_helpers::STAKE_TABLE_CAPACITY_FOR_TEST,
Expand Down Expand Up @@ -1636,6 +1638,7 @@ mod test {
StatePeers::<StaticVersion<0, 1>>::from_urls(
vec![format!("http://localhost:{port}").parse().unwrap()],
Default::default(),
&NoMetrics,
)
}))
.network_config(TestConfigBuilder::default().l1_url(l1).build())
Expand Down Expand Up @@ -1713,6 +1716,7 @@ mod test {
StatePeers::<StaticVersion<0, 1>>::from_urls(
vec![format!("http://localhost:{port}").parse().unwrap()],
Default::default(),
&NoMetrics,
)
}))
.network_config(TestConfigBuilder::default().l1_url(l1).build())
Expand Down Expand Up @@ -1773,13 +1777,15 @@ mod test {
StatePeers::<SequencerApiVersion>::from_urls(
vec![format!("http://localhost:{port}").parse().unwrap()],
BackoffParams::default(),
&NoMetrics,
)
});

// one of the node has dishonest peer. This list of peers is for node#1
peers[2] = StatePeers::<SequencerApiVersion>::from_urls(
vec![url.clone()],
BackoffParams::default(),
&NoMetrics,
);

let config = TestNetworkConfigBuilder::<NUM_NODES, _, _>::with_num_nodes()
Expand Down Expand Up @@ -1966,6 +1972,7 @@ mod test {
StatePeers::<SequencerApiVersion>::from_urls(
vec![format!("http://localhost:{port}").parse().unwrap()],
Default::default(),
&NoMetrics,
)
}))
.network_config(
Expand Down Expand Up @@ -2139,6 +2146,7 @@ mod test {
StatePeers::<StaticVersion<0, 1>>::from_urls(
vec![format!("http://localhost:{port}").parse().unwrap()],
Default::default(),
&NoMetrics,
)
}))
.network_config(TestConfigBuilder::default().l1_url(l1).build())
Expand Down Expand Up @@ -2203,6 +2211,7 @@ mod test {
let peers = StatePeers::<StaticVersion<0, 1>>::from_urls(
vec!["https://notarealnode.network".parse().unwrap(), url],
Default::default(),
&NoMetrics,
);

// Fetch the config from node 1, a different node than the one running the service.
Expand Down
34 changes: 30 additions & 4 deletions sequencer/src/catchup.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,12 @@ use espresso_types::{
};
use futures::future::{Future, FutureExt, TryFuture, TryFutureExt};
use hotshot_types::{
data::ViewNumber, network::NetworkConfig, traits::node_implementation::ConsensusTime as _,
data::ViewNumber,
network::NetworkConfig,
traits::{
metrics::{Counter, CounterFamily, Metrics},
node_implementation::ConsensusTime as _,
},
ValidatorConfig,
};
use itertools::Itertools;
Expand All @@ -37,12 +42,20 @@ use crate::{
struct Client<ServerError, ApiVer: StaticVersionType> {
inner: surf_disco::Client<ServerError, ApiVer>,
url: Url,
requests: Arc<Box<dyn Counter>>,
failures: Arc<Box<dyn Counter>>,
}

impl<ApiVer: StaticVersionType> Client<ServerError, ApiVer> {
pub fn new(url: Url) -> Self {
pub fn new(
url: Url,
requests: &(impl CounterFamily + ?Sized),
failures: &(impl CounterFamily + ?Sized),
) -> Self {
Self {
inner: surf_disco::Client::new(url.clone()),
requests: Arc::new(requests.create(vec![url.to_string()])),
failures: Arc::new(failures.create(vec![url.to_string()])),
url,
}
}
Expand Down Expand Up @@ -169,26 +182,39 @@ impl<ApiVer: StaticVersionType> StatePeers<ApiVer> {
for (id, success) in requests {
scores.change_priority_by(&id, |score| {
score.requests += 1;
self.clients[id].requests.add(1);
if !success {
score.failures += 1;
self.clients[id].failures.add(1);
}
});
}

res
}

pub fn from_urls(urls: Vec<Url>, backoff: BackoffParams) -> Self {
pub fn from_urls(
urls: Vec<Url>,
backoff: BackoffParams,
metrics: &(impl Metrics + ?Sized),
) -> Self {
if urls.is_empty() {
panic!("Cannot create StatePeers with no peers");
}

let metrics = metrics.subgroup("catchup".into());
let requests = metrics.counter_family("requests".into(), vec!["peer".into()]);
let failures = metrics.counter_family("request_failures".into(), vec!["peer".into()]);

let scores = urls
.iter()
.enumerate()
.map(|(i, _)| (i, PeerScore::default()))
.collect();
let clients = urls.into_iter().map(Client::new).collect();
let clients = urls
.into_iter()
.map(|url| Client::new(url, &*requests, &*failures))
.collect();

Self {
clients,
Expand Down
10 changes: 7 additions & 3 deletions sequencer/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ use hotshot_types::{
light_client::{StateKeyPair, StateSignKey},
signature_key::{BLSPrivKey, BLSPubKey},
traits::{
metrics::Metrics,
metrics::{Metrics, NoMetrics},
network::ConnectedNetwork,
node_implementation::{NodeImplementation, NodeType, Versions},
},
Expand Down Expand Up @@ -312,8 +312,11 @@ pub async fn init_node<P: SequencerPersistence, V: Versions>(
// If we were told to fetch the config from an already-started peer, do so.
(None, Some(peers)) => {
tracing::info!(?peers, "loading network config from peers");
let peers =
StatePeers::<SequencerApiVersion>::from_urls(peers, network_params.catchup_backoff);
let peers = StatePeers::<SequencerApiVersion>::from_urls(
peers,
network_params.catchup_backoff,
&NoMetrics,
);
let config = peers.fetch_config(validator_config.clone()).await?;

tracing::info!(
Expand Down Expand Up @@ -509,6 +512,7 @@ pub async fn init_node<P: SequencerPersistence, V: Versions>(
StatePeers::<SequencerApiVersion>::from_urls(
network_params.state_peers,
network_params.catchup_backoff,
metrics,
),
)
.await,
Expand Down

0 comments on commit 67c3e17

Please sign in to comment.