Skip to content

Commit

Permalink
Merge pull request #2412 from EspressoSystems/ed/liveness_check_fix
Browse files Browse the repository at this point in the history
check liveness checkeven if we cannot find the parent in storage
  • Loading branch information
elliedavidson authored Jan 16, 2024
2 parents debf1c0 + 862e5f8 commit 9e02a5a
Show file tree
Hide file tree
Showing 8 changed files with 53 additions and 21 deletions.
2 changes: 1 addition & 1 deletion crates/hotshot/examples/infra/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -413,7 +413,7 @@ pub trait RunDA<
block_size,
} => {
// this might be a obob
if let Some(leaf) = leaf_chain.get(0) {
if let Some(leaf) = leaf_chain.first() {
info!("Decide event for leaf: {}", *leaf.view_number);

let new_anchor = leaf.view_number;
Expand Down
3 changes: 1 addition & 2 deletions crates/hotshot/src/traits/networking.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,7 @@ use std::{
use custom_debug::Debug;
use hotshot_types::traits::metrics::{Counter, Gauge, Histogram, Label, Metrics, NoMetrics};
pub use hotshot_types::traits::network::{
ChannelSendSnafu, CouldNotDeliverSnafu, FailedToDeserializeSnafu, FailedToSerializeSnafu,
NetworkError, NetworkReliability, NoSuchNodeSnafu, ShutDownSnafu,
FailedToSerializeSnafu, NetworkError, NetworkReliability,
};

/// Contains several `NetworkingMetrics` that we're interested in from the networking interfaces
Expand Down
1 change: 1 addition & 0 deletions crates/hotshot/src/traits/networking/combined_network.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ struct Cache {
/// The maximum number of items to store in the cache
capacity: usize,
/// The cache itself
#[allow(clippy::struct_field_names)]
cache: HashSet<u64>,
/// The hashes of the messages in the cache, in order of insertion
hashes: Vec<u64>,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ pub struct Cache {
config: Config,

/// the cache for records (key -> value)
#[allow(clippy::struct_field_names)]
cache: Arc<DashMap<Vec<u8>, Vec<u8>>>,
/// the expiries for the dht cache, in order (expiry time -> key)
expiries: Arc<RwLock<BTreeMap<SystemTime, Vec<u8>>>>,
Expand Down
4 changes: 1 addition & 3 deletions crates/libp2p-networking/src/network/node.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,7 @@ pub use self::{
config::{
MeshParams, NetworkNodeConfig, NetworkNodeConfigBuilder, NetworkNodeConfigBuilderError,
},
handle::{
network_node_handle_error, NetworkNodeHandle, NetworkNodeHandleError, NetworkNodeReceiver,
},
handle::{network_node_handle_error, NetworkNodeHandle, NetworkNodeHandleError},
};

use super::{
Expand Down
2 changes: 1 addition & 1 deletion crates/libp2p-networking/tests/common/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ pub async fn spin_up_swarms<S: Debug + Default>(
.map(|(a, b)| (Some(*a), b.clone()))
.collect::<Vec<_>>()
);

#[allow(clippy::unused_enumerate_index)]
for (_idx, handle) in handles[0..num_of_nodes].iter().enumerate() {
let to_share = bootstrap_addrs.clone();
handle
Expand Down
1 change: 1 addition & 0 deletions crates/libp2p-networking/tests/counter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -371,6 +371,7 @@ async fn run_request_response_increment_all(
requestee_handle.modify_state(|s| *s += 1).await;
info!("RR REQUESTEE IS {:?}", requestee_handle.peer_id());
let mut futs = Vec::new();
#[allow(clippy::unused_enumerate_index)]
for (_i, h) in handles.iter().enumerate() {
if h.lookup_pid(requestee_handle.peer_id()).await.is_err() {
error!("ERROR LOOKING UP REQUESTEE ADDRS");
Expand Down
60 changes: 46 additions & 14 deletions crates/task-impls/src/consensus.rs
Original file line number Diff line number Diff line change
Expand Up @@ -495,8 +495,6 @@ impl<TYPES: NodeType, I: NodeImplementation<TYPES>, A: ConsensusApi<TYPES, I> +
// NOTE: We could update our view with a valid TC but invalid QC, but that is not what we do here
self.update_view(view).await;

self.current_proposal = Some(proposal.data.clone());

let consensus = self.consensus.upgradable_read().await;

// Construct the leaf.
Expand All @@ -509,10 +507,15 @@ impl<TYPES: NodeType, I: NodeImplementation<TYPES>, A: ConsensusApi<TYPES, I> +
.cloned()
};

//
let mut consensus = RwLockUpgradableReadGuard::upgrade(consensus).await;

if justify_qc.get_view_number() > consensus.high_qc.view_number {
debug!("Updating high QC");
consensus.high_qc = justify_qc.clone();
}

// Justify qc's leaf commitment is not the same as the parent's leaf commitment, but it should be (in this case)
let Some(parent) = parent else {
// If no parent then just update our state map and return. We will not vote.
error!(
"Proposal's parent missing from storage with commitment: {:?}",
justify_qc.get_data().leaf_commit
Expand All @@ -521,14 +524,13 @@ impl<TYPES: NodeType, I: NodeImplementation<TYPES>, A: ConsensusApi<TYPES, I> +
view_number: view,
justify_qc: justify_qc.clone(),
parent_commitment: justify_qc.get_data().leaf_commit,
block_header: proposal.data.block_header,
block_header: proposal.data.block_header.clone(),
block_payload: None,
rejected: Vec::new(),
timestamp: time::OffsetDateTime::now_utc().unix_timestamp_nanos(),
proposer_id: sender.to_bytes(),
};

let mut consensus = RwLockUpgradableReadGuard::upgrade(consensus).await;
consensus.state_map.insert(
view,
View {
Expand All @@ -539,14 +541,48 @@ impl<TYPES: NodeType, I: NodeImplementation<TYPES>, A: ConsensusApi<TYPES, I> +
);
consensus.saved_leaves.insert(leaf.commit(), leaf.clone());

// If we are missing the parent from storage, the safety check will fail. But we can
// still vote if the liveness check succeeds.
let liveness_check = justify_qc.get_view_number() > consensus.locked_view;

let high_qc = consensus.high_qc.clone();
let locked_view = consensus.locked_view;


drop(consensus);

if liveness_check {
self.current_proposal = Some(proposal.data.clone());
let new_view = proposal.data.view_number + 1;

// This is for the case where we form a QC but have not yet seen the previous proposal ourselves
let should_propose = self.quorum_membership.get_leader(new_view)
== self.public_key
&& high_qc.view_number
== self.current_proposal.clone().unwrap().view_number;
let qc = high_qc.clone();
if should_propose {
debug!(
"Attempting to publish proposal after voting; now in view: {}",
*new_view
);
self.publish_proposal_if_able(qc.view_number + 1, None)
.await;
}
if self.vote_if_able().await {
self.current_proposal = None;
}
}
warn!("Failed liveneess check; cannot find parent either\n High QC is {:?} Proposal QC is {:?} Locked view is {:?}", high_qc, proposal.data.clone(), locked_view);

return;
};
let parent_commitment = parent.commit();
let leaf: Leaf<_> = Leaf {
view_number: view,
justify_qc: justify_qc.clone(),
parent_commitment,
block_header: proposal.data.block_header,
block_header: proposal.data.block_header.clone(),
block_payload: None,
rejected: Vec::new(),
timestamp: time::OffsetDateTime::now_utc().unix_timestamp_nanos(),
Expand Down Expand Up @@ -589,11 +625,12 @@ impl<TYPES: NodeType, I: NodeImplementation<TYPES>, A: ConsensusApi<TYPES, I> +

// Skip if both saftey and liveness checks fail.
if !safety_check && !liveness_check {
error!("Failed safety check and liveness check");
error!("Failed safety and liveness check \n High QC is {:?} Proposal QC is {:?} Locked view is {:?}", consensus.high_qc, proposal.data.clone(), consensus.locked_view);
return;
}

let high_qc = leaf.justify_qc.clone();
self.current_proposal = Some(proposal.data.clone());

let mut new_anchor_view = consensus.last_decided_view;
let mut new_locked_view = consensus.locked_view;
let mut last_view_number_visited = view;
Expand Down Expand Up @@ -679,11 +716,6 @@ impl<TYPES: NodeType, I: NodeImplementation<TYPES>, A: ConsensusApi<TYPES, I> +
HashSet::new()
};

// promote lock here to add proposal to statemap
let mut consensus = RwLockUpgradableReadGuard::upgrade(consensus).await;
if high_qc.view_number > consensus.high_qc.view_number {
consensus.high_qc = high_qc;
}
consensus.state_map.insert(
view,
View {
Expand Down

0 comments on commit 9e02a5a

Please sign in to comment.