Skip to content

Commit

Permalink
Store upgrade certificate on decide and load on restart (#3679)
Browse files Browse the repository at this point in the history
  • Loading branch information
ss-es authored Sep 25, 2024
1 parent 9df629c commit 735c71b
Show file tree
Hide file tree
Showing 10 changed files with 82 additions and 11 deletions.
15 changes: 14 additions & 1 deletion crates/example-types/src/storage_types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ use hotshot_types::{
data::{DaProposal, Leaf, QuorumProposal, VidDisperseShare},
event::HotShotAction,
message::Proposal,
simple_certificate::QuorumCertificate,
simple_certificate::{QuorumCertificate, UpgradeCertificate},
traits::{
node_implementation::{ConsensusTime, NodeType},
storage::Storage,
Expand Down Expand Up @@ -60,6 +60,7 @@ pub struct TestStorage<TYPES: NodeType> {
/// `should_return_err` is a testing utility to validate negative cases.
pub should_return_err: bool,
pub delay_config: DelayConfig,
pub decided_upgrade_certificate: Arc<RwLock<Option<UpgradeCertificate<TYPES>>>>,
}

impl<TYPES: NodeType> Default for TestStorage<TYPES> {
Expand All @@ -68,6 +69,7 @@ impl<TYPES: NodeType> Default for TestStorage<TYPES> {
inner: Arc::new(RwLock::new(TestStorageState::default())),
should_return_err: false,
delay_config: DelayConfig::default(),
decided_upgrade_certificate: Arc::new(RwLock::new(None)),
}
}
}
Expand All @@ -91,6 +93,9 @@ impl<TYPES: NodeType> TestStorage<TYPES> {
pub async fn high_qc_cloned(&self) -> Option<QuorumCertificate<TYPES>> {
self.inner.read().await.high_qc.clone()
}
pub async fn decided_upgrade_certificate(&self) -> Option<UpgradeCertificate<TYPES>> {
self.decided_upgrade_certificate.read().await.clone()
}
pub async fn last_actioned_view(&self) -> TYPES::Time {
self.inner.read().await.action
}
Expand Down Expand Up @@ -183,4 +188,12 @@ impl<TYPES: NodeType> Storage<TYPES> for TestStorage<TYPES> {
Self::run_delay_settings_from_config(&self.delay_config).await;
Ok(())
}
async fn update_decided_upgrade_certificate(
&self,
decided_upgrade_certificate: Option<UpgradeCertificate<TYPES>>,
) -> Result<()> {
*self.decided_upgrade_certificate.write().await = decided_upgrade_certificate;

Ok(())
}
}
10 changes: 8 additions & 2 deletions crates/hotshot/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ use hotshot_types::{
data::{Leaf, QuorumProposal},
event::{EventType, LeafInfo},
message::{DataMessage, Message, MessageKind, Proposal},
simple_certificate::QuorumCertificate,
simple_certificate::{QuorumCertificate, UpgradeCertificate},
traits::{
consensus_api::ConsensusApi,
election::Membership,
Expand Down Expand Up @@ -255,7 +255,8 @@ impl<TYPES: NodeType, I: NodeImplementation<TYPES>, V: Versions> SystemContext<T
let (internal_tx, internal_rx) = internal_channel;
let (mut external_tx, mut external_rx) = external_channel;

let upgrade_lock = UpgradeLock::<TYPES, V>::new();
let upgrade_lock =
UpgradeLock::<TYPES, V>::from_certificate(&initializer.decided_upgrade_certificate);

// Allow overflow on the channel, otherwise sending to it may block.
external_rx.set_overflow(true);
Expand Down Expand Up @@ -959,6 +960,8 @@ pub struct HotShotInitializer<TYPES: NodeType> {
/// than `inner`s view number for the non genesis case because we must have seen higher QCs
/// to decide on the leaf.
high_qc: QuorumCertificate<TYPES>,
/// Previously decided upgrade certificate; this is necessary if an upgrade has happened and we are not restarting with the new version
decided_upgrade_certificate: Option<UpgradeCertificate<TYPES>>,
/// Undecided leafs that were seen, but not yet decided on. These allow a restarting node
/// to vote and propose right away if they didn't miss anything while down.
undecided_leafs: Vec<Leaf<TYPES>>,
Expand Down Expand Up @@ -986,6 +989,7 @@ impl<TYPES: NodeType> HotShotInitializer<TYPES> {
actioned_view: TYPES::Time::new(0),
saved_proposals: BTreeMap::new(),
high_qc,
decided_upgrade_certificate: None,
undecided_leafs: Vec::new(),
undecided_state: BTreeMap::new(),
instance_state,
Expand All @@ -1008,6 +1012,7 @@ impl<TYPES: NodeType> HotShotInitializer<TYPES> {
actioned_view: TYPES::Time,
saved_proposals: BTreeMap<TYPES::Time, Proposal<TYPES, QuorumProposal<TYPES>>>,
high_qc: QuorumCertificate<TYPES>,
decided_upgrade_certificate: Option<UpgradeCertificate<TYPES>>,
undecided_leafs: Vec<Leaf<TYPES>>,
undecided_state: BTreeMap<TYPES::Time, View<TYPES>>,
) -> Self {
Expand All @@ -1020,6 +1025,7 @@ impl<TYPES: NodeType> HotShotInitializer<TYPES> {
actioned_view,
saved_proposals,
high_qc,
decided_upgrade_certificate,
undecided_leafs,
undecided_state,
}
Expand Down
17 changes: 17 additions & 0 deletions crates/hotshot/src/tasks/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,23 @@ pub async fn add_consensus_tasks<TYPES: NodeType, I: NodeImplementation<TYPES>,
handle.add_task(DaTaskState::<TYPES, I, V>::create_from(handle).await);
handle.add_task(TransactionTaskState::<TYPES, I, V>::create_from(handle).await);

{
let mut upgrade_certificate_lock = handle
.hotshot
.upgrade_lock
.decided_upgrade_certificate
.write()
.await;

// clear the loaded certificate if it's now outdated
if upgrade_certificate_lock
.as_ref()
.is_some_and(|cert| V::Base::VERSION >= cert.data.new_version)
{
*upgrade_certificate_lock = None;
}
}

// only spawn the upgrade task if we are actually configured to perform an upgrade.
if V::Base::VERSION < V::Upgrade::VERSION {
handle.add_task(UpgradeTaskState::<TYPES, I, V>::create_from(handle).await);
Expand Down
7 changes: 7 additions & 0 deletions crates/task-impls/src/consensus/handlers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -540,6 +540,13 @@ pub async fn handle_quorum_proposal_validated<
.await;
*decided_certificate_lock = Some(cert.clone());
drop(decided_certificate_lock);

let _ = task_state
.storage
.write()
.await
.update_decided_upgrade_certificate(Some(cert.clone()))
.await;
}

let mut consensus = task_state.consensus.write().await;
Expand Down
12 changes: 11 additions & 1 deletion crates/task-impls/src/quorum_vote/handlers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,10 @@ use hotshot_types::{
consensus::OuterConsensus,
data::QuorumProposal,
event::{Event, EventType},
traits::node_implementation::{ConsensusTime, NodeImplementation, NodeType},
traits::{
node_implementation::{ConsensusTime, NodeImplementation, NodeType},
storage::Storage,
},
vote::HasViewNumber,
};
use tracing::{debug, instrument};
Expand Down Expand Up @@ -60,6 +63,13 @@ pub(crate) async fn handle_quorum_proposal_validated<
.await;
*decided_certificate_lock = Some(cert.clone());
drop(decided_certificate_lock);

let _ = task_state
.storage
.write()
.await
.update_decided_upgrade_certificate(Some(cert.clone()))
.await;
}

let mut consensus_writer = task_state.consensus.write().await;
Expand Down
2 changes: 0 additions & 2 deletions crates/task-impls/src/upgrade.rs
Original file line number Diff line number Diff line change
Expand Up @@ -269,9 +269,7 @@ impl<TYPES: NodeType, I: NodeImplementation<TYPES>, V: Versions> UpgradeTaskStat
old_version: V::Base::VERSION,
new_version: V::Upgrade::VERSION,
new_version_hash: V::UPGRADE_HASH.to_vec(),
// We schedule the upgrade to begin 15 views in the future
old_version_last_view: TYPES::Time::new(view + UPGRADE_BEGIN_OFFSET),
// and end 20 views in the future
new_version_first_view: TYPES::Time::new(view + UPGRADE_FINISH_OFFSET),
decide_by: TYPES::Time::new(view + UPGRADE_DECIDE_BY_OFFSET),
};
Expand Down
2 changes: 2 additions & 0 deletions crates/testing/src/spinning_task.rs
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,7 @@ where
TYPES::Time::genesis(),
BTreeMap::new(),
self.high_qc.clone(),
None,
Vec::new(),
BTreeMap::new(),
);
Expand Down Expand Up @@ -238,6 +239,7 @@ where
)
.await,
),
read_storage.decided_upgrade_certificate().await,
Vec::new(),
BTreeMap::new(),
);
Expand Down
4 changes: 4 additions & 0 deletions crates/types/src/constants.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,12 @@ pub const EXTERNAL_EVENT_CHANNEL_SIZE: usize = 100_000;
/// The offset for how far in the future we will send out a `QuorumProposal` with an `UpgradeCertificate` we form. This is also how far in advance of sending a `QuorumProposal` we begin collecting votes on an `UpgradeProposal`.
pub const UPGRADE_PROPOSE_OFFSET: u64 = 5;

#[cfg(test)]
/// The offset for how far in the future the upgrade certificate we attach should be decided on (or else discarded).
pub const UPGRADE_DECIDE_BY_OFFSET: u64 = UPGRADE_PROPOSE_OFFSET + 5;
#[cfg(not(test))]
/// The offset for how far in the future the upgrade certificate we attach should be decided on (or else discarded).
pub const UPGRADE_DECIDE_BY_OFFSET: u64 = UPGRADE_PROPOSE_OFFSET + 100;

/// The offset for how far in the future the upgrade actually begins.
pub const UPGRADE_BEGIN_OFFSET: u64 = UPGRADE_DECIDE_BY_OFFSET + 5;
Expand Down
17 changes: 13 additions & 4 deletions crates/types/src/message.rs
Original file line number Diff line number Diff line change
Expand Up @@ -397,6 +397,15 @@ impl<TYPES: NodeType, V: Versions> UpgradeLock<TYPES, V> {
}
}

#[allow(clippy::new_without_default)]
/// Create a new `UpgradeLock` from an optional upgrade certificate
pub fn from_certificate(certificate: &Option<UpgradeCertificate<TYPES>>) -> Self {
Self {
decided_upgrade_certificate: Arc::new(RwLock::new(certificate.clone())),
_pd: PhantomData::<V>,
}
}

/// Calculate the version applied in a view, based on the provided upgrade lock.
///
/// # Errors
Expand Down Expand Up @@ -457,8 +466,8 @@ impl<TYPES: NodeType, V: Versions> UpgradeLock<TYPES, V> {
// Associated constants cannot be used in pattern matches, so we do this trick instead.
v if v == V::Base::VERSION => Serializer::<V::Base>::serialize(&message),
v if v == V::Upgrade::VERSION => Serializer::<V::Upgrade>::serialize(&message),
_ => {
bail!("Attempted to serialize with an incompatible version. This should be impossible.");
v => {
bail!("Attempted to serialize with version {}, which is incompatible. This should be impossible.", v);
}
};

Expand All @@ -481,8 +490,8 @@ impl<TYPES: NodeType, V: Versions> UpgradeLock<TYPES, V> {
let deserialized_message: M = match actual_version {
v if v == V::Base::VERSION => Serializer::<V::Base>::deserialize(message),
v if v == V::Upgrade::VERSION => Serializer::<V::Upgrade>::deserialize(message),
_ => {
bail!("Cannot deserialize message!");
v => {
bail!("Cannot deserialize message with stated version {}", v);
}
}
.context("Failed to deserialize message!")?;
Expand Down
7 changes: 6 additions & 1 deletion crates/types/src/traits/storage.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ use crate::{
data::{DaProposal, Leaf, QuorumProposal, VidDisperseShare},
event::HotShotAction,
message::Proposal,
simple_certificate::QuorumCertificate,
simple_certificate::{QuorumCertificate, UpgradeCertificate},
};

/// Abstraction for storing a variety of consensus payload datum.
Expand All @@ -46,4 +46,9 @@ pub trait Storage<TYPES: NodeType>: Send + Sync + Clone {
leafs: CommitmentMap<Leaf<TYPES>>,
state: BTreeMap<TYPES::Time, View<TYPES>>,
) -> Result<()>;
/// Upgrade the current decided upgrade certificate in storage.
async fn update_decided_upgrade_certificate(
&self,
decided_upgrade_certificate: Option<UpgradeCertificate<TYPES>>,
) -> Result<()>;
}

0 comments on commit 735c71b

Please sign in to comment.