Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Store upgrade certificate on decide and load on restart #3679

Merged
merged 8 commits into from
Sep 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 14 additions & 1 deletion crates/example-types/src/storage_types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ use hotshot_types::{
data::{DaProposal, Leaf, QuorumProposal, VidDisperseShare},
event::HotShotAction,
message::Proposal,
simple_certificate::QuorumCertificate,
simple_certificate::{QuorumCertificate, UpgradeCertificate},
traits::{
node_implementation::{ConsensusTime, NodeType},
storage::Storage,
Expand Down Expand Up @@ -60,6 +60,7 @@ pub struct TestStorage<TYPES: NodeType> {
/// `should_return_err` is a testing utility to validate negative cases.
pub should_return_err: bool,
pub delay_config: DelayConfig,
pub decided_upgrade_certificate: Arc<RwLock<Option<UpgradeCertificate<TYPES>>>>,
}

impl<TYPES: NodeType> Default for TestStorage<TYPES> {
Expand All @@ -68,6 +69,7 @@ impl<TYPES: NodeType> Default for TestStorage<TYPES> {
inner: Arc::new(RwLock::new(TestStorageState::default())),
should_return_err: false,
delay_config: DelayConfig::default(),
decided_upgrade_certificate: Arc::new(RwLock::new(None)),
}
}
}
Expand All @@ -91,6 +93,9 @@ impl<TYPES: NodeType> TestStorage<TYPES> {
pub async fn high_qc_cloned(&self) -> Option<QuorumCertificate<TYPES>> {
self.inner.read().await.high_qc.clone()
}
pub async fn decided_upgrade_certificate(&self) -> Option<UpgradeCertificate<TYPES>> {
self.decided_upgrade_certificate.read().await.clone()
}
pub async fn last_actioned_view(&self) -> TYPES::Time {
self.inner.read().await.action
}
Expand Down Expand Up @@ -183,4 +188,12 @@ impl<TYPES: NodeType> Storage<TYPES> for TestStorage<TYPES> {
Self::run_delay_settings_from_config(&self.delay_config).await;
Ok(())
}
async fn update_decided_upgrade_certificate(
&self,
decided_upgrade_certificate: Option<UpgradeCertificate<TYPES>>,
) -> Result<()> {
*self.decided_upgrade_certificate.write().await = decided_upgrade_certificate;

Ok(())
}
}
10 changes: 8 additions & 2 deletions crates/hotshot/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ use hotshot_types::{
data::{Leaf, QuorumProposal},
event::{EventType, LeafInfo},
message::{DataMessage, Message, MessageKind, Proposal},
simple_certificate::QuorumCertificate,
simple_certificate::{QuorumCertificate, UpgradeCertificate},
traits::{
consensus_api::ConsensusApi,
election::Membership,
Expand Down Expand Up @@ -255,7 +255,8 @@ impl<TYPES: NodeType, I: NodeImplementation<TYPES>, V: Versions> SystemContext<T
let (internal_tx, internal_rx) = internal_channel;
let (mut external_tx, mut external_rx) = external_channel;

let upgrade_lock = UpgradeLock::<TYPES, V>::new();
let upgrade_lock =
UpgradeLock::<TYPES, V>::from_certificate(&initializer.decided_upgrade_certificate);

// Allow overflow on the channel, otherwise sending to it may block.
external_rx.set_overflow(true);
Expand Down Expand Up @@ -961,6 +962,8 @@ pub struct HotShotInitializer<TYPES: NodeType> {
/// than `inner`s view number for the non genesis case because we must have seen higher QCs
/// to decide on the leaf.
high_qc: QuorumCertificate<TYPES>,
/// Previously decided upgrade certificate; this is necessary if an upgrade has happened and we are not restarting with the new version
decided_upgrade_certificate: Option<UpgradeCertificate<TYPES>>,
/// Undecided leafs that were seen, but not yet decided on. These allow a restarting node
/// to vote and propose right away if they didn't miss anything while down.
undecided_leafs: Vec<Leaf<TYPES>>,
Expand Down Expand Up @@ -988,6 +991,7 @@ impl<TYPES: NodeType> HotShotInitializer<TYPES> {
actioned_view: TYPES::Time::new(0),
saved_proposals: BTreeMap::new(),
high_qc,
decided_upgrade_certificate: None,
undecided_leafs: Vec::new(),
undecided_state: BTreeMap::new(),
instance_state,
Expand All @@ -1010,6 +1014,7 @@ impl<TYPES: NodeType> HotShotInitializer<TYPES> {
actioned_view: TYPES::Time,
saved_proposals: BTreeMap<TYPES::Time, Proposal<TYPES, QuorumProposal<TYPES>>>,
high_qc: QuorumCertificate<TYPES>,
decided_upgrade_certificate: Option<UpgradeCertificate<TYPES>>,
undecided_leafs: Vec<Leaf<TYPES>>,
undecided_state: BTreeMap<TYPES::Time, View<TYPES>>,
) -> Self {
Expand All @@ -1022,6 +1027,7 @@ impl<TYPES: NodeType> HotShotInitializer<TYPES> {
actioned_view,
saved_proposals,
high_qc,
decided_upgrade_certificate,
undecided_leafs,
undecided_state,
}
Expand Down
17 changes: 17 additions & 0 deletions crates/hotshot/src/tasks/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,23 @@ pub async fn add_consensus_tasks<TYPES: NodeType, I: NodeImplementation<TYPES>,
handle.add_task(DaTaskState::<TYPES, I, V>::create_from(handle).await);
handle.add_task(TransactionTaskState::<TYPES, I, V>::create_from(handle).await);

{
let mut upgrade_certificate_lock = handle
.hotshot
.upgrade_lock
.decided_upgrade_certificate
.write()
.await;

// clear the loaded certificate if it's now outdated
if upgrade_certificate_lock
.as_ref()
.is_some_and(|cert| V::Base::VERSION >= cert.data.new_version)
{
*upgrade_certificate_lock = None;
}
}

// only spawn the upgrade task if we are actually configured to perform an upgrade.
if V::Base::VERSION < V::Upgrade::VERSION {
handle.add_task(UpgradeTaskState::<TYPES, I, V>::create_from(handle).await);
Expand Down
7 changes: 7 additions & 0 deletions crates/task-impls/src/consensus/handlers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -576,6 +576,13 @@ pub async fn handle_quorum_proposal_validated<
.await;
*decided_certificate_lock = Some(cert.clone());
drop(decided_certificate_lock);

let _ = task_state
.storage
.write()
.await
.update_decided_upgrade_certificate(Some(cert.clone()))
.await;
}

let mut consensus = task_state.consensus.write().await;
Expand Down
12 changes: 11 additions & 1 deletion crates/task-impls/src/quorum_vote/handlers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,10 @@ use hotshot_types::{
consensus::OuterConsensus,
data::QuorumProposal,
event::{Event, EventType},
traits::node_implementation::{ConsensusTime, NodeImplementation, NodeType},
traits::{
node_implementation::{ConsensusTime, NodeImplementation, NodeType},
storage::Storage,
},
vote::HasViewNumber,
};
use tracing::{debug, instrument};
Expand Down Expand Up @@ -60,6 +63,13 @@ pub(crate) async fn handle_quorum_proposal_validated<
.await;
*decided_certificate_lock = Some(cert.clone());
drop(decided_certificate_lock);

let _ = task_state
.storage
.write()
.await
.update_decided_upgrade_certificate(Some(cert.clone()))
.await;
}

let mut consensus_writer = task_state.consensus.write().await;
Expand Down
2 changes: 0 additions & 2 deletions crates/task-impls/src/upgrade.rs
Original file line number Diff line number Diff line change
Expand Up @@ -269,9 +269,7 @@ impl<TYPES: NodeType, I: NodeImplementation<TYPES>, V: Versions> UpgradeTaskStat
old_version: V::Base::VERSION,
new_version: V::Upgrade::VERSION,
new_version_hash: V::UPGRADE_HASH.to_vec(),
// We schedule the upgrade to begin 15 views in the future
old_version_last_view: TYPES::Time::new(view + UPGRADE_BEGIN_OFFSET),
// and end 20 views in the future
new_version_first_view: TYPES::Time::new(view + UPGRADE_FINISH_OFFSET),
decide_by: TYPES::Time::new(view + UPGRADE_DECIDE_BY_OFFSET),
};
Expand Down
2 changes: 2 additions & 0 deletions crates/testing/src/spinning_task.rs
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,7 @@ where
TYPES::Time::genesis(),
BTreeMap::new(),
self.high_qc.clone(),
None,
Vec::new(),
BTreeMap::new(),
);
Expand Down Expand Up @@ -238,6 +239,7 @@ where
)
.await,
),
read_storage.decided_upgrade_certificate().await,
Vec::new(),
BTreeMap::new(),
);
Expand Down
4 changes: 4 additions & 0 deletions crates/types/src/constants.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,12 @@ pub const EXTERNAL_EVENT_CHANNEL_SIZE: usize = 100_000;
/// The offset for how far in the future we will send out a `QuorumProposal` with an `UpgradeCertificate` we form. This is also how far in advance of sending a `QuorumProposal` we begin collecting votes on an `UpgradeProposal`.
pub const UPGRADE_PROPOSE_OFFSET: u64 = 5;

#[cfg(test)]
/// The offset for how far in the future the upgrade certificate we attach should be decided on (or else discarded).
pub const UPGRADE_DECIDE_BY_OFFSET: u64 = UPGRADE_PROPOSE_OFFSET + 5;
#[cfg(not(test))]
/// The offset for how far in the future the upgrade certificate we attach should be decided on (or else discarded).
pub const UPGRADE_DECIDE_BY_OFFSET: u64 = UPGRADE_PROPOSE_OFFSET + 100;

/// The offset for how far in the future the upgrade actually begins.
pub const UPGRADE_BEGIN_OFFSET: u64 = UPGRADE_DECIDE_BY_OFFSET + 5;
Expand Down
17 changes: 13 additions & 4 deletions crates/types/src/message.rs
Original file line number Diff line number Diff line change
Expand Up @@ -398,6 +398,15 @@ impl<TYPES: NodeType, V: Versions> UpgradeLock<TYPES, V> {
}
}

#[allow(clippy::new_without_default)]
/// Create a new `UpgradeLock` from an optional upgrade certificate
pub fn from_certificate(certificate: &Option<UpgradeCertificate<TYPES>>) -> Self {
Self {
decided_upgrade_certificate: Arc::new(RwLock::new(certificate.clone())),
_pd: PhantomData::<V>,
}
}

/// Calculate the version applied in a view, based on the provided upgrade lock.
///
/// # Errors
Expand Down Expand Up @@ -458,8 +467,8 @@ impl<TYPES: NodeType, V: Versions> UpgradeLock<TYPES, V> {
// Associated constants cannot be used in pattern matches, so we do this trick instead.
v if v == V::Base::VERSION => Serializer::<V::Base>::serialize(&message),
v if v == V::Upgrade::VERSION => Serializer::<V::Upgrade>::serialize(&message),
_ => {
bail!("Attempted to serialize with an incompatible version. This should be impossible.");
v => {
bail!("Attempted to serialize with version {}, which is incompatible. This should be impossible.", v);
}
};

Expand All @@ -482,8 +491,8 @@ impl<TYPES: NodeType, V: Versions> UpgradeLock<TYPES, V> {
let deserialized_message: M = match actual_version {
v if v == V::Base::VERSION => Serializer::<V::Base>::deserialize(message),
v if v == V::Upgrade::VERSION => Serializer::<V::Upgrade>::deserialize(message),
_ => {
bail!("Cannot deserialize message!");
v => {
bail!("Cannot deserialize message with stated version {}", v);
}
}
.context("Failed to deserialize message!")?;
Expand Down
7 changes: 6 additions & 1 deletion crates/types/src/traits/storage.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ use crate::{
data::{DaProposal, Leaf, QuorumProposal, VidDisperseShare},
event::HotShotAction,
message::Proposal,
simple_certificate::QuorumCertificate,
simple_certificate::{QuorumCertificate, UpgradeCertificate},
};

/// Abstraction for storing a variety of consensus payload datum.
Expand All @@ -46,4 +46,9 @@ pub trait Storage<TYPES: NodeType>: Send + Sync + Clone {
leafs: CommitmentMap<Leaf<TYPES>>,
state: BTreeMap<TYPES::Time, View<TYPES>>,
) -> Result<()>;
/// Upgrade the current decided upgrade certificate in storage.
async fn update_decided_upgrade_certificate(
&self,
decided_upgrade_certificate: Option<UpgradeCertificate<TYPES>>,
) -> Result<()>;
}
Loading