Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Byzantine Test where we have a dishonest leader #3516

Merged
merged 8 commits into from
Aug 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 79 additions & 1 deletion crates/hotshot/src/tasks/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

/// Provides trait to create task states from a `SystemContextHandle`
pub mod task_state;
use std::{sync::Arc, time::Duration};
use std::{collections::HashSet, sync::Arc, time::Duration};

use async_broadcast::broadcast;
use async_compatibility_layer::art::{async_sleep, async_spawn};
Expand All @@ -25,6 +25,8 @@ use hotshot_task_impls::{
};
use hotshot_types::{
constants::EVENT_CHANNEL_SIZE,
data::QuorumProposal,
message::Proposal,
message::{Messages, VersionedMessage},
traits::{
network::ConnectedNetwork,
Expand Down Expand Up @@ -408,6 +410,82 @@ impl<TYPES: NodeType, I: NodeImplementation<TYPES>> EventTransformerState<TYPES,
}
}

#[derive(Debug)]
/// An `EventHandlerState` that modifies justify_qc on `QuorumProposalSend` to that of a previous view to mock dishonest leader
pub struct DishonestLeader<TYPES: NodeType, I: NodeImplementation<TYPES>> {
/// Store events from previous views
pub validated_proposals: Vec<QuorumProposal<TYPES>>,
/// How many times current node has been elected leader and sent proposal
pub total_proposals_from_node: u64,
/// Which proposals to be dishonest at
pub dishonest_at_proposal_numbers: HashSet<u64>,
/// How far back to look for a QC
pub view_look_back: usize,
/// Phantom
pub _phantom: std::marker::PhantomData<I>,
}

/// Add method that will handle `QuorumProposalSend` events
/// If we have previous proposals stored and the total_proposals_from_node matches a value specified in dishonest_at_proposal_numbers
/// Then send out the event with the modified proposal that has an older QC
impl<TYPES: NodeType, I: NodeImplementation<TYPES>> DishonestLeader<TYPES, I> {
/// When a leader is sending a proposal this method will mock a dishonest leader
/// We accomplish this by looking back a number of specified views and using that cached proposals QC
fn handle_proposal_send_event(
&self,
event: &HotShotEvent<TYPES>,
proposal: &Proposal<TYPES, QuorumProposal<TYPES>>,
sender: &TYPES::SignatureKey,
) -> HotShotEvent<TYPES> {
let length = self.validated_proposals.len();
if !self
.dishonest_at_proposal_numbers
.contains(&self.total_proposals_from_node)
|| length == 0
{
return event.clone();
}

// Grab proposal from specified view look back
let proposal_from_look_back = if length - 1 < self.view_look_back {
// If look back is too far just take the first proposal
self.validated_proposals[0].clone()
} else {
let index = (self.validated_proposals.len() - 1) - self.view_look_back;
self.validated_proposals[index].clone()
};

// Create a dishonest proposal by using the old proposals qc
let mut dishonest_proposal = proposal.clone();
dishonest_proposal.data.justify_qc = proposal_from_look_back.justify_qc;

HotShotEvent::QuorumProposalSend(dishonest_proposal, sender.clone())
}
}

#[async_trait]
impl<TYPES: NodeType, I: NodeImplementation<TYPES> + std::fmt::Debug>
EventTransformerState<TYPES, I> for DishonestLeader<TYPES, I>
{
async fn recv_handler(&mut self, event: &HotShotEvent<TYPES>) -> Vec<HotShotEvent<TYPES>> {
vec![event.clone()]
}

async fn send_handler(&mut self, event: &HotShotEvent<TYPES>) -> Vec<HotShotEvent<TYPES>> {
match event {
HotShotEvent::QuorumProposalSend(proposal, sender) => {
self.total_proposals_from_node += 1;
return vec![self.handle_proposal_send_event(event, proposal, sender)];
}
HotShotEvent::QuorumProposalValidated(proposal, _) => {
self.validated_proposals.push(proposal.clone());
}
_ => {}
}
vec![event.clone()]
}
}

/// adds tasks for sending/receiving messages to/from the network.
pub async fn add_network_tasks<TYPES: NodeType, I: NodeImplementation<TYPES>>(
handle: &mut SystemContextHandle<TYPES, I>,
Expand Down
2 changes: 1 addition & 1 deletion crates/testing/src/consistency_task.rs
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ pub struct ConsistencyTask<TYPES: NodeType> {
/// A map from node ids to (leaves keyed on view number)
pub consensus_leaves: NetworkMap<TYPES>,
/// safety task requirements
pub safety_properties: OverallSafetyPropertiesDescription,
pub safety_properties: OverallSafetyPropertiesDescription<TYPES>,
}

#[async_trait]
Expand Down
88 changes: 65 additions & 23 deletions crates/testing/src/overall_safety_task.rs
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,11 @@ pub enum OverallSafetyTaskErr<TYPES: NodeType> {

failed_views: HashSet<TYPES::Time>,
},
/// mismatched expected failed view vs actual failed view
InconsistentFailedViews {
expected_failed_views: HashSet<TYPES::Time>,
actual_failed_views: HashSet<TYPES::Time>,
},
}

/// Data availability task state
Expand All @@ -80,13 +85,42 @@ pub struct OverallSafetyTask<TYPES: NodeType, I: TestableNodeImplementation<TYPE
/// ctx
pub ctx: RoundCtx<TYPES>,
/// configure properties
pub properties: OverallSafetyPropertiesDescription,
pub properties: OverallSafetyPropertiesDescription<TYPES>,
/// error
pub error: Option<Box<OverallSafetyTaskErr<TYPES>>>,
/// sender to test event channel
pub test_sender: Sender<TestEvent>,
}

impl<TYPES: NodeType, I: TestableNodeImplementation<TYPES>> OverallSafetyTask<TYPES, I> {
async fn handle_view_failure(&mut self, num_failed_views: usize, view_number: TYPES::Time) {
let expected_view_to_fail = &mut self.properties.expected_views_to_fail;

self.ctx.failed_views.insert(view_number);
if self.ctx.failed_views.len() > num_failed_views {
let _ = self.test_sender.broadcast(TestEvent::Shutdown).await;
self.error = Some(Box::new(OverallSafetyTaskErr::<TYPES>::TooManyFailures {
failed_views: self.ctx.failed_views.clone(),
}));
} else if !expected_view_to_fail.is_empty() {
match expected_view_to_fail.get(&view_number) {
Some(_) => {
expected_view_to_fail.insert(view_number, true);
}
None => {
let _ = self.test_sender.broadcast(TestEvent::Shutdown).await;
self.error = Some(Box::new(
OverallSafetyTaskErr::<TYPES>::InconsistentFailedViews {
expected_failed_views: expected_view_to_fail.keys().cloned().collect(),
actual_failed_views: self.ctx.failed_views.clone(),
},
));
}
}
}
}
}

#[async_trait]
impl<TYPES: NodeType, I: TestableNodeImplementation<TYPES>> TestTaskState
for OverallSafetyTask<TYPES, I>
Expand All @@ -95,14 +129,15 @@ impl<TYPES: NodeType, I: TestableNodeImplementation<TYPES>> TestTaskState

/// Handles an event from one of multiple receivers.
async fn handle_event(&mut self, (message, id): (Self::Event, usize)) -> Result<()> {
let OverallSafetyPropertiesDescription {
let OverallSafetyPropertiesDescription::<TYPES> {
check_leaf,
check_block,
num_failed_views,
num_successful_views,
threshold_calculator,
transaction_threshold,
}: OverallSafetyPropertiesDescription = self.properties.clone();
..
}: OverallSafetyPropertiesDescription<TYPES> = self.properties.clone();
let Event { view_number, event } = message;
let key = match event {
EventType::Error { error } => {
Expand Down Expand Up @@ -168,14 +203,9 @@ impl<TYPES: NodeType, I: TestableNodeImplementation<TYPES>> TestTaskState
return Ok(());
}
ViewStatus::Failed => {
self.ctx.failed_views.insert(view_number);
if self.ctx.failed_views.len() > num_failed_views {
let _ = self.test_sender.broadcast(TestEvent::Shutdown).await;
self.error =
Some(Box::new(OverallSafetyTaskErr::<TYPES>::TooManyFailures {
failed_views: self.ctx.failed_views.clone(),
}));
}
self.handle_view_failure(num_failed_views, view_number)
.await;

return Ok(());
}
ViewStatus::Err(e) => {
Expand All @@ -189,13 +219,8 @@ impl<TYPES: NodeType, I: TestableNodeImplementation<TYPES>> TestTaskState
}
} else if view.check_if_failed(threshold, len) {
view.status = ViewStatus::Failed;
self.ctx.failed_views.insert(view_number);
if self.ctx.failed_views.len() > num_failed_views {
let _ = self.test_sender.broadcast(TestEvent::Shutdown).await;
self.error = Some(Box::new(OverallSafetyTaskErr::<TYPES>::TooManyFailures {
failed_views: self.ctx.failed_views.clone(),
}));
}
self.handle_view_failure(num_failed_views, view_number)
.await;
return Ok(());
}
Ok(())
Expand All @@ -206,14 +231,15 @@ impl<TYPES: NodeType, I: TestableNodeImplementation<TYPES>> TestTaskState
return TestResult::Fail(e.clone());
}

let OverallSafetyPropertiesDescription {
let OverallSafetyPropertiesDescription::<TYPES> {
check_leaf: _,
check_block: _,
num_failed_views: num_failed_rounds_total,
num_successful_views,
threshold_calculator: _,
transaction_threshold: _,
}: OverallSafetyPropertiesDescription = self.properties.clone();
expected_views_to_fail,
}: OverallSafetyPropertiesDescription<TYPES> = self.properties.clone();

let num_incomplete_views = self.ctx.round_results.len()
- self.ctx.successful_views.len()
Expand All @@ -232,6 +258,18 @@ impl<TYPES: NodeType, I: TestableNodeImplementation<TYPES>> TestTaskState
}));
}

if !expected_views_to_fail
.values()
.all(|&view_failed| view_failed)
{
return TestResult::Fail(Box::new(
OverallSafetyTaskErr::<TYPES>::InconsistentFailedViews {
actual_failed_views: self.ctx.failed_views.clone(),
expected_failed_views: expected_views_to_fail.keys().cloned().collect(),
},
));
}

// We should really be able to include a check like this:
//
// if self.ctx.failed_views.len() < num_failed_rounds_total {
Expand Down Expand Up @@ -495,7 +533,7 @@ impl<TYPES: NodeType> RoundResult<TYPES> {

/// cross node safety properties
#[derive(Clone)]
pub struct OverallSafetyPropertiesDescription {
pub struct OverallSafetyPropertiesDescription<TYPES: NodeType> {
/// required number of successful views
pub num_successful_views: usize,
/// whether or not to check the leaf
Expand All @@ -512,21 +550,24 @@ pub struct OverallSafetyPropertiesDescription {
/// threshold calculator. Given number of live and total nodes, provide number of successes
/// required to mark view as successful
pub threshold_calculator: Arc<dyn Fn(usize, usize) -> usize + Send + Sync>,
/// pass in the views that we expect to fail
pub expected_views_to_fail: HashMap<TYPES::Time, bool>,
}

impl std::fmt::Debug for OverallSafetyPropertiesDescription {
impl<TYPES: NodeType> std::fmt::Debug for OverallSafetyPropertiesDescription<TYPES> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("OverallSafetyPropertiesDescription")
.field("num successful views", &self.num_successful_views)
.field("check leaf", &self.check_leaf)
.field("check_block", &self.check_block)
.field("num_failed_rounds_total", &self.num_failed_views)
.field("transaction_threshold", &self.transaction_threshold)
.field("expected views to fail", &self.expected_views_to_fail)
.finish_non_exhaustive()
}
}

impl Default for OverallSafetyPropertiesDescription {
impl<TYPES: NodeType> Default for OverallSafetyPropertiesDescription<TYPES> {
fn default() -> Self {
Self {
num_successful_views: 50,
Expand All @@ -536,6 +577,7 @@ impl Default for OverallSafetyPropertiesDescription {
transaction_threshold: 0,
// very strict
threshold_calculator: Arc::new(|_num_live, num_total| 2 * num_total / 3 + 1),
expected_views_to_fail: HashMap::new(),
}
}
}
8 changes: 5 additions & 3 deletions crates/testing/src/test_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ pub struct TestDescription<TYPES: NodeType, I: NodeImplementation<TYPES>> {
/// Size of the non-staked DA committee for the test
pub da_non_staked_committee_size: usize,
/// overall safety property description
pub overall_safety_properties: OverallSafetyPropertiesDescription,
pub overall_safety_properties: OverallSafetyPropertiesDescription<TYPES>,
/// spinning properties
pub spinning_properties: SpinningTaskDescription,
/// txns timing
Expand Down Expand Up @@ -230,13 +230,14 @@ impl<TYPES: NodeType, I: NodeImplementation<TYPES>> TestDescription<TYPES, I> {
num_nodes_with_stake,
num_nodes_without_stake,
start_nodes: num_nodes_with_stake,
overall_safety_properties: OverallSafetyPropertiesDescription {
overall_safety_properties: OverallSafetyPropertiesDescription::<TYPES> {
num_successful_views: 50,
check_leaf: true,
check_block: true,
num_failed_views: 15,
transaction_threshold: 0,
threshold_calculator: Arc::new(|_active, total| (2 * total / 3 + 1)),
expected_views_to_fail: HashMap::new(),
},
timing_data: TimingData {
next_view_timeout: 2000,
Expand All @@ -263,13 +264,14 @@ impl<TYPES: NodeType, I: NodeImplementation<TYPES>> TestDescription<TYPES, I> {
num_nodes_with_stake,
num_nodes_without_stake,
start_nodes: num_nodes_with_stake,
overall_safety_properties: OverallSafetyPropertiesDescription {
overall_safety_properties: OverallSafetyPropertiesDescription::<TYPES> {
num_successful_views: 20,
check_leaf: true,
check_block: true,
num_failed_views: 8,
transaction_threshold: 0,
threshold_calculator: Arc::new(|_active, total| (2 * total / 3 + 1)),
expected_views_to_fail: HashMap::new(),
},
timing_data: TimingData {
start_delay: 120_000,
Expand Down
Loading
Loading