Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[AUDIT][MEDIUM][O-1] Orchestrator slots may be trivially exhausted #3621

Merged
merged 12 commits into from
Aug 29, 2024
7 changes: 6 additions & 1 deletion crates/examples/infra/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -886,6 +886,11 @@ pub async fn main_entry_point<
derive_libp2p_peer_id::<TYPES::SignatureKey>(&my_own_validator_config.private_key)
.expect("failed to derive Libp2p keypair");

// We need this to be able to register our node
let peer_config =
PeerConfig::<TYPES::SignatureKey>::to_bytes(&my_own_validator_config.public_config())
.clone();

// conditionally save/load config from file or orchestrator
// This is a function that will return correct complete config from orchestrator.
// It takes in a valid args.network_config_file when loading from file, or valid validator_config when loading from orchestrator, the invalid one will be ignored.
Expand Down Expand Up @@ -956,7 +961,7 @@ pub async fn main_entry_point<
if let NetworkConfigSource::Orchestrator = source {
info!("Waiting for the start command from orchestrator");
orchestrator_client
.wait_for_all_nodes_ready(run_config.clone().node_index)
.wait_for_all_nodes_ready(peer_config)
.await;
}

Expand Down
24 changes: 16 additions & 8 deletions crates/orchestrator/run-config.toml
Original file line number Diff line number Diff line change
Expand Up @@ -39,22 +39,30 @@ seed = [
]
start_delay_seconds = 0
cdn_marshal_address = "127.0.0.1:9000"
public_keys = [
"BLS_VER_KEY~p-JKk1VvO1RoMrDrqyjz0P1VGwtOaEjF5jLjpOZbJi5O747fvYEOg0OvCl_CLe4shh7vsqeG9uMF9RssM12sLSuaiVJkCClxEI5mRLV4qff1UjZAZJIBgeL1_hRhRUkpqC0Trm1qtvXtZ8FwOCIzYXv8c300Au824k7FxjjcWLBL",
"BLS_VER_KEY~bQszS-QKYvUij2g20VqS8asttGSb95NrTu2PUj0uMh1CBUxNy1FqyPDjZqB29M7ZbjWqj79QkEOWkpga84AmDYUeTuWmy-0P1AdKHD3ehc-dKvei78BDj5USwXPJiDUlCxvYs_9rWYhagaq-5_LXENr78xel17spftNd5MA1Mw5U",
"BLS_VER_KEY~4zQnaCOFJ7m95OjxeNls0QOOwWbz4rfxaL3NwmN2zSdnf8t5Nw_dfmMHq05ee8jCegw6Bn5T8inmrnGGAsQJMMWLv77nd7FJziz2ViAbXg-XGGF7o4HyzELCmypDOIYF3X2UWferFE_n72ZX0iQkUhOvYZZ7cfXToXxRTtb_mwRR",
"BLS_VER_KEY~rO2PIjyY30HGfapFcloFe3mNDKMIFi6JlOLkH5ZWBSYoRm5fE2-Rm6Lp3EvmAcB5r7KFJ0c1Uor308x78r04EY_sfjcsDCWt7RSJdL4cJoD_4fSTCv_bisO8k98hs_8BtqQt8BHlPeJohpUXvcfnK8suXJETiJ6Er97pfxRbzgAL",
"BLS_VER_KEY~r6b-Cwzp-b3czlt0MHmYPJIow5kMsXbrNmZsLSYg9RV49oCCO4WEeCRFR02x9bqLCa_sgNFMrIeNdEa11qNiBAohApYFIvrSa-zP5QGj3xbZaMOCrshxYit6E2TR-XsWvv6gjOrypmugjyTAth-iqQzTboSfmO9DD1-gjJIdCaD7",
"BLS_VER_KEY~IBRoz_Q1EXvcm1pNZcmVlyYZU8hZ7qmy337ePAjEMhz8Hl2q8vWPFOd3BaLwgRS1UzAPW3z4E-XIgRDGcRBTAMZX9b_0lKYjlyTlNF2EZfNnKmvv-xJ0yurkfjiveeYEsD2l5d8q_rJJbH1iZdXy-yPEbwI0SIvQfwdlcaKw9po4",
"BLS_VER_KEY~kEUEUJFBtCXl68fM_2roQw856wQlu1ZoDmPn8uu4bQgeZwyb5oz5_kMl-oAJ_OtbYV1serjWE--eXB_qYIpQLZka42-cML6WjCQjNl1hGSejtoBDkExNeUNcweFQBbEsaDiIy3-sgHTrfYpFd1icKeAVihLRn5_RtSU_RUu1TQqR",
"BLS_VER_KEY~PAAQNgOYfj3GiVX7LxSlkXfOCDSnNKZDqPVYQ_jBMxKzOCn0PXbqQ62kKPenWOmCxiCE7X158s-VenBna6MjHJgf61eBAO-3-OyTP5NWVx49RTgHhQf2iMTKk2iqK2gjnjZimBU135YU4lQFtrG-ZgRezwqkC5vy8V-q46fschIG",
"BLS_VER_KEY~96hAcdFZxQT8CEHcyV8j2ILJRsXagquENPkc9AwLSx3u6AE_uMupIKGbNJRiM99oFneK2vI5g1u61HidWeuTLRPM2537xAXeaO8e-wJYx4FaPKw_xTcLPrIm0OZT7SsLAMwFuqfMbDdKM71-RyrLwhff5517xXBKEk5Tg9iT9Qrr",
"BLS_VER_KEY~-pVi7j6TEBeG7ABata4uWWDRM2SrY8wWotWsGnTpIhnOVYJI_lNWyig6VJUuFmBsMS8rLMU7nDxDm8SbObxyA-SLFcr_jCkZqsbx8GcVQrnBAfjNRWuPZP0xcTDMu2IkQqtc3L0OpzbMEgGRGE8Wj09pNqouzl-xhPoYjTmD06Bw",
"BLS_VER_KEY~IUPSdnsNUHgNx_74ZhBPrICcDZ9Bp_DAt-6kFz8vSwJES2Vy1Ws8NJ1mxb9XGE1u13sw0FRe8kn5Ib3p2stbEtR_1Qgbuif6aoLrGaSUzy0MvwrO58u9kHZk3rXIuSAN7n4ok3-KKk2CmnBfx7fchFoqT56FXCd1EJ7XRrYj8wTh",
]
enable_registration_verification = true

[config]
num_nodes_with_stake = 10
num_nodes_without_stake = 0
start_threshold = [
8,
10,
]
start_threshold = [8, 10]
staked_da_nodes = 10
non_staked_da_nodes = 0
fixed_leader_for_gpuvid = 1
next_view_timeout = 30000
timeout_ratio = [
11,
10,
]
timeout_ratio = [11, 10]
round_start_delay = 1
start_delay = 1
num_bootstrap = 5
Expand Down
5 changes: 3 additions & 2 deletions crates/orchestrator/src/client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -442,12 +442,13 @@ impl OrchestratorClient {
/// # Panics
/// Panics if unable to post.
#[instrument(skip(self), name = "orchestrator ready signal")]
pub async fn wait_for_all_nodes_ready(&self, node_index: u64) -> bool {
pub async fn wait_for_all_nodes_ready(&self, peer_config: Vec<u8>) -> bool {
let send_ready_f = |client: Client<ClientError, OrchestratorVersion>| {
let pk = peer_config.clone();
async move {
let result: Result<_, ClientError> = client
.post("api/ready")
.body_json(&node_index)
.body_binary(&pk)
.unwrap()
.send()
.await
Expand Down
15 changes: 15 additions & 0 deletions crates/orchestrator/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
// along with the HotShot repository. If not, see <https://mit-license.org/>.

use std::{
collections::HashSet,
env, fs,
net::SocketAddr,
num::NonZeroUsize,
Expand Down Expand Up @@ -208,6 +209,10 @@ pub struct NetworkConfig<KEY: SignatureKey> {
pub builder: BuilderType,
/// random builder config
pub random_builder: Option<RandomBuilderConfig>,
/// The list of public keys that are allowed to connect to the orchestrator
pub public_keys: HashSet<KEY>,
/// Whether or not to disable registration verification.
pub enable_registration_verification: bool,
}

/// the source of the network config
Expand Down Expand Up @@ -439,6 +444,8 @@ impl<K: SignatureKey> Default for NetworkConfig<K> {
commit_sha: String::new(),
builder: BuilderType::default(),
random_builder: None,
public_keys: HashSet::new(),
enable_registration_verification: true,
}
}
}
Expand Down Expand Up @@ -491,6 +498,12 @@ pub struct NetworkConfigFile<KEY: SignatureKey> {
/// random builder configuration
#[serde(default)]
pub random_builder: Option<RandomBuilderConfig>,
/// The list of public keys that are allowed to connect to the orchestrator
#[serde(default)]
pub public_keys: HashSet<KEY>,
/// Whether or not to disable registration verification.
#[serde(default)]
pub enable_registration_verification: bool,
}

impl<K: SignatureKey> From<NetworkConfigFile<K>> for NetworkConfig<K> {
Expand Down Expand Up @@ -536,6 +549,8 @@ impl<K: SignatureKey> From<NetworkConfigFile<K>> for NetworkConfig<K> {
commit_sha: String::new(),
builder: val.builder,
random_builder: val.random_builder,
public_keys: val.public_keys,
enable_registration_verification: val.enable_registration_verification,
}
}
}
Expand Down
65 changes: 57 additions & 8 deletions crates/orchestrator/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,10 @@ use client::{BenchResults, BenchResultsDownloadConfig};
use config::BuilderType;
use csv::Writer;
use futures::{stream::FuturesUnordered, FutureExt, StreamExt};
use hotshot_types::{traits::signature_key::SignatureKey, PeerConfig};
use hotshot_types::{
traits::signature_key::{SignatureKey, StakeTableEntryType},
PeerConfig,
};
use libp2p::{
identity::{
ed25519::{Keypair as EdKeypair, SecretKey},
Expand Down Expand Up @@ -225,10 +228,10 @@ pub trait OrchestratorApi<KEY: SignatureKey> {
/// # Errors
/// if unable to serve
fn post_run_results(&mut self, metrics: BenchResults) -> Result<(), ServerError>;
/// post endpoint for whether or not all nodes are ready
/// A node POSTs its public key to let the orchestrator know that it is ready
/// # Errors
/// if unable to serve
fn post_ready(&mut self) -> Result<(), ServerError>;
fn post_ready(&mut self, peer_config: &PeerConfig<KEY>) -> Result<(), ServerError>;
/// post endpoint for manually starting the orchestrator
/// # Errors
/// if unable to serve
Expand Down Expand Up @@ -328,7 +331,22 @@ where

let node_index = self.pub_posted.len() as u64;

// Deserialize the public key
let staked_pubkey = PeerConfig::<KEY>::from_bytes(pubkey).unwrap();

// Check if the node is allowed to connect
if self.config.enable_registration_verification
&& !self
.config
.public_keys
.contains(&staked_pubkey.stake_table_entry.public_key())
{
return Err(ServerError {
status: tide_disco::StatusCode::FORBIDDEN,
message: "You are unauthorized to register with the orchestrator".to_string(),
});
}

self.config
.config
.known_nodes_with_stake
Expand Down Expand Up @@ -371,7 +389,7 @@ where
}
}

println!("Posted public key for node_index {node_index}");
tracing::error!("Posted public key for node_index {node_index}");

// node_index starts at 0, so once it matches `num_nodes_with_stake`
// we will have registered one node too many. hence, we want `node_index + 1`.
Expand Down Expand Up @@ -416,11 +434,24 @@ where
}

// Assumes nodes do not post 'ready' twice
// TODO ED Add a map to verify which nodes have posted they're ready
fn post_ready(&mut self) -> Result<(), ServerError> {
fn post_ready(&mut self, peer_config: &PeerConfig<KEY>) -> Result<(), ServerError> {
// If we have not disabled registration verification.
// Is this node allowed to connect?
if !self
.config
.config
.known_nodes_with_stake
.contains(peer_config)
{
return Err(ServerError {
status: tide_disco::StatusCode::FORBIDDEN,
message: "You are unauthorized to register with the orchestrator".to_string(),
});
}

self.nodes_connected += 1;

println!("Nodes connected: {}", self.nodes_connected);
tracing::error!("Nodes connected: {}", self.nodes_connected);

// i.e. nodes_connected >= num_nodes_with_stake * (start_threshold.0 / start_threshold.1)
if self.nodes_connected * self.config.config.start_threshold.1
Expand Down Expand Up @@ -636,7 +667,21 @@ where
})?
.post(
"post_ready",
|_req, state: &mut <State as ReadState>::State| async move { state.post_ready() }.boxed(),
|req, state: &mut <State as ReadState>::State| {
async move {
let mut body_bytes = req.body_bytes();
jparr721 marked this conversation as resolved.
Show resolved Hide resolved
body_bytes.drain(..12);
// Decode the payload-supplied pubkey
let Some(pubkey) = PeerConfig::<KEY>::from_bytes(&body_bytes) else {
return Err(ServerError {
status: tide_disco::StatusCode::BAD_REQUEST,
message: "Malformed body".to_string(),
});
};
state.post_ready(&pubkey)
}
.boxed()
},
)?
.post(
"post_manual_start",
Expand Down Expand Up @@ -726,6 +771,10 @@ where
network_config.config.known_nodes_with_stake = vec![];
network_config.config.known_da_nodes = vec![];

if network_config.enable_registration_verification {
tracing::error!("REGISTRATION VERIFICATION IS TURNED OFF");
}

let web_api =
define_api().map_err(|_e| io::Error::new(ErrorKind::Other, "Failed to define api"));

Expand Down
7 changes: 6 additions & 1 deletion crates/types/src/stake_table.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,16 @@ pub struct StakeTableEntry<K: SignatureKey> {
pub stake_amount: U256,
}

impl<K: SignatureKey> StakeTableEntryType for StakeTableEntry<K> {
impl<K: SignatureKey> StakeTableEntryType<K> for StakeTableEntry<K> {
/// Get the stake amount
fn stake(&self) -> U256 {
self.stake_amount
}

/// Get the public key
fn public_key(&self) -> K {
self.stake_key.clone()
}
}

impl<K: SignatureKey> StakeTableEntry<K> {
Expand Down
6 changes: 4 additions & 2 deletions crates/types/src/traits/signature_key.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,11 @@ use super::EncodeBytes;
use crate::{utils::BuilderCommitment, vid::VidSchemeType};

/// Type representing stake table entries in a `StakeTable`
pub trait StakeTableEntryType {
pub trait StakeTableEntryType<K> {
/// Get the stake value
fn stake(&self) -> U256;
/// Get the public key
fn public_key(&self) -> K;
}

/// Trait for abstracting public key signatures
Expand Down Expand Up @@ -60,7 +62,7 @@ pub trait SignatureKey:
+ for<'a> Deserialize<'a>
+ Hash;
/// The type of the entry that contain both public key and stake value
type StakeTableEntry: StakeTableEntryType
type StakeTableEntry: StakeTableEntryType<Self>
+ Send
+ Sync
+ Sized
Expand Down
Loading