From 9248cd1dfbffb90f2adbab8c3c9baf4f4d3522a9 Mon Sep 17 00:00:00 2001 From: Sebastian Kunert Date: Wed, 3 Apr 2024 10:55:39 +0200 Subject: [PATCH 01/71] Refactor block building and import in collator service --- cumulus/client/consensus/aura/src/collator.rs | 63 +++++++++++++------ 1 file changed, 43 insertions(+), 20 deletions(-) diff --git a/cumulus/client/consensus/aura/src/collator.rs b/cumulus/client/consensus/aura/src/collator.rs index 5b7669c88f47..9030f138186a 100644 --- a/cumulus/client/consensus/aura/src/collator.rs +++ b/cumulus/client/consensus/aura/src/collator.rs @@ -156,15 +156,7 @@ where Ok((paras_inherent_data, other_inherent_data)) } - /// Propose, seal, and import a block, packaging it into a collation. - /// - /// Provide the slot to build at as well as any other necessary pre-digest logs, - /// the inherent data, and the proposal duration and PoV size limits. - /// - /// The Aura pre-digest should not be explicitly provided and is set internally. - /// - /// This does not announce the collation to the parachain network or the relay chain. - pub async fn collate( + pub async fn build_block_and_import( &mut self, parent_header: &Block::Header, slot_claim: &SlotClaim, @@ -172,10 +164,7 @@ where inherent_data: (ParachainInherentData, InherentData), proposal_duration: Duration, max_pov_size: usize, - ) -> Result< - Option<(Collation, ParachainBlockData, Block::Hash)>, - Box, - > { + ) -> Result>, Box> { let mut digest = additional_pre_digest.into().unwrap_or_default(); digest.push(slot_claim.pre_digest.clone()); @@ -205,7 +194,6 @@ where ) .map_err(|e| e as Box)?; - let post_hash = sealed_importable.post_hash(); let block = Block::new( sealed_importable.post_header(), sealed_importable @@ -220,11 +208,46 @@ where .map_err(|e| Box::new(e) as Box) .await?; - if let Some((collation, block_data)) = self.collator_service.build_collation( - parent_header, - post_hash, - ParachainCandidate { block, proof: proposal.proof }, - ) { + Ok(Some(ParachainCandidate { block, proof: proposal.proof })) + } + + /// Propose, seal, and import a block, packaging it into a collation. + /// + /// Provide the slot to build at as well as any other necessary pre-digest logs, + /// the inherent data, and the proposal duration and PoV size limits. + /// + /// The Aura pre-digest should not be explicitly provided and is set internally. + /// + /// This does not announce the collation to the parachain network or the relay chain. + pub async fn collate( + &mut self, + parent_header: &Block::Header, + slot_claim: &SlotClaim, + additional_pre_digest: impl Into>>, + inherent_data: (ParachainInherentData, InherentData), + proposal_duration: Duration, + max_pov_size: usize, + ) -> Result< + Option<(Collation, ParachainBlockData, Block::Hash)>, + Box, + > { + let maybe_candidate = self + .build_block_and_import( + parent_header, + slot_claim, + additional_pre_digest, + inherent_data, + proposal_duration, + max_pov_size, + ) + .await?; + + let Some(candidate) = maybe_candidate else { return Ok(None) }; + + let hash = candidate.block.header().hash(); + if let Some((collation, block_data)) = + self.collator_service.build_collation(parent_header, hash, candidate) + { tracing::info!( target: crate::LOG_TARGET, "PoV size {{ header: {}kb, extrinsics: {}kb, storage_proof: {}kb }}", @@ -241,7 +264,7 @@ where ); } - Ok(Some((collation, block_data, post_hash))) + Ok(Some((collation, block_data, hash))) } else { Err(Box::::from("Unable to produce collation") as Box) From 70afa254161517352fbf0da5346b25cae21ca40e Mon Sep 17 00:00:00 2001 From: Sebastian Kunert Date: Wed, 3 Apr 2024 10:58:44 +0200 Subject: [PATCH 02/71] Remove unused SyncOracle, light reformat --- Cargo.lock | 1 - .../consensus/aura/src/collators/basic.rs | 14 +-- .../consensus/aura/src/collators/lookahead.rs | 12 +- .../common/src/parachain_consensus.rs | 105 +++++++++--------- cumulus/polkadot-parachain/Cargo.toml | 3 +- cumulus/polkadot-parachain/src/service.rs | 20 +--- .../basic-authorship/src/basic_authorship.rs | 6 +- .../client/consensus/aura/src/standalone.rs | 6 +- templates/parachain/node/src/service.rs | 13 +-- 9 files changed, 79 insertions(+), 101 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6467f1e8a14f..0bbeaab00dd8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -13330,7 +13330,6 @@ dependencies = [ "sc-consensus", "sc-executor", "sc-network", - "sc-network-sync", "sc-rpc", "sc-service", "sc-sysinfo", diff --git a/cumulus/client/consensus/aura/src/collators/basic.rs b/cumulus/client/consensus/aura/src/collators/basic.rs index a4c22a45266c..8628968cbfbb 100644 --- a/cumulus/client/consensus/aura/src/collators/basic.rs +++ b/cumulus/client/consensus/aura/src/collators/basic.rs @@ -41,19 +41,18 @@ use sc_consensus::BlockImport; use sp_api::{CallApiAt, ProvideRuntimeApi}; use sp_application_crypto::AppPublic; use sp_blockchain::HeaderBackend; -use sp_consensus::SyncOracle; -use sp_consensus_aura::AuraApi; +use sp_consensus_aura::{AuraApi, SlotDuration}; use sp_core::crypto::Pair; use sp_inherents::CreateInherentDataProviders; use sp_keystore::KeystorePtr; use sp_runtime::traits::{Block as BlockT, Header as HeaderT, Member}; use sp_state_machine::Backend as _; -use std::{convert::TryFrom, sync::Arc, time::Duration}; +use std::{sync::Arc, time::Duration}; use crate::collator as collator_util; /// Parameters for [`run`]. -pub struct Params { +pub struct Params { /// Inherent data providers. Only non-consensus inherent data should be provided, i.e. /// the timestamp, slot, and paras inherents should be omitted, as they are set by this /// collator. @@ -64,8 +63,6 @@ pub struct Params { pub para_client: Arc, /// A handle to the relay-chain client. pub relay_client: RClient, - /// A chain synchronization oracle. - pub sync_oracle: SO, /// The underlying keystore, which should contain Aura consensus keys. pub keystore: KeystorePtr, /// The collator key used to sign collations before submitting to validators. @@ -89,8 +86,8 @@ pub struct Params { } /// Run bare Aura consensus as a relay-chain-driven collator. -pub fn run( - params: Params, +pub fn run( + params: Params, ) -> impl Future + Send + 'static where Block: BlockT + Send, @@ -108,7 +105,6 @@ where CIDP: CreateInherentDataProviders + Send + 'static, CIDP::InherentDataProviders: Send, BI: BlockImport + ParachainBlockImportMarker + Send + Sync + 'static, - SO: SyncOracle + Send + Sync + Clone + 'static, Proposer: ProposerInterface + Send + Sync + 'static, CS: CollatorServiceInterface + Send + Sync + 'static, P: Pair, diff --git a/cumulus/client/consensus/aura/src/collators/lookahead.rs b/cumulus/client/consensus/aura/src/collators/lookahead.rs index 2b774128c1fb..3dcc6f70db53 100644 --- a/cumulus/client/consensus/aura/src/collators/lookahead.rs +++ b/cumulus/client/consensus/aura/src/collators/lookahead.rs @@ -60,19 +60,18 @@ use sc_consensus_aura::standalone as aura_internal; use sp_api::ProvideRuntimeApi; use sp_application_crypto::AppPublic; use sp_blockchain::HeaderBackend; -use sp_consensus::SyncOracle; use sp_consensus_aura::{AuraApi, Slot}; use sp_core::crypto::Pair; use sp_inherents::CreateInherentDataProviders; use sp_keystore::KeystorePtr; use sp_runtime::traits::{Block as BlockT, Header as HeaderT, Member}; use sp_timestamp::Timestamp; -use std::{convert::TryFrom, sync::Arc, time::Duration}; +use std::{sync::Arc, time::Duration}; use crate::collator::{self as collator_util, SlotClaim}; /// Parameters for [`run`]. -pub struct Params { +pub struct Params { /// Inherent data providers. Only non-consensus inherent data should be provided, i.e. /// the timestamp, slot, and paras inherents should be omitted, as they are set by this /// collator. @@ -87,8 +86,6 @@ pub struct Params { pub relay_client: RClient, /// A validation code hash provider, used to get the current validation code hash. pub code_hash_provider: CHP, - /// A chain synchronization oracle. - pub sync_oracle: SO, /// The underlying keystore, which should contain Aura consensus keys. pub keystore: KeystorePtr, /// The collator key used to sign collations before submitting to validators. @@ -110,8 +107,8 @@ pub struct Params { } /// Run async-backing-friendly Aura. -pub fn run( - mut params: Params, +pub fn run( + mut params: Params, ) -> impl Future + Send + 'static where Block: BlockT, @@ -130,7 +127,6 @@ where CIDP: CreateInherentDataProviders + 'static, CIDP::InherentDataProviders: Send, BI: BlockImport + ParachainBlockImportMarker + Send + Sync + 'static, - SO: SyncOracle + Send + Sync + Clone + 'static, Proposer: ProposerInterface + Send + Sync + 'static, CS: CollatorServiceInterface + Send + Sync + 'static, CHP: consensus_common::ValidationCodeHashProvider + Send + 'static, diff --git a/cumulus/client/consensus/common/src/parachain_consensus.rs b/cumulus/client/consensus/common/src/parachain_consensus.rs index b4b315bb32be..a8f33f93032f 100644 --- a/cumulus/client/consensus/common/src/parachain_consensus.rs +++ b/cumulus/client/consensus/common/src/parachain_consensus.rs @@ -375,60 +375,61 @@ async fn handle_new_best_parachain_head( target: LOG_TARGET, block_hash = ?hash, "Skipping set new best block, because block is already the best.", - ) - } else { - // Make sure the block is already known or otherwise we skip setting new best. - match parachain.block_status(hash) { - Ok(BlockStatus::InChainWithState) => { - unset_best_header.take(); - tracing::debug!( - target: LOG_TARGET, - ?hash, - "Importing block as new best for parachain.", - ); - import_block_as_new_best(hash, parachain_head, parachain).await; - }, - Ok(BlockStatus::InChainPruned) => { - tracing::error!( - target: LOG_TARGET, - block_hash = ?hash, - "Trying to set pruned block as new best!", - ); - }, - Ok(BlockStatus::Unknown) => { - *unset_best_header = Some(parachain_head); + ); + return; + } - tracing::debug!( - target: LOG_TARGET, - block_hash = ?hash, - "Parachain block not yet imported, waiting for import to enact as best block.", - ); - - if let Some(ref mut recovery_chan_tx) = recovery_chan_tx { - // Best effort channel to actively encourage block recovery. - // An error here is not fatal; the relay chain continuously re-announces - // the best block, thus we will have other opportunities to retry. - let req = RecoveryRequest { hash, kind: RecoveryKind::Full }; - if let Err(err) = recovery_chan_tx.try_send(req) { - tracing::warn!( - target: LOG_TARGET, - block_hash = ?hash, - error = ?err, - "Unable to notify block recovery subsystem" - ) - } + // Make sure the block is already known or otherwise we skip setting new best. + match parachain.block_status(hash) { + Ok(BlockStatus::InChainWithState) => { + unset_best_header.take(); + tracing::debug!( + target: LOG_TARGET, + ?hash, + "Importing block as new best for parachain.", + ); + import_block_as_new_best(hash, parachain_head, parachain).await; + }, + Ok(BlockStatus::InChainPruned) => { + tracing::error!( + target: LOG_TARGET, + block_hash = ?hash, + "Trying to set pruned block as new best!", + ); + }, + Ok(BlockStatus::Unknown) => { + *unset_best_header = Some(parachain_head); + + tracing::debug!( + target: LOG_TARGET, + block_hash = ?hash, + "Parachain block not yet imported, waiting for import to enact as best block.", + ); + + if let Some(ref mut recovery_chan_tx) = recovery_chan_tx { + // Best effort channel to actively encourage block recovery. + // An error here is not fatal; the relay chain continuously re-announces + // the best block, thus we will have other opportunities to retry. + let req = RecoveryRequest { hash, kind: RecoveryKind::Full }; + if let Err(err) = recovery_chan_tx.try_send(req) { + tracing::warn!( + target: LOG_TARGET, + block_hash = ?hash, + error = ?err, + "Unable to notify block recovery subsystem" + ) } - }, - Err(e) => { - tracing::error!( - target: LOG_TARGET, - block_hash = ?hash, - error = ?e, - "Failed to get block status of block.", - ); - }, - _ => {}, - } + } + }, + Err(e) => { + tracing::error!( + target: LOG_TARGET, + block_hash = ?hash, + error = ?e, + "Failed to get block status of block.", + ); + }, + _ => {}, } } diff --git a/cumulus/polkadot-parachain/Cargo.toml b/cumulus/polkadot-parachain/Cargo.toml index 280ece30fb68..6e74c2587fed 100644 --- a/cumulus/polkadot-parachain/Cargo.toml +++ b/cumulus/polkadot-parachain/Cargo.toml @@ -66,7 +66,6 @@ sc-telemetry = { path = "../../substrate/client/telemetry" } sc-transaction-pool = { path = "../../substrate/client/transaction-pool" } sp-transaction-pool = { path = "../../substrate/primitives/transaction-pool" } sc-network = { path = "../../substrate/client/network" } -sc-network-sync = { path = "../../substrate/client/network/sync" } sc-basic-authorship = { path = "../../substrate/client/basic-authorship" } sp-timestamp = { path = "../../substrate/primitives/timestamp" } sp-blockchain = { path = "../../substrate/primitives/blockchain" } @@ -112,6 +111,7 @@ cumulus-primitives-aura = { path = "../primitives/aura" } cumulus-primitives-core = { path = "../primitives/core" } cumulus-relay-chain-interface = { path = "../client/relay-chain-interface" } color-print = "0.3.4" +tokio = { version = "1.32.0", features = ["macros", "parking_lot", "time"] } [build-dependencies] substrate-build-script-utils = { path = "../../substrate/utils/build-script-utils" } @@ -120,7 +120,6 @@ substrate-build-script-utils = { path = "../../substrate/utils/build-script-util assert_cmd = "2.0" nix = { version = "0.26.1", features = ["signal"] } tempfile = "3.8.0" -tokio = { version = "1.32.0", features = ["macros", "parking_lot", "time"] } wait-timeout = "0.2" [features] diff --git a/cumulus/polkadot-parachain/src/service.rs b/cumulus/polkadot-parachain/src/service.rs index 2dd3541e85f4..c4facd3ef8d5 100644 --- a/cumulus/polkadot-parachain/src/service.rs +++ b/cumulus/polkadot-parachain/src/service.rs @@ -52,7 +52,6 @@ use sc_consensus::{ }; use sc_executor::{HeapAllocStrategy, WasmExecutor, DEFAULT_HEAP_ALLOC_STRATEGY}; use sc_network::{config::FullNetworkConfiguration, service::traits::NetworkBackend, NetworkBlock}; -use sc_network_sync::SyncingService; use sc_service::{Configuration, PartialComponents, TFullBackend, TFullClient, TaskManager}; use sc_telemetry::{Telemetry, TelemetryHandle, TelemetryWorker, TelemetryWorkerHandle}; use sp_api::{ApiExt, ConstructRuntimeApi, ProvideRuntimeApi}; @@ -235,7 +234,6 @@ where &TaskManager, Arc, Arc>>, - Arc>, KeystorePtr, Duration, ParaId, @@ -369,7 +367,6 @@ where &task_manager, relay_chain_interface.clone(), transaction_pool, - sync_service.clone(), params.keystore_container.keystore(), relay_chain_slot_duration, para_id, @@ -710,7 +707,6 @@ pub async fn start_generic_aura_node>( task_manager, relay_chain_interface, transaction_pool, - sync_oracle, keystore, relay_chain_slot_duration, para_id, @@ -739,7 +735,6 @@ pub async fn start_generic_aura_node>( block_import, para_client: client, relay_client: relay_chain_interface, - sync_oracle, keystore, collator_key, para_id, @@ -753,7 +748,7 @@ pub async fn start_generic_aura_node>( }; let fut = - basic_aura::run::::Pair, _, _, _, _, _, _, _>(params); + basic_aura::run::::Pair, _, _, _, _, _, _>(params); task_manager.spawn_essential_handle().spawn("aura", None, fut); Ok(()) @@ -828,7 +823,6 @@ where task_manager, relay_chain_interface, transaction_pool, - sync_oracle, keystore, relay_chain_slot_duration, para_id, @@ -896,7 +890,6 @@ where block_import, para_client: client, relay_client: relay_chain_interface2, - sync_oracle, keystore, collator_key, para_id, @@ -909,7 +902,7 @@ where collation_request_receiver: Some(request_stream), }; - basic_aura::run::::Pair, _, _, _, _, _, _, _>(params) + basic_aura::run::::Pair, _, _, _, _, _, _>(params) .await }); @@ -972,7 +965,6 @@ where task_manager, relay_chain_interface, transaction_pool, - sync_oracle, keystore, relay_chain_slot_duration, para_id, @@ -1045,7 +1037,6 @@ where code_hash_provider: move |block_hash| { client.code_at(block_hash).ok().map(|c| ValidationCode::from(c).hash()) }, - sync_oracle, keystore, collator_key, para_id, @@ -1058,7 +1049,7 @@ where * to aura */ }; - aura::run::::Pair, _, _, _, _, _, _, _, _, _>(params) + aura::run::::Pair, _, _, _, _, _, _, _, _>(params) .await }); @@ -1082,7 +1073,6 @@ fn start_relay_chain_consensus( task_manager: &TaskManager, relay_chain_interface: Arc, transaction_pool: Arc>>, - _sync_oracle: Arc>, _keystore: KeystorePtr, _relay_chain_slot_duration: Duration, para_id: ParaId, @@ -1153,7 +1143,6 @@ fn start_lookahead_aura_consensus( task_manager: &TaskManager, relay_chain_interface: Arc, transaction_pool: Arc>>, - sync_oracle: Arc>, keystore: KeystorePtr, relay_chain_slot_duration: Duration, para_id: ParaId, @@ -1186,7 +1175,6 @@ fn start_lookahead_aura_consensus( code_hash_provider: move |block_hash| { client.code_at(block_hash).ok().map(|c| ValidationCode::from(c).hash()) }, - sync_oracle, keystore, collator_key, para_id, @@ -1198,7 +1186,7 @@ fn start_lookahead_aura_consensus( reinitialize: false, }; - let fut = aura::run::::Pair, _, _, _, _, _, _, _, _, _>(params); + let fut = aura::run::::Pair, _, _, _, _, _, _, _, _>(params); task_manager.spawn_essential_handle().spawn("aura", None, fut); Ok(()) diff --git a/substrate/client/basic-authorship/src/basic_authorship.rs b/substrate/client/basic-authorship/src/basic_authorship.rs index 1519c76c42c0..74805488792a 100644 --- a/substrate/client/basic-authorship/src/basic_authorship.rs +++ b/substrate/client/basic-authorship/src/basic_authorship.rs @@ -205,7 +205,11 @@ where ) -> Proposer { let parent_hash = parent_header.hash(); - info!("🙌 Starting consensus session on top of parent {:?}", parent_hash); + info!( + "🙌 Starting consensus session on top of parent {:?} (#{})", + parent_hash, + parent_header.number() + ); let proposer = Proposer::<_, _, _, PR> { spawn_handle: self.spawn_handle.clone(), diff --git a/substrate/client/consensus/aura/src/standalone.rs b/substrate/client/consensus/aura/src/standalone.rs index 0f9b8668d447..c1536d9ef73f 100644 --- a/substrate/client/consensus/aura/src/standalone.rs +++ b/substrate/client/consensus/aura/src/standalone.rs @@ -24,7 +24,7 @@ use log::trace; use codec::Codec; -use sc_client_api::{backend::AuxStore, UsageProvider}; +use sc_client_api::UsageProvider; use sp_api::{Core, ProvideRuntimeApi}; use sp_application_crypto::{AppCrypto, AppPublic}; use sp_blockchain::Result as CResult; @@ -48,7 +48,7 @@ pub fn slot_duration(client: &C) -> CResult where A: Codec, B: BlockT, - C: AuxStore + ProvideRuntimeApi + UsageProvider, + C: ProvideRuntimeApi + UsageProvider, C::Api: AuraApi, { slot_duration_at(client, client.usage_info().chain.best_hash) @@ -59,7 +59,7 @@ pub fn slot_duration_at(client: &C, block_hash: B::Hash) -> CResult, + C: ProvideRuntimeApi, C::Api: AuraApi, { client.runtime_api().slot_duration(block_hash).map_err(|err| err.into()) diff --git a/templates/parachain/node/src/service.rs b/templates/parachain/node/src/service.rs index 373df01b0c43..c97a41ae8232 100644 --- a/templates/parachain/node/src/service.rs +++ b/templates/parachain/node/src/service.rs @@ -27,8 +27,7 @@ use frame_benchmarking_cli::SUBSTRATE_REFERENCE_HARDWARE; use sc_client_api::Backend; use sc_consensus::ImportQueue; use sc_executor::{HeapAllocStrategy, WasmExecutor, DEFAULT_HEAP_ALLOC_STRATEGY}; -use sc_network::NetworkBlock; -use sc_network_sync::SyncingService; +use sc_network::{NetworkBackend, NetworkBlock}; use sc_service::{Configuration, PartialComponents, TFullBackend, TFullClient, TaskManager}; use sc_telemetry::{Telemetry, TelemetryHandle, TelemetryWorker, TelemetryWorkerHandle}; use sc_transaction_pool_api::OffchainTransactionPoolFactory; @@ -162,7 +161,6 @@ fn start_consensus( task_manager: &TaskManager, relay_chain_interface: Arc, transaction_pool: Arc>, - sync_oracle: Arc>, keystore: KeystorePtr, relay_chain_slot_duration: Duration, para_id: ParaId, @@ -196,7 +194,6 @@ fn start_consensus( block_import, para_client: client, relay_client: relay_chain_interface, - sync_oracle, keystore, collator_key, para_id, @@ -209,10 +206,9 @@ fn start_consensus( collation_request_receiver: None, }; - let fut = - basic_aura::run::( - params, - ); + let fut = basic_aura::run::( + params, + ); task_manager.spawn_essential_handle().spawn("aura", None, fut); Ok(()) @@ -388,7 +384,6 @@ pub async fn start_parachain_node( &task_manager, relay_chain_interface.clone(), transaction_pool, - sync_service.clone(), params.keystore_container.keystore(), relay_chain_slot_duration, para_id, From 7e01dca92e9e8393db934edb5a6f5f72dd53d420 Mon Sep 17 00:00:00 2001 From: Sebastian Kunert Date: Wed, 3 Apr 2024 11:50:27 +0200 Subject: [PATCH 03/71] Add slot-based collator --- cumulus/client/consensus/aura/Cargo.toml | 2 + .../consensus/aura/src/collators/mod.rs | 1 + .../slot_based/block_builder_task.rs | 461 ++++++++++++++++++ .../collators/slot_based/collation_task.rs | 197 ++++++++ .../aura/src/collators/slot_based/mod.rs | 214 ++++++++ cumulus/client/consensus/common/src/lib.rs | 14 +- cumulus/client/consensus/common/src/tests.rs | 13 +- cumulus/client/network/src/tests.rs | 14 +- .../src/lib.rs | 13 +- .../client/relay-chain-interface/src/lib.rs | 18 +- .../relay-chain-rpc-interface/src/lib.rs | 9 +- cumulus/pallets/aura-ext/Cargo.toml | 1 + .../pallets/aura-ext/src/consensus_hook.rs | 17 +- cumulus/pallets/aura-ext/src/lib.rs | 2 +- substrate/client/consensus/slots/src/lib.rs | 2 +- 15 files changed, 960 insertions(+), 18 deletions(-) create mode 100644 cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs create mode 100644 cumulus/client/consensus/aura/src/collators/slot_based/collation_task.rs create mode 100644 cumulus/client/consensus/aura/src/collators/slot_based/mod.rs diff --git a/cumulus/client/consensus/aura/Cargo.toml b/cumulus/client/consensus/aura/Cargo.toml index 70dd67cb9a00..637c5288e28c 100644 --- a/cumulus/client/consensus/aura/Cargo.toml +++ b/cumulus/client/consensus/aura/Cargo.toml @@ -15,6 +15,7 @@ codec = { package = "parity-scale-codec", version = "3.0.0", features = ["derive futures = "0.3.28" tracing = "0.1.37" schnellru = "0.2.1" +tokio = { version = "1.36.0", features = ["sync"] } # Substrate sc-client-api = { path = "../../../../substrate/client/api" } @@ -51,3 +52,4 @@ polkadot-primitives = { path = "../../../../polkadot/primitives" } polkadot-node-primitives = { path = "../../../../polkadot/node/primitives" } polkadot-node-subsystem = { path = "../../../../polkadot/node/subsystem" } polkadot-overseer = { path = "../../../../polkadot/node/overseer" } +sc-service = { version = "0.35.0", path = "../../../../substrate/client/service" } diff --git a/cumulus/client/consensus/aura/src/collators/mod.rs b/cumulus/client/consensus/aura/src/collators/mod.rs index 6e0067d0cedb..7ee236e910da 100644 --- a/cumulus/client/consensus/aura/src/collators/mod.rs +++ b/cumulus/client/consensus/aura/src/collators/mod.rs @@ -27,6 +27,7 @@ use polkadot_primitives::{ pub mod basic; pub mod lookahead; +pub mod slot_based; /// Check the `local_validation_code_hash` against the validation code hash in the relay chain /// state. diff --git a/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs b/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs new file mode 100644 index 000000000000..30ebcfee9561 --- /dev/null +++ b/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs @@ -0,0 +1,461 @@ +// Copyright (C) Parity Technologies (UK) Ltd. +// This file is part of Cumulus. + +// Cumulus is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Cumulus is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with Cumulus. If not, see . + +//! A collator for Aura that looks ahead of the most recently included parachain block +//! when determining what to build upon. +//! +//! The block building mechanism consists of two parts: +//! 1. A block-builder task that builds parachain blocks at each of our slot. +//! 2. A collator task that transforms the blocks into a collation and submits them to the relay +//! chain. +//! +//! This collator also builds additional blocks when the maximum backlog is not saturated. +//! The size of the backlog is determined by invoking a runtime API. If that runtime API +//! is not supported, this assumes a maximum backlog size of 1. +//! +//! This takes more advantage of asynchronous backing, though not complete advantage. +//! When the backlog is not saturated, this approach lets the backlog temporarily 'catch up' +//! with periods of higher throughput. When the backlog is saturated, we typically +//! fall back to the limited cadence of a single parachain block per relay-chain block. +//! +//! Despite this, the fact that there is a backlog at all allows us to spend more time +//! building the block, as there is some buffer before it can get posted to the relay-chain. +//! The main limitation is block propagation time - i.e. the new blocks created by an author +//! must be propagated to the next author before their turn. + +use codec::{Codec, Encode}; + +use cumulus_client_collator::service::ServiceInterface as CollatorServiceInterface; +use cumulus_client_consensus_common::{ + self as consensus_common, load_abridged_host_configuration, ParachainBlockImportMarker, + ParentSearchParams, +}; +use cumulus_client_consensus_proposer::ProposerInterface; +use cumulus_primitives_aura::AuraUnincludedSegmentApi; +use cumulus_primitives_core::{ + relay_chain::Hash as PHash, CollectCollationInfo, PersistedValidationData, +}; +use cumulus_relay_chain_interface::RelayChainInterface; + +use polkadot_primitives::{BlockId, Id as ParaId, OccupiedCoreAssumption}; + +use sc_client_api::{backend::AuxStore, BlockBackend, BlockOf, UsageProvider}; +use sc_consensus::BlockImport; +use sc_consensus_aura::standalone as aura_internal; +use sc_consensus_slots::time_until_next_slot; +use sp_api::ProvideRuntimeApi; +use sp_application_crypto::AppPublic; +use sp_blockchain::HeaderBackend; +use sp_consensus_aura::{AuraApi, Slot, SlotDuration}; +use sp_core::crypto::Pair; +use sp_inherents::CreateInherentDataProviders; +use sp_keystore::KeystorePtr; +use sp_runtime::traits::{Block as BlockT, Header as HeaderT, Member}; +use sp_timestamp::Timestamp; +use std::{sync::Arc, time::Duration}; + +use super::{scheduled_cores, CollatorMessage}; +use crate::{ + collator::{self as collator_util, SlotClaim}, + collators::check_validation_code_or_log, + LOG_TARGET, +}; + +const PARENT_SEARCH_DEPTH: usize = 10; + +/// Parameters for [`run`]. +pub struct BuilderTaskParams { + /// Inherent data providers. Only non-consensus inherent data should be provided, i.e. + /// the timestamp, slot, and paras inherents should be omitted, as they are set by this + /// collator. + pub create_inherent_data_providers: CIDP, + /// Used to actually import blocks. + pub block_import: BI, + /// The underlying para client. + pub para_client: Arc, + /// The para client's backend, used to access the database. + pub para_backend: Arc, + /// A handle to the relay-chain client. + pub relay_client: RClient, + /// A validation code hash provider, used to get the current validation code hash. + pub code_hash_provider: CHP, + /// The underlying keystore, which should contain Aura consensus keys. + pub keystore: KeystorePtr, + /// The para's ID. + pub para_id: ParaId, + /// The underlying block proposer this should call into. + pub proposer: Proposer, + /// The generic collator service used to plug into this consensus engine. + pub collator_service: CS, + /// The amount of time to spend authoring each block. + pub authoring_duration: Duration, + /// Channel to send built blocks to the collation task. + pub collator_sender: tokio::sync::mpsc::Sender>, +} + +#[derive(Debug)] +struct SlotAndTime { + timestamp: Timestamp, + slot: Slot, +} + +#[derive(Debug)] +struct SlotTimer { + slot_duration: SlotDuration, +} + +impl SlotTimer { + pub fn new(slot_duration: SlotDuration) -> Self { + Self { slot_duration } + } + + pub async fn wait_until_next_slot(&self) -> SlotAndTime { + let time_until_next_slot = time_until_next_slot(self.slot_duration.as_duration()); + tokio::time::sleep(time_until_next_slot).await; + let timestamp = sp_timestamp::Timestamp::current(); + SlotAndTime { slot: Slot::from_timestamp(timestamp, self.slot_duration), timestamp } + } +} + +/// Reads allowed ancestry length parameter from the relay chain storage at the given relay parent. +/// +/// Falls back to 0 in case of an error. +async fn max_ancestry_lookback( + relay_parent: PHash, + relay_client: &impl RelayChainInterface, +) -> usize { + match load_abridged_host_configuration(relay_parent, relay_client).await { + Ok(Some(config)) => config.async_backing_params.allowed_ancestry_len as usize, + Ok(None) => { + tracing::error!( + target: crate::LOG_TARGET, + "Active config is missing in relay chain storage", + ); + 0 + }, + Err(err) => { + tracing::error!( + target: crate::LOG_TARGET, + ?err, + ?relay_parent, + "Failed to read active config from relay chain client", + ); + 0 + }, + } +} + +// Checks if we own the slot at the given block and whether there +// is space in the unincluded segment. +async fn can_build_upon( + slot: Slot, + timestamp: Timestamp, + parent_hash: Block::Hash, + included_block: Block::Hash, + client: &Client, + keystore: &KeystorePtr, +) -> Option> +where + Client: ProvideRuntimeApi, + Client::Api: AuraApi + AuraUnincludedSegmentApi, + P: Pair, + P::Public: Codec, + P::Signature: Codec, +{ + let runtime_api = client.runtime_api(); + let authorities = runtime_api.authorities(parent_hash).ok()?; + let author_pub = aura_internal::claim_slot::

(slot, &authorities, keystore).await?; + + // Here we lean on the property that building on an empty unincluded segment must always + // be legal. Skipping the runtime API query here allows us to seamlessly run this + // collator against chains which have not yet upgraded their runtime. + if parent_hash != included_block { + if !runtime_api.can_build_upon(parent_hash, included_block, slot).ok()? { + tracing::debug!( + target: crate::LOG_TARGET, + ?parent_hash, + ?included_block, + ?slot, + "Cannot build on top of the current block, skipping slot." + ); + return None + } + } + + Some(SlotClaim::unchecked::

(author_pub, slot, timestamp)) +} + +/// Run block-builder. +pub async fn run_block_builder( + params: BuilderTaskParams, +) where + Block: BlockT, + Client: ProvideRuntimeApi + + UsageProvider + + BlockOf + + AuxStore + + HeaderBackend + + BlockBackend + + Send + + Sync + + 'static, + Client::Api: + AuraApi + CollectCollationInfo + AuraUnincludedSegmentApi, + Backend: sc_client_api::Backend + 'static, + RClient: RelayChainInterface + Clone + 'static, + CIDP: CreateInherentDataProviders + 'static, + CIDP::InherentDataProviders: Send, + BI: BlockImport + ParachainBlockImportMarker + Send + Sync + 'static, + Proposer: ProposerInterface + Send + Sync + 'static, + CS: CollatorServiceInterface + Send + Sync + 'static, + CHP: consensus_common::ValidationCodeHashProvider + Send + 'static, + P: Pair, + P::Public: AppPublic + Member + Codec, + P::Signature: TryFrom> + Member + Codec, +{ + let BuilderTaskParams { + relay_client, + create_inherent_data_providers, + para_client, + keystore, + block_import, + para_id, + proposer, + collator_service, + collator_sender, + code_hash_provider, + authoring_duration, + para_backend, + } = params; + + let slot_duration = match crate::slot_duration(&*para_client) { + Ok(s) => s, + Err(e) => { + tracing::error!(target: crate::LOG_TARGET, ?e, "Failed to fetch slot duration from runtime. Killing collator task."); + return + }, + }; + + let slot_timer = SlotTimer::new(slot_duration); + + let mut collator = { + let params = collator_util::Params { + create_inherent_data_providers, + block_import, + relay_client: relay_client.clone(), + keystore: keystore.clone(), + para_id, + proposer, + collator_service, + }; + + collator_util::Collator::::new(params) + }; + + loop { + // We wait here until the next slot arrives. + let para_slot = slot_timer.wait_until_next_slot().await; + + let Ok(relay_parent) = relay_client.best_block_hash().await else { + tracing::warn!("Unable to fetch latest relay chain block hash, skipping slot."); + continue; + }; + + let scheduled_cores = scheduled_cores(relay_parent, para_id, &relay_client).await; + if scheduled_cores.is_empty() { + tracing::debug!(target: LOG_TARGET, "Parachain not scheduled, skipping slot."); + continue; + } + + let Ok(Some(relay_parent_header)) = relay_client.header(BlockId::Hash(relay_parent)).await + else { + tracing::warn!("Unable to fetch latest relay chain block header."); + continue; + }; + + let max_pov_size = match relay_client + .persisted_validation_data(relay_parent, para_id, OccupiedCoreAssumption::Included) + .await + { + Ok(None) => continue, + Ok(Some(pvd)) => pvd.max_pov_size, + Err(err) => { + tracing::error!(target: crate::LOG_TARGET, ?err, "Failed to gather information from relay-client"); + continue + }, + }; + + let (included_block, parent) = + match find_parent(relay_parent, para_id, &*para_backend, &relay_client).await { + Some(value) => value, + None => continue, + }; + + let parent_header = parent.header; + let parent_hash = parent.hash; + + // We mainly call this to inform users at genesis if there is a mismatch with the + // on-chain data. + collator.collator_service().check_block_status(parent_hash, &parent_header); + + let slot_claim = match can_build_upon::<_, _, P>( + para_slot.slot, + para_slot.timestamp, + parent_hash, + included_block, + &*para_client, + &keystore, + ) + .await + { + Some(slot) => slot, + None => continue, + }; + + tracing::debug!( + target: crate::LOG_TARGET, + ?relay_parent, + slot_claim = ?para_slot.slot, + unincluded_segment_len = parent.depth, + "Slot claimed. Building" + ); + + let validation_data = PersistedValidationData { + parent_head: parent_header.encode().into(), + relay_parent_number: *relay_parent_header.number(), + relay_parent_storage_root: *relay_parent_header.state_root(), + max_pov_size, + }; + + // Build and announce collations recursively until + // `can_build_upon` fails or building a collation fails. + let (parachain_inherent_data, other_inherent_data) = match collator + .create_inherent_data( + relay_parent, + &validation_data, + parent_hash, + slot_claim.timestamp(), + ) + .await + { + Err(err) => { + tracing::error!(target: crate::LOG_TARGET, ?err); + break + }, + Ok(x) => x, + }; + + let validation_code_hash = match code_hash_provider.code_hash_at(parent_hash) { + None => { + tracing::error!(target: crate::LOG_TARGET, ?parent_hash, "Could not fetch validation code hash"); + break + }, + Some(v) => v, + }; + + check_validation_code_or_log(&validation_code_hash, para_id, &relay_client, relay_parent) + .await; + + let Ok(Some(candidate)) = collator + .build_block_and_import( + &parent_header, + &slot_claim, + None, + (parachain_inherent_data, other_inherent_data), + authoring_duration, + // Set the block limit to 50% of the maximum PoV size. + // + // TODO: If we got benchmarking that includes the proof size, + // we should be able to use the maximum pov size. + (validation_data.max_pov_size / 2) as usize, + ) + .await + else { + tracing::error!(target: crate::LOG_TARGET, "Unable to build block at slot."); + continue; + }; + + let new_block_hash = candidate.block.header().hash(); + + // Announce the newly built block to our peers. + collator.collator_service().announce_block(new_block_hash, None); + + if let Err(err) = collator_sender + .send(CollatorMessage { + relay_parent: relay_parent_header.hash(), + parent_header, + parachain_candidate: candidate, + hash: new_block_hash, + validation_code_hash, + }) + .await + { + tracing::error!(target: crate::LOG_TARGET, ?err, "Unable to send block to collation task."); + } + } +} + +async fn find_parent( + relay_parent: PHash, + para_id: ParaId, + para_backend: &impl sc_client_api::Backend, + relay_client: &impl RelayChainInterface, +) -> Option<(::Hash, consensus_common::PotentialParent)> +where + Block: BlockT, +{ + let parent_search_params = ParentSearchParams { + relay_parent, + para_id, + ancestry_lookback: max_ancestry_lookback(relay_parent, relay_client).await, + max_depth: PARENT_SEARCH_DEPTH, + ignore_alternative_branches: true, + }; + + let potential_parents = cumulus_client_consensus_common::find_potential_parents::( + parent_search_params, + para_backend, + relay_client, + ) + .await; + + let mut potential_parents = match potential_parents { + Err(e) => { + tracing::error!( + target: crate::LOG_TARGET, + ?relay_parent, + err = ?e, + "Could not fetch potential parents to build upon" + ); + + return None + }, + Ok(x) => x, + }; + + let included_block = match potential_parents.iter().find(|x| x.depth == 0) { + None => return None, // also serves as an `is_empty` check. + Some(b) => b.hash, + }; + potential_parents.sort_by_key(|a| a.depth); + + let parent = match potential_parents.pop() { + None => return None, + Some(p) => p, + }; + + Some((included_block, parent)) +} diff --git a/cumulus/client/consensus/aura/src/collators/slot_based/collation_task.rs b/cumulus/client/consensus/aura/src/collators/slot_based/collation_task.rs new file mode 100644 index 000000000000..8958fb33ccdb --- /dev/null +++ b/cumulus/client/consensus/aura/src/collators/slot_based/collation_task.rs @@ -0,0 +1,197 @@ +// Copyright (C) Parity Technologies (UK) Ltd. +// This file is part of Cumulus. + +// Cumulus is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Cumulus is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with Cumulus. If not, see . + +use std::collections::VecDeque; + +use codec::Encode; + +use cumulus_client_collator::service::ServiceInterface as CollatorServiceInterface; +use cumulus_relay_chain_interface::RelayChainInterface; + +use polkadot_node_primitives::{MaybeCompressedPoV, SubmitCollationParams}; +use polkadot_node_subsystem::messages::CollationGenerationMessage; +use polkadot_overseer::Handle as OverseerHandle; +use polkadot_primitives::{CollatorPair, CoreIndex, Id as ParaId}; + +use futures::prelude::*; + +use sp_runtime::traits::{Block as BlockT, Header}; + +use super::{scheduled_cores, CollatorMessage}; + +const LOG_TARGET: &str = "aura::cumulus::collation_task"; + +/// Parameters for the collation task. +pub struct Params { + /// A handle to the relay-chain client. + pub relay_client: RClient, + /// The collator key used to sign collations before submitting to validators. + pub collator_key: CollatorPair, + /// The para's ID. + pub para_id: ParaId, + /// A handle to the relay-chain client's "Overseer" or task orchestrator. + pub overseer_handle: OverseerHandle, + /// Whether we should reinitialize the collator config (i.e. we are transitioning to aura). + pub reinitialize: bool, + /// Collator service interface + pub collator_service: CS, + /// Receiver channel for communication with the block builder task. + pub collator_receiver: tokio::sync::mpsc::Receiver>, +} + +/// Asynchronously executes the collation task for a parachain. +/// +/// This function initializes the collator subsystems necessary for producing and submitting +/// collations to the relay chain. It listens for new best relay chain block notifications and +/// handles collator messages. If our parachain is scheduled on a core and we have a candidate, +/// the task will build a collation and send it to the relay chain. +pub async fn run_collation_task(mut params: Params) +where + Block: BlockT, + CS: CollatorServiceInterface + Send + Sync + 'static, + RClient: RelayChainInterface + Clone + 'static, +{ + cumulus_client_collator::initialize_collator_subsystems( + &mut params.overseer_handle, + params.collator_key, + params.para_id, + params.reinitialize, + ) + .await; + + let collator_service = params.collator_service; + let mut best_notifications = match params.relay_client.new_best_notification_stream().await { + Ok(s) => s, + Err(err) => { + tracing::error!( + target: LOG_TARGET, + ?err, + "Failed to initialize consensus: no relay chain import notification stream" + ); + + return + }, + }; + + let mut overseer_handle = params.overseer_handle; + let mut core_queue = Default::default(); + let mut messages = VecDeque::new(); + loop { + tokio::select! { + // Check for scheduled cores. + Some(notification) = best_notifications.next() => { + core_queue = + scheduled_cores(notification.hash(), params.para_id, ¶ms.relay_client).await; + tracing::debug!( + target: LOG_TARGET, + relay_parent = ?notification.hash(), + ?params.para_id, + cores = ?core_queue, + "New best relay block.", + ); + }, + // Add new message from the block builder to the queue. + collator_message = params.collator_receiver.recv() => { + if let Some(message) = collator_message { + tracing::debug!( + target: LOG_TARGET, + hash = ?message.hash, + "Pushing new message.", + ); + messages.push_back(message); + } + } + } + + while !core_queue.is_empty() { + // If there are no more messages to process, we wait for new messages. + let Some(message) = messages.pop_front() else { + break; + }; + + handle_collation_message( + message, + &collator_service, + &mut overseer_handle, + &mut core_queue, + ) + .await; + } + } +} + +async fn handle_collation_message( + message: CollatorMessage, + collator_service: &impl CollatorServiceInterface, + overseer_handle: &mut OverseerHandle, + core_queue: &mut VecDeque, +) { + let CollatorMessage { + parent_header, + hash, + parachain_candidate, + validation_code_hash, + relay_parent, + } = message; + + if core_queue.is_empty() { + tracing::warn!(target: crate::LOG_TARGET, cores_for_para = core_queue.len(), "Not submitting since we have no cores left!."); + return; + } + + let number = parachain_candidate.block.header().number().clone(); + let (collation, block_data) = + match collator_service.build_collation(&parent_header, hash, parachain_candidate) { + Some(collation) => collation, + None => { + tracing::warn!(target: LOG_TARGET, ?hash, ?number, "Unable to build collation."); + return; + }, + }; + + tracing::info!( + target: LOG_TARGET, + "PoV size {{ header: {}kb, extrinsics: {}kb, storage_proof: {}kb }}", + block_data.header().encode().len() as f64 / 1024f64, + block_data.extrinsics().encode().len() as f64 / 1024f64, + block_data.storage_proof().encode().len() as f64 / 1024f64, + ); + + if let MaybeCompressedPoV::Compressed(ref pov) = collation.proof_of_validity { + tracing::info!( + target: LOG_TARGET, + "Compressed PoV size: {}kb", + pov.block_data.0.len() as f64 / 1024f64, + ); + } + + if let Some(core) = core_queue.pop_front() { + tracing::debug!(target: LOG_TARGET, ?core, ?hash, ?number, "Submitting collation for core."); + overseer_handle + .send_msg( + CollationGenerationMessage::SubmitCollation(SubmitCollationParams { + relay_parent, + collation, + parent_head: parent_header.encode().into(), + validation_code_hash, + core_index: core, + result_sender: None, + }), + "SubmitCollation", + ) + .await; + } +} diff --git a/cumulus/client/consensus/aura/src/collators/slot_based/mod.rs b/cumulus/client/consensus/aura/src/collators/slot_based/mod.rs new file mode 100644 index 000000000000..d4525058063a --- /dev/null +++ b/cumulus/client/consensus/aura/src/collators/slot_based/mod.rs @@ -0,0 +1,214 @@ +// Copyright (C) Parity Technologies (UK) Ltd. +// This file is part of Cumulus. + +// Cumulus is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Cumulus is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with Cumulus. If not, see . + +//! A collator for Aura that looks ahead of the most recently included parachain block +//! when determining what to build upon. +//! +//! The block building mechanism consists of two parts: +//! 1. A block-builder task that builds parachain blocks at each of our slots. +//! 2. A collator task that transforms the blocks into a collation and submits them to the relay +//! chain. +//! +//! This collator also builds additional blocks when the maximum backlog is not saturated. +//! The size of the backlog is determined by invoking a runtime API. If that runtime API +//! is not supported, this assumes a maximum backlog size of 1. +//! +//! This takes more advantage of asynchronous backing, though not complete advantage. +//! When the backlog is not saturated, this approach lets the backlog temporarily 'catch up' +//! with periods of higher throughput. When the backlog is saturated, we typically +//! fall back to the limited cadence of a single parachain block per relay-chain block. +//! +//! Despite this, the fact that there is a backlog at all allows us to spend more time +//! building the block, as there is some buffer before it can get posted to the relay-chain. +//! The main limitation is block propagation time - i.e. the new blocks created by an author +//! must be propagated to the next author before their turn. + +use codec::Codec; +use consensus_common::ParachainCandidate; +use cumulus_client_collator::service::ServiceInterface as CollatorServiceInterface; +use cumulus_client_consensus_common::{self as consensus_common, ParachainBlockImportMarker}; +use cumulus_client_consensus_proposer::ProposerInterface; +use cumulus_primitives_aura::AuraUnincludedSegmentApi; +use cumulus_primitives_core::{relay_chain::Hash as PHash, CollectCollationInfo}; +use cumulus_relay_chain_interface::RelayChainInterface; +use polkadot_overseer::Handle as OverseerHandle; +use polkadot_primitives::{ + CollatorPair, CoreIndex, Hash as RelayHash, Id as ParaId, ValidationCodeHash, +}; + +use sc_client_api::{backend::AuxStore, BlockBackend, BlockOf, UsageProvider}; +use sc_consensus::BlockImport; + +use sp_api::ProvideRuntimeApi; +use sp_application_crypto::AppPublic; +use sp_blockchain::HeaderBackend; +use sp_consensus_aura::AuraApi; +use sp_core::crypto::Pair; +use sp_inherents::CreateInherentDataProviders; +use sp_keystore::KeystorePtr; +use sp_runtime::traits::{Block as BlockT, Member}; + +use std::{collections::VecDeque, sync::Arc, time::Duration}; + +use crate::LOG_TARGET; + +use self::{block_builder_task::run_block_builder, collation_task::run_collation_task}; + +mod block_builder_task; +mod collation_task; + +/// Parameters for [`run`]. +pub struct Params { + /// Inherent data providers. Only non-consensus inherent data should be provided, i.e. + /// the timestamp, slot, and paras inherents should be omitted, as they are set by this + /// collator. + pub create_inherent_data_providers: CIDP, + /// Used to actually import blocks. + pub block_import: BI, + /// The underlying para client. + pub para_client: Arc, + /// The para client's backend, used to access the database. + pub para_backend: Arc, + /// A handle to the relay-chain client. + pub relay_client: RClient, + /// A validation code hash provider, used to get the current validation code hash. + pub code_hash_provider: CHP, + /// The underlying keystore, which should contain Aura consensus keys. + pub keystore: KeystorePtr, + /// The collator key used to sign collations before submitting to validators. + pub collator_key: CollatorPair, + /// The para's ID. + pub para_id: ParaId, + /// A handle to the relay-chain client's "Overseer" or task orchestrator. + pub overseer_handle: OverseerHandle, + /// The length of slots in the relay chain. + pub relay_chain_slot_duration: Duration, + /// The underlying block proposer this should call into. + pub proposer: Proposer, + /// The generic collator service used to plug into this consensus engine. + pub collator_service: CS, + /// The amount of time to spend authoring each block. + pub authoring_duration: Duration, + /// Whether we should reinitialize the collator config (i.e. we are transitioning to aura). + pub reinitialize: bool, +} + +/// Run aura-based block building and collation task. +pub fn run( + params: Params, +) -> (impl futures::Future, impl futures::Future) +where + Block: BlockT, + Client: ProvideRuntimeApi + + BlockOf + + AuxStore + + HeaderBackend + + BlockBackend + + UsageProvider + + Send + + Sync + + 'static, + Client::Api: + AuraApi + CollectCollationInfo + AuraUnincludedSegmentApi, + Backend: sc_client_api::Backend + 'static, + RClient: RelayChainInterface + Clone + 'static, + CIDP: CreateInherentDataProviders + 'static, + CIDP::InherentDataProviders: Send, + BI: BlockImport + ParachainBlockImportMarker + Send + Sync + 'static, + Proposer: ProposerInterface + Send + Sync + 'static, + CS: CollatorServiceInterface + Send + Sync + Clone + 'static, + CHP: consensus_common::ValidationCodeHashProvider + Send + 'static, + P: Pair + 'static, + P::Public: AppPublic + Member + Codec, + P::Signature: TryFrom> + Member + Codec, +{ + let (tx, rx) = tokio::sync::mpsc::channel(100); + + let collator_task_params = collation_task::Params { + relay_client: params.relay_client.clone(), + collator_key: params.collator_key.clone(), + para_id: params.para_id, + overseer_handle: params.overseer_handle.clone(), + reinitialize: params.reinitialize, + collator_service: params.collator_service.clone(), + collator_receiver: rx, + }; + + let collation_task_fut = run_collation_task::(collator_task_params); + + let slot_params = block_builder_task::BuilderTaskParams { + create_inherent_data_providers: params.create_inherent_data_providers, + block_import: params.block_import, + para_client: params.para_client, + para_backend: params.para_backend, + relay_client: params.relay_client, + code_hash_provider: params.code_hash_provider, + keystore: params.keystore, + para_id: params.para_id, + proposer: params.proposer, + collator_service: params.collator_service, + authoring_duration: params.authoring_duration, + collator_sender: tx, + }; + + let block_builder_fut = run_block_builder::(slot_params); + + (collation_task_fut, block_builder_fut) +} + +/// Message to be sent from the block builder to the collation task. +/// +/// Contains all data necessary to submit a collation to the relay chain. +struct CollatorMessage { + /// The hash of the relay chain block that provides the context for the parachain block. + pub relay_parent: RelayHash, + /// The header of the parent block. + pub parent_header: Block::Header, + /// The parachain block candidate. + pub parachain_candidate: ParachainCandidate, + /// The hash of the parachain block. + pub hash: Block::Hash, + /// The validation code hash at the parent block. + pub validation_code_hash: ValidationCodeHash, +} + +/// Retrieve the scheduled cores for the parachain with id `para_id` from the relay chain. +async fn scheduled_cores( + relay_parent: PHash, + para_id: ParaId, + relay_chain_interface: &RClient, +) -> VecDeque { + let cores = match relay_chain_interface.availability_cores(relay_parent).await { + Ok(cores) => cores, + Err(error) => { + tracing::error!( + target: LOG_TARGET, + ?error, + ?relay_parent, + "Failed to query availability cores runtime API", + ); + return VecDeque::new() + }, + }; + + cores + .iter() + .enumerate() + .filter_map(|(idx, core)| { + (core.para_id() == Some(para_id)).then_some(CoreIndex(idx as u32)) + }) + .collect() +} diff --git a/cumulus/client/consensus/common/src/lib.rs b/cumulus/client/consensus/common/src/lib.rs index cebe34e7ea58..6ee576f9c2df 100644 --- a/cumulus/client/consensus/common/src/lib.rs +++ b/cumulus/client/consensus/common/src/lib.rs @@ -46,6 +46,8 @@ pub use level_monitor::{LevelLimit, MAX_LEAVES_PER_LEVEL_SENSIBLE_DEFAULT}; pub mod import_queue; +const PARENT_SEARCH_LOG_TARGET: &str = "consensus::common::find_potential_parents"; + /// Provides the hash of validation code used for authoring/execution of blocks at a given /// hash. pub trait ValidationCodeHashProvider { @@ -350,6 +352,7 @@ pub async fn find_potential_parents( let included_hash = included_header.hash(); let pending_hash = pending_header.as_ref().map(|hdr| hdr.hash()); + tracing::trace!(target: PARENT_SEARCH_LOG_TARGET, ?included_hash, included_num = ?included_header.number(), ?pending_hash ,?rp_ancestry, "Searching relay chain ancestry."); let mut frontier = vec![PotentialParent:: { hash: included_hash, header: included_header, @@ -361,6 +364,8 @@ pub async fn find_potential_parents( // relay parents. let mut potential_parents = Vec::new(); while let Some(entry) = frontier.pop() { + // TODO find_potential_parents The assumption that entry.depth = 1 is the pending block is + // not correct if we produce sub 6s blocks. let is_pending = entry.depth == 1 && pending_hash.as_ref().map_or(false, |h| &entry.hash == h); let is_included = entry.depth == 0; @@ -370,16 +375,21 @@ pub async fn find_potential_parents( // because they have already been posted on chain. let is_potential = is_pending || is_included || { let digest = entry.header.digest(); - cumulus_primitives_core::extract_relay_parent(digest).map_or(false, is_hash_in_ancestry) || + let is_hash_in_ancestry_check = cumulus_primitives_core::extract_relay_parent(digest) + .map_or(false, is_hash_in_ancestry); + let is_root_in_ancestry_check = cumulus_primitives_core::rpsr_digest::extract_relay_parent_storage_root(digest) .map(|(r, _n)| r) - .map_or(false, is_root_in_ancestry) + .map_or(false, is_root_in_ancestry); + + is_hash_in_ancestry_check || is_root_in_ancestry_check }; let parent_aligned_with_pending = entry.aligned_with_pending; let child_depth = entry.depth + 1; let hash = entry.hash; + tracing::trace!(target: PARENT_SEARCH_LOG_TARGET, root_in_ancestry = is_potential && !is_pending && !is_included, ?hash, is_pending, is_included, "Checking potential parent."); if is_potential { potential_parents.push(entry); } diff --git a/cumulus/client/consensus/common/src/tests.rs b/cumulus/client/consensus/common/src/tests.rs index aca922657072..08fe5b5c8281 100644 --- a/cumulus/client/consensus/common/src/tests.rs +++ b/cumulus/client/consensus/common/src/tests.rs @@ -20,7 +20,7 @@ use async_trait::async_trait; use codec::Encode; use cumulus_client_pov_recovery::RecoveryKind; use cumulus_primitives_core::{ - relay_chain::{self, BlockId}, + relay_chain::{BlockId, BlockNumber, CoreState}, CumulusDigestItem, InboundDownwardMessage, InboundHrmpMessage, }; use cumulus_relay_chain_interface::{ @@ -45,11 +45,11 @@ use std::{ time::Duration, }; -fn relay_block_num_from_hash(hash: &PHash) -> relay_chain::BlockNumber { +fn relay_block_num_from_hash(hash: &PHash) -> BlockNumber { hash.to_low_u64_be() as u32 } -fn relay_hash_from_block_num(block_number: relay_chain::BlockNumber) -> PHash { +fn relay_hash_from_block_num(block_number: BlockNumber) -> PHash { PHash::from_low_u64_be(block_number as u64) } @@ -247,6 +247,13 @@ impl RelayChainInterface for Relaychain { extrinsics_root: PHash::zero(), })) } + + async fn availability_cores( + &self, + _relay_parent: PHash, + ) -> RelayChainResult>> { + unimplemented!("Not needed for test"); + } } fn sproof_with_best_parent(client: &Client) -> RelayStateSproofBuilder { diff --git a/cumulus/client/network/src/tests.rs b/cumulus/client/network/src/tests.rs index 3f5757d5eac1..8649e7f1901b 100644 --- a/cumulus/client/network/src/tests.rs +++ b/cumulus/client/network/src/tests.rs @@ -26,9 +26,10 @@ use futures::{executor::block_on, poll, task::Poll, FutureExt, Stream, StreamExt use parking_lot::Mutex; use polkadot_node_primitives::{SignedFullStatement, Statement}; use polkadot_primitives::{ - CandidateCommitments, CandidateDescriptor, CollatorPair, CommittedCandidateReceipt, - Hash as PHash, HeadData, InboundDownwardMessage, InboundHrmpMessage, OccupiedCoreAssumption, - PersistedValidationData, SessionIndex, SigningContext, ValidationCodeHash, ValidatorId, + BlockNumber, CandidateCommitments, CandidateDescriptor, CollatorPair, + CommittedCandidateReceipt, CoreState, Hash as PHash, HeadData, InboundDownwardMessage, + InboundHrmpMessage, OccupiedCoreAssumption, PersistedValidationData, SessionIndex, + SigningContext, ValidationCodeHash, ValidatorId, }; use polkadot_test_client::{ Client as PClient, ClientBlockImportExt, DefaultTestClientBuilderExt, FullBackend as PBackend, @@ -264,6 +265,13 @@ impl RelayChainInterface for DummyRelayChainInterface { Ok(header) } + + async fn availability_cores( + &self, + _relay_parent: PHash, + ) -> RelayChainResult>> { + unimplemented!("Not needed for test"); + } } fn make_validator_and_api() -> ( diff --git a/cumulus/client/relay-chain-inprocess-interface/src/lib.rs b/cumulus/client/relay-chain-inprocess-interface/src/lib.rs index 6ea02b2e7c1f..7c8f4376357f 100644 --- a/cumulus/client/relay-chain-inprocess-interface/src/lib.rs +++ b/cumulus/client/relay-chain-inprocess-interface/src/lib.rs @@ -19,9 +19,9 @@ use std::{pin::Pin, sync::Arc, time::Duration}; use async_trait::async_trait; use cumulus_primitives_core::{ relay_chain::{ - runtime_api::ParachainHost, Block as PBlock, BlockId, CommittedCandidateReceipt, - Hash as PHash, Header as PHeader, InboundHrmpMessage, OccupiedCoreAssumption, SessionIndex, - ValidationCodeHash, ValidatorId, + runtime_api::ParachainHost, Block as PBlock, BlockId, BlockNumber, + CommittedCandidateReceipt, CoreState, Hash as PHash, Header as PHeader, InboundHrmpMessage, + OccupiedCoreAssumption, SessionIndex, ValidationCodeHash, ValidatorId, }, InboundDownwardMessage, ParaId, PersistedValidationData, }; @@ -251,6 +251,13 @@ impl RelayChainInterface for RelayChainInProcessInterface { }); Ok(Box::pin(notifications_stream)) } + + async fn availability_cores( + &self, + relay_parent: PHash, + ) -> RelayChainResult>> { + Ok(self.full_client.runtime_api().availability_cores(relay_parent)?) + } } pub enum BlockCheckStatus { diff --git a/cumulus/client/relay-chain-interface/src/lib.rs b/cumulus/client/relay-chain-interface/src/lib.rs index bb93e6a168c8..aacf35483ada 100644 --- a/cumulus/client/relay-chain-interface/src/lib.rs +++ b/cumulus/client/relay-chain-interface/src/lib.rs @@ -29,8 +29,8 @@ use sp_api::ApiError; use cumulus_primitives_core::relay_chain::BlockId; pub use cumulus_primitives_core::{ relay_chain::{ - CommittedCandidateReceipt, Hash as PHash, Header as PHeader, InboundHrmpMessage, - OccupiedCoreAssumption, SessionIndex, ValidationCodeHash, ValidatorId, + BlockNumber, CommittedCandidateReceipt, CoreState, Hash as PHash, Header as PHeader, + InboundHrmpMessage, OccupiedCoreAssumption, SessionIndex, ValidationCodeHash, ValidatorId, }, InboundDownwardMessage, ParaId, PersistedValidationData, }; @@ -203,6 +203,13 @@ pub trait RelayChainInterface: Send + Sync { para_id: ParaId, occupied_core_assumption: OccupiedCoreAssumption, ) -> RelayChainResult>; + + /// Yields information on all availability cores as relevant to the child block. + /// Cores are either free or occupied. Free cores can have paras assigned to them. + async fn availability_cores( + &self, + relay_parent: PHash, + ) -> RelayChainResult>>; } #[async_trait] @@ -321,4 +328,11 @@ where .validation_code_hash(relay_parent, para_id, occupied_core_assumption) .await } + + async fn availability_cores( + &self, + relay_parent: PHash, + ) -> RelayChainResult>> { + (**self).availability_cores(relay_parent).await + } } diff --git a/cumulus/client/relay-chain-rpc-interface/src/lib.rs b/cumulus/client/relay-chain-rpc-interface/src/lib.rs index 3a4c186e301e..bfe13f83f5b1 100644 --- a/cumulus/client/relay-chain-rpc-interface/src/lib.rs +++ b/cumulus/client/relay-chain-rpc-interface/src/lib.rs @@ -24,7 +24,7 @@ use cumulus_primitives_core::{ InboundDownwardMessage, ParaId, PersistedValidationData, }; use cumulus_relay_chain_interface::{ - PHeader, RelayChainError, RelayChainInterface, RelayChainResult, + BlockNumber, CoreState, PHeader, RelayChainError, RelayChainInterface, RelayChainResult, }; use futures::{FutureExt, Stream, StreamExt}; use polkadot_overseer::Handle; @@ -237,4 +237,11 @@ impl RelayChainInterface for RelayChainRpcInterface { let imported_headers_stream = self.rpc_client.get_best_heads_stream()?; Ok(imported_headers_stream.boxed()) } + + async fn availability_cores( + &self, + relay_parent: RelayHash, + ) -> RelayChainResult>> { + self.rpc_client.parachain_host_availability_cores(relay_parent).await + } } diff --git a/cumulus/pallets/aura-ext/Cargo.toml b/cumulus/pallets/aura-ext/Cargo.toml index fe717596f9b3..12feeab6010b 100644 --- a/cumulus/pallets/aura-ext/Cargo.toml +++ b/cumulus/pallets/aura-ext/Cargo.toml @@ -25,6 +25,7 @@ sp-std = { path = "../../../substrate/primitives/std", default-features = false # Cumulus cumulus-pallet-parachain-system = { path = "../parachain-system", default-features = false } +log = "0.4.20" [dev-dependencies] diff --git a/cumulus/pallets/aura-ext/src/consensus_hook.rs b/cumulus/pallets/aura-ext/src/consensus_hook.rs index 592029803391..bd1c9bd01192 100644 --- a/cumulus/pallets/aura-ext/src/consensus_hook.rs +++ b/cumulus/pallets/aura-ext/src/consensus_hook.rs @@ -65,8 +65,17 @@ where let para_slot_from_relay = Slot::from_timestamp(relay_chain_timestamp.into(), para_slot_duration); - // Perform checks. - assert_eq!(slot, para_slot_from_relay, "slot number mismatch"); + // Check that we are not too far in the future. Since we expect `V` parachain blocks + // during the relay chain slot, we can allow for `V` parachain slots into the future. + if *slot > *para_slot_from_relay + u64::from(velocity) { + panic!( + "Parachain slot is too far in the future: parachain_slot: {:?}, derived_from_relay_slot: {:?} velocity: {:?}", + slot, + para_slot_from_relay, + velocity + ); + } + if authored > velocity + 1 { panic!("authored blocks limit is reached for the slot") } @@ -113,6 +122,10 @@ impl< return false } + // TODO: This logic needs to be adjusted. + // It checks that we have not authored more than `V + 1` blocks in the slot. + // As a slot however, we take the parachain slot here. Velocity should + // be measured in relation to the relay chain slot. if last_slot == new_slot { authored_so_far < velocity + 1 } else { diff --git a/cumulus/pallets/aura-ext/src/lib.rs b/cumulus/pallets/aura-ext/src/lib.rs index 7ca84dff7c51..4605dd325bee 100644 --- a/cumulus/pallets/aura-ext/src/lib.rs +++ b/cumulus/pallets/aura-ext/src/lib.rs @@ -83,7 +83,7 @@ pub mod pallet { SlotInfo::::put((new_slot, authored)); - T::DbWeight::get().reads_writes(2, 1) + T::DbWeight::get().reads_writes(4, 2) } } diff --git a/substrate/client/consensus/slots/src/lib.rs b/substrate/client/consensus/slots/src/lib.rs index d9d792005312..7cdf90877dff 100644 --- a/substrate/client/consensus/slots/src/lib.rs +++ b/substrate/client/consensus/slots/src/lib.rs @@ -29,8 +29,8 @@ mod aux_schema; mod slots; pub use aux_schema::{check_equivocation, MAX_SLOT_CAPACITY, PRUNING_BOUND}; -pub use slots::SlotInfo; use slots::Slots; +pub use slots::{time_until_next_slot, SlotInfo}; use futures::{future::Either, Future, TryFutureExt}; use futures_timer::Delay; From 08ec3e7e791519416e835542cc0675617ca08690 Mon Sep 17 00:00:00 2001 From: Sebastian Kunert Date: Wed, 3 Apr 2024 11:51:04 +0200 Subject: [PATCH 04/71] Prepare cumulus test node & runtime --- Cargo.lock | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index 0bbeaab00dd8..42e8e46d8a77 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3631,6 +3631,7 @@ dependencies = [ "sc-consensus-aura", "sc-consensus-babe", "sc-consensus-slots", + "sc-service", "sc-telemetry", "schnellru", "sp-api", @@ -3646,6 +3647,7 @@ dependencies = [ "sp-state-machine", "sp-timestamp", "substrate-prometheus-endpoint", + "tokio", "tracing", ] @@ -3844,6 +3846,7 @@ dependencies = [ "cumulus-pallet-parachain-system", "frame-support", "frame-system", + "log", "pallet-aura", "pallet-timestamp", "parity-scale-codec", From 38482391abfde0b93bd3779be9a1d4f1c23cef4f Mon Sep 17 00:00:00 2001 From: Sebastian Kunert Date: Wed, 3 Apr 2024 11:51:04 +0200 Subject: [PATCH 05/71] Prepare cumulus test node & runtime --- Cargo.lock | 10 +-- cumulus/test/client/src/lib.rs | 1 + cumulus/test/runtime/Cargo.toml | 1 + cumulus/test/runtime/build.rs | 7 ++ cumulus/test/runtime/src/lib.rs | 26 +++++-- cumulus/test/service/Cargo.toml | 3 - cumulus/test/service/src/chain_spec.rs | 19 ++++- cumulus/test/service/src/cli.rs | 15 +++- cumulus/test/service/src/lib.rs | 102 ++++++++++++++++++------- 9 files changed, 141 insertions(+), 43 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 42e8e46d8a77..9a7af7b30842 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4407,7 +4407,6 @@ dependencies = [ "sp-blockchain", "sp-consensus", "sp-consensus-aura", - "sp-consensus-grandpa", "sp-core", "sp-io", "sp-keyring", @@ -21123,10 +21122,11 @@ checksum = "b6bc1c9ce2b5135ac7f93c72918fc37feb872bdc6a5533a8b85eb4b86bfdae52" [[package]] name = "tracing" -version = "0.1.40" +version = "0.1.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef" +checksum = "8ce8c33a8d48bd45d624a6e523445fd21ec13d3653cd51f681abf67418f54eb8" dependencies = [ + "cfg-if", "log", "pin-project-lite 0.2.12", "tracing-attributes", @@ -21135,9 +21135,9 @@ dependencies = [ [[package]] name = "tracing-attributes" -version = "0.1.27" +version = "0.1.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" +checksum = "5f4f31f56159e98206da9efd823404b79b6ef3143b4a7ab76e67b1751b25a4ab" dependencies = [ "proc-macro2", "quote", diff --git a/cumulus/test/client/src/lib.rs b/cumulus/test/client/src/lib.rs index a39a662553b0..6ccea43fb7cb 100644 --- a/cumulus/test/client/src/lib.rs +++ b/cumulus/test/client/src/lib.rs @@ -94,6 +94,7 @@ impl substrate_test_client::GenesisInit for GenesisParameters { cumulus_test_service::chain_spec::get_chain_spec_with_extra_endowed( None, self.endowed_accounts.clone(), + cumulus_test_runtime::WASM_BINARY.expect("WASM binary not compiled!"), ) .build_storage() .expect("Builds test runtime genesis storage") diff --git a/cumulus/test/runtime/Cargo.toml b/cumulus/test/runtime/Cargo.toml index 1969045640ed..aebc1033db8d 100644 --- a/cumulus/test/runtime/Cargo.toml +++ b/cumulus/test/runtime/Cargo.toml @@ -93,3 +93,4 @@ std = [ "substrate-wasm-builder", ] increment-spec-version = [] +elastic-scaling = [] diff --git a/cumulus/test/runtime/build.rs b/cumulus/test/runtime/build.rs index 5e5f6a35a505..bf01795ac127 100644 --- a/cumulus/test/runtime/build.rs +++ b/cumulus/test/runtime/build.rs @@ -30,6 +30,13 @@ fn main() { .import_memory() .set_file_name("wasm_binary_spec_version_incremented.rs") .build(); + + WasmBuilder::new() + .with_current_project() + .enable_feature("elastic-scaling") + .import_memory() + .set_file_name("wasm_binary_elastic_scaling.rs") + .build(); } #[cfg(not(feature = "std"))] diff --git a/cumulus/test/runtime/src/lib.rs b/cumulus/test/runtime/src/lib.rs index 22dc5d857b7c..74588344fa5c 100644 --- a/cumulus/test/runtime/src/lib.rs +++ b/cumulus/test/runtime/src/lib.rs @@ -27,6 +27,11 @@ pub mod wasm_spec_version_incremented { include!(concat!(env!("OUT_DIR"), "/wasm_binary_spec_version_incremented.rs")); } +pub mod elastic_scaling { + #[cfg(feature = "std")] + include!(concat!(env!("OUT_DIR"), "/wasm_binary_elastic_scaling.rs")); +} + mod test_pallet; use frame_support::{derive_impl, traits::OnRuntimeUpgrade, PalletId}; use sp_api::{decl_runtime_apis, impl_runtime_apis}; @@ -83,8 +88,23 @@ impl_opaque_keys! { /// The para-id used in this runtime. pub const PARACHAIN_ID: u32 = 100; -const UNINCLUDED_SEGMENT_CAPACITY: u32 = 3; +#[cfg(not(feature = "elastic-scaling"))] +const UNINCLUDED_SEGMENT_CAPACITY: u32 = 4; +#[cfg(not(feature = "elastic-scaling"))] const BLOCK_PROCESSING_VELOCITY: u32 = 1; + +#[cfg(feature = "elastic-scaling")] +const UNINCLUDED_SEGMENT_CAPACITY: u32 = 9; +#[cfg(feature = "elastic-scaling")] +const BLOCK_PROCESSING_VELOCITY: u32 = 3; + +#[cfg(not(feature = "elastic-scaling"))] +pub const MILLISECS_PER_BLOCK: u64 = 6000; +#[cfg(feature = "elastic-scaling")] +pub const MILLISECS_PER_BLOCK: u64 = 2000; + +pub const SLOT_DURATION: u64 = MILLISECS_PER_BLOCK; + const RELAY_CHAIN_SLOT_DURATION_MILLIS: u32 = 6000; // The only difference between the two declarations below is the `spec_version`. With the @@ -126,10 +146,6 @@ pub const VERSION: RuntimeVersion = RuntimeVersion { state_version: 1, }; -pub const MILLISECS_PER_BLOCK: u64 = 6000; - -pub const SLOT_DURATION: u64 = MILLISECS_PER_BLOCK; - pub const EPOCH_DURATION_IN_BLOCKS: u32 = 10 * MINUTES; // These time units are defined in number of blocks. diff --git a/cumulus/test/service/Cargo.toml b/cumulus/test/service/Cargo.toml index 18213b2f6326..5bda2aaf0913 100644 --- a/cumulus/test/service/Cargo.toml +++ b/cumulus/test/service/Cargo.toml @@ -92,8 +92,6 @@ pallet-timestamp = { path = "../../../substrate/frame/timestamp" } [dev-dependencies] futures = "0.3.28" portpicker = "0.1.1" -rococo-parachain-runtime = { path = "../../parachains/runtimes/testing/rococo-parachain" } -sp-consensus-grandpa = { path = "../../../substrate/primitives/consensus/grandpa" } sp-authority-discovery = { path = "../../../substrate/primitives/authority-discovery" } cumulus-test-client = { path = "../client" } @@ -116,7 +114,6 @@ runtime-benchmarks = [ "polkadot-primitives/runtime-benchmarks", "polkadot-service/runtime-benchmarks", "polkadot-test-service/runtime-benchmarks", - "rococo-parachain-runtime/runtime-benchmarks", "sc-service/runtime-benchmarks", "sp-runtime/runtime-benchmarks", ] diff --git a/cumulus/test/service/src/chain_spec.rs b/cumulus/test/service/src/chain_spec.rs index 4db2513e2b63..7b44bdf2fee0 100644 --- a/cumulus/test/service/src/chain_spec.rs +++ b/cumulus/test/service/src/chain_spec.rs @@ -66,9 +66,10 @@ where pub fn get_chain_spec_with_extra_endowed( id: Option, extra_endowed_accounts: Vec, + code: &[u8], ) -> ChainSpec { ChainSpec::builder( - cumulus_test_runtime::WASM_BINARY.expect("WASM binary was not built, please build it!"), + code, Extensions { para_id: id.unwrap_or(cumulus_test_runtime::PARACHAIN_ID.into()).into() }, ) .with_name("Local Testnet") @@ -83,7 +84,21 @@ pub fn get_chain_spec_with_extra_endowed( /// Get the chain spec for a specific parachain ID. pub fn get_chain_spec(id: Option) -> ChainSpec { - get_chain_spec_with_extra_endowed(id, Default::default()) + get_chain_spec_with_extra_endowed( + id, + Default::default(), + cumulus_test_runtime::WASM_BINARY.expect("WASM binary was not built, please build it!"), + ) +} + +/// Get the chain spec for a specific parachain ID. +pub fn get_elastic_scaling_chain_spec(id: Option) -> ChainSpec { + get_chain_spec_with_extra_endowed( + id, + Default::default(), + cumulus_test_runtime::elastic_scaling::WASM_BINARY + .expect("WASM binary was not built, please build it!"), + ) } /// Local testnet genesis for testing. diff --git a/cumulus/test/service/src/cli.rs b/cumulus/test/service/src/cli.rs index 87d1d4af8a95..79d557e9db82 100644 --- a/cumulus/test/service/src/cli.rs +++ b/cumulus/test/service/src/cli.rs @@ -45,6 +45,9 @@ pub struct TestCollatorCli { #[arg(long)] pub use_null_consensus: bool, + #[arg(long)] + pub use_slot_authoring: bool, + #[arg(long)] pub disable_block_announcements: bool, @@ -253,8 +256,16 @@ impl SubstrateCli for TestCollatorCli { fn load_spec(&self, id: &str) -> std::result::Result, String> { Ok(match id { - "" => - Box::new(cumulus_test_service::get_chain_spec(Some(ParaId::from(2000)))) as Box<_>, + "" => { + tracing::info!("Using default test service chain spec."); + Box::new(cumulus_test_service::get_chain_spec(Some(ParaId::from(2000)))) as Box<_> + }, + "elastic-scaling" => { + tracing::info!("Using elastic-scaling chain spec."); + Box::new(cumulus_test_service::get_elastic_scaling_chain_spec(Some(ParaId::from( + 2100, + )))) as Box<_> + }, path => { let chain_spec = cumulus_test_service::chain_spec::ChainSpec::from_json_file(path.into())?; diff --git a/cumulus/test/service/src/lib.rs b/cumulus/test/service/src/lib.rs index 11aa2e5b9f35..0efa8f3d9c51 100644 --- a/cumulus/test/service/src/lib.rs +++ b/cumulus/test/service/src/lib.rs @@ -25,7 +25,10 @@ pub mod chain_spec; use cumulus_client_collator::service::CollatorService; use cumulus_client_consensus_aura::{ - collators::lookahead::{self as aura, Params as AuraParams}, + collators::{ + lookahead::{self as aura, Params as AuraParams}, + slot_based::{self as slot_based, Params as SlotBasedParams}, + }, ImportQueueParams, }; use cumulus_client_consensus_proposer::Proposer; @@ -303,7 +306,7 @@ async fn build_relay_chain_interface( /// Start a node with the given parachain `Configuration` and relay chain `Configuration`. /// /// This is the actual implementation that is abstract over the executor and the runtime api. -#[sc_tracing::logging::prefix_logs_with(parachain_config.network.node_name.as_str())] +#[sc_tracing::logging::prefix_logs_with("Parachain")] pub async fn start_node_impl>( parachain_config: Configuration, collator_key: Option, @@ -315,6 +318,7 @@ pub async fn start_node_impl>( consensus: Consensus, collator_options: CollatorOptions, proof_recording_during_import: bool, + slot_based_authoring: bool, ) -> sc_service::error::Result<( TaskManager, Arc, @@ -460,29 +464,72 @@ where ); let client_for_aura = client.clone(); - let params = AuraParams { - create_inherent_data_providers: move |_, ()| async move { Ok(()) }, - block_import, - para_client: client.clone(), - para_backend: backend.clone(), - relay_client: relay_chain_interface, - code_hash_provider: move |block_hash| { - client_for_aura.code_at(block_hash).ok().map(|c| ValidationCode::from(c).hash()) - }, - sync_oracle: sync_service, - keystore, - collator_key, - para_id, - overseer_handle, - relay_chain_slot_duration, - proposer, - collator_service, - authoring_duration: Duration::from_millis(2000), - reinitialize: false, - }; - let fut = aura::run::(params); - task_manager.spawn_essential_handle().spawn("aura", None, fut); + if slot_based_authoring { + tracing::info!(target: LOG_TARGET, "Starting block authoring with slot based authoring."); + let params = SlotBasedParams { + create_inherent_data_providers: move |_, ()| async move { Ok(()) }, + block_import, + para_client: client.clone(), + para_backend: backend.clone(), + relay_client: relay_chain_interface, + code_hash_provider: move |block_hash| { + client_for_aura + .code_at(block_hash) + .ok() + .map(|c| ValidationCode::from(c).hash()) + }, + keystore, + collator_key, + para_id, + overseer_handle, + relay_chain_slot_duration, + proposer, + collator_service, + authoring_duration: Duration::from_millis(2000), + reinitialize: false, + }; + + let (collation_future, block_builer_future) = + slot_based::run::(params); + task_manager.spawn_essential_handle().spawn( + "collation-task", + None, + collation_future, + ); + task_manager.spawn_essential_handle().spawn( + "block-builder-task", + None, + block_builer_future, + ); + } else { + tracing::info!(target: LOG_TARGET, "Starting block authoring with lookahead collator."); + let params = AuraParams { + create_inherent_data_providers: move |_, ()| async move { Ok(()) }, + block_import, + para_client: client.clone(), + para_backend: backend.clone(), + relay_client: relay_chain_interface, + code_hash_provider: move |block_hash| { + client_for_aura + .code_at(block_hash) + .ok() + .map(|c| ValidationCode::from(c).hash()) + }, + keystore, + collator_key, + para_id, + overseer_handle, + relay_chain_slot_duration, + proposer, + collator_service, + authoring_duration: Duration::from_millis(2000), + reinitialize: false, + }; + + let fut = aura::run::(params); + task_manager.spawn_essential_handle().spawn("aura", None, fut); + } } } @@ -765,8 +812,11 @@ pub fn node_config( let root = base_path.path().join(format!("cumulus_test_service_{}", key)); let role = if is_collator { Role::Authority } else { Role::Full }; let key_seed = key.to_seed(); - let mut spec = - Box::new(chain_spec::get_chain_spec_with_extra_endowed(Some(para_id), endowed_accounts)); + let mut spec = Box::new(chain_spec::get_chain_spec_with_extra_endowed( + Some(para_id), + endowed_accounts, + cumulus_test_runtime::WASM_BINARY.expect("WASM binary was not built, please build it!"), + )); let mut storage = spec.as_storage_builder().build_storage().expect("could not build storage"); From 7a057ed5ee19b1fe9b6373b5462f3e174b491021 Mon Sep 17 00:00:00 2001 From: Sebastian Kunert Date: Wed, 3 Apr 2024 11:51:33 +0200 Subject: [PATCH 06/71] Add zombienet test --- .gitlab/pipeline/zombienet/cumulus.yml | 12 +++ .../zombienet/tests/0008-configure-broker.js | 82 +++++++++++++++++++ .../zombienet/tests/0008-configure-relay.js | 78 ++++++++++++++++++ .../tests/0008-elastic_authoring.toml | 66 +++++++++++++++ .../tests/0008-elastic_authoring.zndsl | 18 ++++ 5 files changed, 256 insertions(+) create mode 100644 cumulus/zombienet/tests/0008-configure-broker.js create mode 100644 cumulus/zombienet/tests/0008-configure-relay.js create mode 100644 cumulus/zombienet/tests/0008-elastic_authoring.toml create mode 100644 cumulus/zombienet/tests/0008-elastic_authoring.zndsl diff --git a/.gitlab/pipeline/zombienet/cumulus.yml b/.gitlab/pipeline/zombienet/cumulus.yml index c473f5c5fed7..746c77ee7cf3 100644 --- a/.gitlab/pipeline/zombienet/cumulus.yml +++ b/.gitlab/pipeline/zombienet/cumulus.yml @@ -150,3 +150,15 @@ zombienet-cumulus-0007-full_node_warp_sync: --local-dir="${LOCAL_DIR}" --concurrency=1 --test="0007-full_node_warp_sync.zndsl" + +zombienet-cumulus-0008-elastic_authoring: + extends: + - .zombienet-cumulus-common + - .zombienet-refs + - .zombienet-before-script + - .zombienet-after-script + script: + - /home/nonroot/zombie-net/scripts/ci/run-test-local-env-manager.sh + --local-dir="${LOCAL_DIR}" + --concurrency=1 + --test="0008-elastic_authoring.zndsl" diff --git a/cumulus/zombienet/tests/0008-configure-broker.js b/cumulus/zombienet/tests/0008-configure-broker.js new file mode 100644 index 000000000000..d93f28ed0984 --- /dev/null +++ b/cumulus/zombienet/tests/0008-configure-broker.js @@ -0,0 +1,82 @@ +const assert = require("assert"); + +async function run(nodeName, networkInfo, _jsArgs) { + const { wsUri, userDefinedTypes } = networkInfo.nodesByName[nodeName]; + const api = await zombie.connect(wsUri, userDefinedTypes); + + await zombie.util.cryptoWaitReady(); + + // account to submit tx + const keyring = new zombie.Keyring({ type: "sr25519" }); + const alice = keyring.addFromUri("//Alice"); + + const calls = [ + // Default broker configuration + api.tx.broker.configure({ + advanceNotice: 5, + interludeLength: 1, + leadinLength: 1, + regionLength: 1, + idealBulkProportion: 100, + limitCoresOffered: null, + renewalBump: 10, + contributionTimeout: 5, + }), + // We need MOARE cores. + api.tx.broker.requestCoreCount(7), + // Set a lease for the broker chain itself. + api.tx.broker.setLease( + 1005, + 1000, + ), + // Three cores for para 2100. + api.tx.broker.setLease( + 2100, + 1000, + ), + api.tx.broker.setLease( + 2100, + 1000, + ), + api.tx.broker.setLease( + 2100, + 1000, + ), + // One cores for para 2000. + api.tx.broker.setLease( + 2000, + 1000, + ), + // Start sale to make the broker "work", but we don't offer any cores + // as we have fixed leases only anyway. + api.tx.broker.startSales(1, 0), + ]; + const sudo_batch = api.tx.sudo.sudo(api.tx.utility.batch(calls)); + + await new Promise(async (resolve, reject) => { + const unsub = await sudo_batch.signAndSend(alice, (result) => { + console.log(`Current status is ${result.status}`); + if (result.status.isInBlock) { + console.log( + `Transaction included at blockHash ${result.status.asInBlock}` + ); + } else if (result.status.isFinalized) { + console.log( + `Transaction finalized at blockHash ${result.status.asFinalized}` + ); + unsub(); + return resolve(); + } else if (result.isError) { + // Probably happens because of: https://github.com/paritytech/polkadot-sdk/issues/1202. + console.log(`Transaction error`); + // We ignore the error because it is very likely misleading, because of the issue mentioned above. + unsub(); + return resolve(); + } + }); + }); + + return 0; +} + +module.exports = { run }; diff --git a/cumulus/zombienet/tests/0008-configure-relay.js b/cumulus/zombienet/tests/0008-configure-relay.js new file mode 100644 index 000000000000..c546ac3f5fdd --- /dev/null +++ b/cumulus/zombienet/tests/0008-configure-relay.js @@ -0,0 +1,78 @@ +const assert = require("assert"); + +async function run(nodeName, networkInfo, _jsArgs) { + const init = networkInfo.nodesByName[nodeName]; + let wsUri = init.wsUri; + let userDefinedTypes = init.userDefinedTypes; + const api = await zombie.connect(wsUri, userDefinedTypes); + + const collatorElastic = networkInfo.nodesByName["collator-elastic"]; + wsUri = collatorElastic.wsUri; + userDefinedTypes = collatorElastic.userDefinedTypes; + const apiCollatorElastic = await zombie.connect(wsUri, userDefinedTypes); + + const collatorSingleCore = networkInfo.nodesByName["collator-single-core"]; + wsUriSingleCore = collatorSingleCore.wsUri; + userDefinedTypes6s = collatorSingleCore.userDefinedTypes; + + const apiCollatorSingleCore = await zombie.connect(wsUriSingleCore, userDefinedTypes6s); + + await zombie.util.cryptoWaitReady(); + + // Get the genesis header and the validation code of parachain 2100 + const genesisHeaderElastic = await apiCollatorElastic.rpc.chain.getHeader(); + const validationCodeElastic = await apiCollatorElastic.rpc.state.getStorage("0x3A636F6465"); + + // Get the genesis header and the validation code of parachain 2000 + const genesisHeaderSingleCore = await apiCollatorSingleCore.rpc.chain.getHeader(); + const validationCodeSingleCore = await apiCollatorSingleCore.rpc.state.getStorage("0x3A636F6465"); + + // account to submit tx + const keyring = new zombie.Keyring({ type: "sr25519" }); + const alice = keyring.addFromUri("//Alice"); + + const calls = [ + api.tx.configuration.setCoretimeCores({ new: 7 }), + api.tx.coretime.assignCore(0, 20,[[ { task: 1005 }, 57600 ]], null), + api.tx.registrar.forceRegister( + alice.address, + 0, + 2100, + genesisHeaderElastic.toHex(), + validationCodeElastic.toHex(), + ), + api.tx.registrar.forceRegister( + alice.address, + 0, + 2000, + genesisHeaderSingleCore.toHex(), + validationCodeSingleCore.toHex(), + ) + ]; + const sudo_batch = api.tx.sudo.sudo(api.tx.utility.batch(calls)); + + await new Promise(async (resolve, reject) => { + const unsub = await sudo_batch.signAndSend(alice, (result) => { + console.log(`Current status is ${result.status}`); + if (result.status.isInBlock) { + console.log( + `Transaction included at blockHash ${result.status.asInBlock}` + ); + } else if (result.status.isFinalized) { + console.log( + `Transaction finalized at blockHash ${result.status.asFinalized}` + ); + unsub(); + return resolve(); + } else if (result.isError) { + console.log(`Transaction Error`); + unsub(); + return reject(); + } + }); + }); + + return 0; +} + +module.exports = { run }; diff --git a/cumulus/zombienet/tests/0008-elastic_authoring.toml b/cumulus/zombienet/tests/0008-elastic_authoring.toml new file mode 100644 index 000000000000..00d9ca74b6ef --- /dev/null +++ b/cumulus/zombienet/tests/0008-elastic_authoring.toml @@ -0,0 +1,66 @@ +[settings] +timeout = 1000 + +[relaychain.genesis.runtimeGenesis.patch.configuration.config.async_backing_params] + max_candidate_depth = 6 + allowed_ancestry_len = 4 + +[relaychain.genesis.runtimeGenesis.patch.configuration.config.scheduler_params] + max_validators_per_core = 1 + scheduling_lookahead = 2 + num_cores = 7 + +[relaychain.genesis.runtimeGenesis.patch.configuration.config.approval_voting_params] + needed_approvals = 3 + max_approval_coalesce_count = 5 + +[relaychain] +default_image = "{{ZOMBIENET_INTEGRATION_TEST_IMAGE}}" +chain = "rococo-local" +command = "polkadot" + + [[relaychain.nodes]] + name = "alice" + args = ["" ] + + [[relaychain.node_groups]] + name = "validator" + args = ["-lruntime=debug,parachain=trace" ] + count = 8 + +[[parachains]] +id = 1005 +chain = "coretime-rococo-local" + + [parachains.collator] + name = "coretime-collator" + image = "{{CUMULUS_IMAGE}}" + command = "polkadot-parachain" + args = [ "-lruntime=info" ,"--force-authoring"] + +# Slot based authoring with 3 cores and 2s slot duration +[[parachains]] +id = 2100 +add_to_genesis = false +register_para = false +onboard_as_parachain = false +chain = "elastic-scaling" + + [[parachains.collators]] + name = "collator-elastic" + image = "{{CUMULUS_IMAGE}}" + command = "test-parachain" + args = ["-laura=trace,runtime=info,consensus::common=trace,parachain::collation-generation=trace,parachain::collator-protocol=trace,parachain=debug", "--force-authoring", "--use-slot-authoring"] + +# Slot based authoring with 3 cores and 2s slot duration +[[parachains]] +id = 2000 +add_to_genesis = false +register_para = false +onboard_as_parachain = false + + [[parachains.collators]] + name = "collator-single-core" + image = "{{CUMULUS_IMAGE}}" + command = "test-parachain" + args = ["-laura=trace,runtime=info,consensus::common=trace,parachain::collation-generation=trace,parachain::collator-protocol=trace,parachain=debug", "--force-authoring", "--use-slot-authoring"] diff --git a/cumulus/zombienet/tests/0008-elastic_authoring.zndsl b/cumulus/zombienet/tests/0008-elastic_authoring.zndsl new file mode 100644 index 000000000000..8f83ba8b1a2c --- /dev/null +++ b/cumulus/zombienet/tests/0008-elastic_authoring.zndsl @@ -0,0 +1,18 @@ +Description: Slot based authoring for elastic scaling +Network: ./0008-elastic_authoring.toml +Creds: config + +alice: is up +coretime-collator: is up + +# configure relay chain +alice: js-script ./0008-configure-relay.js with "" return is 0 within 600 secs + +# configure broker chain +coretime-collator: js-script ./0008-configure-broker.js with "" return is 0 within 600 secs + +# Ensure that parachain 2100 got onboarded +alice: parachain 2100 block height is at least 30 within 45000 seconds + +# Ensure that parachain 2000 got onboarded +alice: parachain 2000 block height is at least 10 within 45000 seconds From 3f4a2609e2dcce9848981d16adf0c8ee9354b3b1 Mon Sep 17 00:00:00 2001 From: Sebastian Kunert Date: Wed, 3 Apr 2024 11:57:08 +0200 Subject: [PATCH 07/71] Misc --- .../consensus/aura/src/collators/slot_based/collation_task.rs | 3 ++- cumulus/pallets/aura-ext/Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/cumulus/client/consensus/aura/src/collators/slot_based/collation_task.rs b/cumulus/client/consensus/aura/src/collators/slot_based/collation_task.rs index 8958fb33ccdb..451551fa2697 100644 --- a/cumulus/client/consensus/aura/src/collators/slot_based/collation_task.rs +++ b/cumulus/client/consensus/aura/src/collators/slot_based/collation_task.rs @@ -109,6 +109,7 @@ where tracing::debug!( target: LOG_TARGET, hash = ?message.hash, + num_messages = ?messages.len() + 1, "Pushing new message.", ); messages.push_back(message); @@ -152,7 +153,7 @@ async fn handle_collation_message( return; } - let number = parachain_candidate.block.header().number().clone(); + let number = *parachain_candidate.block.header().number(); let (collation, block_data) = match collator_service.build_collation(&parent_header, hash, parachain_candidate) { Some(collation) => collation, diff --git a/cumulus/pallets/aura-ext/Cargo.toml b/cumulus/pallets/aura-ext/Cargo.toml index 12feeab6010b..76548fd66abb 100644 --- a/cumulus/pallets/aura-ext/Cargo.toml +++ b/cumulus/pallets/aura-ext/Cargo.toml @@ -25,7 +25,7 @@ sp-std = { path = "../../../substrate/primitives/std", default-features = false # Cumulus cumulus-pallet-parachain-system = { path = "../parachain-system", default-features = false } -log = "0.4.20" +log = { workspace = true } [dev-dependencies] From c049f5062cfa42739551a37620615a77ce2d0211 Mon Sep 17 00:00:00 2001 From: Sebastian Kunert Date: Wed, 3 Apr 2024 13:04:37 +0200 Subject: [PATCH 08/71] Nits --- cumulus/client/consensus/aura/Cargo.toml | 2 +- cumulus/client/consensus/aura/src/collator.rs | 1 + .../src/collators/slot_based/block_builder_task.rs | 3 ++- .../consensus/aura/src/collators/slot_based/mod.rs | 5 +++-- cumulus/zombienet/tests/0008-configure-broker.js | 2 +- cumulus/zombienet/tests/0008-elastic_authoring.toml | 11 +---------- 6 files changed, 9 insertions(+), 15 deletions(-) diff --git a/cumulus/client/consensus/aura/Cargo.toml b/cumulus/client/consensus/aura/Cargo.toml index 637c5288e28c..a4ac78efb688 100644 --- a/cumulus/client/consensus/aura/Cargo.toml +++ b/cumulus/client/consensus/aura/Cargo.toml @@ -36,6 +36,7 @@ sp-keystore = { path = "../../../../substrate/primitives/keystore" } sp-runtime = { path = "../../../../substrate/primitives/runtime" } sp-timestamp = { path = "../../../../substrate/primitives/timestamp" } sp-state-machine = { path = "../../../../substrate/primitives/state-machine" } +sc-service = { path = "../../../../substrate/client/service" } substrate-prometheus-endpoint = { path = "../../../../substrate/utils/prometheus" } # Cumulus @@ -52,4 +53,3 @@ polkadot-primitives = { path = "../../../../polkadot/primitives" } polkadot-node-primitives = { path = "../../../../polkadot/node/primitives" } polkadot-node-subsystem = { path = "../../../../polkadot/node/subsystem" } polkadot-overseer = { path = "../../../../polkadot/node/overseer" } -sc-service = { version = "0.35.0", path = "../../../../substrate/client/service" } diff --git a/cumulus/client/consensus/aura/src/collator.rs b/cumulus/client/consensus/aura/src/collator.rs index 9030f138186a..bac1c27c4837 100644 --- a/cumulus/client/consensus/aura/src/collator.rs +++ b/cumulus/client/consensus/aura/src/collator.rs @@ -156,6 +156,7 @@ where Ok((paras_inherent_data, other_inherent_data)) } + /// Build and import a parachain block on the given parent header, using the given slot claim. pub async fn build_block_and_import( &mut self, parent_header: &Block::Header, diff --git a/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs b/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs index 30ebcfee9561..83c2a8c859b7 100644 --- a/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs +++ b/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs @@ -76,7 +76,7 @@ use crate::{ const PARENT_SEARCH_DEPTH: usize = 10; -/// Parameters for [`run`]. +/// Parameters for [`run_block_builder`]. pub struct BuilderTaskParams { /// Inherent data providers. Only non-consensus inherent data should be provided, i.e. /// the timestamp, slot, and paras inherents should be omitted, as they are set by this @@ -122,6 +122,7 @@ impl SlotTimer { Self { slot_duration } } + /// Returns a future that resolves when the next slot arrives. pub async fn wait_until_next_slot(&self) -> SlotAndTime { let time_until_next_slot = time_until_next_slot(self.slot_duration.as_duration()); tokio::time::sleep(time_until_next_slot).await; diff --git a/cumulus/client/consensus/aura/src/collators/slot_based/mod.rs b/cumulus/client/consensus/aura/src/collators/slot_based/mod.rs index d4525058063a..d56840425470 100644 --- a/cumulus/client/consensus/aura/src/collators/slot_based/mod.rs +++ b/cumulus/client/consensus/aura/src/collators/slot_based/mod.rs @@ -149,7 +149,7 @@ where let collation_task_fut = run_collation_task::(collator_task_params); - let slot_params = block_builder_task::BuilderTaskParams { + let block_builder_params = block_builder_task::BuilderTaskParams { create_inherent_data_providers: params.create_inherent_data_providers, block_import: params.block_import, para_client: params.para_client, @@ -164,7 +164,8 @@ where collator_sender: tx, }; - let block_builder_fut = run_block_builder::(slot_params); + let block_builder_fut = + run_block_builder::(block_builder_params); (collation_task_fut, block_builder_fut) } diff --git a/cumulus/zombienet/tests/0008-configure-broker.js b/cumulus/zombienet/tests/0008-configure-broker.js index d93f28ed0984..f38a3bc03331 100644 --- a/cumulus/zombienet/tests/0008-configure-broker.js +++ b/cumulus/zombienet/tests/0008-configure-broker.js @@ -23,7 +23,7 @@ async function run(nodeName, networkInfo, _jsArgs) { contributionTimeout: 5, }), // We need MOARE cores. - api.tx.broker.requestCoreCount(7), + api.tx.broker.requestCoreCount(5), // Set a lease for the broker chain itself. api.tx.broker.setLease( 1005, diff --git a/cumulus/zombienet/tests/0008-elastic_authoring.toml b/cumulus/zombienet/tests/0008-elastic_authoring.toml index 00d9ca74b6ef..183fa10b6a96 100644 --- a/cumulus/zombienet/tests/0008-elastic_authoring.toml +++ b/cumulus/zombienet/tests/0008-elastic_authoring.toml @@ -3,16 +3,7 @@ timeout = 1000 [relaychain.genesis.runtimeGenesis.patch.configuration.config.async_backing_params] max_candidate_depth = 6 - allowed_ancestry_len = 4 - -[relaychain.genesis.runtimeGenesis.patch.configuration.config.scheduler_params] - max_validators_per_core = 1 - scheduling_lookahead = 2 - num_cores = 7 - -[relaychain.genesis.runtimeGenesis.patch.configuration.config.approval_voting_params] - needed_approvals = 3 - max_approval_coalesce_count = 5 + allowed_ancestry_len = 3 [relaychain] default_image = "{{ZOMBIENET_INTEGRATION_TEST_IMAGE}}" From c1c96bc69d98c3c7cabec344438e929058238270 Mon Sep 17 00:00:00 2001 From: Sebastian Kunert Date: Wed, 3 Apr 2024 14:12:05 +0200 Subject: [PATCH 09/71] Remove unused dep, add issues to todos --- Cargo.lock | 1 - 1 file changed, 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index 9a7af7b30842..b1ce537a07b7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4382,7 +4382,6 @@ dependencies = [ "polkadot-test-service", "portpicker", "rand 0.8.5", - "rococo-parachain-runtime", "sc-basic-authorship", "sc-block-builder", "sc-chain-spec", From 7af20a69069901a3518e6f7c5f2b3e90bfa1b0a8 Mon Sep 17 00:00:00 2001 From: Sebastian Kunert Date: Wed, 3 Apr 2024 14:12:05 +0200 Subject: [PATCH 10/71] Remove unused dep, add issues to todos --- Cargo.lock | 1 - cumulus/client/consensus/common/src/lib.rs | 1 + cumulus/pallets/aura-ext/Cargo.toml | 1 - cumulus/pallets/aura-ext/src/consensus_hook.rs | 1 + 4 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b1ce537a07b7..a727f5809d68 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3846,7 +3846,6 @@ dependencies = [ "cumulus-pallet-parachain-system", "frame-support", "frame-system", - "log", "pallet-aura", "pallet-timestamp", "parity-scale-codec", diff --git a/cumulus/client/consensus/common/src/lib.rs b/cumulus/client/consensus/common/src/lib.rs index 6ee576f9c2df..c59eda7bb744 100644 --- a/cumulus/client/consensus/common/src/lib.rs +++ b/cumulus/client/consensus/common/src/lib.rs @@ -366,6 +366,7 @@ pub async fn find_potential_parents( while let Some(entry) = frontier.pop() { // TODO find_potential_parents The assumption that entry.depth = 1 is the pending block is // not correct if we produce sub 6s blocks. + // https://github.com/paritytech/polkadot-sdk/issues/3967 let is_pending = entry.depth == 1 && pending_hash.as_ref().map_or(false, |h| &entry.hash == h); let is_included = entry.depth == 0; diff --git a/cumulus/pallets/aura-ext/Cargo.toml b/cumulus/pallets/aura-ext/Cargo.toml index 76548fd66abb..fe717596f9b3 100644 --- a/cumulus/pallets/aura-ext/Cargo.toml +++ b/cumulus/pallets/aura-ext/Cargo.toml @@ -25,7 +25,6 @@ sp-std = { path = "../../../substrate/primitives/std", default-features = false # Cumulus cumulus-pallet-parachain-system = { path = "../parachain-system", default-features = false } -log = { workspace = true } [dev-dependencies] diff --git a/cumulus/pallets/aura-ext/src/consensus_hook.rs b/cumulus/pallets/aura-ext/src/consensus_hook.rs index bd1c9bd01192..40276c035f6d 100644 --- a/cumulus/pallets/aura-ext/src/consensus_hook.rs +++ b/cumulus/pallets/aura-ext/src/consensus_hook.rs @@ -126,6 +126,7 @@ impl< // It checks that we have not authored more than `V + 1` blocks in the slot. // As a slot however, we take the parachain slot here. Velocity should // be measured in relation to the relay chain slot. + // https://github.com/paritytech/polkadot-sdk/issues/3967 if last_slot == new_slot { authored_so_far < velocity + 1 } else { From 245a11a7e8841dc6a41f41b2e39197e893070e88 Mon Sep 17 00:00:00 2001 From: Sebastian Kunert Date: Wed, 3 Apr 2024 17:36:40 +0200 Subject: [PATCH 11/71] Enable "elastic-scaling-experimental" for cumulus test node --- cumulus/test/service/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cumulus/test/service/Cargo.toml b/cumulus/test/service/Cargo.toml index 5bda2aaf0913..3444fd6192d2 100644 --- a/cumulus/test/service/Cargo.toml +++ b/cumulus/test/service/Cargo.toml @@ -62,7 +62,7 @@ sc-executor-common = { path = "../../../substrate/client/executor/common" } # Polkadot polkadot-primitives = { path = "../../../polkadot/primitives" } -polkadot-service = { path = "../../../polkadot/node/service" } +polkadot-service = { path = "../../../polkadot/node/service", features = ["elastic-scaling-experimental"] } polkadot-test-service = { path = "../../../polkadot/node/test/service" } polkadot-cli = { path = "../../../polkadot/cli" } polkadot-node-subsystem = { path = "../../../polkadot/node/subsystem" } From 560a907a82151b6bdeffced5145d264ef1348604 Mon Sep 17 00:00:00 2001 From: Sebastian Kunert Date: Fri, 5 Apr 2024 17:03:19 +0200 Subject: [PATCH 12/71] Adjust parent search to allow for pending blocks with depth > 1 --- cumulus/client/consensus/common/src/lib.rs | 132 ++++++++++--- .../common/src/parachain_consensus.rs | 2 +- cumulus/client/consensus/common/src/tests.rs | 179 ++++++++++++++++++ cumulus/test/runtime/src/lib.rs | 2 +- .../tests/0008-elastic_authoring.toml | 4 +- 5 files changed, 291 insertions(+), 28 deletions(-) diff --git a/cumulus/client/consensus/common/src/lib.rs b/cumulus/client/consensus/common/src/lib.rs index c59eda7bb744..db9fe5c9c258 100644 --- a/cumulus/client/consensus/common/src/lib.rs +++ b/cumulus/client/consensus/common/src/lib.rs @@ -250,7 +250,7 @@ pub struct ParentSearchParams { } /// A potential parent block returned from [`find_potential_parents`] -#[derive(Debug, PartialEq)] +#[derive(PartialEq)] pub struct PotentialParent { /// The hash of the block. pub hash: B::Hash, @@ -263,6 +263,17 @@ pub struct PotentialParent { pub aligned_with_pending: bool, } +impl std::fmt::Debug for PotentialParent { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("PotentialParent") + .field("hash", &self.hash) + .field("depth", &self.depth) + .field("aligned_with_pending", &self.aligned_with_pending) + .field("number", &self.header.number()) + .finish() + } +} + /// Perform a recursive search through blocks to find potential /// parent blocks for a new block. /// @@ -280,10 +291,11 @@ pub struct PotentialParent { /// * the block number is within `max_depth` blocks of the included block pub async fn find_potential_parents( params: ParentSearchParams, - client: &impl Backend, + backend: &impl Backend, relay_client: &impl RelayChainInterface, ) -> Result>, RelayChainError> { // 1. Build up the ancestry record of the relay chain to compare against. + tracing::trace!("Parent search parameters: {params:?}"); let rp_ancestry = { let mut ancestry = Vec::with_capacity(params.ancestry_lookback + 1); let mut current_rp = params.relay_parent; @@ -352,24 +364,96 @@ pub async fn find_potential_parents( let included_hash = included_header.hash(); let pending_hash = pending_header.as_ref().map(|hdr| hdr.hash()); + if params.max_depth == 0 { + return Ok(vec![PotentialParent:: { + hash: included_hash, + header: included_header, + depth: 0, + aligned_with_pending: true, + }]) + }; + + let maybe_route = pending_hash + .map(|pending| sp_blockchain::tree_route(backend.blockchain(), included_hash, pending)) + .transpose()?; + + // The distance between pending and included block. Is later used to check if a child + // is aligned with pending when it is between pending and included block. + let pending_distance = maybe_route.as_ref().map(|route| route.enacted().len()); + + // If we want to ignore alternative branches there is no reason to start + // the parent search at the included block. We can add the included block and + // the path to the pending block to the potential parents directly (limited by max_depth). + let (mut frontier, mut potential_parents) = if let (Some(pending), true, Some(ref route)) = + (pending_header, params.ignore_alternative_branches, &maybe_route) + { + let mut potential_parents = Vec::new(); + // Included block is always a potential parent + potential_parents.push(PotentialParent:: { + hash: included_hash, + header: included_header.clone(), + depth: 0, + aligned_with_pending: true, + }); + + // Add all items on the path included -> pending - 1 to the potential parents, but not + // more than `max_depth`. + let num_parents_on_path = route.enacted().len().saturating_sub(1).min(params.max_depth); + for (num, block) in route.enacted().iter().take(num_parents_on_path).enumerate() { + let header = match backend.blockchain().header(block.hash) { + Ok(Some(h)) => h, + Ok(None) => continue, + Err(_) => continue, + }; + + potential_parents.push(PotentialParent:: { + hash: block.hash, + header, + depth: 1 + num, + aligned_with_pending: true, + }); + } + + // The search for additional potential parents should now start at the + // pending block. + ( + vec![PotentialParent:: { + hash: pending.hash(), + header: pending.clone(), + depth: route.enacted().len(), + aligned_with_pending: true, + }], + potential_parents, + ) + } else { + ( + vec![PotentialParent:: { + hash: included_hash, + header: included_header.clone(), + depth: 0, + aligned_with_pending: true, + }], + Default::default(), + ) + }; + + if potential_parents.len() > params.max_depth { + return Ok(potential_parents); + } + + // If a block is on the path included -> pending, we consider it `aligned_with_pending`. + let is_child_in_path_to_pending = |hash| { + maybe_route + .as_ref() + .map_or(true, |route| route.enacted().iter().any(|x| x.hash == hash)) + }; + tracing::trace!(target: PARENT_SEARCH_LOG_TARGET, ?included_hash, included_num = ?included_header.number(), ?pending_hash ,?rp_ancestry, "Searching relay chain ancestry."); - let mut frontier = vec![PotentialParent:: { - hash: included_hash, - header: included_header, - depth: 0, - aligned_with_pending: true, - }]; - - // Recursive search through descendants of the included block which have acceptable - // relay parents. - let mut potential_parents = Vec::new(); while let Some(entry) = frontier.pop() { - // TODO find_potential_parents The assumption that entry.depth = 1 is the pending block is - // not correct if we produce sub 6s blocks. + // TODO Adjust once we can fetch multiple pending blocks. // https://github.com/paritytech/polkadot-sdk/issues/3967 - let is_pending = - entry.depth == 1 && pending_hash.as_ref().map_or(false, |h| &entry.hash == h); - let is_included = entry.depth == 0; + let is_pending = pending_hash.as_ref().map_or(false, |h| &entry.hash == h); + let is_included = included_hash == entry.hash; // note: even if the pending block or included block have a relay parent // outside of the expected part of the relay chain, they are always allowed @@ -400,19 +484,19 @@ pub async fn find_potential_parents( } // push children onto search frontier. - for child in client.blockchain().children(hash).ok().into_iter().flatten() { + for child in backend.blockchain().children(hash).ok().into_iter().flatten() { + tracing::trace!(target: PARENT_SEARCH_LOG_TARGET, ?child, child_depth, ?pending_distance, "Looking at child."); let aligned_with_pending = parent_aligned_with_pending && - if child_depth == 1 { - pending_hash.as_ref().map_or(true, |h| &child == h) - } else { - true - }; + (pending_distance.map_or(true, |dist| child_depth > dist) || + pending_hash.as_ref().map_or(true, |h| &child == h) || + is_child_in_path_to_pending(child)); if params.ignore_alternative_branches && !aligned_with_pending { + tracing::trace!(target: PARENT_SEARCH_LOG_TARGET, ?child, "Child is not aligned with pending block."); continue } - let header = match client.blockchain().header(child) { + let header = match backend.blockchain().header(child) { Ok(Some(h)) => h, Ok(None) => continue, Err(_) => continue, diff --git a/cumulus/client/consensus/common/src/parachain_consensus.rs b/cumulus/client/consensus/common/src/parachain_consensus.rs index a8f33f93032f..944917673b11 100644 --- a/cumulus/client/consensus/common/src/parachain_consensus.rs +++ b/cumulus/client/consensus/common/src/parachain_consensus.rs @@ -385,7 +385,7 @@ async fn handle_new_best_parachain_head( unset_best_header.take(); tracing::debug!( target: LOG_TARGET, - ?hash, + included = ?hash, "Importing block as new best for parachain.", ); import_block_as_new_best(hash, parachain_head, parachain).await; diff --git a/cumulus/client/consensus/common/src/tests.rs b/cumulus/client/consensus/common/src/tests.rs index 08fe5b5c8281..a66828248920 100644 --- a/cumulus/client/consensus/common/src/tests.rs +++ b/cumulus/client/consensus/common/src/tests.rs @@ -1132,6 +1132,177 @@ fn find_potential_parents_with_max_depth() { } } +/// Test where there is an additional block between included and pending block. +#[test] +fn find_potential_parents_aligned_with_late_pending() { + sp_tracing::try_init_simple(); + + const NON_INCLUDED_CHAIN_LEN: usize = 5; + + let backend = Arc::new(Backend::new_test(1000, 1)); + let client = Arc::new(TestClientBuilder::with_backend(backend.clone()).build()); + let mut para_import = + ParachainBlockImport::new_with_delayed_best_block(client.clone(), backend.clone()); + + let relay_parent = relay_hash_from_block_num(10); + // Choose different relay parent for alternative chain to get new hashes. + let search_relay_parent = relay_hash_from_block_num(11); + let included_block = build_and_import_block_ext( + &client, + BlockOrigin::NetworkInitialSync, + true, + &mut para_import, + None, + None, + Some(relay_parent), + ); + + let in_between_block = build_and_import_block_ext( + &client, + BlockOrigin::NetworkInitialSync, + true, + &mut para_import, + Some(included_block.header().hash()), + None, + Some(relay_parent), + ); + + let pending_block = build_and_import_block_ext( + &client, + BlockOrigin::Own, + true, + &mut para_import, + Some(in_between_block.header().hash()), + None, + Some(relay_parent), + ); + + let relay_chain = Relaychain::new(); + { + let relay_inner = &mut relay_chain.inner.lock().unwrap(); + relay_inner + .relay_chain_hash_to_header + .insert(search_relay_parent, included_block.header().clone()); + relay_inner + .relay_chain_hash_to_header_pending + .insert(search_relay_parent, in_between_block.header().clone()); + relay_inner + .relay_chain_hash_to_header_pending + .insert(search_relay_parent, pending_block.header().clone()); + } + + // Build two sibling chains from the included block. + let mut aligned_blocks = Vec::new(); + let mut parent = pending_block.header().hash(); + for _ in 2..NON_INCLUDED_CHAIN_LEN { + let block = build_and_import_block_ext( + &client, + BlockOrigin::Own, + true, + &mut para_import, + Some(parent), + None, + Some(relay_parent), + ); + parent = block.header().hash(); + aligned_blocks.push(block); + } + + let mut alt_blocks = Vec::new(); + let mut parent = included_block.header().hash(); + for _ in 0..NON_INCLUDED_CHAIN_LEN { + let block = build_and_import_block_ext( + &client, + BlockOrigin::NetworkInitialSync, + true, + &mut para_import, + Some(parent), + None, + Some(search_relay_parent), + ); + parent = block.header().hash(); + alt_blocks.push(block); + } + + // Ignore alternative branch: + for max_depth in 0..=NON_INCLUDED_CHAIN_LEN { + let potential_parents = block_on(find_potential_parents( + ParentSearchParams { + relay_parent: search_relay_parent, + para_id: ParaId::from(100), + ancestry_lookback: 1, // aligned chain is in ancestry. + max_depth, + ignore_alternative_branches: true, + }, + &*backend, + &relay_chain, + )) + .unwrap(); + + assert_eq!(potential_parents.len(), max_depth + 1); + let expected_parents: Vec<_> = [&included_block, &in_between_block, &pending_block] + .into_iter() + .chain(aligned_blocks.iter()) + .take(max_depth + 1) + .collect(); + + for i in 0..(max_depth + 1) { + let parent = &potential_parents[i]; + let expected = &expected_parents[i]; + + assert_eq!(parent.hash, expected.hash()); + assert_eq!(&parent.header, expected.header()); + assert_eq!(parent.depth, i); + assert!(parent.aligned_with_pending); + } + } + + // Do not ignore: + for max_depth in 0..=NON_INCLUDED_CHAIN_LEN { + let potential_parents = block_on(find_potential_parents( + ParentSearchParams { + relay_parent: search_relay_parent, + para_id: ParaId::from(100), + ancestry_lookback: 1, // aligned chain is in ancestry. + max_depth, + ignore_alternative_branches: false, + }, + &*backend, + &relay_chain, + )) + .unwrap(); + + let expected_len = 2 * max_depth + 1; + assert_eq!(potential_parents.len(), expected_len); + let expected_aligned: Vec<_> = [&included_block, &in_between_block, &pending_block] + .into_iter() + .chain(aligned_blocks.iter()) + .take(max_depth + 1) + .collect(); + let expected_alt = alt_blocks.iter().take(max_depth); + + let expected_parents: Vec<_> = + expected_aligned.clone().into_iter().chain(expected_alt).collect(); + // Check correctness. + assert_eq!(expected_parents.len(), expected_len); + + for i in 0..expected_len { + let parent = &potential_parents[i]; + let expected = expected_parents + .iter() + .find(|block| block.header().hash() == parent.hash) + .expect("missing parent"); + + let is_aligned = expected_aligned.contains(&expected); + + assert_eq!(parent.hash, expected.hash()); + assert_eq!(&parent.header, expected.header()); + + assert_eq!(parent.aligned_with_pending, is_aligned); + } + } +} + #[test] fn find_potential_parents_aligned_with_pending() { sp_tracing::try_init_simple(); @@ -1243,6 +1414,7 @@ fn find_potential_parents_aligned_with_pending() { // Do not ignore: for max_depth in 0..=NON_INCLUDED_CHAIN_LEN { + log::info!("Ran with max_depth = {max_depth}"); let potential_parents = block_on(find_potential_parents( ParentSearchParams { relay_parent: search_relay_parent, @@ -1270,6 +1442,7 @@ fn find_potential_parents_aligned_with_pending() { // Check correctness. assert_eq!(expected_parents.len(), expected_len); + potential_parents.iter().for_each(|p| log::info!("result: {:?}", p)); for i in 0..expected_len { let parent = &potential_parents[i]; let expected = expected_parents @@ -1282,6 +1455,12 @@ fn find_potential_parents_aligned_with_pending() { assert_eq!(parent.hash, expected.hash()); assert_eq!(&parent.header, expected.header()); + log::info!( + "Check hash: {:?} expected: {} is: {}", + parent.hash, + is_aligned, + parent.aligned_with_pending, + ); assert_eq!(parent.aligned_with_pending, is_aligned); } } diff --git a/cumulus/test/runtime/src/lib.rs b/cumulus/test/runtime/src/lib.rs index 74588344fa5c..da9fd2768b43 100644 --- a/cumulus/test/runtime/src/lib.rs +++ b/cumulus/test/runtime/src/lib.rs @@ -94,7 +94,7 @@ const UNINCLUDED_SEGMENT_CAPACITY: u32 = 4; const BLOCK_PROCESSING_VELOCITY: u32 = 1; #[cfg(feature = "elastic-scaling")] -const UNINCLUDED_SEGMENT_CAPACITY: u32 = 9; +const UNINCLUDED_SEGMENT_CAPACITY: u32 = 6; #[cfg(feature = "elastic-scaling")] const BLOCK_PROCESSING_VELOCITY: u32 = 3; diff --git a/cumulus/zombienet/tests/0008-elastic_authoring.toml b/cumulus/zombienet/tests/0008-elastic_authoring.toml index 183fa10b6a96..9d3769d7cb0a 100644 --- a/cumulus/zombienet/tests/0008-elastic_authoring.toml +++ b/cumulus/zombienet/tests/0008-elastic_authoring.toml @@ -41,7 +41,7 @@ chain = "elastic-scaling" name = "collator-elastic" image = "{{CUMULUS_IMAGE}}" command = "test-parachain" - args = ["-laura=trace,runtime=info,consensus::common=trace,parachain::collation-generation=trace,parachain::collator-protocol=trace,parachain=debug", "--force-authoring", "--use-slot-authoring"] + args = ["-laura=trace,runtime=info,cumulus-consensus=trace,consensus::common=trace,parachain::collation-generation=trace,parachain::collator-protocol=trace,parachain=debug", "--force-authoring", "--use-slot-authoring"] # Slot based authoring with 3 cores and 2s slot duration [[parachains]] @@ -54,4 +54,4 @@ onboard_as_parachain = false name = "collator-single-core" image = "{{CUMULUS_IMAGE}}" command = "test-parachain" - args = ["-laura=trace,runtime=info,consensus::common=trace,parachain::collation-generation=trace,parachain::collator-protocol=trace,parachain=debug", "--force-authoring", "--use-slot-authoring"] + args = ["-laura=trace,runtime=info,cumulus-consensus=trace,consensus::common=trace,parachain::collation-generation=trace,parachain::collator-protocol=trace,parachain=debug", "--force-authoring", "--use-slot-authoring"] From 734bdcbaf175239f43883f9cedea8cdc6c8c4e64 Mon Sep 17 00:00:00 2001 From: Sebastian Kunert Date: Tue, 9 Apr 2024 16:00:08 +0200 Subject: [PATCH 13/71] Merge fixes --- cumulus/client/consensus/aura/src/collators/basic.rs | 2 +- cumulus/test/service/src/lib.rs | 2 ++ cumulus/test/service/src/main.rs | 2 ++ templates/parachain/node/src/service.rs | 2 +- 4 files changed, 6 insertions(+), 2 deletions(-) diff --git a/cumulus/client/consensus/aura/src/collators/basic.rs b/cumulus/client/consensus/aura/src/collators/basic.rs index 8628968cbfbb..4efd50a04ec6 100644 --- a/cumulus/client/consensus/aura/src/collators/basic.rs +++ b/cumulus/client/consensus/aura/src/collators/basic.rs @@ -41,7 +41,7 @@ use sc_consensus::BlockImport; use sp_api::{CallApiAt, ProvideRuntimeApi}; use sp_application_crypto::AppPublic; use sp_blockchain::HeaderBackend; -use sp_consensus_aura::{AuraApi, SlotDuration}; +use sp_consensus_aura::AuraApi; use sp_core::crypto::Pair; use sp_inherents::CreateInherentDataProviders; use sp_keystore::KeystorePtr; diff --git a/cumulus/test/service/src/lib.rs b/cumulus/test/service/src/lib.rs index 0efa8f3d9c51..836cf8bbc531 100644 --- a/cumulus/test/service/src/lib.rs +++ b/cumulus/test/service/src/lib.rs @@ -766,6 +766,7 @@ impl TestNodeBuilder { self.consensus, collator_options, self.record_proof_during_import, + false, ) .await .expect("could not create Cumulus test service"), @@ -781,6 +782,7 @@ impl TestNodeBuilder { self.consensus, collator_options, self.record_proof_during_import, + false, ) .await .expect("could not create Cumulus test service"), diff --git a/cumulus/test/service/src/main.rs b/cumulus/test/service/src/main.rs index 90d37173dd59..96a1155f3abe 100644 --- a/cumulus/test/service/src/main.rs +++ b/cumulus/test/service/src/main.rs @@ -118,6 +118,7 @@ fn main() -> Result<(), sc_cli::Error> { consensus, collator_options, true, + cli.use_slot_authoring, ) .await, sc_network::config::NetworkBackendType::Litep2p => @@ -135,6 +136,7 @@ fn main() -> Result<(), sc_cli::Error> { consensus, collator_options, true, + cli.use_slot_authoring, ) .await, } diff --git a/templates/parachain/node/src/service.rs b/templates/parachain/node/src/service.rs index c97a41ae8232..0b1b6259c72d 100644 --- a/templates/parachain/node/src/service.rs +++ b/templates/parachain/node/src/service.rs @@ -27,7 +27,7 @@ use frame_benchmarking_cli::SUBSTRATE_REFERENCE_HARDWARE; use sc_client_api::Backend; use sc_consensus::ImportQueue; use sc_executor::{HeapAllocStrategy, WasmExecutor, DEFAULT_HEAP_ALLOC_STRATEGY}; -use sc_network::{NetworkBackend, NetworkBlock}; +use sc_network::NetworkBlock; use sc_service::{Configuration, PartialComponents, TFullBackend, TFullClient, TaskManager}; use sc_telemetry::{Telemetry, TelemetryHandle, TelemetryWorker, TelemetryWorkerHandle}; use sc_transaction_pool_api::OffchainTransactionPoolFactory; From c439d2161c07b311605de68dbe26cfe0dd18ac1b Mon Sep 17 00:00:00 2001 From: Sebastian Kunert Date: Thu, 11 Apr 2024 09:17:15 +0200 Subject: [PATCH 14/71] Modernize zombienet test: --- .../zombienet/tests/0008-configure-broker.js | 82 ------------------- .../zombienet/tests/0008-configure-relay.js | 78 ------------------ .../tests/0008-elastic_authoring.toml | 27 +++--- .../tests/0008-elastic_authoring.zndsl | 17 ++-- cumulus/zombienet/tests/assign-core.js | 41 ++++++++++ 5 files changed, 59 insertions(+), 186 deletions(-) delete mode 100644 cumulus/zombienet/tests/0008-configure-broker.js delete mode 100644 cumulus/zombienet/tests/0008-configure-relay.js create mode 100644 cumulus/zombienet/tests/assign-core.js diff --git a/cumulus/zombienet/tests/0008-configure-broker.js b/cumulus/zombienet/tests/0008-configure-broker.js deleted file mode 100644 index f38a3bc03331..000000000000 --- a/cumulus/zombienet/tests/0008-configure-broker.js +++ /dev/null @@ -1,82 +0,0 @@ -const assert = require("assert"); - -async function run(nodeName, networkInfo, _jsArgs) { - const { wsUri, userDefinedTypes } = networkInfo.nodesByName[nodeName]; - const api = await zombie.connect(wsUri, userDefinedTypes); - - await zombie.util.cryptoWaitReady(); - - // account to submit tx - const keyring = new zombie.Keyring({ type: "sr25519" }); - const alice = keyring.addFromUri("//Alice"); - - const calls = [ - // Default broker configuration - api.tx.broker.configure({ - advanceNotice: 5, - interludeLength: 1, - leadinLength: 1, - regionLength: 1, - idealBulkProportion: 100, - limitCoresOffered: null, - renewalBump: 10, - contributionTimeout: 5, - }), - // We need MOARE cores. - api.tx.broker.requestCoreCount(5), - // Set a lease for the broker chain itself. - api.tx.broker.setLease( - 1005, - 1000, - ), - // Three cores for para 2100. - api.tx.broker.setLease( - 2100, - 1000, - ), - api.tx.broker.setLease( - 2100, - 1000, - ), - api.tx.broker.setLease( - 2100, - 1000, - ), - // One cores for para 2000. - api.tx.broker.setLease( - 2000, - 1000, - ), - // Start sale to make the broker "work", but we don't offer any cores - // as we have fixed leases only anyway. - api.tx.broker.startSales(1, 0), - ]; - const sudo_batch = api.tx.sudo.sudo(api.tx.utility.batch(calls)); - - await new Promise(async (resolve, reject) => { - const unsub = await sudo_batch.signAndSend(alice, (result) => { - console.log(`Current status is ${result.status}`); - if (result.status.isInBlock) { - console.log( - `Transaction included at blockHash ${result.status.asInBlock}` - ); - } else if (result.status.isFinalized) { - console.log( - `Transaction finalized at blockHash ${result.status.asFinalized}` - ); - unsub(); - return resolve(); - } else if (result.isError) { - // Probably happens because of: https://github.com/paritytech/polkadot-sdk/issues/1202. - console.log(`Transaction error`); - // We ignore the error because it is very likely misleading, because of the issue mentioned above. - unsub(); - return resolve(); - } - }); - }); - - return 0; -} - -module.exports = { run }; diff --git a/cumulus/zombienet/tests/0008-configure-relay.js b/cumulus/zombienet/tests/0008-configure-relay.js deleted file mode 100644 index c546ac3f5fdd..000000000000 --- a/cumulus/zombienet/tests/0008-configure-relay.js +++ /dev/null @@ -1,78 +0,0 @@ -const assert = require("assert"); - -async function run(nodeName, networkInfo, _jsArgs) { - const init = networkInfo.nodesByName[nodeName]; - let wsUri = init.wsUri; - let userDefinedTypes = init.userDefinedTypes; - const api = await zombie.connect(wsUri, userDefinedTypes); - - const collatorElastic = networkInfo.nodesByName["collator-elastic"]; - wsUri = collatorElastic.wsUri; - userDefinedTypes = collatorElastic.userDefinedTypes; - const apiCollatorElastic = await zombie.connect(wsUri, userDefinedTypes); - - const collatorSingleCore = networkInfo.nodesByName["collator-single-core"]; - wsUriSingleCore = collatorSingleCore.wsUri; - userDefinedTypes6s = collatorSingleCore.userDefinedTypes; - - const apiCollatorSingleCore = await zombie.connect(wsUriSingleCore, userDefinedTypes6s); - - await zombie.util.cryptoWaitReady(); - - // Get the genesis header and the validation code of parachain 2100 - const genesisHeaderElastic = await apiCollatorElastic.rpc.chain.getHeader(); - const validationCodeElastic = await apiCollatorElastic.rpc.state.getStorage("0x3A636F6465"); - - // Get the genesis header and the validation code of parachain 2000 - const genesisHeaderSingleCore = await apiCollatorSingleCore.rpc.chain.getHeader(); - const validationCodeSingleCore = await apiCollatorSingleCore.rpc.state.getStorage("0x3A636F6465"); - - // account to submit tx - const keyring = new zombie.Keyring({ type: "sr25519" }); - const alice = keyring.addFromUri("//Alice"); - - const calls = [ - api.tx.configuration.setCoretimeCores({ new: 7 }), - api.tx.coretime.assignCore(0, 20,[[ { task: 1005 }, 57600 ]], null), - api.tx.registrar.forceRegister( - alice.address, - 0, - 2100, - genesisHeaderElastic.toHex(), - validationCodeElastic.toHex(), - ), - api.tx.registrar.forceRegister( - alice.address, - 0, - 2000, - genesisHeaderSingleCore.toHex(), - validationCodeSingleCore.toHex(), - ) - ]; - const sudo_batch = api.tx.sudo.sudo(api.tx.utility.batch(calls)); - - await new Promise(async (resolve, reject) => { - const unsub = await sudo_batch.signAndSend(alice, (result) => { - console.log(`Current status is ${result.status}`); - if (result.status.isInBlock) { - console.log( - `Transaction included at blockHash ${result.status.asInBlock}` - ); - } else if (result.status.isFinalized) { - console.log( - `Transaction finalized at blockHash ${result.status.asFinalized}` - ); - unsub(); - return resolve(); - } else if (result.isError) { - console.log(`Transaction Error`); - unsub(); - return reject(); - } - }); - }); - - return 0; -} - -module.exports = { run }; diff --git a/cumulus/zombienet/tests/0008-elastic_authoring.toml b/cumulus/zombienet/tests/0008-elastic_authoring.toml index 9d3769d7cb0a..07e1298ce248 100644 --- a/cumulus/zombienet/tests/0008-elastic_authoring.toml +++ b/cumulus/zombienet/tests/0008-elastic_authoring.toml @@ -5,6 +5,15 @@ timeout = 1000 max_candidate_depth = 6 allowed_ancestry_len = 3 +[relaychain.genesis.runtimeGenesis.patch.configuration.config.scheduler_params] + max_validators_per_core = 1 + scheduling_lookahead = 2 + num_cores = 4 + +[relaychain.genesis.runtimeGenesis.patch.configuration.config.approval_voting_params] + needed_approvals = 3 + max_approval_coalesce_count = 5 + [relaychain] default_image = "{{ZOMBIENET_INTEGRATION_TEST_IMAGE}}" chain = "rococo-local" @@ -19,23 +28,11 @@ command = "polkadot" args = ["-lruntime=debug,parachain=trace" ] count = 8 -[[parachains]] -id = 1005 -chain = "coretime-rococo-local" - - [parachains.collator] - name = "coretime-collator" - image = "{{CUMULUS_IMAGE}}" - command = "polkadot-parachain" - args = [ "-lruntime=info" ,"--force-authoring"] - # Slot based authoring with 3 cores and 2s slot duration [[parachains]] id = 2100 -add_to_genesis = false -register_para = false -onboard_as_parachain = false chain = "elastic-scaling" +add_to_genesis = true [[parachains.collators]] name = "collator-elastic" @@ -46,9 +43,7 @@ chain = "elastic-scaling" # Slot based authoring with 3 cores and 2s slot duration [[parachains]] id = 2000 -add_to_genesis = false -register_para = false -onboard_as_parachain = false +add_to_genesis = true [[parachains.collators]] name = "collator-single-core" diff --git a/cumulus/zombienet/tests/0008-elastic_authoring.zndsl b/cumulus/zombienet/tests/0008-elastic_authoring.zndsl index 8f83ba8b1a2c..bfc756530c82 100644 --- a/cumulus/zombienet/tests/0008-elastic_authoring.zndsl +++ b/cumulus/zombienet/tests/0008-elastic_authoring.zndsl @@ -3,16 +3,13 @@ Network: ./0008-elastic_authoring.toml Creds: config alice: is up -coretime-collator: is up +collator-elastic: is up +collator-single-core: is up -# configure relay chain -alice: js-script ./0008-configure-relay.js with "" return is 0 within 600 secs - -# configure broker chain -coretime-collator: js-script ./0008-configure-broker.js with "" return is 0 within 600 secs -# Ensure that parachain 2100 got onboarded -alice: parachain 2100 block height is at least 30 within 45000 seconds +# configure relay chain +alice: js-script ./assign-core.js with "2100,0" return is 0 within 600 seconds +alice: js-script ./assign-core.js with "2100,1" return is 0 within 600 seconds -# Ensure that parachain 2000 got onboarded -alice: parachain 2000 block height is at least 10 within 45000 seconds +collator-single-core: reports block height is at least 20 within 225 seconds +collator-elastic: reports block height is at least 40 within 225 seconds diff --git a/cumulus/zombienet/tests/assign-core.js b/cumulus/zombienet/tests/assign-core.js new file mode 100644 index 000000000000..2e5f9d8cfa58 --- /dev/null +++ b/cumulus/zombienet/tests/assign-core.js @@ -0,0 +1,41 @@ +async function run(nodeName, networkInfo, args) { + const { wsUri, userDefinedTypes } = networkInfo.nodesByName[nodeName]; + const api = await zombie.connect(wsUri, userDefinedTypes); + + let para = Number(args[0]); + let core = Number(args[1]); + console.log(`Assigning para ${para} to core ${core}`); + + await zombie.util.cryptoWaitReady(); + + // account to submit tx + const keyring = new zombie.Keyring({ type: "sr25519" }); + const alice = keyring.addFromUri("//Alice"); + + await new Promise(async (resolve, reject) => { + const unsub = await api.tx.sudo + .sudo(api.tx.coretime.assignCore(core, 0, [[{ task: para }, 57600]], null)) + .signAndSend(alice, ({ status, isError }) => { + if (status.isInBlock) { + console.log( + `Transaction included at blockhash ${status.asInBlock}`, + ); + } else if (status.isFinalized) { + console.log( + `Transaction finalized at blockHash ${status.asFinalized}`, + ); + unsub(); + return resolve(); + } else if (isError) { + console.log(`Transaction error`); + reject(`Transaction error`); + } + }); + }); + + + + return 0; +} + +module.exports = { run }; From fb9f383282027bb41049633deb137deea8da3c29 Mon Sep 17 00:00:00 2001 From: Sebastian Kunert Date: Thu, 11 Apr 2024 11:35:54 +0200 Subject: [PATCH 15/71] Unify core schedule fetching methods --- .../consensus/aura/src/collators/lookahead.rs | 110 ++---------------- .../consensus/aura/src/collators/mod.rs | 79 ++++++++++++- .../slot_based/block_builder_task.rs | 6 +- .../collators/slot_based/collation_task.rs | 5 +- .../aura/src/collators/slot_based/mod.rs | 38 +----- 5 files changed, 94 insertions(+), 144 deletions(-) diff --git a/cumulus/client/consensus/aura/src/collators/lookahead.rs b/cumulus/client/consensus/aura/src/collators/lookahead.rs index 3dcc6f70db53..16df18325610 100644 --- a/cumulus/client/consensus/aura/src/collators/lookahead.rs +++ b/cumulus/client/consensus/aura/src/collators/lookahead.rs @@ -34,26 +34,19 @@ use codec::{Codec, Encode}; use cumulus_client_collator::service::ServiceInterface as CollatorServiceInterface; use cumulus_client_consensus_common::{ - self as consensus_common, load_abridged_host_configuration, ParachainBlockImportMarker, - ParentSearchParams, + self as consensus_common, ParachainBlockImportMarker, ParentSearchParams, }; use cumulus_client_consensus_proposer::ProposerInterface; use cumulus_primitives_aura::AuraUnincludedSegmentApi; -use cumulus_primitives_core::{ - relay_chain::Hash as PHash, CollectCollationInfo, PersistedValidationData, -}; +use cumulus_primitives_core::{CollectCollationInfo, PersistedValidationData}; use cumulus_relay_chain_interface::RelayChainInterface; use polkadot_node_primitives::SubmitCollationParams; -use polkadot_node_subsystem::messages::{ - CollationGenerationMessage, RuntimeApiMessage, RuntimeApiRequest, -}; +use polkadot_node_subsystem::messages::CollationGenerationMessage; use polkadot_overseer::Handle as OverseerHandle; -use polkadot_primitives::{ - AsyncBackingParams, CollatorPair, CoreIndex, CoreState, Id as ParaId, OccupiedCoreAssumption, -}; +use polkadot_primitives::{CollatorPair, Id as ParaId, OccupiedCoreAssumption}; -use futures::{channel::oneshot, prelude::*}; +use futures::prelude::*; use sc_client_api::{backend::AuxStore, BlockBackend, BlockOf}; use sc_consensus::BlockImport; use sc_consensus_aura::standalone as aura_internal; @@ -184,10 +177,9 @@ where // TODO: Currently we use just the first core here, but for elastic scaling // we iterate and build on all of the cores returned. - let core_index = if let Some(core_index) = cores_scheduled_for_para( + let core_index = if let Some(core_index) = super::cores_scheduled_for_para( relay_parent, params.para_id, - &mut params.overseer_handle, &mut params.relay_client, ) .await @@ -225,7 +217,7 @@ where let parent_search_params = ParentSearchParams { relay_parent, para_id: params.para_id, - ancestry_lookback: async_backing_params(relay_parent, ¶ms.relay_client) + ancestry_lookback: super::async_backing_params(relay_parent, ¶ms.relay_client) .await .map(|c| c.allowed_ancestry_len as usize) .unwrap_or(0), @@ -465,91 +457,3 @@ where Some(SlotClaim::unchecked::

(author_pub, slot, timestamp)) } - -/// Reads async backing parameters from the relay chain storage at the given relay parent. -async fn async_backing_params( - relay_parent: PHash, - relay_client: &impl RelayChainInterface, -) -> Option { - match load_abridged_host_configuration(relay_parent, relay_client).await { - Ok(Some(config)) => Some(config.async_backing_params), - Ok(None) => { - tracing::error!( - target: crate::LOG_TARGET, - "Active config is missing in relay chain storage", - ); - None - }, - Err(err) => { - tracing::error!( - target: crate::LOG_TARGET, - ?err, - ?relay_parent, - "Failed to read active config from relay chain client", - ); - None - }, - } -} - -// Return all the cores assigned to the para at the provided relay parent. -async fn cores_scheduled_for_para( - relay_parent: PHash, - para_id: ParaId, - overseer_handle: &mut OverseerHandle, - relay_client: &impl RelayChainInterface, -) -> Vec { - // Get `AvailabilityCores` from runtime - let (tx, rx) = oneshot::channel(); - let request = RuntimeApiRequest::AvailabilityCores(tx); - overseer_handle - .send_msg(RuntimeApiMessage::Request(relay_parent, request), "LookaheadCollator") - .await; - - let cores = match rx.await { - Ok(Ok(cores)) => cores, - Ok(Err(error)) => { - tracing::error!( - target: crate::LOG_TARGET, - ?error, - ?relay_parent, - "Failed to query availability cores runtime API", - ); - return Vec::new() - }, - Err(oneshot::Canceled) => { - tracing::error!( - target: crate::LOG_TARGET, - ?relay_parent, - "Sender for availability cores runtime request dropped", - ); - return Vec::new() - }, - }; - - let max_candidate_depth = async_backing_params(relay_parent, relay_client) - .await - .map(|c| c.max_candidate_depth) - .unwrap_or(0); - - cores - .iter() - .enumerate() - .filter_map(|(index, core)| { - let core_para_id = match core { - CoreState::Scheduled(scheduled_core) => Some(scheduled_core.para_id), - CoreState::Occupied(occupied_core) if max_candidate_depth >= 1 => occupied_core - .next_up_on_available - .as_ref() - .map(|scheduled_core| scheduled_core.para_id), - CoreState::Free | CoreState::Occupied(_) => None, - }; - - if core_para_id == Some(para_id) { - Some(CoreIndex(index as u32)) - } else { - None - } - }) - .collect() -} diff --git a/cumulus/client/consensus/aura/src/collators/mod.rs b/cumulus/client/consensus/aura/src/collators/mod.rs index 7ee236e910da..e184e9953039 100644 --- a/cumulus/client/consensus/aura/src/collators/mod.rs +++ b/cumulus/client/consensus/aura/src/collators/mod.rs @@ -20,9 +20,13 @@ //! included parachain block, as well as the [`lookahead`] collator, which prospectively //! builds on parachain blocks which have not yet been included in the relay chain. +use std::collections::VecDeque; + +use cumulus_client_consensus_common::load_abridged_host_configuration; use cumulus_relay_chain_interface::RelayChainInterface; use polkadot_primitives::{ - Hash as RHash, Id as ParaId, OccupiedCoreAssumption, ValidationCodeHash, + AsyncBackingParams, CoreIndex, CoreState, Hash as RHash, Id as ParaId, OccupiedCoreAssumption, + ValidationCodeHash, }; pub mod basic; @@ -78,3 +82,76 @@ async fn check_validation_code_or_log( }, } } + +/// Reads async backing parameters from the relay chain storage at the given relay parent. +async fn async_backing_params( + relay_parent: RHash, + relay_client: &impl RelayChainInterface, +) -> Option { + match load_abridged_host_configuration(relay_parent, relay_client).await { + Ok(Some(config)) => Some(config.async_backing_params), + Ok(None) => { + tracing::error!( + target: crate::LOG_TARGET, + "Active config is missing in relay chain storage", + ); + None + }, + Err(err) => { + tracing::error!( + target: crate::LOG_TARGET, + ?err, + ?relay_parent, + "Failed to read active config from relay chain client", + ); + None + }, + } +} + +// Return all the cores assigned to the para at the provided relay parent. +async fn cores_scheduled_for_para( + relay_parent: polkadot_primitives::Hash, + para_id: ParaId, + relay_client: &impl RelayChainInterface, +) -> VecDeque { + // Get `AvailabilityCores` from runtime + let cores = match relay_client.availability_cores(relay_parent).await { + Ok(cores) => cores, + Err(error) => { + tracing::error!( + target: crate::LOG_TARGET, + ?error, + ?relay_parent, + "Failed to query availability cores runtime API", + ); + return VecDeque::new() + }, + }; + + let max_candidate_depth = async_backing_params(relay_parent, relay_client) + .await + .map(|c| c.max_candidate_depth) + .unwrap_or(0); + + cores + .iter() + .enumerate() + .filter_map(|(index, core)| { + let core_para_id = match core { + CoreState::Scheduled(scheduled_core) => Some(scheduled_core.para_id), + CoreState::Occupied(occupied_core) if max_candidate_depth >= 1 => occupied_core + .next_up_on_available + .as_ref() + .map(|scheduled_core| scheduled_core.para_id), + CoreState::Free | CoreState::Occupied(_) => None, + }; + + if core_para_id == Some(para_id) { + Some(CoreIndex(index as u32)) + } else { + None + } + }) + .collect() +} diff --git a/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs b/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs index 83c2a8c859b7..62e173481170 100644 --- a/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs +++ b/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs @@ -67,10 +67,10 @@ use sp_runtime::traits::{Block as BlockT, Header as HeaderT, Member}; use sp_timestamp::Timestamp; use std::{sync::Arc, time::Duration}; -use super::{scheduled_cores, CollatorMessage}; +use super::CollatorMessage; use crate::{ collator::{self as collator_util, SlotClaim}, - collators::check_validation_code_or_log, + collators::{check_validation_code_or_log, cores_scheduled_for_para}, LOG_TARGET, }; @@ -275,7 +275,7 @@ pub async fn run_block_builder { core_queue = - scheduled_cores(notification.hash(), params.para_id, ¶ms.relay_client).await; + cores_scheduled_for_para(notification.hash(), params.para_id, ¶ms.relay_client).await; tracing::debug!( target: LOG_TARGET, relay_parent = ?notification.hash(), diff --git a/cumulus/client/consensus/aura/src/collators/slot_based/mod.rs b/cumulus/client/consensus/aura/src/collators/slot_based/mod.rs index d56840425470..5c401862f03b 100644 --- a/cumulus/client/consensus/aura/src/collators/slot_based/mod.rs +++ b/cumulus/client/consensus/aura/src/collators/slot_based/mod.rs @@ -42,12 +42,10 @@ use cumulus_client_collator::service::ServiceInterface as CollatorServiceInterfa use cumulus_client_consensus_common::{self as consensus_common, ParachainBlockImportMarker}; use cumulus_client_consensus_proposer::ProposerInterface; use cumulus_primitives_aura::AuraUnincludedSegmentApi; -use cumulus_primitives_core::{relay_chain::Hash as PHash, CollectCollationInfo}; +use cumulus_primitives_core::CollectCollationInfo; use cumulus_relay_chain_interface::RelayChainInterface; use polkadot_overseer::Handle as OverseerHandle; -use polkadot_primitives::{ - CollatorPair, CoreIndex, Hash as RelayHash, Id as ParaId, ValidationCodeHash, -}; +use polkadot_primitives::{CollatorPair, Hash as RelayHash, Id as ParaId, ValidationCodeHash}; use sc_client_api::{backend::AuxStore, BlockBackend, BlockOf, UsageProvider}; use sc_consensus::BlockImport; @@ -61,9 +59,7 @@ use sp_inherents::CreateInherentDataProviders; use sp_keystore::KeystorePtr; use sp_runtime::traits::{Block as BlockT, Member}; -use std::{collections::VecDeque, sync::Arc, time::Duration}; - -use crate::LOG_TARGET; +use std::{sync::Arc, time::Duration}; use self::{block_builder_task::run_block_builder, collation_task::run_collation_task}; @@ -185,31 +181,3 @@ struct CollatorMessage { /// The validation code hash at the parent block. pub validation_code_hash: ValidationCodeHash, } - -/// Retrieve the scheduled cores for the parachain with id `para_id` from the relay chain. -async fn scheduled_cores( - relay_parent: PHash, - para_id: ParaId, - relay_chain_interface: &RClient, -) -> VecDeque { - let cores = match relay_chain_interface.availability_cores(relay_parent).await { - Ok(cores) => cores, - Err(error) => { - tracing::error!( - target: LOG_TARGET, - ?error, - ?relay_parent, - "Failed to query availability cores runtime API", - ); - return VecDeque::new() - }, - }; - - cores - .iter() - .enumerate() - .filter_map(|(idx, core)| { - (core.para_id() == Some(para_id)).then_some(CoreIndex(idx as u32)) - }) - .collect() -} From 06f8fc79bfc9233d3e6da0b3ea8514936e74f585 Mon Sep 17 00:00:00 2001 From: Sebastian Kunert Date: Thu, 11 Apr 2024 11:44:24 +0200 Subject: [PATCH 16/71] Remove overseer_handle from parameters --- .../aura/src/collators/slot_based/collation_task.rs | 10 ++++++---- .../consensus/aura/src/collators/slot_based/mod.rs | 3 --- cumulus/test/service/src/lib.rs | 1 - 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/cumulus/client/consensus/aura/src/collators/slot_based/collation_task.rs b/cumulus/client/consensus/aura/src/collators/slot_based/collation_task.rs index 4a8478bd0e8a..b57c9f7ba0c2 100644 --- a/cumulus/client/consensus/aura/src/collators/slot_based/collation_task.rs +++ b/cumulus/client/consensus/aura/src/collators/slot_based/collation_task.rs @@ -43,8 +43,6 @@ pub struct Params { pub collator_key: CollatorPair, /// The para's ID. pub para_id: ParaId, - /// A handle to the relay-chain client's "Overseer" or task orchestrator. - pub overseer_handle: OverseerHandle, /// Whether we should reinitialize the collator config (i.e. we are transitioning to aura). pub reinitialize: bool, /// Collator service interface @@ -65,8 +63,13 @@ where CS: CollatorServiceInterface + Send + Sync + 'static, RClient: RelayChainInterface + Clone + 'static, { + let Ok(mut overseer_handle) = params.relay_client.overseer_handle() else { + tracing::error!(target: LOG_TARGET, "Failed to get overseer handle."); + return + }; + cumulus_client_collator::initialize_collator_subsystems( - &mut params.overseer_handle, + &mut overseer_handle, params.collator_key, params.para_id, params.reinitialize, @@ -87,7 +90,6 @@ where }, }; - let mut overseer_handle = params.overseer_handle; let mut core_queue = Default::default(); let mut messages = VecDeque::new(); loop { diff --git a/cumulus/client/consensus/aura/src/collators/slot_based/mod.rs b/cumulus/client/consensus/aura/src/collators/slot_based/mod.rs index 5c401862f03b..3d61b1d4cafd 100644 --- a/cumulus/client/consensus/aura/src/collators/slot_based/mod.rs +++ b/cumulus/client/consensus/aura/src/collators/slot_based/mod.rs @@ -88,8 +88,6 @@ pub struct Params { pub collator_key: CollatorPair, /// The para's ID. pub para_id: ParaId, - /// A handle to the relay-chain client's "Overseer" or task orchestrator. - pub overseer_handle: OverseerHandle, /// The length of slots in the relay chain. pub relay_chain_slot_duration: Duration, /// The underlying block proposer this should call into. @@ -137,7 +135,6 @@ where relay_client: params.relay_client.clone(), collator_key: params.collator_key.clone(), para_id: params.para_id, - overseer_handle: params.overseer_handle.clone(), reinitialize: params.reinitialize, collator_service: params.collator_service.clone(), collator_receiver: rx, diff --git a/cumulus/test/service/src/lib.rs b/cumulus/test/service/src/lib.rs index 836cf8bbc531..76cee331cd8e 100644 --- a/cumulus/test/service/src/lib.rs +++ b/cumulus/test/service/src/lib.rs @@ -482,7 +482,6 @@ where keystore, collator_key, para_id, - overseer_handle, relay_chain_slot_duration, proposer, collator_service, From cf64247e2b6c1beb4185fcc0d6a54f2d87b758bf Mon Sep 17 00:00:00 2001 From: Sebastian Kunert Date: Thu, 11 Apr 2024 14:12:04 +0200 Subject: [PATCH 17/71] Improve logging --- .../aura/src/collators/slot_based/block_builder_task.rs | 8 +++++--- .../client/consensus/aura/src/collators/slot_based/mod.rs | 1 - cumulus/client/consensus/common/src/lib.rs | 2 +- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs b/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs index 62e173481170..5c74eddda174 100644 --- a/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs +++ b/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs @@ -328,10 +328,12 @@ pub async fn run_block_builder( .map_or(true, |route| route.enacted().iter().any(|x| x.hash == hash)) }; - tracing::trace!(target: PARENT_SEARCH_LOG_TARGET, ?included_hash, included_num = ?included_header.number(), ?pending_hash ,?rp_ancestry, "Searching relay chain ancestry."); + tracing::trace!(target: PARENT_SEARCH_LOG_TARGET, ?included_hash, included_num = ?included_header.number(), ?pending_hash , ?rp_ancestry, "Searching relay chain ancestry."); while let Some(entry) = frontier.pop() { // TODO Adjust once we can fetch multiple pending blocks. // https://github.com/paritytech/polkadot-sdk/issues/3967 From 44b489f6ef7c9478d8261644c6b5e0f3ee6204fd Mon Sep 17 00:00:00 2001 From: Sebastian Kunert Date: Fri, 12 Apr 2024 17:18:54 +0200 Subject: [PATCH 18/71] Do not return potential_parents that are not available locally --- cumulus/client/consensus/common/src/lib.rs | 26 +++++ cumulus/client/consensus/common/src/tests.rs | 108 +++++++++++++++++++ 2 files changed, 134 insertions(+) diff --git a/cumulus/client/consensus/common/src/lib.rs b/cumulus/client/consensus/common/src/lib.rs index 3aee1942144a..655e1f7946fb 100644 --- a/cumulus/client/consensus/common/src/lib.rs +++ b/cumulus/client/consensus/common/src/lib.rs @@ -364,6 +364,32 @@ pub async fn find_potential_parents( let included_hash = included_header.hash(); let pending_hash = pending_header.as_ref().map(|hdr| hdr.hash()); + match backend.blockchain().header(included_hash) { + Ok(None) | Err(_) => { + tracing::warn!("Failed to get header for included block at hash {:?}", included_hash); + return Ok(Default::default()) + }, + _ => {}, + }; + + if let Some(pending_hash) = pending_hash { + match backend.blockchain().header(pending_hash) { + Ok(None) | Err(_) => { + tracing::warn!( + "Failed to get header for included block at hash {:?}", + included_hash + ); + return Ok(vec![PotentialParent:: { + hash: included_hash, + header: included_header.clone(), + depth: 0, + aligned_with_pending: true, + }]) + }, + _ => {}, + }; + } + if params.max_depth == 0 { return Ok(vec![PotentialParent:: { hash: included_hash, diff --git a/cumulus/client/consensus/common/src/tests.rs b/cumulus/client/consensus/common/src/tests.rs index a66828248920..f5a4273b2cb3 100644 --- a/cumulus/client/consensus/common/src/tests.rs +++ b/cumulus/client/consensus/common/src/tests.rs @@ -1132,6 +1132,114 @@ fn find_potential_parents_with_max_depth() { } } +#[test] +fn find_potential_parents_unknown_included() { + sp_tracing::try_init_simple(); + + const NON_INCLUDED_CHAIN_LEN: usize = 5; + + let backend = Arc::new(Backend::new_test(1000, 1)); + let client = Arc::new(TestClientBuilder::with_backend(backend.clone()).build()); + let relay_parent = relay_hash_from_block_num(10); + // Choose different relay parent for alternative chain to get new hashes. + let search_relay_parent = relay_hash_from_block_num(11); + + let sproof = sproof_with_best_parent(&client); + let included_but_unknown = build_block(&*client, sproof, None, None, Some(relay_parent)); + + let relay_chain = Relaychain::new(); + { + let relay_inner = &mut relay_chain.inner.lock().unwrap(); + relay_inner + .relay_chain_hash_to_header + .insert(search_relay_parent, included_but_unknown.header().clone()); + } + + // Ignore alternative branch: + let potential_parents = block_on(find_potential_parents( + ParentSearchParams { + relay_parent: search_relay_parent, + para_id: ParaId::from(100), + ancestry_lookback: 1, // aligned chain is in ancestry. + max_depth: NON_INCLUDED_CHAIN_LEN, + ignore_alternative_branches: true, + }, + &*backend, + &relay_chain, + )) + .unwrap(); + + assert_eq!(potential_parents.len(), 0); +} + +#[test] +fn find_potential_parents_unknown_pending() { + sp_tracing::try_init_simple(); + + const NON_INCLUDED_CHAIN_LEN: usize = 5; + + let backend = Arc::new(Backend::new_test(1000, 1)); + let client = Arc::new(TestClientBuilder::with_backend(backend.clone()).build()); + let mut para_import = + ParachainBlockImport::new_with_delayed_best_block(client.clone(), backend.clone()); + + let relay_parent = relay_hash_from_block_num(10); + // Choose different relay parent for alternative chain to get new hashes. + let search_relay_parent = relay_hash_from_block_num(11); + let included_block = build_and_import_block_ext( + &client, + BlockOrigin::NetworkInitialSync, + true, + &mut para_import, + None, + None, + Some(relay_parent), + ); + + let sproof = sproof_with_parent_by_hash(&client, included_block.header().hash()); + let pending_but_unknown = build_block( + &*client, + sproof, + Some(included_block.header().hash()), + None, + Some(relay_parent), + ); + + let relay_chain = Relaychain::new(); + { + let relay_inner = &mut relay_chain.inner.lock().unwrap(); + relay_inner + .relay_chain_hash_to_header + .insert(search_relay_parent, included_block.header().clone()); + relay_inner + .relay_chain_hash_to_header_pending + .insert(search_relay_parent, pending_but_unknown.header().clone()); + } + + // Ignore alternative branch: + let potential_parents = block_on(find_potential_parents( + ParentSearchParams { + relay_parent: search_relay_parent, + para_id: ParaId::from(100), + ancestry_lookback: 1, // aligned chain is in ancestry. + max_depth: NON_INCLUDED_CHAIN_LEN, + ignore_alternative_branches: true, + }, + &*backend, + &relay_chain, + )) + .unwrap(); + + assert_eq!(potential_parents.len(), 1); + let expected = included_block; + let parent = &potential_parents[0]; + + assert_eq!(parent.hash, expected.hash()); + assert_eq!(&parent.header, expected.header()); + assert_eq!(parent.depth, 0); + assert!(parent.aligned_with_pending); +} + /// Test where there is an additional block between included and pending block. #[test] fn find_potential_parents_aligned_with_late_pending() { From 8df558f6da6eafa1d9a7755e3260605877c62075 Mon Sep 17 00:00:00 2001 From: Sebastian Kunert Date: Tue, 16 Apr 2024 11:42:48 +0200 Subject: [PATCH 19/71] Use commong async backing params fetching methods --- .../slot_based/block_builder_task.rs | 32 ++----------------- 1 file changed, 3 insertions(+), 29 deletions(-) diff --git a/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs b/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs index 5c74eddda174..3f36c99d7c1c 100644 --- a/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs +++ b/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs @@ -131,34 +131,6 @@ impl SlotTimer { } } -/// Reads allowed ancestry length parameter from the relay chain storage at the given relay parent. -/// -/// Falls back to 0 in case of an error. -async fn max_ancestry_lookback( - relay_parent: PHash, - relay_client: &impl RelayChainInterface, -) -> usize { - match load_abridged_host_configuration(relay_parent, relay_client).await { - Ok(Some(config)) => config.async_backing_params.allowed_ancestry_len as usize, - Ok(None) => { - tracing::error!( - target: crate::LOG_TARGET, - "Active config is missing in relay chain storage", - ); - 0 - }, - Err(err) => { - tracing::error!( - target: crate::LOG_TARGET, - ?err, - ?relay_parent, - "Failed to read active config from relay chain client", - ); - 0 - }, - } -} - // Checks if we own the slot at the given block and whether there // is space in the unincluded segment. async fn can_build_upon( @@ -423,7 +395,9 @@ where let parent_search_params = ParentSearchParams { relay_parent, para_id, - ancestry_lookback: max_ancestry_lookback(relay_parent, relay_client).await, + ancestry_lookback: crate::collators::async_backing_params(relay_parent, relay_client) + .await + .map_or(0, |params| params.allowed_ancestry_len as usize), max_depth: PARENT_SEARCH_DEPTH, ignore_alternative_branches: true, }; From 7705314ef40481d80abfb5a811dfd93de55b2c0d Mon Sep 17 00:00:00 2001 From: Sebastian Kunert Date: Tue, 16 Apr 2024 16:01:32 +0200 Subject: [PATCH 20/71] Assign cores during block building --- Cargo.lock | 1 + cumulus/client/consensus/aura/Cargo.toml | 1 + .../slot_based/block_builder_task.rs | 46 +++++--- .../collators/slot_based/collation_task.rs | 109 ++++-------------- .../aura/src/collators/slot_based/mod.rs | 12 +- 5 files changed, 65 insertions(+), 104 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 60c45c8cb91d..367f19ddbfe7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3724,6 +3724,7 @@ dependencies = [ "sc-consensus-slots", "sc-service", "sc-telemetry", + "sc-utils", "schnellru", "sp-api", "sp-application-crypto", diff --git a/cumulus/client/consensus/aura/Cargo.toml b/cumulus/client/consensus/aura/Cargo.toml index a4ac78efb688..9a52b9fe4e5c 100644 --- a/cumulus/client/consensus/aura/Cargo.toml +++ b/cumulus/client/consensus/aura/Cargo.toml @@ -53,3 +53,4 @@ polkadot-primitives = { path = "../../../../polkadot/primitives" } polkadot-node-primitives = { path = "../../../../polkadot/node/primitives" } polkadot-node-subsystem = { path = "../../../../polkadot/node/subsystem" } polkadot-overseer = { path = "../../../../polkadot/node/overseer" } +sc-utils = { version = "14.0.0", path = "../../../../substrate/client/utils" } diff --git a/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs b/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs index 3f36c99d7c1c..00e5ee3a31ee 100644 --- a/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs +++ b/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs @@ -40,8 +40,7 @@ use codec::{Codec, Encode}; use cumulus_client_collator::service::ServiceInterface as CollatorServiceInterface; use cumulus_client_consensus_common::{ - self as consensus_common, load_abridged_host_configuration, ParachainBlockImportMarker, - ParentSearchParams, + self as consensus_common, ParachainBlockImportMarker, ParentSearchParams, }; use cumulus_client_consensus_proposer::ProposerInterface; use cumulus_primitives_aura::AuraUnincludedSegmentApi; @@ -103,7 +102,9 @@ pub struct BuilderTaskParams>, + pub collator_sender: sc_utils::mpsc::TracingUnboundedSender>, + /// Slot duration of the relay chain + pub relay_chain_slot_duration: Duration, } #[derive(Debug)] @@ -212,6 +213,7 @@ pub async fn run_block_builder::new(params) }; + let Ok(velocity) = u64::try_from( + relay_chain_slot_duration.as_millis() / slot_duration.as_duration().as_millis(), + ) else { + tracing::error!(target: LOG_TARGET, ?relay_chain_slot_duration, ?slot_duration, "Unable to calculate expected parachain velocity."); + return; + }; + loop { // We wait here until the next slot arrives. let para_slot = slot_timer.wait_until_next_slot().await; @@ -253,6 +262,12 @@ pub async fn run_block_builder. -use std::collections::VecDeque; - use codec::Encode; use cumulus_client_collator::service::ServiceInterface as CollatorServiceInterface; @@ -24,14 +22,14 @@ use cumulus_relay_chain_interface::RelayChainInterface; use polkadot_node_primitives::{MaybeCompressedPoV, SubmitCollationParams}; use polkadot_node_subsystem::messages::CollationGenerationMessage; use polkadot_overseer::Handle as OverseerHandle; -use polkadot_primitives::{CollatorPair, CoreIndex, Id as ParaId}; +use polkadot_primitives::{CollatorPair, Id as ParaId}; use futures::prelude::*; +use sc_utils::mpsc::TracingUnboundedReceiver; use sp_runtime::traits::{Block as BlockT, Header}; use super::CollatorMessage; -use crate::collators::cores_scheduled_for_para; const LOG_TARGET: &str = "aura::cumulus::collation_task"; @@ -48,7 +46,7 @@ pub struct Params { /// Collator service interface pub collator_service: CS, /// Receiver channel for communication with the block builder task. - pub collator_receiver: tokio::sync::mpsc::Receiver>, + pub collator_receiver: TracingUnboundedReceiver>, } /// Asynchronously executes the collation task for a parachain. @@ -77,63 +75,13 @@ where .await; let collator_service = params.collator_service; - let mut best_notifications = match params.relay_client.new_best_notification_stream().await { - Ok(s) => s, - Err(err) => { - tracing::error!( - target: LOG_TARGET, - ?err, - "Failed to initialize consensus: no relay chain import notification stream" - ); - - return - }, - }; - - let mut core_queue = Default::default(); - let mut messages = VecDeque::new(); - loop { - tokio::select! { - // Check for scheduled cores. - Some(notification) = best_notifications.next() => { - core_queue = - cores_scheduled_for_para(notification.hash(), params.para_id, ¶ms.relay_client).await; - tracing::debug!( - target: LOG_TARGET, - relay_parent = ?notification.hash(), - ?params.para_id, - cores = ?core_queue, - "New best relay block.", - ); - }, - // Add new message from the block builder to the queue. - collator_message = params.collator_receiver.recv() => { - if let Some(message) = collator_message { - tracing::debug!( - target: LOG_TARGET, - hash = ?message.hash, - num_messages = ?messages.len() + 1, - "Pushing new message.", - ); - messages.push_back(message); - } - } - } - - while !core_queue.is_empty() { - // If there are no more messages to process, we wait for new messages. - let Some(message) = messages.pop_front() else { - break; - }; - - handle_collation_message( - message, - &collator_service, - &mut overseer_handle, - &mut core_queue, - ) - .await; - } + while let Some(collator_message) = params.collator_receiver.next().await { + tracing::debug!( + target: LOG_TARGET, + hash = ?collator_message.hash, + "Handling new message from builder task.", + ); + handle_collation_message(collator_message, &collator_service, &mut overseer_handle).await; } } @@ -141,7 +89,6 @@ async fn handle_collation_message( message: CollatorMessage, collator_service: &impl CollatorServiceInterface, overseer_handle: &mut OverseerHandle, - core_queue: &mut VecDeque, ) { let CollatorMessage { parent_header, @@ -149,13 +96,9 @@ async fn handle_collation_message( parachain_candidate, validation_code_hash, relay_parent, + core_index: core_idx, } = message; - if core_queue.is_empty() { - tracing::warn!(target: crate::LOG_TARGET, cores_for_para = core_queue.len(), "Not submitting since we have no cores left!."); - return; - } - let number = *parachain_candidate.block.header().number(); let (collation, block_data) = match collator_service.build_collation(&parent_header, hash, parachain_candidate) { @@ -182,20 +125,18 @@ async fn handle_collation_message( ); } - if let Some(core) = core_queue.pop_front() { - tracing::debug!(target: LOG_TARGET, ?core, ?hash, ?number, "Submitting collation for core."); - overseer_handle - .send_msg( - CollationGenerationMessage::SubmitCollation(SubmitCollationParams { - relay_parent, - collation, - parent_head: parent_header.encode().into(), - validation_code_hash, - core_index: core, - result_sender: None, - }), - "SubmitCollation", - ) - .await; - } + tracing::debug!(target: LOG_TARGET, ?core_idx, ?hash, ?number, "Submitting collation for core."); + overseer_handle + .send_msg( + CollationGenerationMessage::SubmitCollation(SubmitCollationParams { + relay_parent, + collation, + parent_head: parent_header.encode().into(), + validation_code_hash, + core_index: core_idx, + result_sender: None, + }), + "SubmitCollation", + ) + .await; } diff --git a/cumulus/client/consensus/aura/src/collators/slot_based/mod.rs b/cumulus/client/consensus/aura/src/collators/slot_based/mod.rs index 6dcedf575bf8..243174b9d5e8 100644 --- a/cumulus/client/consensus/aura/src/collators/slot_based/mod.rs +++ b/cumulus/client/consensus/aura/src/collators/slot_based/mod.rs @@ -44,10 +44,13 @@ use cumulus_client_consensus_proposer::ProposerInterface; use cumulus_primitives_aura::AuraUnincludedSegmentApi; use cumulus_primitives_core::CollectCollationInfo; use cumulus_relay_chain_interface::RelayChainInterface; -use polkadot_primitives::{CollatorPair, Hash as RelayHash, Id as ParaId, ValidationCodeHash}; +use polkadot_primitives::{ + CollatorPair, CoreIndex, Hash as RelayHash, Id as ParaId, ValidationCodeHash, +}; use sc_client_api::{backend::AuxStore, BlockBackend, BlockOf, UsageProvider}; use sc_consensus::BlockImport; +use sc_utils::mpsc::tracing_unbounded; use sp_api::ProvideRuntimeApi; use sp_application_crypto::AppPublic; @@ -128,11 +131,10 @@ where P::Public: AppPublic + Member + Codec, P::Signature: TryFrom> + Member + Codec, { - let (tx, rx) = tokio::sync::mpsc::channel(100); - + let (tx, rx) = tracing_unbounded("mpsc_builder_to_collator", 100); let collator_task_params = collation_task::Params { relay_client: params.relay_client.clone(), - collator_key: params.collator_key.clone(), + collator_key: params.collator_key, para_id: params.para_id, reinitialize: params.reinitialize, collator_service: params.collator_service.clone(), @@ -154,6 +156,7 @@ where collator_service: params.collator_service, authoring_duration: params.authoring_duration, collator_sender: tx, + relay_chain_slot_duration: params.relay_chain_slot_duration, }; let block_builder_fut = @@ -176,4 +179,5 @@ struct CollatorMessage { pub hash: Block::Hash, /// The validation code hash at the parent block. pub validation_code_hash: ValidationCodeHash, + pub core_index: CoreIndex, } From f84a6f3501b151f29953546d6f9da05eedd5bd73 Mon Sep 17 00:00:00 2001 From: Sebastian Kunert Date: Tue, 16 Apr 2024 17:46:11 +0200 Subject: [PATCH 21/71] Add more comments, cleanup --- .../collators/slot_based/block_builder_task.rs | 8 ++++---- .../src/collators/slot_based/collation_task.rs | 18 ++++++++---------- .../aura/src/collators/slot_based/mod.rs | 3 +-- cumulus/client/consensus/common/src/lib.rs | 11 ++++++++--- 4 files changed, 21 insertions(+), 19 deletions(-) diff --git a/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs b/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs index 00e5ee3a31ee..f644ab022eac 100644 --- a/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs +++ b/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs @@ -331,8 +331,6 @@ pub async fn run_block_builder( relay_parent: PHash, para_id: ParaId, diff --git a/cumulus/client/consensus/aura/src/collators/slot_based/collation_task.rs b/cumulus/client/consensus/aura/src/collators/slot_based/collation_task.rs index 204c8ff8bc7d..d377cc7ae338 100644 --- a/cumulus/client/consensus/aura/src/collators/slot_based/collation_task.rs +++ b/cumulus/client/consensus/aura/src/collators/slot_based/collation_task.rs @@ -76,15 +76,13 @@ where let collator_service = params.collator_service; while let Some(collator_message) = params.collator_receiver.next().await { - tracing::debug!( - target: LOG_TARGET, - hash = ?collator_message.hash, - "Handling new message from builder task.", - ); handle_collation_message(collator_message, &collator_service, &mut overseer_handle).await; } } +/// Handle an incoming collation message from the block builder task. +/// This builds the collation from the [`CollatorMessage`] and submits it to +/// the collation-generation subsystem of the relay chain. async fn handle_collation_message( message: CollatorMessage, collator_service: &impl CollatorServiceInterface, @@ -92,19 +90,19 @@ async fn handle_collation_message( ) { let CollatorMessage { parent_header, - hash, parachain_candidate, validation_code_hash, relay_parent, - core_index: core_idx, + core_index, } = message; + let hash = parachain_candidate.block.header().hash(); let number = *parachain_candidate.block.header().number(); let (collation, block_data) = match collator_service.build_collation(&parent_header, hash, parachain_candidate) { Some(collation) => collation, None => { - tracing::warn!(target: LOG_TARGET, ?hash, ?number, "Unable to build collation."); + tracing::warn!(target: LOG_TARGET, %hash, ?number, ?core_index, "Unable to build collation."); return; }, }; @@ -125,7 +123,7 @@ async fn handle_collation_message( ); } - tracing::debug!(target: LOG_TARGET, ?core_idx, ?hash, ?number, "Submitting collation for core."); + tracing::debug!(target: LOG_TARGET, ?core_index, %hash, %number, "Submitting collation for core."); overseer_handle .send_msg( CollationGenerationMessage::SubmitCollation(SubmitCollationParams { @@ -133,7 +131,7 @@ async fn handle_collation_message( collation, parent_head: parent_header.encode().into(), validation_code_hash, - core_index: core_idx, + core_index, result_sender: None, }), "SubmitCollation", diff --git a/cumulus/client/consensus/aura/src/collators/slot_based/mod.rs b/cumulus/client/consensus/aura/src/collators/slot_based/mod.rs index 243174b9d5e8..021ba8036542 100644 --- a/cumulus/client/consensus/aura/src/collators/slot_based/mod.rs +++ b/cumulus/client/consensus/aura/src/collators/slot_based/mod.rs @@ -175,9 +175,8 @@ struct CollatorMessage { pub parent_header: Block::Header, /// The parachain block candidate. pub parachain_candidate: ParachainCandidate, - /// The hash of the parachain block. - pub hash: Block::Hash, /// The validation code hash at the parent block. pub validation_code_hash: ValidationCodeHash, + /// Core index that this block should be submitted on pub core_index: CoreIndex, } diff --git a/cumulus/client/consensus/common/src/lib.rs b/cumulus/client/consensus/common/src/lib.rs index 655e1f7946fb..9b20eeb6d94f 100644 --- a/cumulus/client/consensus/common/src/lib.rs +++ b/cumulus/client/consensus/common/src/lib.rs @@ -366,7 +366,11 @@ pub async fn find_potential_parents( match backend.blockchain().header(included_hash) { Ok(None) | Err(_) => { - tracing::warn!("Failed to get header for included block at hash {:?}", included_hash); + tracing::warn!( + target: PARENT_SEARCH_LOG_TARGET, + %included_hash, + "Failed to get header for included block.", + ); return Ok(Default::default()) }, _ => {}, @@ -376,8 +380,9 @@ pub async fn find_potential_parents( match backend.blockchain().header(pending_hash) { Ok(None) | Err(_) => { tracing::warn!( - "Failed to get header for included block at hash {:?}", - included_hash + target: PARENT_SEARCH_LOG_TARGET, + %pending_hash, + "Failed to get header for pending block.", ); return Ok(vec![PotentialParent:: { hash: included_hash, From e1a58796dd4da537d0375f8f019a595ab7bc4721 Mon Sep 17 00:00:00 2001 From: Sebastian Kunert Date: Tue, 16 Apr 2024 20:47:05 +0200 Subject: [PATCH 22/71] Refactor `find_potential_parent` --- .../slot_based/block_builder_task.rs | 2 +- cumulus/client/consensus/common/src/lib.rs | 352 +++++++++++------- cumulus/client/consensus/common/src/tests.rs | 77 ++++ 3 files changed, 289 insertions(+), 142 deletions(-) diff --git a/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs b/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs index f644ab022eac..e68dc0792237 100644 --- a/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs +++ b/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs @@ -387,7 +387,7 @@ pub async fn run_block_builder( backend: &impl Backend, relay_client: &impl RelayChainInterface, ) -> Result>, RelayChainError> { - // 1. Build up the ancestry record of the relay chain to compare against. tracing::trace!("Parent search parameters: {params:?}"); - let rp_ancestry = { - let mut ancestry = Vec::with_capacity(params.ancestry_lookback + 1); - let mut current_rp = params.relay_parent; - let mut required_session = None; - - while ancestry.len() <= params.ancestry_lookback { - let header = match relay_client.header(RBlockId::hash(current_rp)).await? { - None => break, - Some(h) => h, - }; - - let session = relay_client.session_index_for_child(current_rp).await?; - if let Some(required_session) = required_session { - // Respect the relay-chain rule not to cross session boundaries. - if session != required_session { - break - } - } else { - required_session = Some(session); - } - - ancestry.push((current_rp, *header.state_root())); - current_rp = *header.parent_hash(); - - // don't iterate back into the genesis block. - if header.number == 1 { - break - } - } - - ancestry - }; - - let is_hash_in_ancestry = |hash| rp_ancestry.iter().any(|x| x.0 == hash); - let is_root_in_ancestry = |root| rp_ancestry.iter().any(|x| x.1 == root); - - // 2. Get the included and pending availability blocks. - let included_header = relay_client - .persisted_validation_data( - params.relay_parent, - params.para_id, - OccupiedCoreAssumption::TimedOut, - ) - .await?; - - let included_header = match included_header { - Some(pvd) => pvd.parent_head, - None => return Ok(Vec::new()), // this implies the para doesn't exist. - }; - - let pending_header = relay_client - .persisted_validation_data( - params.relay_parent, - params.para_id, - OccupiedCoreAssumption::Included, - ) - .await? - .and_then(|x| if x.parent_head != included_header { Some(x.parent_head) } else { None }); + // Get the included block. + let (included_header, included_hash, pending_pvd) = { + let included_header = relay_client + .persisted_validation_data( + params.relay_parent, + params.para_id, + OccupiedCoreAssumption::TimedOut, + ) + .await?; + let included_header = match included_header { + Some(pvd) => pvd.parent_head, + None => return Ok(Vec::new()), // this implies the para doesn't exist. + }; - let included_header = match B::Header::decode(&mut &included_header.0[..]).ok() { - None => return Ok(Vec::new()), - Some(x) => x, - }; - // Silently swallow if pending block can't decode. - let pending_header = pending_header.and_then(|p| B::Header::decode(&mut &p.0[..]).ok()); - let included_hash = included_header.hash(); - let pending_hash = pending_header.as_ref().map(|hdr| hdr.hash()); - - match backend.blockchain().header(included_hash) { - Ok(None) | Err(_) => { - tracing::warn!( - target: PARENT_SEARCH_LOG_TARGET, - %included_hash, - "Failed to get header for included block.", + // Fetch the pending header from the relay chain. + let pending_pvd = relay_client + .persisted_validation_data( + params.relay_parent, + params.para_id, + OccupiedCoreAssumption::Included, + ) + .await? + .and_then( + |x| if x.parent_head != included_header { Some(x.parent_head) } else { None }, ); - return Ok(Default::default()) - }, - _ => {}, - }; - if let Some(pending_hash) = pending_hash { - match backend.blockchain().header(pending_hash) { + let included_header = match B::Header::decode(&mut &included_header.0[..]).ok() { + None => return Ok(Vec::new()), + Some(x) => x, + }; + + let included_hash = included_header.hash(); + // If the included block is not locally known, we can't do anything. + match backend.blockchain().header(included_hash) { Ok(None) | Err(_) => { tracing::warn!( target: PARENT_SEARCH_LOG_TARGET, - %pending_hash, - "Failed to get header for pending block.", + %included_hash, + "Failed to get header for included block.", ); - return Ok(vec![PotentialParent:: { - hash: included_hash, - header: included_header.clone(), - depth: 0, - aligned_with_pending: true, - }]) + return Ok(Default::default()) }, _ => {}, }; - } + + (included_header, included_hash, pending_pvd) + }; + + // Get the pending block. + let (pending_header, pending_hash) = { + // Fetch the pending header from the relay chain. + let pending_header = pending_pvd.and_then(|p| B::Header::decode(&mut &p.0[..]).ok()); + + let pending_hash = pending_header.as_ref().map(|hdr| hdr.hash()); + + // If the pending block is not locally known, we can't do anything. + if let Some(hash) = pending_hash { + match backend.blockchain().header(hash) { + // We are supposed to ignore branches that don't contain the pending block, but we + // do not know the pending block locally. + Ok(None) | Err(_) if params.ignore_alternative_branches => { + tracing::warn!( + target: PARENT_SEARCH_LOG_TARGET, + %hash, + "Failed to get header for pending block.", + ); + return Ok(vec![PotentialParent { + hash: included_hash, + header: included_header, + depth: 0, + aligned_with_pending: true, + }]) + }, + Ok(Some(_)) => (pending_header, pending_hash), + _ => (None, None), + } + } else { + (None, None) + } + }; if params.max_depth == 0 { - return Ok(vec![PotentialParent:: { + return Ok(vec![PotentialParent { hash: included_hash, header: included_header, depth: 0, @@ -408,73 +390,160 @@ pub async fn find_potential_parents( .map(|pending| sp_blockchain::tree_route(backend.blockchain(), included_hash, pending)) .transpose()?; - // The distance between pending and included block. Is later used to check if a child - // is aligned with pending when it is between pending and included block. - let pending_distance = maybe_route.as_ref().map(|route| route.enacted().len()); - // If we want to ignore alternative branches there is no reason to start // the parent search at the included block. We can add the included block and // the path to the pending block to the potential parents directly (limited by max_depth). - let (mut frontier, mut potential_parents) = if let (Some(pending), true, Some(ref route)) = - (pending_header, params.ignore_alternative_branches, &maybe_route) - { - let mut potential_parents = Vec::new(); - // Included block is always a potential parent - potential_parents.push(PotentialParent:: { - hash: included_hash, - header: included_header.clone(), - depth: 0, - aligned_with_pending: true, - }); - - // Add all items on the path included -> pending - 1 to the potential parents, but not - // more than `max_depth`. - let num_parents_on_path = route.enacted().len().saturating_sub(1).min(params.max_depth); - for (num, block) in route.enacted().iter().take(num_parents_on_path).enumerate() { - let header = match backend.blockchain().header(block.hash) { - Ok(Some(h)) => h, - Ok(None) => continue, - Err(_) => continue, - }; - - potential_parents.push(PotentialParent:: { - hash: block.hash, - header, - depth: 1 + num, + let (frontier, potential_parents) = match ( + pending_header, + params.ignore_alternative_branches, + &maybe_route, + ) { + (Some(pending), true, Some(ref route_to_pending)) => { + let mut potential_parents = Vec::new(); + // Included block is always a potential parent + potential_parents.push(PotentialParent { + hash: included_hash, + header: included_header.clone(), + depth: 0, aligned_with_pending: true, }); - } - // The search for additional potential parents should now start at the - // pending block. - ( - vec![PotentialParent:: { - hash: pending.hash(), - header: pending.clone(), - depth: route.enacted().len(), - aligned_with_pending: true, - }], - potential_parents, - ) - } else { - ( - vec![PotentialParent:: { + // This is a defensive check, should never happen. + if !route_to_pending.retracted().is_empty() { + tracing::warn!(target: PARENT_SEARCH_LOG_TARGET, "Pending block not an ancestor of included block. This should not happen."); + return Ok(Default::default()) + } + + // Add all items on the path included -> pending - 1 to the potential parents, but + // not more than `max_depth`. + let num_parents_on_path = + route_to_pending.enacted().len().saturating_sub(1).min(params.max_depth); + for (num, block) in + route_to_pending.enacted().iter().take(num_parents_on_path).enumerate() + { + let header = match backend.blockchain().header(block.hash) { + Ok(Some(h)) => h, + Ok(None) => continue, + Err(_) => continue, + }; + + potential_parents.push(PotentialParent { + hash: block.hash, + header, + depth: 1 + num, + aligned_with_pending: true, + }); + } + + // The search for additional potential parents should now start at the children of + // the pending block. + ( + vec![PotentialParent { + hash: pending.hash(), + header: pending.clone(), + depth: route_to_pending.enacted().len(), + aligned_with_pending: true, + }], + potential_parents, + ) + }, + _ => ( + vec![PotentialParent { hash: included_hash, header: included_header.clone(), depth: 0, aligned_with_pending: true, }], Default::default(), - ) + ), }; if potential_parents.len() > params.max_depth { return Ok(potential_parents); } + // Build up the ancestry record of the relay chain to compare against. + let rp_ancestry = + build_relay_parent_ancestry(params.ancestry_lookback, params.relay_parent, relay_client) + .await?; + + Ok(search_child_branches_for_parents( + frontier, + maybe_route, + included_header, + pending_hash, + backend, + params.max_depth, + params.ignore_alternative_branches, + rp_ancestry, + potential_parents, + )) +} + +/// Build an ancestry of relay parents that are acceptable. +/// +/// An acceptable relay parent is one that is no more than `ancestry_lookback` + 1 blocks above the +/// relay parent we want to build on. Parachain blocks anchored on relay parents older than that can +/// not be considered potential parents for block building. They have no chance of still getting +/// included, so our newly build parachain block would also not get included. +async fn build_relay_parent_ancestry( + ancestry_lookback: usize, + relay_parent: PHash, + relay_client: &impl RelayChainInterface, +) -> Result, RelayChainError> { + let mut ancestry = Vec::with_capacity(ancestry_lookback + 1); + let mut current_rp = relay_parent; + let mut required_session = None; + while ancestry.len() <= ancestry_lookback { + let header = match relay_client.header(RBlockId::hash(current_rp)).await? { + None => break, + Some(h) => h, + }; + + let session = relay_client.session_index_for_child(current_rp).await?; + if let Some(required_session) = required_session { + // Respect the relay-chain rule not to cross session boundaries. + if session != required_session { + break + } + } else { + required_session = Some(session); + } + + ancestry.push((current_rp, *header.state_root())); + current_rp = *header.parent_hash(); + + // don't iterate back into the genesis block. + if header.number == 1 { + break + } + } + Ok(ancestry) +} + +/// Start search for child blocks that can be used as parents. +pub fn search_child_branches_for_parents( + mut frontier: Vec>, + maybe_route_to_pending: Option>, + included_header: Block::Header, + pending_hash: Option, + backend: &impl Backend, + max_depth: usize, + ignore_alternative_branches: bool, + rp_ancestry: Vec<(H256, H256)>, + mut potential_parents: Vec>, +) -> Vec> { + let included_hash = included_header.hash(); + let is_hash_in_ancestry = |hash| rp_ancestry.iter().any(|x| x.0 == hash); + let is_root_in_ancestry = |root| rp_ancestry.iter().any(|x| x.1 == root); + + // The distance between pending and included block. Is later used to check if a child + // is aligned with pending when it is between pending and included block. + let pending_distance = maybe_route_to_pending.as_ref().map(|route| route.enacted().len()); + // If a block is on the path included -> pending, we consider it `aligned_with_pending`. let is_child_in_path_to_pending = |hash| { - maybe_route + maybe_route_to_pending .as_ref() .map_or(true, |route| route.enacted().iter().any(|x| x.hash == hash)) }; @@ -510,19 +579,20 @@ pub async fn find_potential_parents( potential_parents.push(entry); } - if !is_potential || child_depth > params.max_depth { + if !is_potential || child_depth > max_depth { continue } // push children onto search frontier. for child in backend.blockchain().children(hash).ok().into_iter().flatten() { tracing::trace!(target: PARENT_SEARCH_LOG_TARGET, ?child, child_depth, ?pending_distance, "Looking at child."); + let aligned_with_pending = parent_aligned_with_pending && (pending_distance.map_or(true, |dist| child_depth > dist) || pending_hash.as_ref().map_or(true, |h| &child == h) || is_child_in_path_to_pending(child)); - if params.ignore_alternative_branches && !aligned_with_pending { + if ignore_alternative_branches && !aligned_with_pending { tracing::trace!(target: PARENT_SEARCH_LOG_TARGET, ?child, "Child is not aligned with pending block."); continue } @@ -542,7 +612,7 @@ pub async fn find_potential_parents( } } - Ok(potential_parents) + potential_parents } /// Get the relay-parent slot and timestamp from a header. diff --git a/cumulus/client/consensus/common/src/tests.rs b/cumulus/client/consensus/common/src/tests.rs index f5a4273b2cb3..64c803c610d4 100644 --- a/cumulus/client/consensus/common/src/tests.rs +++ b/cumulus/client/consensus/common/src/tests.rs @@ -1240,6 +1240,83 @@ fn find_potential_parents_unknown_pending() { assert!(parent.aligned_with_pending); } +#[test] +fn find_potential_parents_unknown_pending_include_alternative_branches() { + sp_tracing::try_init_simple(); + + const NON_INCLUDED_CHAIN_LEN: usize = 5; + + let backend = Arc::new(Backend::new_test(1000, 1)); + let client = Arc::new(TestClientBuilder::with_backend(backend.clone()).build()); + let mut para_import = + ParachainBlockImport::new_with_delayed_best_block(client.clone(), backend.clone()); + + let relay_parent = relay_hash_from_block_num(10); + + // Choose different relay parent for alternative chain to get new hashes. + let search_relay_parent = relay_hash_from_block_num(11); + + let included_block = build_and_import_block_ext( + &client, + BlockOrigin::NetworkInitialSync, + true, + &mut para_import, + None, + None, + Some(relay_parent), + ); + + let alt_block = build_and_import_block_ext( + &client, + BlockOrigin::NetworkInitialSync, + true, + &mut para_import, + Some(included_block.header().hash()), + None, + Some(search_relay_parent), + ); + + tracing::info!(hash = %alt_block.header().hash(), "Alt block."); + let sproof = sproof_with_parent_by_hash(&client, included_block.header().hash()); + let pending_but_unknown = build_block( + &*client, + sproof, + Some(included_block.header().hash()), + None, + Some(relay_parent), + ); + + let relay_chain = Relaychain::new(); + { + let relay_inner = &mut relay_chain.inner.lock().unwrap(); + relay_inner + .relay_chain_hash_to_header + .insert(search_relay_parent, included_block.header().clone()); + relay_inner + .relay_chain_hash_to_header_pending + .insert(search_relay_parent, pending_but_unknown.header().clone()); + } + + // Ignore alternative branch: + let potential_parents = block_on(find_potential_parents( + ParentSearchParams { + relay_parent: search_relay_parent, + para_id: ParaId::from(100), + ancestry_lookback: 1, // aligned chain is in ancestry. + max_depth: NON_INCLUDED_CHAIN_LEN, + ignore_alternative_branches: false, + }, + &*backend, + &relay_chain, + )) + .unwrap(); + + let expected_parents: Vec<_> = vec![&included_block, &alt_block]; + assert_eq!(potential_parents.len(), 2); + assert_eq!(expected_parents[0].hash(), potential_parents[0].hash); + assert_eq!(expected_parents[1].hash(), potential_parents[1].hash); +} + /// Test where there is an additional block between included and pending block. #[test] fn find_potential_parents_aligned_with_late_pending() { From 1b184da46a03bda89608c2deed90c354481fd45a Mon Sep 17 00:00:00 2001 From: Sebastian Kunert Date: Wed, 17 Apr 2024 12:47:00 +0200 Subject: [PATCH 23/71] Make expected cores work when para slot duration > relay slot duration --- .../aura/src/collators/slot_based/block_builder_task.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs b/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs index e68dc0792237..4c340ac06653 100644 --- a/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs +++ b/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs @@ -240,12 +240,13 @@ pub async fn run_block_builder::new(params) }; - let Ok(velocity) = u64::try_from( + let Ok(expected_cores) = u64::try_from( relay_chain_slot_duration.as_millis() / slot_duration.as_duration().as_millis(), ) else { - tracing::error!(target: LOG_TARGET, ?relay_chain_slot_duration, ?slot_duration, "Unable to calculate expected parachain velocity."); + tracing::error!(target: LOG_TARGET, ?relay_chain_slot_duration, ?slot_duration, "Unable to calculate expected parachain expected_cores."); return; }; + let expected_cores = expected_cores.max(1); loop { // We wait here until the next slot arrives. @@ -262,7 +263,7 @@ pub async fn run_block_builder Date: Tue, 23 Apr 2024 16:16:29 +0200 Subject: [PATCH 24/71] Review comments --- cumulus/client/consensus/common/src/lib.rs | 199 ++++++++---------- cumulus/client/consensus/common/src/tests.rs | 4 +- .../client/relay-chain-interface/src/lib.rs | 3 +- cumulus/polkadot-parachain/Cargo.toml | 2 +- .../tests/0008-elastic_authoring.toml | 2 +- cumulus/zombienet/tests/assign-core.js | 7 +- 6 files changed, 106 insertions(+), 111 deletions(-) diff --git a/cumulus/client/consensus/common/src/lib.rs b/cumulus/client/consensus/common/src/lib.rs index e3161a4fa8f7..aa11afda2f87 100644 --- a/cumulus/client/consensus/common/src/lib.rs +++ b/cumulus/client/consensus/common/src/lib.rs @@ -16,7 +16,8 @@ use codec::Decode; use polkadot_primitives::{ - Block as PBlock, Hash as PHash, Header as PHeader, PersistedValidationData, ValidationCodeHash, + Block as PBlock, Hash as PHash, HeadData, Header as PHeader, PersistedValidationData, + ValidationCodeHash, }; use cumulus_primitives_core::{ @@ -275,6 +276,49 @@ impl std::fmt::Debug for PotentialParent { } } +/// Fetch the included and pending block from the relay chain. +async fn fetch_included_pending_from_relay( + relay_client: &impl RelayChainInterface, + backend: &impl Backend, + para_id: ParaId, + relay_parent: PHash, +) -> Result)>, RelayChainError> { + let included_header = relay_client + .persisted_validation_data(relay_parent, para_id, OccupiedCoreAssumption::TimedOut) + .await?; + let included_header = match included_header { + Some(pvd) => pvd.parent_head, + None => return Ok(None), // this implies the para doesn't exist. + }; + + // Fetch the pending header from the relay chain. + let pending_pvd = relay_client + .persisted_validation_data(relay_parent, para_id, OccupiedCoreAssumption::Included) + .await? + .and_then(|x| if x.parent_head != included_header { Some(x.parent_head) } else { None }); + + let included_header = match B::Header::decode(&mut &included_header.0[..]).ok() { + None => return Ok(None), + Some(x) => x, + }; + + let included_hash = included_header.hash(); + // If the included block is not locally known, we can't do anything. + match backend.blockchain().header(included_hash) { + Ok(None) | Err(_) => { + tracing::warn!( + target: PARENT_SEARCH_LOG_TARGET, + %included_hash, + "Failed to get header for included block.", + ); + return Ok(None) + }, + _ => {}, + }; + + Ok(Some((included_header, included_hash, pending_pvd))) +} + /// Perform a recursive search through blocks to find potential /// parent blocks for a new block. /// @@ -297,120 +341,76 @@ pub async fn find_potential_parents( ) -> Result>, RelayChainError> { tracing::trace!("Parent search parameters: {params:?}"); // Get the included block. - let (included_header, included_hash, pending_pvd) = { - let included_header = relay_client - .persisted_validation_data( - params.relay_parent, - params.para_id, - OccupiedCoreAssumption::TimedOut, - ) - .await?; - let included_header = match included_header { - Some(pvd) => pvd.parent_head, - None => return Ok(Vec::new()), // this implies the para doesn't exist. - }; - - // Fetch the pending header from the relay chain. - let pending_pvd = relay_client - .persisted_validation_data( - params.relay_parent, - params.para_id, - OccupiedCoreAssumption::Included, - ) - .await? - .and_then( - |x| if x.parent_head != included_header { Some(x.parent_head) } else { None }, - ); - - let included_header = match B::Header::decode(&mut &included_header.0[..]).ok() { - None => return Ok(Vec::new()), - Some(x) => x, - }; + let Some((included_header, included_hash, pending_pvd)) = fetch_included_pending_from_relay( + relay_client, + backend, + params.para_id, + params.relay_parent, + ) + .await? + else { + return Ok(Default::default()) + }; - let included_hash = included_header.hash(); - // If the included block is not locally known, we can't do anything. - match backend.blockchain().header(included_hash) { - Ok(None) | Err(_) => { - tracing::warn!( - target: PARENT_SEARCH_LOG_TARGET, - %included_hash, - "Failed to get header for included block.", - ); - return Ok(Default::default()) - }, - _ => {}, - }; + let only_included = vec![PotentialParent { + hash: included_hash, + header: included_header.clone(), + depth: 0, + aligned_with_pending: true, + }]; - (included_header, included_hash, pending_pvd) + if params.max_depth == 0 { + return Ok(only_included) }; - // Get the pending block. - let (pending_header, pending_hash) = { - // Fetch the pending header from the relay chain. + // Pending header and hash. + let maybe_pending = { + // Try to decode the pending header. let pending_header = pending_pvd.and_then(|p| B::Header::decode(&mut &p.0[..]).ok()); - let pending_hash = pending_header.as_ref().map(|hdr| hdr.hash()); - // If the pending block is not locally known, we can't do anything. - if let Some(hash) = pending_hash { - match backend.blockchain().header(hash) { + if let Some(header) = pending_header { + let pending_hash = header.hash(); + match backend.blockchain().header(pending_hash) { // We are supposed to ignore branches that don't contain the pending block, but we // do not know the pending block locally. Ok(None) | Err(_) if params.ignore_alternative_branches => { tracing::warn!( target: PARENT_SEARCH_LOG_TARGET, - %hash, + %pending_hash, "Failed to get header for pending block.", ); - return Ok(vec![PotentialParent { - hash: included_hash, - header: included_header, - depth: 0, - aligned_with_pending: true, - }]) + return Ok(only_included) }, - Ok(Some(_)) => (pending_header, pending_hash), - _ => (None, None), + Ok(Some(_)) => Some((header, pending_hash)), + _ => None, } } else { - (None, None) + None } }; - if params.max_depth == 0 { - return Ok(vec![PotentialParent { - hash: included_hash, - header: included_header, - depth: 0, - aligned_with_pending: true, - }]) - }; - - let maybe_route = pending_hash - .map(|pending| sp_blockchain::tree_route(backend.blockchain(), included_hash, pending)) + let maybe_route = maybe_pending + .as_ref() + .map(|(_, pending)| { + sp_blockchain::tree_route(backend.blockchain(), included_hash, *pending) + }) .transpose()?; // If we want to ignore alternative branches there is no reason to start // the parent search at the included block. We can add the included block and // the path to the pending block to the potential parents directly (limited by max_depth). let (frontier, potential_parents) = match ( - pending_header, + &maybe_pending, params.ignore_alternative_branches, &maybe_route, ) { - (Some(pending), true, Some(ref route_to_pending)) => { - let mut potential_parents = Vec::new(); - // Included block is always a potential parent - potential_parents.push(PotentialParent { - hash: included_hash, - header: included_header.clone(), - depth: 0, - aligned_with_pending: true, - }); + (Some((pending_header, pending_hash)), true, Some(ref route_to_pending)) => { + let mut potential_parents = only_included; // This is a defensive check, should never happen. if !route_to_pending.retracted().is_empty() { - tracing::warn!(target: PARENT_SEARCH_LOG_TARGET, "Pending block not an ancestor of included block. This should not happen."); + tracing::warn!(target: PARENT_SEARCH_LOG_TARGET, "Included block not an ancestor of pending block. This should not happen."); return Ok(Default::default()) } @@ -421,11 +421,7 @@ pub async fn find_potential_parents( for (num, block) in route_to_pending.enacted().iter().take(num_parents_on_path).enumerate() { - let header = match backend.blockchain().header(block.hash) { - Ok(Some(h)) => h, - Ok(None) => continue, - Err(_) => continue, - }; + let Ok(Some(header)) = backend.blockchain().header(block.hash) else { continue }; potential_parents.push(PotentialParent { hash: block.hash, @@ -439,23 +435,15 @@ pub async fn find_potential_parents( // the pending block. ( vec![PotentialParent { - hash: pending.hash(), - header: pending.clone(), + hash: *pending_hash, + header: pending_header.clone(), depth: route_to_pending.enacted().len(), aligned_with_pending: true, }], potential_parents, ) }, - _ => ( - vec![PotentialParent { - hash: included_hash, - header: included_header.clone(), - depth: 0, - aligned_with_pending: true, - }], - Default::default(), - ), + _ => (only_included, Default::default()), }; if potential_parents.len() > params.max_depth { @@ -471,7 +459,7 @@ pub async fn find_potential_parents( frontier, maybe_route, included_header, - pending_hash, + maybe_pending.map(|(_, hash)| hash), backend, params.max_depth, params.ignore_alternative_branches, @@ -482,10 +470,13 @@ pub async fn find_potential_parents( /// Build an ancestry of relay parents that are acceptable. /// -/// An acceptable relay parent is one that is no more than `ancestry_lookback` + 1 blocks above the +/// An acceptable relay parent is one that is no more than `ancestry_lookback` + 1 blocks below the /// relay parent we want to build on. Parachain blocks anchored on relay parents older than that can /// not be considered potential parents for block building. They have no chance of still getting /// included, so our newly build parachain block would also not get included. +/// +/// On success, returns a vector of `(header_hash, state_root)` of the relevant relay chain +/// ancestry blocks. async fn build_relay_parent_ancestry( ancestry_lookback: usize, relay_parent: PHash, @@ -597,11 +588,7 @@ pub fn search_child_branches_for_parents( continue } - let header = match backend.blockchain().header(child) { - Ok(Some(h)) => h, - Ok(None) => continue, - Err(_) => continue, - }; + let Ok(Some(header)) = backend.blockchain().header(child) else { continue }; frontier.push(PotentialParent { hash: child, diff --git a/cumulus/client/consensus/common/src/tests.rs b/cumulus/client/consensus/common/src/tests.rs index 64c803c610d4..9995d5db3df3 100644 --- a/cumulus/client/consensus/common/src/tests.rs +++ b/cumulus/client/consensus/common/src/tests.rs @@ -1376,7 +1376,9 @@ fn find_potential_parents_aligned_with_late_pending() { .insert(search_relay_parent, pending_block.header().clone()); } - // Build two sibling chains from the included block. + // Build some blocks on the pending block and on the included block. + // We end up with two sibling chains, one is aligned with the pending block, + // the other is not. let mut aligned_blocks = Vec::new(); let mut parent = pending_block.header().hash(); for _ in 2..NON_INCLUDED_CHAIN_LEN { diff --git a/cumulus/client/relay-chain-interface/src/lib.rs b/cumulus/client/relay-chain-interface/src/lib.rs index aacf35483ada..cb667d218d06 100644 --- a/cumulus/client/relay-chain-interface/src/lib.rs +++ b/cumulus/client/relay-chain-interface/src/lib.rs @@ -205,7 +205,8 @@ pub trait RelayChainInterface: Send + Sync { ) -> RelayChainResult>; /// Yields information on all availability cores as relevant to the child block. - /// Cores are either free or occupied. Free cores can have paras assigned to them. + /// + /// Cores are either free, scheduled or occupied. Free cores can have paras assigned to them. async fn availability_cores( &self, relay_parent: PHash, diff --git a/cumulus/polkadot-parachain/Cargo.toml b/cumulus/polkadot-parachain/Cargo.toml index 6e74c2587fed..55cea88adf16 100644 --- a/cumulus/polkadot-parachain/Cargo.toml +++ b/cumulus/polkadot-parachain/Cargo.toml @@ -111,7 +111,6 @@ cumulus-primitives-aura = { path = "../primitives/aura" } cumulus-primitives-core = { path = "../primitives/core" } cumulus-relay-chain-interface = { path = "../client/relay-chain-interface" } color-print = "0.3.4" -tokio = { version = "1.32.0", features = ["macros", "parking_lot", "time"] } [build-dependencies] substrate-build-script-utils = { path = "../../substrate/utils/build-script-utils" } @@ -121,6 +120,7 @@ assert_cmd = "2.0" nix = { version = "0.26.1", features = ["signal"] } tempfile = "3.8.0" wait-timeout = "0.2" +tokio = { version = "1.32.0", features = ["macros", "parking_lot", "time"] } [features] default = [] diff --git a/cumulus/zombienet/tests/0008-elastic_authoring.toml b/cumulus/zombienet/tests/0008-elastic_authoring.toml index 07e1298ce248..7fd53c416ed9 100644 --- a/cumulus/zombienet/tests/0008-elastic_authoring.toml +++ b/cumulus/zombienet/tests/0008-elastic_authoring.toml @@ -40,7 +40,7 @@ add_to_genesis = true command = "test-parachain" args = ["-laura=trace,runtime=info,cumulus-consensus=trace,consensus::common=trace,parachain::collation-generation=trace,parachain::collator-protocol=trace,parachain=debug", "--force-authoring", "--use-slot-authoring"] -# Slot based authoring with 3 cores and 2s slot duration +# Slot based authoring with 1 core and 6s slot duration [[parachains]] id = 2000 add_to_genesis = true diff --git a/cumulus/zombienet/tests/assign-core.js b/cumulus/zombienet/tests/assign-core.js index 2e5f9d8cfa58..4179b68b2e3c 100644 --- a/cumulus/zombienet/tests/assign-core.js +++ b/cumulus/zombienet/tests/assign-core.js @@ -1,3 +1,7 @@ +// Assign a parachain to a core. +// +// First argument should be the parachain id. +// Second argument should be the core. async function run(nodeName, networkInfo, args) { const { wsUri, userDefinedTypes } = networkInfo.nodesByName[nodeName]; const api = await zombie.connect(wsUri, userDefinedTypes); @@ -8,10 +12,11 @@ async function run(nodeName, networkInfo, args) { await zombie.util.cryptoWaitReady(); - // account to submit tx + // Submit transaction with Alice accoung const keyring = new zombie.Keyring({ type: "sr25519" }); const alice = keyring.addFromUri("//Alice"); + // Wait for this transaction to be finalized in a block. await new Promise(async (resolve, reject) => { const unsub = await api.tx.sudo .sudo(api.tx.coretime.assignCore(core, 0, [[{ task: para }, 57600]], null)) From e4edbe4282c97863046e64bcac252edbcee6d1b5 Mon Sep 17 00:00:00 2001 From: Sebastian Kunert Date: Tue, 23 Apr 2024 17:56:02 +0200 Subject: [PATCH 25/71] Extract parent search into module --- cumulus/client/consensus/common/src/lib.rs | 386 +---------------- .../consensus/common/src/parent_search.rs | 401 ++++++++++++++++++ 2 files changed, 409 insertions(+), 378 deletions(-) create mode 100644 cumulus/client/consensus/common/src/parent_search.rs diff --git a/cumulus/client/consensus/common/src/lib.rs b/cumulus/client/consensus/common/src/lib.rs index aa11afda2f87..08b7f7f59a2d 100644 --- a/cumulus/client/consensus/common/src/lib.rs +++ b/cumulus/client/consensus/common/src/lib.rs @@ -16,21 +16,19 @@ use codec::Decode; use polkadot_primitives::{ - Block as PBlock, Hash as PHash, HeadData, Header as PHeader, PersistedValidationData, - ValidationCodeHash, + Block as PBlock, Hash as PHash, Header as PHeader, PersistedValidationData, ValidationCodeHash, }; use cumulus_primitives_core::{ - relay_chain::{self, BlockId as RBlockId, OccupiedCoreAssumption}, - AbridgedHostConfiguration, ParaId, + relay_chain::{self}, + AbridgedHostConfiguration, }; use cumulus_relay_chain_interface::{RelayChainError, RelayChainInterface}; -use sc_client_api::{Backend, HeaderBackend}; +use sc_client_api::Backend; use sc_consensus::{shared_data::SharedData, BlockImport, ImportResult}; -use sp_blockchain::{Backend as BlockchainBackend, TreeRoute}; use sp_consensus_slots::Slot; -use sp_core::H256; + use sp_runtime::traits::{Block as BlockT, Header as HeaderT}; use sp_timestamp::Timestamp; @@ -38,9 +36,12 @@ use std::{sync::Arc, time::Duration}; mod level_monitor; mod parachain_consensus; +mod parent_search; #[cfg(test)] mod tests; +pub use parent_search::*; + pub use parachain_consensus::run_parachain_consensus; use level_monitor::LevelMonitor; @@ -48,8 +49,6 @@ pub use level_monitor::{LevelLimit, MAX_LEAVES_PER_LEVEL_SENSIBLE_DEFAULT}; pub mod import_queue; -const PARENT_SEARCH_LOG_TARGET: &str = "consensus::common::find_potential_parents"; - /// Provides the hash of validation code used for authoring/execution of blocks at a given /// hash. pub trait ValidationCodeHashProvider { @@ -233,375 +232,6 @@ pub trait ParachainBlockImportMarker {} impl ParachainBlockImportMarker for ParachainBlockImport {} -/// Parameters when searching for suitable parents to build on top of. -#[derive(Debug)] -pub struct ParentSearchParams { - /// The relay-parent that is intended to be used. - pub relay_parent: PHash, - /// The ID of the parachain. - pub para_id: ParaId, - /// A limitation on the age of relay parents for parachain blocks that are being - /// considered. This is relative to the `relay_parent` number. - pub ancestry_lookback: usize, - /// How "deep" parents can be relative to the included parachain block at the relay-parent. - /// The included block has depth 0. - pub max_depth: usize, - /// Whether to only ignore "alternative" branches, i.e. branches of the chain - /// which do not contain the block pending availability. - pub ignore_alternative_branches: bool, -} - -/// A potential parent block returned from [`find_potential_parents`] -#[derive(PartialEq)] -pub struct PotentialParent { - /// The hash of the block. - pub hash: B::Hash, - /// The header of the block. - pub header: B::Header, - /// The depth of the block. - pub depth: usize, - /// Whether the block is the included block, is itself pending on-chain, or descends - /// from the block pending availability. - pub aligned_with_pending: bool, -} - -impl std::fmt::Debug for PotentialParent { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("PotentialParent") - .field("hash", &self.hash) - .field("depth", &self.depth) - .field("aligned_with_pending", &self.aligned_with_pending) - .field("number", &self.header.number()) - .finish() - } -} - -/// Fetch the included and pending block from the relay chain. -async fn fetch_included_pending_from_relay( - relay_client: &impl RelayChainInterface, - backend: &impl Backend, - para_id: ParaId, - relay_parent: PHash, -) -> Result)>, RelayChainError> { - let included_header = relay_client - .persisted_validation_data(relay_parent, para_id, OccupiedCoreAssumption::TimedOut) - .await?; - let included_header = match included_header { - Some(pvd) => pvd.parent_head, - None => return Ok(None), // this implies the para doesn't exist. - }; - - // Fetch the pending header from the relay chain. - let pending_pvd = relay_client - .persisted_validation_data(relay_parent, para_id, OccupiedCoreAssumption::Included) - .await? - .and_then(|x| if x.parent_head != included_header { Some(x.parent_head) } else { None }); - - let included_header = match B::Header::decode(&mut &included_header.0[..]).ok() { - None => return Ok(None), - Some(x) => x, - }; - - let included_hash = included_header.hash(); - // If the included block is not locally known, we can't do anything. - match backend.blockchain().header(included_hash) { - Ok(None) | Err(_) => { - tracing::warn!( - target: PARENT_SEARCH_LOG_TARGET, - %included_hash, - "Failed to get header for included block.", - ); - return Ok(None) - }, - _ => {}, - }; - - Ok(Some((included_header, included_hash, pending_pvd))) -} - -/// Perform a recursive search through blocks to find potential -/// parent blocks for a new block. -/// -/// This accepts a relay-chain block to be used as an anchor and a maximum search depth, -/// along with some arguments for filtering parachain blocks and performs a recursive search -/// for parachain blocks. The search begins at the last included parachain block and returns -/// a set of [`PotentialParent`]s which could be potential parents of a new block with this -/// relay-parent according to the search parameters. -/// -/// A parachain block is a potential parent if it is either the last included parachain block, the -/// pending parachain block (when `max_depth` >= 1), or all of the following hold: -/// * its parent is a potential parent -/// * its relay-parent is within `ancestry_lookback` of the targeted relay-parent. -/// * its relay-parent is within the same session as the targeted relay-parent. -/// * the block number is within `max_depth` blocks of the included block -pub async fn find_potential_parents( - params: ParentSearchParams, - backend: &impl Backend, - relay_client: &impl RelayChainInterface, -) -> Result>, RelayChainError> { - tracing::trace!("Parent search parameters: {params:?}"); - // Get the included block. - let Some((included_header, included_hash, pending_pvd)) = fetch_included_pending_from_relay( - relay_client, - backend, - params.para_id, - params.relay_parent, - ) - .await? - else { - return Ok(Default::default()) - }; - - let only_included = vec![PotentialParent { - hash: included_hash, - header: included_header.clone(), - depth: 0, - aligned_with_pending: true, - }]; - - if params.max_depth == 0 { - return Ok(only_included) - }; - - // Pending header and hash. - let maybe_pending = { - // Try to decode the pending header. - let pending_header = pending_pvd.and_then(|p| B::Header::decode(&mut &p.0[..]).ok()); - - // If the pending block is not locally known, we can't do anything. - if let Some(header) = pending_header { - let pending_hash = header.hash(); - match backend.blockchain().header(pending_hash) { - // We are supposed to ignore branches that don't contain the pending block, but we - // do not know the pending block locally. - Ok(None) | Err(_) if params.ignore_alternative_branches => { - tracing::warn!( - target: PARENT_SEARCH_LOG_TARGET, - %pending_hash, - "Failed to get header for pending block.", - ); - return Ok(only_included) - }, - Ok(Some(_)) => Some((header, pending_hash)), - _ => None, - } - } else { - None - } - }; - - let maybe_route = maybe_pending - .as_ref() - .map(|(_, pending)| { - sp_blockchain::tree_route(backend.blockchain(), included_hash, *pending) - }) - .transpose()?; - - // If we want to ignore alternative branches there is no reason to start - // the parent search at the included block. We can add the included block and - // the path to the pending block to the potential parents directly (limited by max_depth). - let (frontier, potential_parents) = match ( - &maybe_pending, - params.ignore_alternative_branches, - &maybe_route, - ) { - (Some((pending_header, pending_hash)), true, Some(ref route_to_pending)) => { - let mut potential_parents = only_included; - - // This is a defensive check, should never happen. - if !route_to_pending.retracted().is_empty() { - tracing::warn!(target: PARENT_SEARCH_LOG_TARGET, "Included block not an ancestor of pending block. This should not happen."); - return Ok(Default::default()) - } - - // Add all items on the path included -> pending - 1 to the potential parents, but - // not more than `max_depth`. - let num_parents_on_path = - route_to_pending.enacted().len().saturating_sub(1).min(params.max_depth); - for (num, block) in - route_to_pending.enacted().iter().take(num_parents_on_path).enumerate() - { - let Ok(Some(header)) = backend.blockchain().header(block.hash) else { continue }; - - potential_parents.push(PotentialParent { - hash: block.hash, - header, - depth: 1 + num, - aligned_with_pending: true, - }); - } - - // The search for additional potential parents should now start at the children of - // the pending block. - ( - vec![PotentialParent { - hash: *pending_hash, - header: pending_header.clone(), - depth: route_to_pending.enacted().len(), - aligned_with_pending: true, - }], - potential_parents, - ) - }, - _ => (only_included, Default::default()), - }; - - if potential_parents.len() > params.max_depth { - return Ok(potential_parents); - } - - // Build up the ancestry record of the relay chain to compare against. - let rp_ancestry = - build_relay_parent_ancestry(params.ancestry_lookback, params.relay_parent, relay_client) - .await?; - - Ok(search_child_branches_for_parents( - frontier, - maybe_route, - included_header, - maybe_pending.map(|(_, hash)| hash), - backend, - params.max_depth, - params.ignore_alternative_branches, - rp_ancestry, - potential_parents, - )) -} - -/// Build an ancestry of relay parents that are acceptable. -/// -/// An acceptable relay parent is one that is no more than `ancestry_lookback` + 1 blocks below the -/// relay parent we want to build on. Parachain blocks anchored on relay parents older than that can -/// not be considered potential parents for block building. They have no chance of still getting -/// included, so our newly build parachain block would also not get included. -/// -/// On success, returns a vector of `(header_hash, state_root)` of the relevant relay chain -/// ancestry blocks. -async fn build_relay_parent_ancestry( - ancestry_lookback: usize, - relay_parent: PHash, - relay_client: &impl RelayChainInterface, -) -> Result, RelayChainError> { - let mut ancestry = Vec::with_capacity(ancestry_lookback + 1); - let mut current_rp = relay_parent; - let mut required_session = None; - while ancestry.len() <= ancestry_lookback { - let header = match relay_client.header(RBlockId::hash(current_rp)).await? { - None => break, - Some(h) => h, - }; - - let session = relay_client.session_index_for_child(current_rp).await?; - if let Some(required_session) = required_session { - // Respect the relay-chain rule not to cross session boundaries. - if session != required_session { - break - } - } else { - required_session = Some(session); - } - - ancestry.push((current_rp, *header.state_root())); - current_rp = *header.parent_hash(); - - // don't iterate back into the genesis block. - if header.number == 1 { - break - } - } - Ok(ancestry) -} - -/// Start search for child blocks that can be used as parents. -pub fn search_child_branches_for_parents( - mut frontier: Vec>, - maybe_route_to_pending: Option>, - included_header: Block::Header, - pending_hash: Option, - backend: &impl Backend, - max_depth: usize, - ignore_alternative_branches: bool, - rp_ancestry: Vec<(H256, H256)>, - mut potential_parents: Vec>, -) -> Vec> { - let included_hash = included_header.hash(); - let is_hash_in_ancestry = |hash| rp_ancestry.iter().any(|x| x.0 == hash); - let is_root_in_ancestry = |root| rp_ancestry.iter().any(|x| x.1 == root); - - // The distance between pending and included block. Is later used to check if a child - // is aligned with pending when it is between pending and included block. - let pending_distance = maybe_route_to_pending.as_ref().map(|route| route.enacted().len()); - - // If a block is on the path included -> pending, we consider it `aligned_with_pending`. - let is_child_in_path_to_pending = |hash| { - maybe_route_to_pending - .as_ref() - .map_or(true, |route| route.enacted().iter().any(|x| x.hash == hash)) - }; - - tracing::trace!(target: PARENT_SEARCH_LOG_TARGET, ?included_hash, included_num = ?included_header.number(), ?pending_hash , ?rp_ancestry, "Searching relay chain ancestry."); - while let Some(entry) = frontier.pop() { - // TODO Adjust once we can fetch multiple pending blocks. - // https://github.com/paritytech/polkadot-sdk/issues/3967 - let is_pending = pending_hash.as_ref().map_or(false, |h| &entry.hash == h); - let is_included = included_hash == entry.hash; - - // note: even if the pending block or included block have a relay parent - // outside of the expected part of the relay chain, they are always allowed - // because they have already been posted on chain. - let is_potential = is_pending || is_included || { - let digest = entry.header.digest(); - let is_hash_in_ancestry_check = cumulus_primitives_core::extract_relay_parent(digest) - .map_or(false, is_hash_in_ancestry); - let is_root_in_ancestry_check = - cumulus_primitives_core::rpsr_digest::extract_relay_parent_storage_root(digest) - .map(|(r, _n)| r) - .map_or(false, is_root_in_ancestry); - - is_hash_in_ancestry_check || is_root_in_ancestry_check - }; - - let parent_aligned_with_pending = entry.aligned_with_pending; - let child_depth = entry.depth + 1; - let hash = entry.hash; - - tracing::trace!(target: PARENT_SEARCH_LOG_TARGET, root_in_ancestry = is_potential && !is_pending && !is_included, ?hash, is_pending, is_included, "Checking potential parent."); - if is_potential { - potential_parents.push(entry); - } - - if !is_potential || child_depth > max_depth { - continue - } - - // push children onto search frontier. - for child in backend.blockchain().children(hash).ok().into_iter().flatten() { - tracing::trace!(target: PARENT_SEARCH_LOG_TARGET, ?child, child_depth, ?pending_distance, "Looking at child."); - - let aligned_with_pending = parent_aligned_with_pending && - (pending_distance.map_or(true, |dist| child_depth > dist) || - pending_hash.as_ref().map_or(true, |h| &child == h) || - is_child_in_path_to_pending(child)); - - if ignore_alternative_branches && !aligned_with_pending { - tracing::trace!(target: PARENT_SEARCH_LOG_TARGET, ?child, "Child is not aligned with pending block."); - continue - } - - let Ok(Some(header)) = backend.blockchain().header(child) else { continue }; - - frontier.push(PotentialParent { - hash: child, - header, - depth: child_depth, - aligned_with_pending, - }); - } - } - - potential_parents -} - /// Get the relay-parent slot and timestamp from a header. pub fn relay_slot_and_timestamp( relay_parent_header: &PHeader, diff --git a/cumulus/client/consensus/common/src/parent_search.rs b/cumulus/client/consensus/common/src/parent_search.rs new file mode 100644 index 000000000000..ab7f4290e6ab --- /dev/null +++ b/cumulus/client/consensus/common/src/parent_search.rs @@ -0,0 +1,401 @@ +// Copyright (C) Parity Technologies (UK) Ltd. +// This file is part of Cumulus. + +// Cumulus is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Cumulus is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with Cumulus. If not, see . + +use codec::Decode; +use polkadot_primitives::{Hash as PHash, HeadData}; + +use cumulus_primitives_core::{ + relay_chain::{BlockId as RBlockId, OccupiedCoreAssumption}, + ParaId, +}; +use cumulus_relay_chain_interface::{RelayChainError, RelayChainInterface}; + +use sc_client_api::{Backend, HeaderBackend}; + +use sp_blockchain::{Backend as BlockchainBackend, TreeRoute}; + +use sp_core::H256; +use sp_runtime::traits::{Block as BlockT, Header as HeaderT}; + +const PARENT_SEARCH_LOG_TARGET: &str = "consensus::common::find_potential_parents"; +/// Parameters when searching for suitable parents to build on top of. +#[derive(Debug)] +pub struct ParentSearchParams { + /// The relay-parent that is intended to be used. + pub relay_parent: PHash, + /// The ID of the parachain. + pub para_id: ParaId, + /// A limitation on the age of relay parents for parachain blocks that are being + /// considered. This is relative to the `relay_parent` number. + pub ancestry_lookback: usize, + /// How "deep" parents can be relative to the included parachain block at the relay-parent. + /// The included block has depth 0. + pub max_depth: usize, + /// Whether to only ignore "alternative" branches, i.e. branches of the chain + /// which do not contain the block pending availability. + pub ignore_alternative_branches: bool, +} + +/// A potential parent block returned from [`find_potential_parents`] +#[derive(PartialEq)] +pub struct PotentialParent { + /// The hash of the block. + pub hash: B::Hash, + /// The header of the block. + pub header: B::Header, + /// The depth of the block. + pub depth: usize, + /// Whether the block is the included block, is itself pending on-chain, or descends + /// from the block pending availability. + pub aligned_with_pending: bool, +} + +impl std::fmt::Debug for PotentialParent { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("PotentialParent") + .field("hash", &self.hash) + .field("depth", &self.depth) + .field("aligned_with_pending", &self.aligned_with_pending) + .field("number", &self.header.number()) + .finish() + } +} + +/// Perform a recursive search through blocks to find potential +/// parent blocks for a new block. +/// +/// This accepts a relay-chain block to be used as an anchor and a maximum search depth, +/// along with some arguments for filtering parachain blocks and performs a recursive search +/// for parachain blocks. The search begins at the last included parachain block and returns +/// a set of [`PotentialParent`]s which could be potential parents of a new block with this +/// relay-parent according to the search parameters. +/// +/// A parachain block is a potential parent if it is either the last included parachain block, the +/// pending parachain block (when `max_depth` >= 1), or all of the following hold: +/// * its parent is a potential parent +/// * its relay-parent is within `ancestry_lookback` of the targeted relay-parent. +/// * its relay-parent is within the same session as the targeted relay-parent. +/// * the block number is within `max_depth` blocks of the included block +pub async fn find_potential_parents( + params: ParentSearchParams, + backend: &impl Backend, + relay_client: &impl RelayChainInterface, +) -> Result>, RelayChainError> { + tracing::trace!("Parent search parameters: {params:?}"); + // Get the included block. + let Some((included_header, included_hash, pending_pvd)) = fetch_included_pending_from_relay( + relay_client, + backend, + params.para_id, + params.relay_parent, + ) + .await? + else { + return Ok(Default::default()) + }; + + let only_included = vec![PotentialParent { + hash: included_hash, + header: included_header.clone(), + depth: 0, + aligned_with_pending: true, + }]; + + if params.max_depth == 0 { + return Ok(only_included) + }; + + // Pending header and hash. + let maybe_pending = { + // Try to decode the pending header. + let pending_header = pending_pvd.and_then(|p| B::Header::decode(&mut &p.0[..]).ok()); + + // If the pending block is not locally known, we can't do anything. + if let Some(header) = pending_header { + let pending_hash = header.hash(); + match backend.blockchain().header(pending_hash) { + // We are supposed to ignore branches that don't contain the pending block, but we + // do not know the pending block locally. + Ok(None) | Err(_) if params.ignore_alternative_branches => { + tracing::warn!( + target: PARENT_SEARCH_LOG_TARGET, + %pending_hash, + "Failed to get header for pending block.", + ); + return Ok(only_included) + }, + Ok(Some(_)) => Some((header, pending_hash)), + _ => None, + } + } else { + None + } + }; + + let maybe_route = maybe_pending + .as_ref() + .map(|(_, pending)| { + sp_blockchain::tree_route(backend.blockchain(), included_hash, *pending) + }) + .transpose()?; + + // If we want to ignore alternative branches there is no reason to start + // the parent search at the included block. We can add the included block and + // the path to the pending block to the potential parents directly (limited by max_depth). + let (frontier, potential_parents) = match ( + &maybe_pending, + params.ignore_alternative_branches, + &maybe_route, + ) { + (Some((pending_header, pending_hash)), true, Some(ref route_to_pending)) => { + let mut potential_parents = only_included; + + // This is a defensive check, should never happen. + if !route_to_pending.retracted().is_empty() { + tracing::warn!(target: PARENT_SEARCH_LOG_TARGET, "Included block not an ancestor of pending block. This should not happen."); + return Ok(Default::default()) + } + + // Add all items on the path included -> pending - 1 to the potential parents, but + // not more than `max_depth`. + let num_parents_on_path = + route_to_pending.enacted().len().saturating_sub(1).min(params.max_depth); + for (num, block) in + route_to_pending.enacted().iter().take(num_parents_on_path).enumerate() + { + let Ok(Some(header)) = backend.blockchain().header(block.hash) else { continue }; + + potential_parents.push(PotentialParent { + hash: block.hash, + header, + depth: 1 + num, + aligned_with_pending: true, + }); + } + + // The search for additional potential parents should now start at the children of + // the pending block. + ( + vec![PotentialParent { + hash: *pending_hash, + header: pending_header.clone(), + depth: route_to_pending.enacted().len(), + aligned_with_pending: true, + }], + potential_parents, + ) + }, + _ => (only_included, Default::default()), + }; + + if potential_parents.len() > params.max_depth { + return Ok(potential_parents); + } + + // Build up the ancestry record of the relay chain to compare against. + let rp_ancestry = + build_relay_parent_ancestry(params.ancestry_lookback, params.relay_parent, relay_client) + .await?; + + Ok(search_child_branches_for_parents( + frontier, + maybe_route, + included_header, + maybe_pending.map(|(_, hash)| hash), + backend, + params.max_depth, + params.ignore_alternative_branches, + rp_ancestry, + potential_parents, + )) +} + +/// Fetch the included and pending block from the relay chain. +async fn fetch_included_pending_from_relay( + relay_client: &impl RelayChainInterface, + backend: &impl Backend, + para_id: ParaId, + relay_parent: PHash, +) -> Result)>, RelayChainError> { + let included_header = relay_client + .persisted_validation_data(relay_parent, para_id, OccupiedCoreAssumption::TimedOut) + .await?; + let included_header = match included_header { + Some(pvd) => pvd.parent_head, + None => return Ok(None), // this implies the para doesn't exist. + }; + + // Fetch the pending header from the relay chain. + let pending_pvd = relay_client + .persisted_validation_data(relay_parent, para_id, OccupiedCoreAssumption::Included) + .await? + .and_then(|x| if x.parent_head != included_header { Some(x.parent_head) } else { None }); + + let included_header = match B::Header::decode(&mut &included_header.0[..]).ok() { + None => return Ok(None), + Some(x) => x, + }; + + let included_hash = included_header.hash(); + // If the included block is not locally known, we can't do anything. + match backend.blockchain().header(included_hash) { + Ok(None) | Err(_) => { + tracing::warn!( + target: PARENT_SEARCH_LOG_TARGET, + %included_hash, + "Failed to get header for included block.", + ); + return Ok(None) + }, + _ => {}, + }; + + Ok(Some((included_header, included_hash, pending_pvd))) +} + +/// Build an ancestry of relay parents that are acceptable. +/// +/// An acceptable relay parent is one that is no more than `ancestry_lookback` + 1 blocks below the +/// relay parent we want to build on. Parachain blocks anchored on relay parents older than that can +/// not be considered potential parents for block building. They have no chance of still getting +/// included, so our newly build parachain block would also not get included. +/// +/// On success, returns a vector of `(header_hash, state_root)` of the relevant relay chain +/// ancestry blocks. +async fn build_relay_parent_ancestry( + ancestry_lookback: usize, + relay_parent: PHash, + relay_client: &impl RelayChainInterface, +) -> Result, RelayChainError> { + let mut ancestry = Vec::with_capacity(ancestry_lookback + 1); + let mut current_rp = relay_parent; + let mut required_session = None; + while ancestry.len() <= ancestry_lookback { + let header = match relay_client.header(RBlockId::hash(current_rp)).await? { + None => break, + Some(h) => h, + }; + + let session = relay_client.session_index_for_child(current_rp).await?; + if let Some(required_session) = required_session { + // Respect the relay-chain rule not to cross session boundaries. + if session != required_session { + break + } + } else { + required_session = Some(session); + } + + ancestry.push((current_rp, *header.state_root())); + current_rp = *header.parent_hash(); + + // don't iterate back into the genesis block. + if header.number == 1 { + break + } + } + Ok(ancestry) +} + +/// Start search for child blocks that can be used as parents. +pub fn search_child_branches_for_parents( + mut frontier: Vec>, + maybe_route_to_pending: Option>, + included_header: Block::Header, + pending_hash: Option, + backend: &impl Backend, + max_depth: usize, + ignore_alternative_branches: bool, + rp_ancestry: Vec<(H256, H256)>, + mut potential_parents: Vec>, +) -> Vec> { + let included_hash = included_header.hash(); + let is_hash_in_ancestry = |hash| rp_ancestry.iter().any(|x| x.0 == hash); + let is_root_in_ancestry = |root| rp_ancestry.iter().any(|x| x.1 == root); + + // The distance between pending and included block. Is later used to check if a child + // is aligned with pending when it is between pending and included block. + let pending_distance = maybe_route_to_pending.as_ref().map(|route| route.enacted().len()); + + // If a block is on the path included -> pending, we consider it `aligned_with_pending`. + let is_child_in_path_to_pending = |hash| { + maybe_route_to_pending + .as_ref() + .map_or(true, |route| route.enacted().iter().any(|x| x.hash == hash)) + }; + + tracing::trace!(target: PARENT_SEARCH_LOG_TARGET, ?included_hash, included_num = ?included_header.number(), ?pending_hash , ?rp_ancestry, "Searching relay chain ancestry."); + while let Some(entry) = frontier.pop() { + // TODO Adjust once we can fetch multiple pending blocks. + // https://github.com/paritytech/polkadot-sdk/issues/3967 + let is_pending = pending_hash.as_ref().map_or(false, |h| &entry.hash == h); + let is_included = included_hash == entry.hash; + + // note: even if the pending block or included block have a relay parent + // outside of the expected part of the relay chain, they are always allowed + // because they have already been posted on chain. + let is_potential = is_pending || is_included || { + let digest = entry.header.digest(); + let is_hash_in_ancestry_check = cumulus_primitives_core::extract_relay_parent(digest) + .map_or(false, is_hash_in_ancestry); + let is_root_in_ancestry_check = + cumulus_primitives_core::rpsr_digest::extract_relay_parent_storage_root(digest) + .map(|(r, _n)| r) + .map_or(false, is_root_in_ancestry); + + is_hash_in_ancestry_check || is_root_in_ancestry_check + }; + + let parent_aligned_with_pending = entry.aligned_with_pending; + let child_depth = entry.depth + 1; + let hash = entry.hash; + + tracing::trace!(target: PARENT_SEARCH_LOG_TARGET, root_in_ancestry = is_potential && !is_pending && !is_included, ?hash, is_pending, is_included, "Checking potential parent."); + if is_potential { + potential_parents.push(entry); + } + + if !is_potential || child_depth > max_depth { + continue + } + + // push children onto search frontier. + for child in backend.blockchain().children(hash).ok().into_iter().flatten() { + tracing::trace!(target: PARENT_SEARCH_LOG_TARGET, ?child, child_depth, ?pending_distance, "Looking at child."); + + let aligned_with_pending = parent_aligned_with_pending && + (pending_distance.map_or(true, |dist| child_depth > dist) || + pending_hash.as_ref().map_or(true, |h| &child == h) || + is_child_in_path_to_pending(child)); + + if ignore_alternative_branches && !aligned_with_pending { + tracing::trace!(target: PARENT_SEARCH_LOG_TARGET, ?child, "Child is not aligned with pending block."); + continue + } + + let Ok(Some(header)) = backend.blockchain().header(child) else { continue }; + + frontier.push(PotentialParent { + hash: child, + header, + depth: child_depth, + aligned_with_pending, + }); + } + } + + potential_parents +} From 53869a004f9193fca0ea36d39a1697b42419d2b5 Mon Sep 17 00:00:00 2001 From: Sebastian Kunert Date: Tue, 23 Apr 2024 18:16:33 +0200 Subject: [PATCH 26/71] More code reuse between lookahead and slot-based --- .../consensus/aura/src/collators/lookahead.rs | 94 ++----------- .../consensus/aura/src/collators/mod.rs | 113 +++++++++++++++- .../slot_based/block_builder_task.rs | 128 ++---------------- cumulus/client/consensus/common/src/tests.rs | 1 + 4 files changed, 140 insertions(+), 196 deletions(-) diff --git a/cumulus/client/consensus/aura/src/collators/lookahead.rs b/cumulus/client/consensus/aura/src/collators/lookahead.rs index bebc34389d75..a0e8455e8c59 100644 --- a/cumulus/client/consensus/aura/src/collators/lookahead.rs +++ b/cumulus/client/consensus/aura/src/collators/lookahead.rs @@ -34,7 +34,7 @@ use codec::{Codec, Encode}; use cumulus_client_collator::service::ServiceInterface as CollatorServiceInterface; use cumulus_client_consensus_common::{ - self as consensus_common, ParachainBlockImportMarker, ParentSearchParams, + self as consensus_common, ParachainBlockImportMarker, }; use cumulus_client_consensus_proposer::ProposerInterface; use cumulus_primitives_aura::AuraUnincludedSegmentApi; @@ -49,7 +49,6 @@ use polkadot_primitives::{CollatorPair, Id as ParaId, OccupiedCoreAssumption}; use futures::prelude::*; use sc_client_api::{backend::AuxStore, BlockBackend, BlockOf}; use sc_consensus::BlockImport; -use sc_consensus_aura::standalone as aura_internal; use sp_api::ProvideRuntimeApi; use sp_application_crypto::AppPublic; use sp_blockchain::HeaderBackend; @@ -58,10 +57,9 @@ use sp_core::crypto::Pair; use sp_inherents::CreateInherentDataProviders; use sp_keystore::KeystorePtr; use sp_runtime::traits::{Block as BlockT, Header as HeaderT, Member}; -use sp_timestamp::Timestamp; use std::{sync::Arc, time::Duration}; -use crate::collator::{self as collator_util, SlotClaim}; +use crate::collator::{self as collator_util}; /// Parameters for [`run`]. pub struct Params { @@ -133,8 +131,6 @@ where // Since we only search for parent blocks which have already been imported, // we can guarantee that all imported blocks respect the unincluded segment // rules specified by the parachain's runtime and thus will never be too deep. - const PARENT_SEARCH_DEPTH: usize = 10; - async move { cumulus_client_collator::initialize_collator_subsystems( &mut params.overseer_handle, @@ -214,42 +210,16 @@ where }, }; - let parent_search_params = ParentSearchParams { + let (included_block, initial_parent) = match crate::collators::find_parent( relay_parent, - para_id: params.para_id, - ancestry_lookback: super::async_backing_params(relay_parent, ¶ms.relay_client) - .await - .map(|c| c.allowed_ancestry_len as usize) - .unwrap_or(0), - max_depth: PARENT_SEARCH_DEPTH, - ignore_alternative_branches: true, - }; - - let potential_parents = - cumulus_client_consensus_common::find_potential_parents::( - parent_search_params, - &*params.para_backend, - ¶ms.relay_client, - ) - .await; - - let mut potential_parents = match potential_parents { - Err(e) => { - tracing::error!( - target: crate::LOG_TARGET, - ?relay_parent, - err = ?e, - "Could not fetch potential parents to build upon" - ); - - continue - }, - Ok(x) => x, - }; - - let included_block = match potential_parents.iter().find(|x| x.depth == 0) { - None => continue, // also serves as an `is_empty` check. - Some(b) => b.hash, + params.para_id, + &*params.para_backend, + ¶ms.relay_client, + ) + .await + { + Some(value) => value, + None => continue, }; let para_client = &*params.para_client; @@ -280,7 +250,7 @@ where relay_chain_slot_duration = ?params.relay_chain_slot_duration, "Adjusted relay-chain slot to parachain slot" ); - Some(can_build_upon::<_, _, P>( + Some(super::can_build_upon::<_, _, P>( slot_now, timestamp, block_hash, @@ -290,13 +260,6 @@ where )) }; - // Sort by depth, ascending, to choose the longest chain. - // - // If the longest chain has space, build upon that. Otherwise, don't - // build at all. - potential_parents.sort_by_key(|a| a.depth); - let Some(initial_parent) = potential_parents.pop() else { continue }; - // Build in a loop until not allowed. Note that the authorities can change // at any block, so we need to re-claim our slot every time. let mut parent_hash = initial_parent.hash; @@ -425,36 +388,3 @@ where } } } - -// Checks if we own the slot at the given block and whether there -// is space in the unincluded segment. -async fn can_build_upon( - slot: Slot, - timestamp: Timestamp, - parent_hash: Block::Hash, - included_block: Block::Hash, - client: &Client, - keystore: &KeystorePtr, -) -> Option> -where - Client: ProvideRuntimeApi, - Client::Api: AuraApi + AuraUnincludedSegmentApi, - P: Pair, - P::Public: Codec, - P::Signature: Codec, -{ - let runtime_api = client.runtime_api(); - let authorities = runtime_api.authorities(parent_hash).ok()?; - let author_pub = aura_internal::claim_slot::

(slot, &authorities, keystore).await?; - - // Here we lean on the property that building on an empty unincluded segment must always - // be legal. Skipping the runtime API query here allows us to seamlessly run this - // collator against chains which have not yet upgraded their runtime. - if parent_hash != included_block { - if !runtime_api.can_build_upon(parent_hash, included_block, slot).ok()? { - return None - } - } - - Some(SlotClaim::unchecked::

(author_pub, slot, timestamp)) -} diff --git a/cumulus/client/consensus/aura/src/collators/mod.rs b/cumulus/client/consensus/aura/src/collators/mod.rs index e184e9953039..683d34394464 100644 --- a/cumulus/client/consensus/aura/src/collators/mod.rs +++ b/cumulus/client/consensus/aura/src/collators/mod.rs @@ -22,17 +22,31 @@ use std::collections::VecDeque; -use cumulus_client_consensus_common::load_abridged_host_configuration; +use crate::collator::SlotClaim; +use codec::{Codec}; +use cumulus_client_consensus_common::{ + self as consensus_common, load_abridged_host_configuration, + ParentSearchParams, +}; +use cumulus_primitives_aura::{AuraUnincludedSegmentApi, Slot}; +use cumulus_primitives_core::{relay_chain::Hash as PHash, BlockT}; use cumulus_relay_chain_interface::RelayChainInterface; use polkadot_primitives::{ AsyncBackingParams, CoreIndex, CoreState, Hash as RHash, Id as ParaId, OccupiedCoreAssumption, ValidationCodeHash, }; +use sc_consensus_aura::{standalone as aura_internal, AuraApi}; +use sp_api::ProvideRuntimeApi; +use sp_core::Pair; +use sp_keystore::KeystorePtr; +use sp_timestamp::Timestamp; pub mod basic; pub mod lookahead; pub mod slot_based; +const PARENT_SEARCH_DEPTH: usize = 10; + /// Check the `local_validation_code_hash` against the validation code hash in the relay chain /// state. /// @@ -155,3 +169,100 @@ async fn cores_scheduled_for_para( }) .collect() } + +// Checks if we own the slot at the given block and whether there +// is space in the unincluded segment. +async fn can_build_upon( + slot: Slot, + timestamp: Timestamp, + parent_hash: Block::Hash, + included_block: Block::Hash, + client: &Client, + keystore: &KeystorePtr, +) -> Option> +where + Client: ProvideRuntimeApi, + Client::Api: AuraApi + AuraUnincludedSegmentApi, + P: Pair, + P::Public: Codec, + P::Signature: Codec, +{ + let runtime_api = client.runtime_api(); + let authorities = runtime_api.authorities(parent_hash).ok()?; + let author_pub = aura_internal::claim_slot::

(slot, &authorities, keystore).await?; + + // Here we lean on the property that building on an empty unincluded segment must always + // be legal. Skipping the runtime API query here allows us to seamlessly run this + // collator against chains which have not yet upgraded their runtime. + if parent_hash != included_block { + if !runtime_api.can_build_upon(parent_hash, included_block, slot).ok()? { + tracing::debug!( + target: crate::LOG_TARGET, + ?parent_hash, + ?included_block, + ?slot, + "Cannot build on top of the current block, skipping slot." + ); + return None + } + } + + Some(SlotClaim::unchecked::

(author_pub, slot, timestamp)) +} + +/// Use [`cumulus_client_consensus_common::find_potential_parents`] to find parachain blocks that +/// we can build on. Once a list of potential parents is retrieved, return the last one of the +/// longest chain. +async fn find_parent( + relay_parent: PHash, + para_id: ParaId, + para_backend: &impl sc_client_api::Backend, + relay_client: &impl RelayChainInterface, +) -> Option<(::Hash, consensus_common::PotentialParent)> +where + Block: BlockT, +{ + let parent_search_params = ParentSearchParams { + relay_parent, + para_id, + ancestry_lookback: crate::collators::async_backing_params(relay_parent, relay_client) + .await + .map_or(0, |params| params.allowed_ancestry_len as usize), + max_depth: PARENT_SEARCH_DEPTH, + ignore_alternative_branches: true, + }; + + let potential_parents = cumulus_client_consensus_common::find_potential_parents::( + parent_search_params, + para_backend, + relay_client, + ) + .await; + + let mut potential_parents = match potential_parents { + Err(e) => { + tracing::error!( + target: crate::LOG_TARGET, + ?relay_parent, + err = ?e, + "Could not fetch potential parents to build upon" + ); + + return None + }, + Ok(x) => x, + }; + + let included_block = match potential_parents.iter().find(|x| x.depth == 0) { + None => return None, // also serves as an `is_empty` check. + Some(b) => b.hash, + }; + potential_parents.sort_by_key(|a| a.depth); + + let parent = match potential_parents.pop() { + None => return None, + Some(p) => p, + }; + + Some((included_block, parent)) +} diff --git a/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs b/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs index 4c340ac06653..b69e9e9d53b4 100644 --- a/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs +++ b/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs @@ -39,21 +39,16 @@ use codec::{Codec, Encode}; use cumulus_client_collator::service::ServiceInterface as CollatorServiceInterface; -use cumulus_client_consensus_common::{ - self as consensus_common, ParachainBlockImportMarker, ParentSearchParams, -}; +use cumulus_client_consensus_common::{self as consensus_common, ParachainBlockImportMarker}; use cumulus_client_consensus_proposer::ProposerInterface; use cumulus_primitives_aura::AuraUnincludedSegmentApi; -use cumulus_primitives_core::{ - relay_chain::Hash as PHash, CollectCollationInfo, PersistedValidationData, -}; +use cumulus_primitives_core::{CollectCollationInfo, PersistedValidationData}; use cumulus_relay_chain_interface::RelayChainInterface; use polkadot_primitives::{BlockId, Id as ParaId, OccupiedCoreAssumption}; use sc_client_api::{backend::AuxStore, BlockBackend, BlockOf, UsageProvider}; use sc_consensus::BlockImport; -use sc_consensus_aura::standalone as aura_internal; use sc_consensus_slots::time_until_next_slot; use sp_api::ProvideRuntimeApi; use sp_application_crypto::AppPublic; @@ -68,13 +63,11 @@ use std::{sync::Arc, time::Duration}; use super::CollatorMessage; use crate::{ - collator::{self as collator_util, SlotClaim}, + collator::{self as collator_util}, collators::{check_validation_code_or_log, cores_scheduled_for_para}, LOG_TARGET, }; -const PARENT_SEARCH_DEPTH: usize = 10; - /// Parameters for [`run_block_builder`]. pub struct BuilderTaskParams { /// Inherent data providers. Only non-consensus inherent data should be provided, i.e. @@ -132,46 +125,6 @@ impl SlotTimer { } } -// Checks if we own the slot at the given block and whether there -// is space in the unincluded segment. -async fn can_build_upon( - slot: Slot, - timestamp: Timestamp, - parent_hash: Block::Hash, - included_block: Block::Hash, - client: &Client, - keystore: &KeystorePtr, -) -> Option> -where - Client: ProvideRuntimeApi, - Client::Api: AuraApi + AuraUnincludedSegmentApi, - P: Pair, - P::Public: Codec, - P::Signature: Codec, -{ - let runtime_api = client.runtime_api(); - let authorities = runtime_api.authorities(parent_hash).ok()?; - let author_pub = aura_internal::claim_slot::

(slot, &authorities, keystore).await?; - - // Here we lean on the property that building on an empty unincluded segment must always - // be legal. Skipping the runtime API query here allows us to seamlessly run this - // collator against chains which have not yet upgraded their runtime. - if parent_hash != included_block { - if !runtime_api.can_build_upon(parent_hash, included_block, slot).ok()? { - tracing::debug!( - target: crate::LOG_TARGET, - ?parent_hash, - ?included_block, - ?slot, - "Cannot build on top of the current block, skipping slot." - ); - return None - } - } - - Some(SlotClaim::unchecked::

(author_pub, slot, timestamp)) -} - /// Run block-builder. pub async fn run_block_builder( params: BuilderTaskParams, @@ -287,11 +240,17 @@ pub async fn run_block_builder value, - None => continue, - }; + let (included_block, parent) = match crate::collators::find_parent( + relay_parent, + para_id, + &*para_backend, + &relay_client, + ) + .await + { + Some(value) => value, + None => continue, + }; let parent_header = parent.header; let parent_hash = parent.hash; @@ -300,7 +259,7 @@ pub async fn run_block_builder( + let slot_claim = match crate::collators::can_build_upon::<_, _, P>( para_slot.slot, para_slot.timestamp, parent_hash, @@ -394,60 +353,3 @@ pub async fn run_block_builder( - relay_parent: PHash, - para_id: ParaId, - para_backend: &impl sc_client_api::Backend, - relay_client: &impl RelayChainInterface, -) -> Option<(::Hash, consensus_common::PotentialParent)> -where - Block: BlockT, -{ - let parent_search_params = ParentSearchParams { - relay_parent, - para_id, - ancestry_lookback: crate::collators::async_backing_params(relay_parent, relay_client) - .await - .map_or(0, |params| params.allowed_ancestry_len as usize), - max_depth: PARENT_SEARCH_DEPTH, - ignore_alternative_branches: true, - }; - - let potential_parents = cumulus_client_consensus_common::find_potential_parents::( - parent_search_params, - para_backend, - relay_client, - ) - .await; - - let mut potential_parents = match potential_parents { - Err(e) => { - tracing::error!( - target: crate::LOG_TARGET, - ?relay_parent, - err = ?e, - "Could not fetch potential parents to build upon" - ); - - return None - }, - Ok(x) => x, - }; - - let included_block = match potential_parents.iter().find(|x| x.depth == 0) { - None => return None, // also serves as an `is_empty` check. - Some(b) => b.hash, - }; - potential_parents.sort_by_key(|a| a.depth); - - let parent = match potential_parents.pop() { - None => return None, - Some(p) => p, - }; - - Some((included_block, parent)) -} diff --git a/cumulus/client/consensus/common/src/tests.rs b/cumulus/client/consensus/common/src/tests.rs index 9995d5db3df3..cb11a2e969d9 100644 --- a/cumulus/client/consensus/common/src/tests.rs +++ b/cumulus/client/consensus/common/src/tests.rs @@ -37,6 +37,7 @@ use futures_timer::Delay; use polkadot_primitives::HeadData; use sc_client_api::{Backend as _, UsageProvider}; use sc_consensus::{BlockImport, BlockImportParams, ForkChoiceStrategy}; +use sp_blockchain::Backend as BlockchainBackend; use sp_consensus::{BlockOrigin, BlockStatus}; use std::{ collections::{BTreeMap, HashMap}, From c790fb8a62ee4b6328fa398fd99d85a7b83d67b1 Mon Sep 17 00:00:00 2001 From: Sebastian Kunert Date: Tue, 23 Apr 2024 20:16:09 +0200 Subject: [PATCH 27/71] fmt --- cumulus/client/consensus/aura/src/collators/lookahead.rs | 4 +--- cumulus/client/consensus/aura/src/collators/mod.rs | 5 ++--- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/cumulus/client/consensus/aura/src/collators/lookahead.rs b/cumulus/client/consensus/aura/src/collators/lookahead.rs index a0e8455e8c59..358b0e9039e4 100644 --- a/cumulus/client/consensus/aura/src/collators/lookahead.rs +++ b/cumulus/client/consensus/aura/src/collators/lookahead.rs @@ -33,9 +33,7 @@ use codec::{Codec, Encode}; use cumulus_client_collator::service::ServiceInterface as CollatorServiceInterface; -use cumulus_client_consensus_common::{ - self as consensus_common, ParachainBlockImportMarker, -}; +use cumulus_client_consensus_common::{self as consensus_common, ParachainBlockImportMarker}; use cumulus_client_consensus_proposer::ProposerInterface; use cumulus_primitives_aura::AuraUnincludedSegmentApi; use cumulus_primitives_core::{CollectCollationInfo, PersistedValidationData}; diff --git a/cumulus/client/consensus/aura/src/collators/mod.rs b/cumulus/client/consensus/aura/src/collators/mod.rs index 683d34394464..23c1b2516e88 100644 --- a/cumulus/client/consensus/aura/src/collators/mod.rs +++ b/cumulus/client/consensus/aura/src/collators/mod.rs @@ -23,10 +23,9 @@ use std::collections::VecDeque; use crate::collator::SlotClaim; -use codec::{Codec}; +use codec::Codec; use cumulus_client_consensus_common::{ - self as consensus_common, load_abridged_host_configuration, - ParentSearchParams, + self as consensus_common, load_abridged_host_configuration, ParentSearchParams, }; use cumulus_primitives_aura::{AuraUnincludedSegmentApi, Slot}; use cumulus_primitives_core::{relay_chain::Hash as PHash, BlockT}; From 169479adda4f1010c042b7bff99fb078f7dca5a9 Mon Sep 17 00:00:00 2001 From: Sebastian Kunert Date: Tue, 30 Apr 2024 20:12:02 +0200 Subject: [PATCH 28/71] Add slot drift for testing --- Cargo.lock | 9 +++--- .../slot_based/block_builder_task.rs | 30 +++++++++++++++---- 2 files changed, 29 insertions(+), 10 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ded9f47cdea7..2f77a6d50772 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -21609,11 +21609,10 @@ checksum = "b6bc1c9ce2b5135ac7f93c72918fc37feb872bdc6a5533a8b85eb4b86bfdae52" [[package]] name = "tracing" -version = "0.1.37" +version = "0.1.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ce8c33a8d48bd45d624a6e523445fd21ec13d3653cd51f681abf67418f54eb8" +checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef" dependencies = [ - "cfg-if", "log", "pin-project-lite 0.2.12", "tracing-attributes", @@ -21622,9 +21621,9 @@ dependencies = [ [[package]] name = "tracing-attributes" -version = "0.1.26" +version = "0.1.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f4f31f56159e98206da9efd823404b79b6ef3143b4a7ab76e67b1751b25a4ab" +checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2 1.0.75", "quote 1.0.35", diff --git a/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs b/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs index b69e9e9d53b4..c5c756e6aad3 100644 --- a/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs +++ b/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs @@ -49,7 +49,6 @@ use polkadot_primitives::{BlockId, Id as ParaId, OccupiedCoreAssumption}; use sc_client_api::{backend::AuxStore, BlockBackend, BlockOf, UsageProvider}; use sc_consensus::BlockImport; -use sc_consensus_slots::time_until_next_slot; use sp_api::ProvideRuntimeApi; use sp_application_crypto::AppPublic; use sp_blockchain::HeaderBackend; @@ -109,16 +108,37 @@ struct SlotAndTime { #[derive(Debug)] struct SlotTimer { slot_duration: SlotDuration, + drift: Duration, +} + +/// Returns current duration since unix epoch. +fn duration_now() -> Duration { + use std::time::SystemTime; + let now = SystemTime::now(); + now.duration_since(SystemTime::UNIX_EPOCH).unwrap_or_else(|e| { + panic!("Current time {:?} is before unix epoch. Something is wrong: {:?}", now, e) + }) +} + +/// TODO For testing of slot drift, check if can be moved elsewhere. +/// Returns the duration until the next slot from now. +fn time_until_next_slot(slot_duration: Duration, drift: Duration) -> Duration { + let now = duration_now().as_millis() - drift.as_millis(); + + let next_slot = (now + slot_duration.as_millis()) / slot_duration.as_millis(); + let remaining_millis = next_slot * slot_duration.as_millis() - now; + Duration::from_millis(remaining_millis as u64) } impl SlotTimer { - pub fn new(slot_duration: SlotDuration) -> Self { - Self { slot_duration } + pub fn new_with_drift(slot_duration: SlotDuration, drift: Duration) -> Self { + Self { slot_duration, drift } } /// Returns a future that resolves when the next slot arrives. pub async fn wait_until_next_slot(&self) -> SlotAndTime { - let time_until_next_slot = time_until_next_slot(self.slot_duration.as_duration()); + let time_until_next_slot = + time_until_next_slot(self.slot_duration.as_duration(), self.drift); tokio::time::sleep(time_until_next_slot).await; let timestamp = sp_timestamp::Timestamp::current(); SlotAndTime { slot: Slot::from_timestamp(timestamp, self.slot_duration), timestamp } @@ -177,7 +197,7 @@ pub async fn run_block_builder Date: Mon, 6 May 2024 16:25:33 +0200 Subject: [PATCH 29/71] Remove duplicate comment --- .../slot_based/block_builder_task.rs | 22 ------------------- 1 file changed, 22 deletions(-) diff --git a/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs b/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs index c5c756e6aad3..7b9eab7195f1 100644 --- a/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs +++ b/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs @@ -14,28 +14,6 @@ // You should have received a copy of the GNU General Public License // along with Cumulus. If not, see . -//! A collator for Aura that looks ahead of the most recently included parachain block -//! when determining what to build upon. -//! -//! The block building mechanism consists of two parts: -//! 1. A block-builder task that builds parachain blocks at each of our slot. -//! 2. A collator task that transforms the blocks into a collation and submits them to the relay -//! chain. -//! -//! This collator also builds additional blocks when the maximum backlog is not saturated. -//! The size of the backlog is determined by invoking a runtime API. If that runtime API -//! is not supported, this assumes a maximum backlog size of 1. -//! -//! This takes more advantage of asynchronous backing, though not complete advantage. -//! When the backlog is not saturated, this approach lets the backlog temporarily 'catch up' -//! with periods of higher throughput. When the backlog is saturated, we typically -//! fall back to the limited cadence of a single parachain block per relay-chain block. -//! -//! Despite this, the fact that there is a backlog at all allows us to spend more time -//! building the block, as there is some buffer before it can get posted to the relay-chain. -//! The main limitation is block propagation time - i.e. the new blocks created by an author -//! must be propagated to the next author before their turn. - use codec::{Codec, Encode}; use cumulus_client_collator::service::ServiceInterface as CollatorServiceInterface; From 9760e815cc7880353e271ac6d1ae47f465d19a37 Mon Sep 17 00:00:00 2001 From: Sebastian Kunert Date: Tue, 7 May 2024 15:30:35 +0200 Subject: [PATCH 30/71] Expose slot_drift parameter --- .../aura/src/collators/slot_based/block_builder_task.rs | 5 ++++- .../client/consensus/aura/src/collators/slot_based/mod.rs | 4 ++++ cumulus/test/service/src/lib.rs | 1 + 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs b/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs index 7b9eab7195f1..0b230194a0d1 100644 --- a/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs +++ b/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs @@ -75,6 +75,8 @@ pub struct BuilderTaskParams>, /// Slot duration of the relay chain pub relay_chain_slot_duration: Duration, + /// Drift every slot by this duration. + pub slot_drift: Duration, } #[derive(Debug)] @@ -165,6 +167,7 @@ pub async fn run_block_builder { pub authoring_duration: Duration, /// Whether we should reinitialize the collator config (i.e. we are transitioning to aura). pub reinitialize: bool, + /// Drift slots by a fixed duration. This can be used to create more preferrable authoring + /// timings. + pub slot_drift: Duration, } /// Run aura-based block building and collation task. @@ -157,6 +160,7 @@ where authoring_duration: params.authoring_duration, collator_sender: tx, relay_chain_slot_duration: params.relay_chain_slot_duration, + slot_drift: params.slot_drift, }; let block_builder_fut = diff --git a/cumulus/test/service/src/lib.rs b/cumulus/test/service/src/lib.rs index 76cee331cd8e..f35e4535196e 100644 --- a/cumulus/test/service/src/lib.rs +++ b/cumulus/test/service/src/lib.rs @@ -487,6 +487,7 @@ where collator_service, authoring_duration: Duration::from_millis(2000), reinitialize: false, + slot_drift: Duration::from_secs(1), }; let (collation_future, block_builer_future) = From 42c01359515d91b48428460d948319a8bf668dfb Mon Sep 17 00:00:00 2001 From: Sebastian Kunert Date: Tue, 7 May 2024 17:18:55 +0200 Subject: [PATCH 31/71] Do not assumed fixed slot_duration --- .../consensus/aura/src/collators/mod.rs | 7 -- .../slot_based/block_builder_task.rs | 96 +++++++++++++------ 2 files changed, 65 insertions(+), 38 deletions(-) diff --git a/cumulus/client/consensus/aura/src/collators/mod.rs b/cumulus/client/consensus/aura/src/collators/mod.rs index 23c1b2516e88..fd229db35013 100644 --- a/cumulus/client/consensus/aura/src/collators/mod.rs +++ b/cumulus/client/consensus/aura/src/collators/mod.rs @@ -195,13 +195,6 @@ where // collator against chains which have not yet upgraded their runtime. if parent_hash != included_block { if !runtime_api.can_build_upon(parent_hash, included_block, slot).ok()? { - tracing::debug!( - target: crate::LOG_TARGET, - ?parent_hash, - ?included_block, - ?slot, - "Cannot build on top of the current block, skipping slot." - ); return None } } diff --git a/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs b/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs index 0b230194a0d1..a4a3c5350d55 100644 --- a/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs +++ b/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs @@ -80,15 +80,17 @@ pub struct BuilderTaskParams { + client: Arc, drift: Duration, + phantom: std::marker::PhantomData<(P, Block)>, } /// Returns current duration since unix epoch. @@ -110,18 +112,33 @@ fn time_until_next_slot(slot_duration: Duration, drift: Duration) -> Duration { Duration::from_millis(remaining_millis as u64) } -impl SlotTimer { - pub fn new_with_drift(slot_duration: SlotDuration, drift: Duration) -> Self { - Self { slot_duration, drift } +impl SlotTimer +where + Block: BlockT, + Client: ProvideRuntimeApi + Send + Sync + 'static + UsageProvider, + Client::Api: AuraApi, + + P: Pair, + P::Public: AppPublic + Member + Codec, + P::Signature: TryFrom> + Member + Codec, +{ + pub fn new_with_drift(client: Arc, drift: Duration) -> Self { + Self { client, drift, phantom: Default::default() } } /// Returns a future that resolves when the next slot arrives. - pub async fn wait_until_next_slot(&self) -> SlotAndTime { - let time_until_next_slot = - time_until_next_slot(self.slot_duration.as_duration(), self.drift); + pub async fn wait_until_next_slot(&self) -> SlotInfo { + let slot_duration = match crate::slot_duration(&*self.client) { + Ok(s) => s, + Err(e) => { + tracing::error!(target: crate::LOG_TARGET, ?e, "Failed to fetch slot duration from runtime. Killing collator task."); + todo!(); + }, + }; + let time_until_next_slot = time_until_next_slot(slot_duration.as_duration(), self.drift); tokio::time::sleep(time_until_next_slot).await; let timestamp = sp_timestamp::Timestamp::current(); - SlotAndTime { slot: Slot::from_timestamp(timestamp, self.slot_duration), timestamp } + SlotInfo { slot: Slot::from_timestamp(timestamp, slot_duration), timestamp, slot_duration } } } @@ -170,15 +187,7 @@ pub async fn run_block_builder s, - Err(e) => { - tracing::error!(target: crate::LOG_TARGET, ?e, "Failed to fetch slot duration from runtime. Killing collator task."); - return - }, - }; - - let slot_timer = SlotTimer::new_with_drift(slot_duration, slot_drift); + let slot_timer = SlotTimer::<_, _, P>::new_with_drift(para_client.clone(), slot_drift); let mut collator = { let params = collator_util::Params { @@ -194,18 +203,16 @@ pub async fn run_block_builder::new(params) }; - let Ok(expected_cores) = u64::try_from( - relay_chain_slot_duration.as_millis() / slot_duration.as_duration().as_millis(), - ) else { - tracing::error!(target: LOG_TARGET, ?relay_chain_slot_duration, ?slot_duration, "Unable to calculate expected parachain expected_cores."); - return; - }; - let expected_cores = expected_cores.max(1); - loop { // We wait here until the next slot arrives. let para_slot = slot_timer.wait_until_next_slot().await; + let Ok(expected_cores) = + expected_core_count(relay_chain_slot_duration, para_slot.slot_duration) + else { + return + }; + let Ok(relay_parent) = relay_client.best_block_hash().await else { tracing::warn!("Unable to fetch latest relay chain block hash, skipping slot."); continue; @@ -271,13 +278,25 @@ pub async fn run_block_builder slot, - None => continue, + None => { + tracing::debug!( + target: crate::LOG_TARGET, + ?core_index, + slot_info = ?para_slot, + unincluded_segment_len = parent.depth, + relay_parent = %relay_parent, + included = %included_block, + parent = %parent_hash, + "Not building block." + ); + continue + }, }; tracing::debug!( target: crate::LOG_TARGET, ?core_index, - slot = ?para_slot.slot, + slot_info = ?para_slot, unincluded_segment_len = parent.depth, relay_parent = %relay_parent, included = %included_block, @@ -354,3 +373,18 @@ pub async fn run_block_builder Result { + u64::try_from(relay_chain_slot_duration.as_millis() / slot_duration.as_duration().as_millis()) + .map_err(|e| tracing::error!("Unable to claculate expected parachain core count: {e}")) + .map(|expected_core_count| expected_core_count.max(1)) +} From 44bc8e77f35a2f470aed6455216397eb318f1ca2 Mon Sep 17 00:00:00 2001 From: Sebastian Kunert Date: Wed, 29 May 2024 16:07:27 +0200 Subject: [PATCH 32/71] Apply suggestions from code review Co-authored-by: Davide Galassi Co-authored-by: Andrei Sandu <54316454+sandreim@users.noreply.github.com> --- cumulus/client/consensus/aura/src/collators/mod.rs | 13 +++---------- .../src/collators/slot_based/block_builder_task.rs | 7 +++---- .../aura/src/collators/slot_based/collation_task.rs | 2 +- cumulus/client/consensus/common/src/lib.rs | 2 +- 4 files changed, 8 insertions(+), 16 deletions(-) diff --git a/cumulus/client/consensus/aura/src/collators/mod.rs b/cumulus/client/consensus/aura/src/collators/mod.rs index fd229db35013..f37e50778983 100644 --- a/cumulus/client/consensus/aura/src/collators/mod.rs +++ b/cumulus/client/consensus/aura/src/collators/mod.rs @@ -124,7 +124,7 @@ async fn async_backing_params( // Return all the cores assigned to the para at the provided relay parent. async fn cores_scheduled_for_para( - relay_parent: polkadot_primitives::Hash, + relay_parent: RHash, para_id: ParaId, relay_client: &impl RelayChainInterface, ) -> VecDeque { @@ -153,7 +153,7 @@ async fn cores_scheduled_for_para( .filter_map(|(index, core)| { let core_para_id = match core { CoreState::Scheduled(scheduled_core) => Some(scheduled_core.para_id), - CoreState::Occupied(occupied_core) if max_candidate_depth >= 1 => occupied_core + CoreState::Occupied(occupied_core) if max_candidate_depth > 0 => occupied_core .next_up_on_available .as_ref() .map(|scheduled_core| scheduled_core.para_id), @@ -249,12 +249,5 @@ where None => return None, // also serves as an `is_empty` check. Some(b) => b.hash, }; - potential_parents.sort_by_key(|a| a.depth); - - let parent = match potential_parents.pop() { - None => return None, - Some(p) => p, - }; - - Some((included_block, parent)) + potential_parents.into_iter().max_by_key(|a| a.depth).map(|parent| (included_block, parent)) } diff --git a/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs b/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs index a4a3c5350d55..8aed2ab0d7a7 100644 --- a/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs +++ b/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs @@ -93,7 +93,7 @@ struct SlotTimer { phantom: std::marker::PhantomData<(P, Block)>, } -/// Returns current duration since unix epoch. +/// Returns current duration since Unix epoch. fn duration_now() -> Duration { use std::time::SystemTime; let now = SystemTime::now(); @@ -117,7 +117,6 @@ where Block: BlockT, Client: ProvideRuntimeApi + Send + Sync + 'static + UsageProvider, Client::Api: AuraApi, - P: Pair, P::Public: AppPublic + Member + Codec, P::Signature: TryFrom> + Member + Codec, @@ -215,7 +214,7 @@ pub async fn run_block_builder Result { u64::try_from(relay_chain_slot_duration.as_millis() / slot_duration.as_duration().as_millis()) - .map_err(|e| tracing::error!("Unable to claculate expected parachain core count: {e}")) + .map_err(|e| tracing::error!("Unable to calculate expected parachain core count: {e}")) .map(|expected_core_count| expected_core_count.max(1)) } diff --git a/cumulus/client/consensus/aura/src/collators/slot_based/collation_task.rs b/cumulus/client/consensus/aura/src/collators/slot_based/collation_task.rs index d377cc7ae338..6ce8b467568a 100644 --- a/cumulus/client/consensus/aura/src/collators/slot_based/collation_task.rs +++ b/cumulus/client/consensus/aura/src/collators/slot_based/collation_task.rs @@ -109,7 +109,7 @@ async fn handle_collation_message( tracing::info!( target: LOG_TARGET, - "PoV size {{ header: {}kb, extrinsics: {}kb, storage_proof: {}kb }}", + "PoV size {{ header: {:.2}kB, extrinsics: {:.2}kB, storage_proof: {:.2}kB }}", block_data.header().encode().len() as f64 / 1024f64, block_data.extrinsics().encode().len() as f64 / 1024f64, block_data.storage_proof().encode().len() as f64 / 1024f64, diff --git a/cumulus/client/consensus/common/src/lib.rs b/cumulus/client/consensus/common/src/lib.rs index 08b7f7f59a2d..10ffef0d8aff 100644 --- a/cumulus/client/consensus/common/src/lib.rs +++ b/cumulus/client/consensus/common/src/lib.rs @@ -20,7 +20,7 @@ use polkadot_primitives::{ }; use cumulus_primitives_core::{ - relay_chain::{self}, + relay_chain, AbridgedHostConfiguration, }; use cumulus_relay_chain_interface::{RelayChainError, RelayChainInterface}; From a23d5a987e345f195127da00bd33b6d37243d7d4 Mon Sep 17 00:00:00 2001 From: Sebastian Kunert Date: Wed, 29 May 2024 16:09:14 +0200 Subject: [PATCH 33/71] Address comments --- .../consensus/aura/src/collators/lookahead.rs | 2 - .../consensus/aura/src/collators/mod.rs | 29 +++++----- .../slot_based/block_builder_task.rs | 54 +++++++++++++----- .../consensus/common/src/parent_search.rs | 55 ++++++++++--------- 4 files changed, 83 insertions(+), 57 deletions(-) diff --git a/cumulus/client/consensus/aura/src/collators/lookahead.rs b/cumulus/client/consensus/aura/src/collators/lookahead.rs index 358b0e9039e4..41cc285cd377 100644 --- a/cumulus/client/consensus/aura/src/collators/lookahead.rs +++ b/cumulus/client/consensus/aura/src/collators/lookahead.rs @@ -169,8 +169,6 @@ where while let Some(relay_parent_header) = import_notifications.next().await { let relay_parent = relay_parent_header.hash(); - // TODO: Currently we use just the first core here, but for elastic scaling - // we iterate and build on all of the cores returned. let core_index = if let Some(core_index) = super::cores_scheduled_for_para( relay_parent, params.para_id, diff --git a/cumulus/client/consensus/aura/src/collators/mod.rs b/cumulus/client/consensus/aura/src/collators/mod.rs index f37e50778983..52e0a3cd88df 100644 --- a/cumulus/client/consensus/aura/src/collators/mod.rs +++ b/cumulus/client/consensus/aura/src/collators/mod.rs @@ -28,11 +28,11 @@ use cumulus_client_consensus_common::{ self as consensus_common, load_abridged_host_configuration, ParentSearchParams, }; use cumulus_primitives_aura::{AuraUnincludedSegmentApi, Slot}; -use cumulus_primitives_core::{relay_chain::Hash as PHash, BlockT}; +use cumulus_primitives_core::{relay_chain::Hash as ParaHash, BlockT}; use cumulus_relay_chain_interface::RelayChainInterface; use polkadot_primitives::{ - AsyncBackingParams, CoreIndex, CoreState, Hash as RHash, Id as ParaId, OccupiedCoreAssumption, - ValidationCodeHash, + AsyncBackingParams, CoreIndex, CoreState, Hash as RelayHash, Id as ParaId, + OccupiedCoreAssumption, ValidationCodeHash, }; use sc_consensus_aura::{standalone as aura_internal, AuraApi}; use sp_api::ProvideRuntimeApi; @@ -54,7 +54,7 @@ async fn check_validation_code_or_log( local_validation_code_hash: &ValidationCodeHash, para_id: ParaId, relay_client: &impl RelayChainInterface, - relay_parent: RHash, + relay_parent: RelayHash, ) { let state_validation_code_hash = match relay_client .validation_code_hash(relay_parent, para_id, OccupiedCoreAssumption::Included) @@ -98,7 +98,7 @@ async fn check_validation_code_or_log( /// Reads async backing parameters from the relay chain storage at the given relay parent. async fn async_backing_params( - relay_parent: RHash, + relay_parent: RelayHash, relay_client: &impl RelayChainInterface, ) -> Option { match load_abridged_host_configuration(relay_parent, relay_client).await { @@ -124,7 +124,7 @@ async fn async_backing_params( // Return all the cores assigned to the para at the provided relay parent. async fn cores_scheduled_for_para( - relay_parent: RHash, + relay_parent: RelayHash, para_id: ParaId, relay_client: &impl RelayChainInterface, ) -> VecDeque { @@ -193,10 +193,10 @@ where // Here we lean on the property that building on an empty unincluded segment must always // be legal. Skipping the runtime API query here allows us to seamlessly run this // collator against chains which have not yet upgraded their runtime. - if parent_hash != included_block { - if !runtime_api.can_build_upon(parent_hash, included_block, slot).ok()? { - return None - } + if parent_hash != included_block && + !runtime_api.can_build_upon(parent_hash, included_block, slot).ok()? + { + return None } Some(SlotClaim::unchecked::

(author_pub, slot, timestamp)) @@ -206,7 +206,7 @@ where /// we can build on. Once a list of potential parents is retrieved, return the last one of the /// longest chain. async fn find_parent( - relay_parent: PHash, + relay_parent: ParaHash, para_id: ParaId, para_backend: &impl sc_client_api::Backend, relay_client: &impl RelayChainInterface, @@ -231,7 +231,7 @@ where ) .await; - let mut potential_parents = match potential_parents { + let potential_parents = match potential_parents { Err(e) => { tracing::error!( target: crate::LOG_TARGET, @@ -249,5 +249,8 @@ where None => return None, // also serves as an `is_empty` check. Some(b) => b.hash, }; - potential_parents.into_iter().max_by_key(|a| a.depth).map(|parent| (included_block, parent)) + potential_parents + .into_iter() + .max_by_key(|a| a.depth) + .map(|parent| (included_block, parent)) } diff --git a/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs b/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs index 8aed2ab0d7a7..e032f1083e01 100644 --- a/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs +++ b/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs @@ -46,7 +46,17 @@ use crate::{ }; /// Parameters for [`run_block_builder`]. -pub struct BuilderTaskParams { +pub struct BuilderTaskParams< + Block: BlockT, + BI, + CIDP, + Client, + Backend, + RelayClient, + CHP, + Proposer, + CS, +> { /// Inherent data providers. Only non-consensus inherent data should be provided, i.e. /// the timestamp, slot, and paras inherents should be omitted, as they are set by this /// collator. @@ -58,7 +68,7 @@ pub struct BuilderTaskParams, /// A handle to the relay-chain client. - pub relay_client: RClient, + pub relay_client: RelayClient, /// A validation code hash provider, used to get the current validation code hash. pub code_hash_provider: CHP, /// The underlying keystore, which should contain Aura consensus keys. @@ -102,7 +112,6 @@ fn duration_now() -> Duration { }) } -/// TODO For testing of slot drift, check if can be moved elsewhere. /// Returns the duration until the next slot from now. fn time_until_next_slot(slot_duration: Duration, drift: Duration) -> Duration { let now = duration_now().as_millis() - drift.as_millis(); @@ -126,24 +135,37 @@ where } /// Returns a future that resolves when the next slot arrives. - pub async fn wait_until_next_slot(&self) -> SlotInfo { - let slot_duration = match crate::slot_duration(&*self.client) { - Ok(s) => s, - Err(e) => { - tracing::error!(target: crate::LOG_TARGET, ?e, "Failed to fetch slot duration from runtime. Killing collator task."); - todo!(); - }, + pub async fn wait_until_next_slot(&self) -> Result { + let Ok(slot_duration) = crate::slot_duration(&*self.client) else { + tracing::error!(target: crate::LOG_TARGET, "Failed to fetch slot duration from runtime."); + return Err(()) }; + let time_until_next_slot = time_until_next_slot(slot_duration.as_duration(), self.drift); tokio::time::sleep(time_until_next_slot).await; let timestamp = sp_timestamp::Timestamp::current(); - SlotInfo { slot: Slot::from_timestamp(timestamp, slot_duration), timestamp, slot_duration } + Ok(SlotInfo { + slot: Slot::from_timestamp(timestamp, slot_duration), + timestamp, + slot_duration, + }) } } /// Run block-builder. -pub async fn run_block_builder( - params: BuilderTaskParams, +pub async fn run_block_builder< + Block, + P, + BI, + CIDP, + Client, + Backend, + RelayClient, + CHP, + Proposer, + CS, +>( + params: BuilderTaskParams, ) where Block: BlockT, Client: ProvideRuntimeApi @@ -158,7 +180,7 @@ pub async fn run_block_builder + CollectCollationInfo + AuraUnincludedSegmentApi, Backend: sc_client_api::Backend + 'static, - RClient: RelayChainInterface + Clone + 'static, + RelayClient: RelayChainInterface + Clone + 'static, CIDP: CreateInherentDataProviders + 'static, CIDP::InherentDataProviders: Send, BI: BlockImport + ParachainBlockImportMarker + Send + Sync + 'static, @@ -204,7 +226,9 @@ pub async fn run_block_builder. use codec::Decode; -use polkadot_primitives::{Hash as PHash, HeadData}; +use polkadot_primitives::Hash as RelayHash; use cumulus_primitives_core::{ relay_chain::{BlockId as RBlockId, OccupiedCoreAssumption}, @@ -27,7 +27,6 @@ use sc_client_api::{Backend, HeaderBackend}; use sp_blockchain::{Backend as BlockchainBackend, TreeRoute}; -use sp_core::H256; use sp_runtime::traits::{Block as BlockT, Header as HeaderT}; const PARENT_SEARCH_LOG_TARGET: &str = "consensus::common::find_potential_parents"; @@ -35,7 +34,7 @@ const PARENT_SEARCH_LOG_TARGET: &str = "consensus::common::find_potential_parent #[derive(Debug)] pub struct ParentSearchParams { /// The relay-parent that is intended to be used. - pub relay_parent: PHash, + pub relay_parent: RelayHash, /// The ID of the parachain. pub para_id: ParaId, /// A limitation on the age of relay parents for parachain blocks that are being @@ -56,7 +55,7 @@ pub struct PotentialParent { pub hash: B::Hash, /// The header of the block. pub header: B::Header, - /// The depth of the block. + /// The depth of the block with respect to the included block. pub depth: usize, /// Whether the block is the included block, is itself pending on-chain, or descends /// from the block pending availability. @@ -96,13 +95,9 @@ pub async fn find_potential_parents( ) -> Result>, RelayChainError> { tracing::trace!("Parent search parameters: {params:?}"); // Get the included block. - let Some((included_header, included_hash, pending_pvd)) = fetch_included_pending_from_relay( - relay_client, - backend, - params.para_id, - params.relay_parent, - ) - .await? + let Some((included_header, included_hash)) = + fetch_included_from_relay_chain(relay_client, backend, params.para_id, params.relay_parent) + .await? else { return Ok(Default::default()) }; @@ -120,8 +115,17 @@ pub async fn find_potential_parents( // Pending header and hash. let maybe_pending = { - // Try to decode the pending header. - let pending_header = pending_pvd.and_then(|p| B::Header::decode(&mut &p.0[..]).ok()); + // Fetch the pending header from the relay chain. We use `OccupiedCoreAssumption::Included` + // so the candidate pending availability gets enacted before being returned to us. + let pending_header = relay_client + .persisted_validation_data( + params.relay_parent, + params.para_id, + OccupiedCoreAssumption::Included, + ) + .await? + .and_then(|p| B::Header::decode(&mut &p.parent_head.0[..]).ok()) + .filter(|x| x.hash() != included_hash); // If the pending block is not locally known, we can't do anything. if let Some(header) = pending_header { @@ -223,13 +227,16 @@ pub async fn find_potential_parents( )) } -/// Fetch the included and pending block from the relay chain. -async fn fetch_included_pending_from_relay( +/// Fetch the included block from the relay chain. +async fn fetch_included_from_relay_chain( relay_client: &impl RelayChainInterface, backend: &impl Backend, para_id: ParaId, - relay_parent: PHash, -) -> Result)>, RelayChainError> { + relay_parent: RelayHash, +) -> Result, RelayChainError> { + // Fetch the pending header from the relay chain. We use `OccupiedCoreAssumption::TimedOut` + // so that even if there is a pending candidate, we assume it is timed out and we get the + // included head. let included_header = relay_client .persisted_validation_data(relay_parent, para_id, OccupiedCoreAssumption::TimedOut) .await?; @@ -238,12 +245,6 @@ async fn fetch_included_pending_from_relay( None => return Ok(None), // this implies the para doesn't exist. }; - // Fetch the pending header from the relay chain. - let pending_pvd = relay_client - .persisted_validation_data(relay_parent, para_id, OccupiedCoreAssumption::Included) - .await? - .and_then(|x| if x.parent_head != included_header { Some(x.parent_head) } else { None }); - let included_header = match B::Header::decode(&mut &included_header.0[..]).ok() { None => return Ok(None), Some(x) => x, @@ -263,7 +264,7 @@ async fn fetch_included_pending_from_relay( _ => {}, }; - Ok(Some((included_header, included_hash, pending_pvd))) + Ok(Some((included_header, included_hash))) } /// Build an ancestry of relay parents that are acceptable. @@ -277,9 +278,9 @@ async fn fetch_included_pending_from_relay( /// ancestry blocks. async fn build_relay_parent_ancestry( ancestry_lookback: usize, - relay_parent: PHash, + relay_parent: RelayHash, relay_client: &impl RelayChainInterface, -) -> Result, RelayChainError> { +) -> Result, RelayChainError> { let mut ancestry = Vec::with_capacity(ancestry_lookback + 1); let mut current_rp = relay_parent; let mut required_session = None; @@ -319,7 +320,7 @@ pub fn search_child_branches_for_parents( backend: &impl Backend, max_depth: usize, ignore_alternative_branches: bool, - rp_ancestry: Vec<(H256, H256)>, + rp_ancestry: Vec<(RelayHash, RelayHash)>, mut potential_parents: Vec>, ) -> Vec> { let included_hash = included_header.hash(); From 07478dc6e4bd9eae40dbc2ffba07895d504a52ed Mon Sep 17 00:00:00 2001 From: Sebastian Kunert Date: Wed, 29 May 2024 18:24:15 +0200 Subject: [PATCH 34/71] Remove unused parameters from zombienet --- cumulus/zombienet/tests/0008-elastic_authoring.toml | 2 -- .../elastic_scaling/0001-basic-3cores-6s-blocks.toml | 4 +--- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/cumulus/zombienet/tests/0008-elastic_authoring.toml b/cumulus/zombienet/tests/0008-elastic_authoring.toml index 7fd53c416ed9..75368db5bb91 100644 --- a/cumulus/zombienet/tests/0008-elastic_authoring.toml +++ b/cumulus/zombienet/tests/0008-elastic_authoring.toml @@ -7,11 +7,9 @@ timeout = 1000 [relaychain.genesis.runtimeGenesis.patch.configuration.config.scheduler_params] max_validators_per_core = 1 - scheduling_lookahead = 2 num_cores = 4 [relaychain.genesis.runtimeGenesis.patch.configuration.config.approval_voting_params] - needed_approvals = 3 max_approval_coalesce_count = 5 [relaychain] diff --git a/polkadot/zombienet_tests/elastic_scaling/0001-basic-3cores-6s-blocks.toml b/polkadot/zombienet_tests/elastic_scaling/0001-basic-3cores-6s-blocks.toml index 83f5434edddb..611978a33a5f 100644 --- a/polkadot/zombienet_tests/elastic_scaling/0001-basic-3cores-6s-blocks.toml +++ b/polkadot/zombienet_tests/elastic_scaling/0001-basic-3cores-6s-blocks.toml @@ -7,11 +7,9 @@ timeout = 1000 [relaychain.genesis.runtimeGenesis.patch.configuration.config.scheduler_params] max_validators_per_core = 1 - scheduling_lookahead = 2 num_cores = 3 [relaychain.genesis.runtimeGenesis.patch.configuration.config.approval_voting_params] - needed_approvals = 3 max_approval_coalesce_count = 5 [relaychain] @@ -48,4 +46,4 @@ addToGenesis = true [types.Header] number = "u64" parent_hash = "Hash" -post_state = "Hash" \ No newline at end of file +post_state = "Hash" From 15c36a9b8406cd132de86039af728d466aedfd07 Mon Sep 17 00:00:00 2001 From: Sebastian Kunert Date: Wed, 29 May 2024 20:53:15 +0200 Subject: [PATCH 35/71] Introduce experimental CLI option --- cumulus/client/cli/src/lib.rs | 8 +++++++- cumulus/test/service/src/cli.rs | 3 --- cumulus/test/service/src/lib.rs | 7 ++----- cumulus/test/service/src/main.rs | 2 -- 4 files changed, 9 insertions(+), 11 deletions(-) diff --git a/cumulus/client/cli/src/lib.rs b/cumulus/client/cli/src/lib.rs index a7b2eb19de88..2bf68cdaab34 100644 --- a/cumulus/client/cli/src/lib.rs +++ b/cumulus/client/cli/src/lib.rs @@ -307,6 +307,10 @@ pub struct RunCmd { /// Will use the specified relay chain chainspec. #[arg(long, conflicts_with_all = ["relay_chain_rpc_urls", "collator"])] pub relay_chain_light_client: bool, + + /// EXPERIMENTAL: Use slot-based collator which can handle elastic scaling. Use with care, this flag is unstable and subject to change. + #[arg(long)] + pub experimental_use_slot_based: bool, } impl RunCmd { @@ -329,7 +333,7 @@ impl RunCmd { _ => RelayChainMode::Embedded, }; - CollatorOptions { relay_chain_mode } + CollatorOptions { relay_chain_mode, use_slot_based: self.experimental_use_slot_based } } } @@ -349,6 +353,8 @@ pub enum RelayChainMode { pub struct CollatorOptions { /// How this collator retrieves relay chain information pub relay_chain_mode: RelayChainMode, + /// Use slot based collator + pub use_slot_based: bool, } /// A non-redundant version of the `RunCmd` that sets the `validator` field when the diff --git a/cumulus/test/service/src/cli.rs b/cumulus/test/service/src/cli.rs index 79d557e9db82..58d53fde19fc 100644 --- a/cumulus/test/service/src/cli.rs +++ b/cumulus/test/service/src/cli.rs @@ -45,9 +45,6 @@ pub struct TestCollatorCli { #[arg(long)] pub use_null_consensus: bool, - #[arg(long)] - pub use_slot_authoring: bool, - #[arg(long)] pub disable_block_announcements: bool, diff --git a/cumulus/test/service/src/lib.rs b/cumulus/test/service/src/lib.rs index f35e4535196e..a9dea1d94df8 100644 --- a/cumulus/test/service/src/lib.rs +++ b/cumulus/test/service/src/lib.rs @@ -318,7 +318,6 @@ pub async fn start_node_impl>( consensus: Consensus, collator_options: CollatorOptions, proof_recording_during_import: bool, - slot_based_authoring: bool, ) -> sc_service::error::Result<( TaskManager, Arc, @@ -465,7 +464,7 @@ where let client_for_aura = client.clone(); - if slot_based_authoring { + if collator_options.use_slot_based { tracing::info!(target: LOG_TARGET, "Starting block authoring with slot based authoring."); let params = SlotBasedParams { create_inherent_data_providers: move |_, ()| async move { Ok(()) }, @@ -746,7 +745,7 @@ impl TestNodeBuilder { false, ); - let collator_options = CollatorOptions { relay_chain_mode: self.relay_chain_mode }; + let collator_options = CollatorOptions { relay_chain_mode: self.relay_chain_mode, use_slot_based: false }; relay_chain_config.network.node_name = format!("{} (relay chain)", relay_chain_config.network.node_name); @@ -766,7 +765,6 @@ impl TestNodeBuilder { self.consensus, collator_options, self.record_proof_during_import, - false, ) .await .expect("could not create Cumulus test service"), @@ -782,7 +780,6 @@ impl TestNodeBuilder { self.consensus, collator_options, self.record_proof_during_import, - false, ) .await .expect("could not create Cumulus test service"), diff --git a/cumulus/test/service/src/main.rs b/cumulus/test/service/src/main.rs index 96a1155f3abe..90d37173dd59 100644 --- a/cumulus/test/service/src/main.rs +++ b/cumulus/test/service/src/main.rs @@ -118,7 +118,6 @@ fn main() -> Result<(), sc_cli::Error> { consensus, collator_options, true, - cli.use_slot_authoring, ) .await, sc_network::config::NetworkBackendType::Litep2p => @@ -136,7 +135,6 @@ fn main() -> Result<(), sc_cli::Error> { consensus, collator_options, true, - cli.use_slot_authoring, ) .await, } From 65cdc010ea9dbbec1203aef603995dc2d59a6c12 Mon Sep 17 00:00:00 2001 From: Sebastian Kunert Date: Wed, 29 May 2024 21:43:30 +0200 Subject: [PATCH 36/71] fmt --- cumulus/client/cli/src/lib.rs | 3 ++- cumulus/client/consensus/common/src/lib.rs | 5 +---- cumulus/test/service/src/lib.rs | 3 ++- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/cumulus/client/cli/src/lib.rs b/cumulus/client/cli/src/lib.rs index 2bf68cdaab34..b3040aca5ed9 100644 --- a/cumulus/client/cli/src/lib.rs +++ b/cumulus/client/cli/src/lib.rs @@ -308,7 +308,8 @@ pub struct RunCmd { #[arg(long, conflicts_with_all = ["relay_chain_rpc_urls", "collator"])] pub relay_chain_light_client: bool, - /// EXPERIMENTAL: Use slot-based collator which can handle elastic scaling. Use with care, this flag is unstable and subject to change. + /// EXPERIMENTAL: Use slot-based collator which can handle elastic scaling. Use with care, this + /// flag is unstable and subject to change. #[arg(long)] pub experimental_use_slot_based: bool, } diff --git a/cumulus/client/consensus/common/src/lib.rs b/cumulus/client/consensus/common/src/lib.rs index 10ffef0d8aff..b6bf7cef8a4a 100644 --- a/cumulus/client/consensus/common/src/lib.rs +++ b/cumulus/client/consensus/common/src/lib.rs @@ -19,10 +19,7 @@ use polkadot_primitives::{ Block as PBlock, Hash as PHash, Header as PHeader, PersistedValidationData, ValidationCodeHash, }; -use cumulus_primitives_core::{ - relay_chain, - AbridgedHostConfiguration, -}; +use cumulus_primitives_core::{relay_chain, AbridgedHostConfiguration}; use cumulus_relay_chain_interface::{RelayChainError, RelayChainInterface}; use sc_client_api::Backend; diff --git a/cumulus/test/service/src/lib.rs b/cumulus/test/service/src/lib.rs index ded61ae90fd9..15118ce9aad6 100644 --- a/cumulus/test/service/src/lib.rs +++ b/cumulus/test/service/src/lib.rs @@ -746,7 +746,8 @@ impl TestNodeBuilder { false, ); - let collator_options = CollatorOptions { relay_chain_mode: self.relay_chain_mode, use_slot_based: false }; + let collator_options = + CollatorOptions { relay_chain_mode: self.relay_chain_mode, use_slot_based: false }; relay_chain_config.network.node_name = format!("{} (relay chain)", relay_chain_config.network.node_name); From 5dcea6c3659f78911c14be5e69531eb2a805b97c Mon Sep 17 00:00:00 2001 From: Sebastian Kunert Date: Wed, 29 May 2024 22:28:12 +0200 Subject: [PATCH 37/71] Adjust zombienet parameter --- cumulus/zombienet/tests/0008-elastic_authoring.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cumulus/zombienet/tests/0008-elastic_authoring.toml b/cumulus/zombienet/tests/0008-elastic_authoring.toml index 75368db5bb91..80425f74c7b3 100644 --- a/cumulus/zombienet/tests/0008-elastic_authoring.toml +++ b/cumulus/zombienet/tests/0008-elastic_authoring.toml @@ -36,7 +36,7 @@ add_to_genesis = true name = "collator-elastic" image = "{{CUMULUS_IMAGE}}" command = "test-parachain" - args = ["-laura=trace,runtime=info,cumulus-consensus=trace,consensus::common=trace,parachain::collation-generation=trace,parachain::collator-protocol=trace,parachain=debug", "--force-authoring", "--use-slot-authoring"] + args = ["-laura=trace,runtime=info,cumulus-consensus=trace,consensus::common=trace,parachain::collation-generation=trace,parachain::collator-protocol=trace,parachain=debug", "--force-authoring", "--experimental-use-slot-based"] # Slot based authoring with 1 core and 6s slot duration [[parachains]] @@ -47,4 +47,4 @@ add_to_genesis = true name = "collator-single-core" image = "{{CUMULUS_IMAGE}}" command = "test-parachain" - args = ["-laura=trace,runtime=info,cumulus-consensus=trace,consensus::common=trace,parachain::collation-generation=trace,parachain::collator-protocol=trace,parachain=debug", "--force-authoring", "--use-slot-authoring"] + args = ["-laura=trace,runtime=info,cumulus-consensus=trace,consensus::common=trace,parachain::collation-generation=trace,parachain::collator-protocol=trace,parachain=debug", "--force-authoring", "--experimental-use-slot-based"] From 27eb643bb6b955869323a4396790fc9318633e27 Mon Sep 17 00:00:00 2001 From: Sebastian Kunert Date: Fri, 31 May 2024 15:37:21 +0200 Subject: [PATCH 38/71] Remove +1 on velocity --- cumulus/pallets/aura-ext/src/consensus_hook.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cumulus/pallets/aura-ext/src/consensus_hook.rs b/cumulus/pallets/aura-ext/src/consensus_hook.rs index 40276c035f6d..9e93ea164a6f 100644 --- a/cumulus/pallets/aura-ext/src/consensus_hook.rs +++ b/cumulus/pallets/aura-ext/src/consensus_hook.rs @@ -76,7 +76,7 @@ where ); } - if authored > velocity + 1 { + if authored > velocity { panic!("authored blocks limit is reached for the slot") } let weight = T::DbWeight::get().reads(1); From 99e5741df244178e013e6bbe2caa35462f47b7c4 Mon Sep 17 00:00:00 2001 From: Sebastian Kunert Date: Fri, 31 May 2024 18:12:08 +0200 Subject: [PATCH 39/71] Reduce number of relay chain fetches --- .../consensus/aura/src/collators/mod.rs | 6 +- .../slot_based/block_builder_task.rs | 113 ++++++++++++++---- .../consensus/common/src/parent_search.rs | 18 ++- 3 files changed, 100 insertions(+), 37 deletions(-) diff --git a/cumulus/client/consensus/aura/src/collators/mod.rs b/cumulus/client/consensus/aura/src/collators/mod.rs index 52e0a3cd88df..6935a7ddf2c2 100644 --- a/cumulus/client/consensus/aura/src/collators/mod.rs +++ b/cumulus/client/consensus/aura/src/collators/mod.rs @@ -20,8 +20,6 @@ //! included parachain block, as well as the [`lookahead`] collator, which prospectively //! builds on parachain blocks which have not yet been included in the relay chain. -use std::collections::VecDeque; - use crate::collator::SlotClaim; use codec::Codec; use cumulus_client_consensus_common::{ @@ -127,7 +125,7 @@ async fn cores_scheduled_for_para( relay_parent: RelayHash, para_id: ParaId, relay_client: &impl RelayChainInterface, -) -> VecDeque { +) -> Vec { // Get `AvailabilityCores` from runtime let cores = match relay_client.availability_cores(relay_parent).await { Ok(cores) => cores, @@ -138,7 +136,7 @@ async fn cores_scheduled_for_para( ?relay_parent, "Failed to query availability cores runtime API", ); - return VecDeque::new() + return Vec::new() }, }; diff --git a/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs b/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs index e032f1083e01..e457a7b7b5c8 100644 --- a/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs +++ b/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs @@ -23,7 +23,10 @@ use cumulus_primitives_aura::AuraUnincludedSegmentApi; use cumulus_primitives_core::{CollectCollationInfo, PersistedValidationData}; use cumulus_relay_chain_interface::RelayChainInterface; -use polkadot_primitives::{BlockId, Id as ParaId, OccupiedCoreAssumption}; +use polkadot_primitives::{ + BlockId, CoreIndex, Hash as RelayHash, Header as RelayHeader, Id as ParaId, + OccupiedCoreAssumption, +}; use sc_client_api::{backend::AuxStore, BlockBackend, BlockOf, UsageProvider}; use sc_consensus::BlockImport; @@ -224,6 +227,8 @@ pub async fn run_block_builder< collator_util::Collator::::new(params) }; + let mut relay_chain_fetcher = RelayChainCachingFetcher::new(relay_client.clone(), para_id); + loop { // We wait here until the next slot arrives. let Ok(para_slot) = slot_timer.wait_until_next_slot().await else { @@ -236,12 +241,16 @@ pub async fn run_block_builder< return }; - let Ok(relay_parent) = relay_client.best_block_hash().await else { - tracing::warn!("Unable to fetch latest relay chain block hash, skipping slot."); - continue + let Ok(RelayChainData { + relay_parent_header, + max_pov_size, + relay_parent_hash: relay_parent, + scheduled_cores, + }) = relay_chain_fetcher.get_relay_chain_data().await + else { + continue; }; - let scheduled_cores = cores_scheduled_for_para(relay_parent, para_id, &relay_client).await; if scheduled_cores.is_empty() { tracing::debug!(target: LOG_TARGET, "Parachain not scheduled, skipping slot."); continue; @@ -253,24 +262,6 @@ pub async fn run_block_builder< continue; }; - let Ok(Some(relay_parent_header)) = relay_client.header(BlockId::Hash(relay_parent)).await - else { - tracing::warn!("Unable to fetch latest relay chain block header."); - continue; - }; - - let max_pov_size = match relay_client - .persisted_validation_data(relay_parent, para_id, OccupiedCoreAssumption::Included) - .await - { - Ok(None) => continue, - Ok(Some(pvd)) => pvd.max_pov_size, - Err(err) => { - tracing::error!(target: crate::LOG_TARGET, ?err, "Failed to gather information from relay-client"); - continue - }, - }; - let (included_block, parent) = match crate::collators::find_parent( relay_parent, para_id, @@ -411,3 +402,79 @@ fn expected_core_count( .map_err(|e| tracing::error!("Unable to calculate expected parachain core count: {e}")) .map(|expected_core_count| expected_core_count.max(1)) } + +#[derive(Clone)] +struct RelayChainData { + pub relay_parent_header: RelayHeader, + pub scheduled_cores: Vec, + pub max_pov_size: u32, + pub relay_parent_hash: RelayHash, +} + +struct RelayChainCachingFetcher { + relay_client: RI, + para_id: ParaId, + last_seen_hash: Option, + last_data: Option, +} + +impl RelayChainCachingFetcher +where + RI: RelayChainInterface + Clone + 'static, +{ + pub fn new(relay_client: RI, para_id: ParaId) -> Self { + Self { relay_client, para_id, last_seen_hash: None, last_data: None } + } + + pub async fn get_relay_chain_data(&mut self) -> Result { + let Ok(relay_parent) = self.relay_client.best_block_hash().await else { + tracing::warn!(target: crate::LOG_TARGET, "Unable to fetch latest relay chain block hash."); + return Err(()) + }; + + if self.last_seen_hash.is_some_and(|h| h == relay_parent) { + if let Some(data) = self.last_data.as_ref() { + tracing::trace!(target: crate::LOG_TARGET, %relay_parent, "Using cached data for relay parent."); + + return Ok(data.clone()) + } + } + + tracing::trace!(target: crate::LOG_TARGET, %relay_parent, "Relay chain best block changed, fetching new data from relay chain."); + let data = self.update_for_relay_parent(relay_parent).await?; + self.last_seen_hash = Some(relay_parent); + self.last_data = Some(data.clone()); + Ok(data) + } + + async fn update_for_relay_parent(&self, relay_parent: RelayHash) -> Result { + let scheduled_cores = + cores_scheduled_for_para(relay_parent, self.para_id, &self.relay_client).await; + let Ok(Some(relay_parent_header)) = + self.relay_client.header(BlockId::Hash(relay_parent)).await + else { + tracing::warn!(target: crate::LOG_TARGET, "Unable to fetch latest relay chain block header."); + return Err(()) + }; + + let max_pov_size = match self + .relay_client + .persisted_validation_data(relay_parent, self.para_id, OccupiedCoreAssumption::Included) + .await + { + Ok(None) => return Err(()), + Ok(Some(pvd)) => pvd.max_pov_size, + Err(err) => { + tracing::error!(target: crate::LOG_TARGET, ?err, "Failed to gather information from relay-client"); + return Err(()) + }, + }; + + Ok(RelayChainData { + relay_parent_hash: relay_parent, + relay_parent_header, + scheduled_cores, + max_pov_size, + }) + } +} diff --git a/cumulus/client/consensus/common/src/parent_search.rs b/cumulus/client/consensus/common/src/parent_search.rs index e042bd1d8f26..cbe747f5f4cb 100644 --- a/cumulus/client/consensus/common/src/parent_search.rs +++ b/cumulus/client/consensus/common/src/parent_search.rs @@ -149,7 +149,7 @@ pub async fn find_potential_parents( } }; - let maybe_route = maybe_pending + let maybe_route_to_last_pending = maybe_pending .as_ref() .map(|(_, pending)| { sp_blockchain::tree_route(backend.blockchain(), included_hash, *pending) @@ -162,7 +162,7 @@ pub async fn find_potential_parents( let (frontier, potential_parents) = match ( &maybe_pending, params.ignore_alternative_branches, - &maybe_route, + &maybe_route_to_last_pending, ) { (Some((pending_header, pending_hash)), true, Some(ref route_to_pending)) => { let mut potential_parents = only_included; @@ -216,7 +216,7 @@ pub async fn find_potential_parents( Ok(search_child_branches_for_parents( frontier, - maybe_route, + maybe_route_to_last_pending, included_header, maybe_pending.map(|(_, hash)| hash), backend, @@ -314,7 +314,7 @@ async fn build_relay_parent_ancestry( /// Start search for child blocks that can be used as parents. pub fn search_child_branches_for_parents( mut frontier: Vec>, - maybe_route_to_pending: Option>, + maybe_route_to_last_pending: Option>, included_header: Block::Header, pending_hash: Option, backend: &impl Backend, @@ -329,19 +329,17 @@ pub fn search_child_branches_for_parents( // The distance between pending and included block. Is later used to check if a child // is aligned with pending when it is between pending and included block. - let pending_distance = maybe_route_to_pending.as_ref().map(|route| route.enacted().len()); + let pending_distance = maybe_route_to_last_pending.as_ref().map(|route| route.enacted().len()); // If a block is on the path included -> pending, we consider it `aligned_with_pending`. - let is_child_in_path_to_pending = |hash| { - maybe_route_to_pending + let is_child_pending = |hash| { + maybe_route_to_last_pending .as_ref() .map_or(true, |route| route.enacted().iter().any(|x| x.hash == hash)) }; tracing::trace!(target: PARENT_SEARCH_LOG_TARGET, ?included_hash, included_num = ?included_header.number(), ?pending_hash , ?rp_ancestry, "Searching relay chain ancestry."); while let Some(entry) = frontier.pop() { - // TODO Adjust once we can fetch multiple pending blocks. - // https://github.com/paritytech/polkadot-sdk/issues/3967 let is_pending = pending_hash.as_ref().map_or(false, |h| &entry.hash == h); let is_included = included_hash == entry.hash; @@ -380,7 +378,7 @@ pub fn search_child_branches_for_parents( let aligned_with_pending = parent_aligned_with_pending && (pending_distance.map_or(true, |dist| child_depth > dist) || pending_hash.as_ref().map_or(true, |h| &child == h) || - is_child_in_path_to_pending(child)); + is_child_pending(child)); if ignore_alternative_branches && !aligned_with_pending { tracing::trace!(target: PARENT_SEARCH_LOG_TARGET, ?child, "Child is not aligned with pending block."); From 944044df24a55d00daf9d7413bb7d20552a81c3d Mon Sep 17 00:00:00 2001 From: Sebastian Kunert Date: Tue, 4 Jun 2024 15:59:25 +0200 Subject: [PATCH 40/71] Add comments to `RelayChainCachingFetcher` --- .../src/collators/slot_based/block_builder_task.rs | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs b/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs index e457a7b7b5c8..f373ac141d09 100644 --- a/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs +++ b/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs @@ -403,14 +403,21 @@ fn expected_core_count( .map(|expected_core_count| expected_core_count.max(1)) } +/// Contains relay chain data necessary for parachain block building. #[derive(Clone)] struct RelayChainData { + /// Current relay chain parent header. pub relay_parent_header: RelayHeader, + /// The cores this para is scheduled on in the context of the relay parent. pub scheduled_cores: Vec, + /// Maximum configured PoV size on the relay chain. pub max_pov_size: u32, + /// Current relay chain parent header. pub relay_parent_hash: RelayHash, } +/// Simple helper to fetch relay chain data and cache it based on the current relay chain best block +/// hash. struct RelayChainCachingFetcher { relay_client: RI, para_id: ParaId, @@ -426,6 +433,9 @@ where Self { relay_client, para_id, last_seen_hash: None, last_data: None } } + /// Fetch required [`RelayChainData`] from the relay chain. + /// If this data has been fetched in the past for the incoming hash, it will reuse + /// cached data. pub async fn get_relay_chain_data(&mut self) -> Result { let Ok(relay_parent) = self.relay_client.best_block_hash().await else { tracing::warn!(target: crate::LOG_TARGET, "Unable to fetch latest relay chain block hash."); @@ -447,6 +457,7 @@ where Ok(data) } + /// Fetch fresh data from the relay chain for the given relay parent hash. async fn update_for_relay_parent(&self, relay_parent: RelayHash) -> Result { let scheduled_cores = cores_scheduled_for_para(relay_parent, self.para_id, &self.relay_client).await; From 41dba0211df1cef1f6e973ba8a3855335aba47ba Mon Sep 17 00:00:00 2001 From: Sebastian Kunert Date: Tue, 4 Jun 2024 16:45:04 +0200 Subject: [PATCH 41/71] More comment adjustments --- .../aura/src/collators/slot_based/block_builder_task.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs b/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs index f373ac141d09..91e203a68478 100644 --- a/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs +++ b/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs @@ -398,9 +398,10 @@ fn expected_core_count( relay_chain_slot_duration: Duration, slot_duration: SlotDuration, ) -> Result { - u64::try_from(relay_chain_slot_duration.as_millis() / slot_duration.as_duration().as_millis()) + let slot_duration_millis = slot_duration.as_millis(); + u64::try_from(relay_chain_slot_duration.as_millis()) .map_err(|e| tracing::error!("Unable to calculate expected parachain core count: {e}")) - .map(|expected_core_count| expected_core_count.max(1)) + .map(|relay_slot_duration| (relay_slot_duration / slot_duration_millis).max(1)) } /// Contains relay chain data necessary for parachain block building. From 0b061f670dbc8684ed26826313d7f5bafcbeff02 Mon Sep 17 00:00:00 2001 From: Sebastian Kunert Date: Tue, 4 Jun 2024 17:13:50 +0200 Subject: [PATCH 42/71] Fix template build --- templates/parachain/node/src/service.rs | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/templates/parachain/node/src/service.rs b/templates/parachain/node/src/service.rs index 8ca94895406c..1a753e30bfbc 100644 --- a/templates/parachain/node/src/service.rs +++ b/templates/parachain/node/src/service.rs @@ -202,7 +202,6 @@ fn start_consensus( code_hash_provider: move |block_hash| { client.code_at(block_hash).ok().map(|c| ValidationCode::from(c).hash()) }, - sync_oracle, keystore, collator_key, para_id, @@ -213,10 +212,9 @@ fn start_consensus( authoring_duration: Duration::from_millis(2000), reinitialize: false, }; - let fut = - aura::run::( - params, - ); + let fut = aura::run::( + params, + ); task_manager.spawn_essential_handle().spawn("aura", None, fut); Ok(()) @@ -386,6 +384,7 @@ pub async fn start_parachain_node( if validator { start_consensus( client.clone(), + backend, block_import, prometheus_registry.as_ref(), telemetry.as_ref().map(|t| t.handle()), From 8be5db4680b7d4ecf6de7942588b534cc8a65ebd Mon Sep 17 00:00:00 2001 From: Sebastian Kunert Date: Thu, 13 Jun 2024 13:52:38 +0200 Subject: [PATCH 43/71] Apply suggestions from code review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Bastian Köcher --- cumulus/client/consensus/aura/src/collator.rs | 2 +- cumulus/client/consensus/aura/src/collators/mod.rs | 5 +---- .../src/collators/slot_based/block_builder_task.rs | 10 +++++----- .../aura/src/collators/slot_based/collation_task.rs | 6 +++--- cumulus/client/consensus/common/src/parent_search.rs | 6 +++--- 5 files changed, 13 insertions(+), 16 deletions(-) diff --git a/cumulus/client/consensus/aura/src/collator.rs b/cumulus/client/consensus/aura/src/collator.rs index e9f65d5b4a99..dc830e463a4f 100644 --- a/cumulus/client/consensus/aura/src/collator.rs +++ b/cumulus/client/consensus/aura/src/collator.rs @@ -212,7 +212,7 @@ where Ok(Some(ParachainCandidate { block, proof: proposal.proof })) } - /// Propose, seal, and import a block, packaging it into a collation. + /// Propose, seal, import a block and packaging it into a collation. /// /// Provide the slot to build at as well as any other necessary pre-digest logs, /// the inherent data, and the proposal duration and PoV size limits. diff --git a/cumulus/client/consensus/aura/src/collators/mod.rs b/cumulus/client/consensus/aura/src/collators/mod.rs index 6935a7ddf2c2..95e5fb25084c 100644 --- a/cumulus/client/consensus/aura/src/collators/mod.rs +++ b/cumulus/client/consensus/aura/src/collators/mod.rs @@ -243,10 +243,7 @@ where Ok(x) => x, }; - let included_block = match potential_parents.iter().find(|x| x.depth == 0) { - None => return None, // also serves as an `is_empty` check. - Some(b) => b.hash, - }; + let included_block = potential_parents.iter().find(|x| x.depth == 0)?.hash; potential_parents .into_iter() .max_by_key(|a| a.depth) diff --git a/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs b/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs index 91e203a68478..69dd52ab19c3 100644 --- a/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs +++ b/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs @@ -262,16 +262,15 @@ pub async fn run_block_builder< continue; }; - let (included_block, parent) = match crate::collators::find_parent( + let Some((included_block, parent)) = crate::collators::find_parent( relay_parent, para_id, &*para_backend, &relay_client, ) - .await + .await else { - Some(value) => value, - None => continue, + continue }; let parent_header = parent.header; @@ -384,6 +383,7 @@ pub async fn run_block_builder< core_index: *core_index, }) { tracing::error!(target: crate::LOG_TARGET, ?err, "Unable to send block to collation task."); + break } } } @@ -397,7 +397,7 @@ pub async fn run_block_builder< fn expected_core_count( relay_chain_slot_duration: Duration, slot_duration: SlotDuration, -) -> Result { +) -> Option { let slot_duration_millis = slot_duration.as_millis(); u64::try_from(relay_chain_slot_duration.as_millis()) .map_err(|e| tracing::error!("Unable to calculate expected parachain core count: {e}")) diff --git a/cumulus/client/consensus/aura/src/collators/slot_based/collation_task.rs b/cumulus/client/consensus/aura/src/collators/slot_based/collation_task.rs index 6ce8b467568a..44e3c6846e29 100644 --- a/cumulus/client/consensus/aura/src/collators/slot_based/collation_task.rs +++ b/cumulus/client/consensus/aura/src/collators/slot_based/collation_task.rs @@ -110,9 +110,9 @@ async fn handle_collation_message( tracing::info!( target: LOG_TARGET, "PoV size {{ header: {:.2}kB, extrinsics: {:.2}kB, storage_proof: {:.2}kB }}", - block_data.header().encode().len() as f64 / 1024f64, - block_data.extrinsics().encode().len() as f64 / 1024f64, - block_data.storage_proof().encode().len() as f64 / 1024f64, + block_data.header().encoded_len() as f64 / 1024f64, + block_data.extrinsics().encoded_len() as f64 / 1024f64, + block_data.storage_proof().encoded_len() as f64 / 1024f64, ); if let MaybeCompressedPoV::Compressed(ref pov) = collation.proof_of_validity { diff --git a/cumulus/client/consensus/common/src/parent_search.rs b/cumulus/client/consensus/common/src/parent_search.rs index cbe747f5f4cb..441d481aac55 100644 --- a/cumulus/client/consensus/common/src/parent_search.rs +++ b/cumulus/client/consensus/common/src/parent_search.rs @@ -30,6 +30,7 @@ use sp_blockchain::{Backend as BlockchainBackend, TreeRoute}; use sp_runtime::traits::{Block as BlockT, Header as HeaderT}; const PARENT_SEARCH_LOG_TARGET: &str = "consensus::common::find_potential_parents"; + /// Parameters when searching for suitable parents to build on top of. #[derive(Debug)] pub struct ParentSearchParams { @@ -285,9 +286,8 @@ async fn build_relay_parent_ancestry( let mut current_rp = relay_parent; let mut required_session = None; while ancestry.len() <= ancestry_lookback { - let header = match relay_client.header(RBlockId::hash(current_rp)).await? { - None => break, - Some(h) => h, + let Some(header) = relay_client.header(RBlockId::hash(current_rp)).await? else { + break }; let session = relay_client.session_index_for_child(current_rp).await?; From 6c97bd1986253d616e5b717df66dc7c840851e99 Mon Sep 17 00:00:00 2001 From: Sebastian Kunert Date: Thu, 13 Jun 2024 17:45:45 +0200 Subject: [PATCH 44/71] Merge option in relay chain fetcher --- .../slot_based/block_builder_task.rs | 26 +++++++++---------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs b/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs index fff1f0e284e4..3f0692aa92ea 100644 --- a/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs +++ b/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs @@ -419,8 +419,7 @@ struct RelayChainData { struct RelayChainCachingFetcher { relay_client: RI, para_id: ParaId, - last_seen_hash: Option, - last_data: Option, + last_data: Option<(RelayHash, RelayChainData)>, } impl RelayChainCachingFetcher @@ -428,7 +427,7 @@ where RI: RelayChainInterface + Clone + 'static, { pub fn new(relay_client: RI, para_id: ParaId) -> Self { - Self { relay_client, para_id, last_seen_hash: None, last_data: None } + Self { relay_client, para_id, last_data: None } } /// Fetch required [`RelayChainData`] from the relay chain. @@ -440,19 +439,18 @@ where return Err(()) }; - if self.last_seen_hash.is_some_and(|h| h == relay_parent) { - if let Some(data) = self.last_data.as_ref() { + match &self.last_data { + Some((last_seen_hash, data)) if *last_seen_hash == relay_parent => { tracing::trace!(target: crate::LOG_TARGET, %relay_parent, "Using cached data for relay parent."); - - return Ok(data.clone()) - } + Ok(data.clone()) + }, + _ => { + tracing::trace!(target: crate::LOG_TARGET, %relay_parent, "Relay chain best block changed, fetching new data from relay chain."); + let data = self.update_for_relay_parent(relay_parent).await?; + self.last_data = Some((relay_parent, data.clone())); + Ok(data) + }, } - - tracing::trace!(target: crate::LOG_TARGET, %relay_parent, "Relay chain best block changed, fetching new data from relay chain."); - let data = self.update_for_relay_parent(relay_parent).await?; - self.last_seen_hash = Some(relay_parent); - self.last_data = Some(data.clone()); - Ok(data) } /// Fetch fresh data from the relay chain for the given relay parent hash. From 0ae68d331dda59fca9c1a735811ef91c426e3a1f Mon Sep 17 00:00:00 2001 From: Sebastian Kunert Date: Thu, 13 Jun 2024 17:46:28 +0200 Subject: [PATCH 45/71] Spawn futures inside collator --- .../aura/src/collators/slot_based/mod.rs | 28 +++++++++++++------ cumulus/test/service/src/lib.rs | 14 ++-------- 2 files changed, 22 insertions(+), 20 deletions(-) diff --git a/cumulus/client/consensus/aura/src/collators/slot_based/mod.rs b/cumulus/client/consensus/aura/src/collators/slot_based/mod.rs index e6a86fac61c5..7e5637526c0f 100644 --- a/cumulus/client/consensus/aura/src/collators/slot_based/mod.rs +++ b/cumulus/client/consensus/aura/src/collators/slot_based/mod.rs @@ -56,7 +56,7 @@ use sp_api::ProvideRuntimeApi; use sp_application_crypto::AppPublic; use sp_blockchain::HeaderBackend; use sp_consensus_aura::AuraApi; -use sp_core::crypto::Pair; +use sp_core::{crypto::Pair, traits::SpawnEssentialNamed}; use sp_inherents::CreateInherentDataProviders; use sp_keystore::KeystorePtr; use sp_runtime::traits::{Block as BlockT, Member}; @@ -69,7 +69,7 @@ mod block_builder_task; mod collation_task; /// Parameters for [`run`]. -pub struct Params { +pub struct Params { /// Inherent data providers. Only non-consensus inherent data should be provided, i.e. /// the timestamp, slot, and paras inherents should be omitted, as they are set by this /// collator. @@ -103,13 +103,14 @@ pub struct Params { /// Drift slots by a fixed duration. This can be used to create more preferrable authoring /// timings. pub slot_drift: Duration, + /// Spawn handle to spawn the block building and collation tasks. + pub spawn_handle: SpawnHandle, } /// Run aura-based block building and collation task. -pub fn run( - params: Params, -) -> (impl futures::Future, impl futures::Future) -where +pub fn run( + params: Params, +) where Block: BlockT, Client: ProvideRuntimeApi + BlockOf @@ -130,9 +131,10 @@ where Proposer: ProposerInterface + Send + Sync + 'static, CS: CollatorServiceInterface + Send + Sync + Clone + 'static, CHP: consensus_common::ValidationCodeHashProvider + Send + 'static, - P: Pair + 'static, + P: Pair + Send + Sync + 'static, P::Public: AppPublic + Member + Codec, P::Signature: TryFrom> + Member + Codec, + SpawnHandle: SpawnEssentialNamed, { let (tx, rx) = tracing_unbounded("mpsc_builder_to_collator", 100); let collator_task_params = collation_task::Params { @@ -166,7 +168,17 @@ where let block_builder_fut = run_block_builder::(block_builder_params); - (collation_task_fut, block_builder_fut) + params.spawn_handle.spawn_essential_blocking( + "collation-task", + Some("parachain-authoring"), + Box::pin(collation_task_fut), + ); + + params.spawn_handle.spawn_essential_blocking( + "parachain-block-builder-task", + Some("parachain-authoring"), + Box::pin(block_builder_fut), + ); } /// Message to be sent from the block builder to the collation task. diff --git a/cumulus/test/service/src/lib.rs b/cumulus/test/service/src/lib.rs index 15118ce9aad6..f004bab17064 100644 --- a/cumulus/test/service/src/lib.rs +++ b/cumulus/test/service/src/lib.rs @@ -488,20 +488,10 @@ where authoring_duration: Duration::from_millis(2000), reinitialize: false, slot_drift: Duration::from_secs(1), + spawn_handle: task_manager.spawn_essential_handle(), }; - let (collation_future, block_builer_future) = - slot_based::run::(params); - task_manager.spawn_essential_handle().spawn( - "collation-task", - None, - collation_future, - ); - task_manager.spawn_essential_handle().spawn( - "block-builder-task", - None, - block_builer_future, - ); + slot_based::run::(params); } else { tracing::info!(target: LOG_TARGET, "Starting block authoring with lookahead collator."); let params = AuraParams { From 767430ec38ba2e3c1af58c3d9b1993e6a4fa706d Mon Sep 17 00:00:00 2001 From: Sebastian Kunert Date: Thu, 13 Jun 2024 17:47:25 +0200 Subject: [PATCH 46/71] Do not build if pending parent is not in db --- cumulus/client/consensus/common/src/parent_search.rs | 6 ++---- cumulus/client/consensus/common/src/tests.rs | 11 ++--------- 2 files changed, 4 insertions(+), 13 deletions(-) diff --git a/cumulus/client/consensus/common/src/parent_search.rs b/cumulus/client/consensus/common/src/parent_search.rs index 441d481aac55..b84f21f6f588 100644 --- a/cumulus/client/consensus/common/src/parent_search.rs +++ b/cumulus/client/consensus/common/src/parent_search.rs @@ -140,7 +140,7 @@ pub async fn find_potential_parents( %pending_hash, "Failed to get header for pending block.", ); - return Ok(only_included) + return Ok(Default::default()) }, Ok(Some(_)) => Some((header, pending_hash)), _ => None, @@ -286,9 +286,7 @@ async fn build_relay_parent_ancestry( let mut current_rp = relay_parent; let mut required_session = None; while ancestry.len() <= ancestry_lookback { - let Some(header) = relay_client.header(RBlockId::hash(current_rp)).await? else { - break - }; + let Some(header) = relay_client.header(RBlockId::hash(current_rp)).await? else { break }; let session = relay_client.session_index_for_child(current_rp).await?; if let Some(required_session) = required_session { diff --git a/cumulus/client/consensus/common/src/tests.rs b/cumulus/client/consensus/common/src/tests.rs index 2beaba934264..284fa39ed1e7 100644 --- a/cumulus/client/consensus/common/src/tests.rs +++ b/cumulus/client/consensus/common/src/tests.rs @@ -1244,14 +1244,7 @@ fn find_potential_parents_unknown_pending() { )) .unwrap(); - assert_eq!(potential_parents.len(), 1); - let expected = included_block; - let parent = &potential_parents[0]; - - assert_eq!(parent.hash, expected.hash()); - assert_eq!(&parent.header, expected.header()); - assert_eq!(parent.depth, 0); - assert!(parent.aligned_with_pending); + assert!(potential_parents.is_empty()); } #[test] @@ -1331,7 +1324,7 @@ fn find_potential_parents_unknown_pending_include_alternative_branches() { assert_eq!(expected_parents[1].hash(), potential_parents[1].hash); } -/// Test where there is an additional block between included and pending block. +/// Test where there are multiple pending blocks. #[test] fn find_potential_parents_aligned_with_late_pending() { sp_tracing::try_init_simple(); From 1021ee365a5e14af45524aa13a8b3c782489eec2 Mon Sep 17 00:00:00 2001 From: Sebastian Kunert Date: Mon, 17 Jun 2024 19:09:40 +0200 Subject: [PATCH 47/71] Remove slot based experimental from common cli options --- cumulus/client/cli/src/lib.rs | 9 +- cumulus/polkadot-parachain/src/cli.rs | 5 + cumulus/polkadot-parachain/src/command.rs | 39 ++++-- cumulus/polkadot-parachain/src/service.rs | 149 +++++++++++++++++----- cumulus/test/service/src/cli.rs | 5 + cumulus/test/service/src/lib.rs | 8 +- cumulus/test/service/src/main.rs | 2 + 7 files changed, 160 insertions(+), 57 deletions(-) diff --git a/cumulus/client/cli/src/lib.rs b/cumulus/client/cli/src/lib.rs index b3040aca5ed9..a7b2eb19de88 100644 --- a/cumulus/client/cli/src/lib.rs +++ b/cumulus/client/cli/src/lib.rs @@ -307,11 +307,6 @@ pub struct RunCmd { /// Will use the specified relay chain chainspec. #[arg(long, conflicts_with_all = ["relay_chain_rpc_urls", "collator"])] pub relay_chain_light_client: bool, - - /// EXPERIMENTAL: Use slot-based collator which can handle elastic scaling. Use with care, this - /// flag is unstable and subject to change. - #[arg(long)] - pub experimental_use_slot_based: bool, } impl RunCmd { @@ -334,7 +329,7 @@ impl RunCmd { _ => RelayChainMode::Embedded, }; - CollatorOptions { relay_chain_mode, use_slot_based: self.experimental_use_slot_based } + CollatorOptions { relay_chain_mode } } } @@ -354,8 +349,6 @@ pub enum RelayChainMode { pub struct CollatorOptions { /// How this collator retrieves relay chain information pub relay_chain_mode: RelayChainMode, - /// Use slot based collator - pub use_slot_based: bool, } /// A non-redundant version of the `RunCmd` that sets the `validator` field when the diff --git a/cumulus/polkadot-parachain/src/cli.rs b/cumulus/polkadot-parachain/src/cli.rs index f7d2fd0f0be3..fdb801b27db7 100644 --- a/cumulus/polkadot-parachain/src/cli.rs +++ b/cumulus/polkadot-parachain/src/cli.rs @@ -80,6 +80,11 @@ pub struct Cli { #[command(flatten)] pub run: cumulus_client_cli::RunCmd, + /// EXPERIMENTAL: Use slot-based collator which can handle elastic scaling. Use with care, this + /// flag is unstable and subject to change. + #[arg(long)] + pub experimental_use_slot_based: bool, + /// Disable automatic hardware benchmarks. /// /// By default these benchmarks are automatically ran at startup and measure diff --git a/cumulus/polkadot-parachain/src/command.rs b/cumulus/polkadot-parachain/src/command.rs index 653ea3281f0f..da4d59d0d021 100644 --- a/cumulus/polkadot-parachain/src/command.rs +++ b/cumulus/polkadot-parachain/src/command.rs @@ -674,6 +674,7 @@ pub fn run() -> Result<()> { polkadot_config, collator_options, id, + cli.experimental_use_slot_based, hwbench, ) .await, @@ -683,6 +684,7 @@ pub fn run() -> Result<()> { polkadot_config, collator_options, id, + cli.experimental_use_slot_based, hwbench, ) .await, @@ -697,24 +699,27 @@ async fn start_node>( polkadot_config: sc_service::Configuration, collator_options: cumulus_client_cli::CollatorOptions, id: ParaId, + use_experimental_slot_based: bool, hwbench: Option, ) -> Result { match config.chain_spec.runtime()? { - Runtime::AssetHubPolkadot => crate::service::start_asset_hub_lookahead_node::< - AssetHubPolkadotRuntimeApi, - AssetHubPolkadotAuraId, - Network, - >(config, polkadot_config, collator_options, id, hwbench) - .await - .map(|r| r.0) - .map_err(Into::into), + Runtime::AssetHubPolkadot => + crate::service::start_asset_hub_async_backing_node::< + AssetHubPolkadotRuntimeApi, + AssetHubPolkadotAuraId, + Network, + >(config, polkadot_config, collator_options, id, use_experimental_slot_based, hwbench) + .await + .map(|r| r.0) + .map_err(Into::into), Runtime::AssetHubRococo | Runtime::AssetHubWestend | Runtime::AssetHubKusama => - crate::service::start_asset_hub_lookahead_node::( + crate::service::start_asset_hub_async_backing_node::( config, polkadot_config, collator_options, id, + use_experimental_slot_based, hwbench, ) .await @@ -722,11 +727,12 @@ async fn start_node>( .map_err(Into::into), Runtime::CollectivesWestend | Runtime::CollectivesPolkadot => - crate::service::start_generic_aura_lookahead_node::( + crate::service::start_generic_aura_async_backing_node::( config, polkadot_config, collator_options, id, + use_experimental_slot_based, hwbench, ) .await @@ -749,6 +755,7 @@ async fn start_node>( polkadot_config, collator_options, id, + use_experimental_slot_based, hwbench, ) .await @@ -766,11 +773,12 @@ async fn start_node>( chain_spec::bridge_hubs::BridgeHubRuntimeType::Rococo | chain_spec::bridge_hubs::BridgeHubRuntimeType::RococoLocal | chain_spec::bridge_hubs::BridgeHubRuntimeType::RococoDevelopment => - crate::service::start_generic_aura_lookahead_node::( + crate::service::start_generic_aura_async_backing_node::( config, polkadot_config, collator_options, id, + use_experimental_slot_based, hwbench, ) .await @@ -789,11 +797,12 @@ async fn start_node>( chain_spec::coretime::CoretimeRuntimeType::Westend | chain_spec::coretime::CoretimeRuntimeType::WestendLocal | chain_spec::coretime::CoretimeRuntimeType::WestendDevelopment => - crate::service::start_generic_aura_lookahead_node::( + crate::service::start_generic_aura_async_backing_node::( config, polkadot_config, collator_options, id, + use_experimental_slot_based, hwbench, ) .await @@ -814,11 +823,12 @@ async fn start_node>( .map_err(Into::into), Runtime::Glutton | Runtime::GluttonWestend => - crate::service::start_basic_lookahead_node::( + crate::service::start_basic_async_backing_node::( config, polkadot_config, collator_options, id, + use_experimental_slot_based, hwbench, ) .await @@ -836,11 +846,12 @@ async fn start_node>( chain_spec::people::PeopleRuntimeType::Westend | chain_spec::people::PeopleRuntimeType::WestendLocal | chain_spec::people::PeopleRuntimeType::WestendDevelopment => - crate::service::start_generic_aura_lookahead_node::( + crate::service::start_generic_aura_async_backing_node::( config, polkadot_config, collator_options, id, + use_experimental_slot_based, hwbench, ) .await diff --git a/cumulus/polkadot-parachain/src/service.rs b/cumulus/polkadot-parachain/src/service.rs index 604379acb727..89c079252cc5 100644 --- a/cumulus/polkadot-parachain/src/service.rs +++ b/cumulus/polkadot-parachain/src/service.rs @@ -17,7 +17,10 @@ use codec::{Codec, Decode}; use cumulus_client_cli::CollatorOptions; use cumulus_client_collator::service::CollatorService; -use cumulus_client_consensus_aura::collators::lookahead::{self as aura, Params as AuraParams}; +use cumulus_client_consensus_aura::collators::{ + lookahead::{self as aura, Params as AuraParams}, + slot_based::{self as slot_based, Params as SlotBasedParams}, +}; use cumulus_client_consensus_common::{ ParachainBlockImport as TParachainBlockImport, ParachainCandidate, ParachainConsensus, }; @@ -684,13 +687,19 @@ where /// Uses the lookahead collator to support async backing. /// /// Start an aura powered parachain node. Some system chains use this. -pub async fn start_generic_aura_lookahead_node>( +pub async fn start_generic_aura_async_backing_node>( parachain_config: Configuration, polkadot_config: Configuration, collator_options: CollatorOptions, para_id: ParaId, + use_experimental_slot_based: bool, hwbench: Option, ) -> sc_service::error::Result<(TaskManager, Arc>)> { + let consensus_starter = if use_experimental_slot_based { + start_slot_based_aura_consensus + } else { + start_lookahead_aura_consensus + }; start_node_impl::( parachain_config, polkadot_config, @@ -699,7 +708,7 @@ pub async fn start_generic_aura_lookahead_node> para_id, build_parachain_rpc_extensions::, build_relay_to_aura_import_queue::<_, AuraId>, - start_lookahead_aura_consensus, + consensus_starter, hwbench, ) .await @@ -711,7 +720,7 @@ pub async fn start_generic_aura_lookahead_node> /// /// Uses the lookahead collator to support async backing. #[sc_tracing::logging::prefix_logs_with("Parachain")] -pub async fn start_asset_hub_lookahead_node< +pub async fn start_asset_hub_async_backing_node< RuntimeApi, AuraId: AppCrypto + Send + Codec + Sync, Net, @@ -720,6 +729,7 @@ pub async fn start_asset_hub_lookahead_node< polkadot_config: Configuration, collator_options: CollatorOptions, para_id: ParaId, + use_experimental_slot_based: bool, hwbench: Option, ) -> sc_service::error::Result<(TaskManager, Arc>)> where @@ -814,32 +824,36 @@ where } } - // Move to Aura consensus. - let proposer = Proposer::new(proposer_factory); - - let params = AuraParams { - create_inherent_data_providers: move |_, ()| async move { Ok(()) }, - block_import, - para_client: client.clone(), - para_backend: backend, - relay_client: relay_chain_interface2, - code_hash_provider: move |block_hash| { - client.code_at(block_hash).ok().map(|c| ValidationCode::from(c).hash()) - }, - keystore, - collator_key, - para_id, - overseer_handle, - relay_chain_slot_duration, - proposer, - collator_service, - authoring_duration: Duration::from_millis(1500), - reinitialize: true, /* we need to always re-initialize for asset-hub moving - * to aura */ + if use_experimental_slot_based { + panic!(); + } else { + // Move to Aura consensus. + let proposer = Proposer::new(proposer_factory); + + let params = AuraParams { + create_inherent_data_providers: move |_, ()| async move { Ok(()) }, + block_import, + para_client: client.clone(), + para_backend: backend, + relay_client: relay_chain_interface2, + code_hash_provider: move |block_hash| { + client.code_at(block_hash).ok().map(|c| ValidationCode::from(c).hash()) + }, + keystore, + collator_key, + para_id, + overseer_handle, + relay_chain_slot_duration, + proposer, + collator_service, + authoring_duration: Duration::from_millis(1500), + reinitialize: true, /* we need to always re-initialize for asset-hub + * moving to aura */ + }; + + aura::run::::Pair, _, _, _, _, _, _, _, _>(params) + .await }; - - aura::run::::Pair, _, _, _, _, _, _, _, _>(params) - .await }); let spawner = task_manager.spawn_essential_handle(); @@ -980,17 +994,82 @@ fn start_lookahead_aura_consensus( Ok(()) } +/// Start consensus using the lookahead aura collator. +fn start_slot_based_aura_consensus( + client: Arc>, + block_import: ParachainBlockImport, + prometheus_registry: Option<&Registry>, + telemetry: Option, + task_manager: &TaskManager, + relay_chain_interface: Arc, + transaction_pool: Arc>>, + keystore: KeystorePtr, + relay_chain_slot_duration: Duration, + para_id: ParaId, + collator_key: CollatorPair, + _overseer_handle: OverseerHandle, + announce_block: Arc>) + Send + Sync>, + backend: Arc, +) -> Result<(), sc_service::Error> { + log::info!("Starting block authoring with slot based authoring."); + let proposer_factory = sc_basic_authorship::ProposerFactory::with_proof_recording( + task_manager.spawn_handle(), + client.clone(), + transaction_pool, + prometheus_registry, + telemetry.clone(), + ); + + let proposer = Proposer::new(proposer_factory); + let collator_service = CollatorService::new( + client.clone(), + Arc::new(task_manager.spawn_handle()), + announce_block, + client.clone(), + ); + + let client_for_aura = client.clone(); + let params = SlotBasedParams { + create_inherent_data_providers: move |_, ()| async move { Ok(()) }, + block_import, + para_client: client.clone(), + para_backend: backend.clone(), + relay_client: relay_chain_interface, + code_hash_provider: move |block_hash| { + client_for_aura.code_at(block_hash).ok().map(|c| ValidationCode::from(c).hash()) + }, + keystore, + collator_key, + para_id, + relay_chain_slot_duration, + proposer, + collator_service, + authoring_duration: Duration::from_millis(2000), + reinitialize: false, + slot_drift: Duration::from_secs(1), + spawn_handle: task_manager.spawn_essential_handle(), + }; + + slot_based::run::::Pair, _, _, _, _, _, _, _, _, _>(params); + Ok(()) +} /// Start an aura powered parachain node which uses the lookahead collator to support async backing. /// This node is basic in the sense that its runtime api doesn't include common contents such as /// transaction payment. Used for aura glutton. -pub async fn start_basic_lookahead_node>( +pub async fn start_basic_async_backing_node>( parachain_config: Configuration, polkadot_config: Configuration, collator_options: CollatorOptions, para_id: ParaId, + use_experimental_slot_based: bool, hwbench: Option, ) -> sc_service::error::Result<(TaskManager, Arc>)> { + let consensus_starter = if use_experimental_slot_based { + start_slot_based_aura_consensus + } else { + start_lookahead_aura_consensus + }; start_node_impl::( parachain_config, polkadot_config, @@ -999,7 +1078,7 @@ pub async fn start_basic_lookahead_node>( para_id, |_, _, _, _| Ok(RpcModule::new(())), build_relay_to_aura_import_queue::<_, AuraId>, - start_lookahead_aura_consensus, + consensus_starter, hwbench, ) .await @@ -1011,8 +1090,14 @@ pub async fn start_contracts_rococo_node>( polkadot_config: Configuration, collator_options: CollatorOptions, para_id: ParaId, + use_experimental_slot_based: bool, hwbench: Option, ) -> sc_service::error::Result<(TaskManager, Arc>)> { + let consensus_starter = if use_experimental_slot_based { + start_slot_based_aura_consensus + } else { + start_lookahead_aura_consensus + }; start_node_impl::( parachain_config, polkadot_config, @@ -1021,7 +1106,7 @@ pub async fn start_contracts_rococo_node>( para_id, build_contracts_rpc_extensions, build_aura_import_queue, - start_lookahead_aura_consensus, + consensus_starter, hwbench, ) .await diff --git a/cumulus/test/service/src/cli.rs b/cumulus/test/service/src/cli.rs index 58d53fde19fc..e9c075555f59 100644 --- a/cumulus/test/service/src/cli.rs +++ b/cumulus/test/service/src/cli.rs @@ -50,6 +50,11 @@ pub struct TestCollatorCli { #[arg(long)] pub fail_pov_recovery: bool, + + /// EXPERIMENTAL: Use slot-based collator which can handle elastic scaling. Use with care, this + /// flag is unstable and subject to change. + #[arg(long)] + pub experimental_use_slot_based: bool, } #[derive(Debug, clap::Subcommand)] diff --git a/cumulus/test/service/src/lib.rs b/cumulus/test/service/src/lib.rs index f004bab17064..bd400a542d38 100644 --- a/cumulus/test/service/src/lib.rs +++ b/cumulus/test/service/src/lib.rs @@ -319,6 +319,7 @@ pub async fn start_node_impl>( consensus: Consensus, collator_options: CollatorOptions, proof_recording_during_import: bool, + use_slot_based_collator: bool, ) -> sc_service::error::Result<( TaskManager, Arc, @@ -465,7 +466,7 @@ where let client_for_aura = client.clone(); - if collator_options.use_slot_based { + if use_slot_based_collator { tracing::info!(target: LOG_TARGET, "Starting block authoring with slot based authoring."); let params = SlotBasedParams { create_inherent_data_providers: move |_, ()| async move { Ok(()) }, @@ -736,8 +737,7 @@ impl TestNodeBuilder { false, ); - let collator_options = - CollatorOptions { relay_chain_mode: self.relay_chain_mode, use_slot_based: false }; + let collator_options = CollatorOptions { relay_chain_mode: self.relay_chain_mode }; relay_chain_config.network.node_name = format!("{} (relay chain)", relay_chain_config.network.node_name); @@ -757,6 +757,7 @@ impl TestNodeBuilder { self.consensus, collator_options, self.record_proof_during_import, + false, ) .await .expect("could not create Cumulus test service"), @@ -772,6 +773,7 @@ impl TestNodeBuilder { self.consensus, collator_options, self.record_proof_during_import, + false, ) .await .expect("could not create Cumulus test service"), diff --git a/cumulus/test/service/src/main.rs b/cumulus/test/service/src/main.rs index 90d37173dd59..9357978b769a 100644 --- a/cumulus/test/service/src/main.rs +++ b/cumulus/test/service/src/main.rs @@ -118,6 +118,7 @@ fn main() -> Result<(), sc_cli::Error> { consensus, collator_options, true, + cli.experimental_use_slot_based, ) .await, sc_network::config::NetworkBackendType::Litep2p => @@ -135,6 +136,7 @@ fn main() -> Result<(), sc_cli::Error> { consensus, collator_options, true, + cli.experimental_use_slot_based, ) .await, } From c109088d9d2c55d0f5a84b0c9992edd9e3a4e444 Mon Sep 17 00:00:00 2001 From: Sebastian Kunert Date: Tue, 18 Jun 2024 18:18:15 +0200 Subject: [PATCH 48/71] Fix lookahead collator filling the pipeline --- cumulus/pallets/aura-ext/src/consensus_hook.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cumulus/pallets/aura-ext/src/consensus_hook.rs b/cumulus/pallets/aura-ext/src/consensus_hook.rs index 9e93ea164a6f..560d477b2a85 100644 --- a/cumulus/pallets/aura-ext/src/consensus_hook.rs +++ b/cumulus/pallets/aura-ext/src/consensus_hook.rs @@ -76,7 +76,8 @@ where ); } - if authored > velocity { + // We need to allow authoring multiple blocks in the same slot. + if slot != para_slot_from_relay && authored > velocity { panic!("authored blocks limit is reached for the slot") } let weight = T::DbWeight::get().reads(1); From 8c364e84701213d56b1d0d6d0fd7e47e71dbe41a Mon Sep 17 00:00:00 2001 From: Sebastian Kunert Date: Tue, 18 Jun 2024 20:29:53 +0200 Subject: [PATCH 49/71] Add PoV-recovery test, make sure no inherent errors are thrown --- .../tests/0003-full_node_catching_up.zndsl | 3 ++ .../0006-rpc_collator_builds_blocks.zndsl | 4 ++ .../tests/0008-elastic_authoring.zndsl | 4 ++ .../tests/0009-elastic_pov_recovery.toml | 48 +++++++++++++++++++ .../tests/0009-elastic_pov_recovery.zndsl | 17 +++++++ 5 files changed, 76 insertions(+) create mode 100644 cumulus/zombienet/tests/0009-elastic_pov_recovery.toml create mode 100644 cumulus/zombienet/tests/0009-elastic_pov_recovery.zndsl diff --git a/cumulus/zombienet/tests/0003-full_node_catching_up.zndsl b/cumulus/zombienet/tests/0003-full_node_catching_up.zndsl index 49b6d9e94fd1..e1e8442f3050 100644 --- a/cumulus/zombienet/tests/0003-full_node_catching_up.zndsl +++ b/cumulus/zombienet/tests/0003-full_node_catching_up.zndsl @@ -6,3 +6,6 @@ alice: parachain 2000 is registered within 225 seconds dave: reports block height is at least 7 within 250 seconds eve: reports block height is at least 7 within 250 seconds ferdie: reports block height is at least 7 within 250 seconds + +# We want to make sure that none of the consensus hook checks fail, even if the chain makes progress +charlie: count of log lines containing "set_validation_data inherent needs to be present in every block" is 0 within 10 seconds diff --git a/cumulus/zombienet/tests/0006-rpc_collator_builds_blocks.zndsl b/cumulus/zombienet/tests/0006-rpc_collator_builds_blocks.zndsl index 7da8416d0161..b14c15ed5e5b 100644 --- a/cumulus/zombienet/tests/0006-rpc_collator_builds_blocks.zndsl +++ b/cumulus/zombienet/tests/0006-rpc_collator_builds_blocks.zndsl @@ -13,3 +13,7 @@ two: restart after 1 seconds three: restart after 20 seconds dave: is up dave: reports block height is at least 30 within 200 seconds + +# We want to make sure that none of the consensus hook checks fail, even if the chain makes progress +dave: count of log lines containing "set_validation_data inherent needs to be present in every block" is 0 within 10 seconds +eve: count of log lines containing "set_validation_data inherent needs to be present in every block" is 0 within 10 seconds diff --git a/cumulus/zombienet/tests/0008-elastic_authoring.zndsl b/cumulus/zombienet/tests/0008-elastic_authoring.zndsl index bfc756530c82..a06ffd24fefd 100644 --- a/cumulus/zombienet/tests/0008-elastic_authoring.zndsl +++ b/cumulus/zombienet/tests/0008-elastic_authoring.zndsl @@ -13,3 +13,7 @@ alice: js-script ./assign-core.js with "2100,1" return is 0 within 600 seconds collator-single-core: reports block height is at least 20 within 225 seconds collator-elastic: reports block height is at least 40 within 225 seconds + +# We want to make sure that none of the consensus hook checks fail, even if the chain makes progress +collator-elastic: count of log lines containing "set_validation_data inherent needs to be present in every block" is 0 within 10 seconds +collator-single-core: count of log lines containing "set_validation_data inherent needs to be present in every block" is 0 within 10 seconds diff --git a/cumulus/zombienet/tests/0009-elastic_pov_recovery.toml b/cumulus/zombienet/tests/0009-elastic_pov_recovery.toml new file mode 100644 index 000000000000..cd12d09c5b44 --- /dev/null +++ b/cumulus/zombienet/tests/0009-elastic_pov_recovery.toml @@ -0,0 +1,48 @@ +[settings] +timeout = 1000 + +[relaychain.genesis.runtimeGenesis.patch.configuration.config.async_backing_params] + max_candidate_depth = 6 + allowed_ancestry_len = 3 + +[relaychain.genesis.runtimeGenesis.patch.configuration.config.scheduler_params] + max_validators_per_core = 1 + num_cores = 4 + +[relaychain.genesis.runtimeGenesis.patch.configuration.config.approval_voting_params] + max_approval_coalesce_count = 5 + +[relaychain] +default_image = "{{ZOMBIENET_INTEGRATION_TEST_IMAGE}}" +chain = "rococo-local" +command = "polkadot" + + [[relaychain.nodes]] + name = "alice" + args = ["" ] + + [[relaychain.node_groups]] + name = "validator" + args = ["-lruntime=debug,parachain=trace", "--reserved-only", "--reserved-nodes {{'alice'|zombie('multiAddress')}}"] + count = 8 + +# Slot based authoring with 3 cores and 2s slot duration +[[parachains]] +id = 2100 +chain = "elastic-scaling" +add_to_genesis = true + + # Slot based authoring with 3 cores and 2s slot duration + [[parachains.collators]] + name = "collator-elastic" + image = "{{CUMULUS_IMAGE}}" + command = "test-parachain" + args = ["--disable-block-announcements", "-laura=trace,runtime=info,cumulus-consensus=trace,consensus::common=trace,parachain::collation-generation=trace,parachain::collator-protocol=trace,parachain=debug", "--force-authoring", "--experimental-use-slot-based"] + + # run 'recovery-target' as a parachain full node + [[parachains.collators]] + name = "recovery-target" + validator = false # full node + image = "{{COL_IMAGE}}" + command = "test-parachain" + args = ["-lparachain::availability=trace,sync=debug,parachain=debug,cumulus-pov-recovery=debug,cumulus-consensus=debug", "--disable-block-announcements", "--bootnodes {{'bob'|zombie('multiAddress')}}", "--in-peers 0", "--out-peers 0", "--", "--reserved-only", "--reserved-nodes {{'alice'|zombie('multiAddress')}}"] diff --git a/cumulus/zombienet/tests/0009-elastic_pov_recovery.zndsl b/cumulus/zombienet/tests/0009-elastic_pov_recovery.zndsl new file mode 100644 index 000000000000..ff2dc38e647f --- /dev/null +++ b/cumulus/zombienet/tests/0009-elastic_pov_recovery.zndsl @@ -0,0 +1,17 @@ +Description: Elastic scaling PoV recovery test +Network: ./0009-elastic_pov_recovery.toml +Creds: config + +alice: is up +collator-elastic: is up + +# wait 20 blocks and register parachain +alice: reports block height is at least 20 within 250 seconds +alice: js-script ./register-para.js with "2000" within 240 seconds +alice: parachain 2000 is registered within 300 seconds + +# check block production +collator-elastic: reports block height is at least 40 within 225 seconds +collator-elastic: count of log lines containing "set_validation_data inherent needs to be present in every block" is 0 within 10 seconds + +recovery-target: count of log lines containing "Importing block retrieved using pov_recovery" is greater than 35 within 10 seconds From 8e30d38b8dab6ad86b8e15a89c719379f91d1358 Mon Sep 17 00:00:00 2001 From: Sebastian Kunert Date: Wed, 19 Jun 2024 08:46:09 +0200 Subject: [PATCH 50/71] Move collator to futures again --- .../slot_based/block_builder_task.rs | 364 +++++++++--------- .../aura/src/collators/slot_based/mod.rs | 28 +- cumulus/polkadot-parachain/Cargo.toml | 2 +- cumulus/polkadot-parachain/src/service.rs | 68 +++- cumulus/test/service/src/lib.rs | 14 +- 5 files changed, 262 insertions(+), 214 deletions(-) diff --git a/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs b/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs index 3f0692aa92ea..c6145c870704 100644 --- a/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs +++ b/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs @@ -28,6 +28,7 @@ use polkadot_primitives::{ OccupiedCoreAssumption, }; +use futures::prelude::*; use sc_client_api::{backend::AuxStore, BlockBackend, BlockOf, UsageProvider}; use sc_consensus::BlockImport; use sp_api::ProvideRuntimeApi; @@ -103,7 +104,7 @@ struct SlotInfo { struct SlotTimer { client: Arc, drift: Duration, - phantom: std::marker::PhantomData<(P, Block)>, + _marker: std::marker::PhantomData<(Block, Box)>, } /// Returns current duration since Unix epoch. @@ -134,7 +135,7 @@ where P::Signature: TryFrom> + Member + Codec, { pub fn new_with_drift(client: Arc, drift: Duration) -> Self { - Self { client, drift, phantom: Default::default() } + Self { client, drift, _marker: Default::default() } } /// Returns a future that resolves when the next slot arrives. @@ -156,20 +157,10 @@ where } /// Run block-builder. -pub async fn run_block_builder< - Block, - P, - BI, - CIDP, - Client, - Backend, - RelayClient, - CHP, - Proposer, - CS, ->( +pub fn run_block_builder( params: BuilderTaskParams, -) where +) -> impl Future + Send + 'static +where Block: BlockT, Client: ProvideRuntimeApi + UsageProvider @@ -194,192 +185,199 @@ pub async fn run_block_builder< P::Public: AppPublic + Member + Codec, P::Signature: TryFrom> + Member + Codec, { - let BuilderTaskParams { - relay_client, - create_inherent_data_providers, - para_client, - keystore, - block_import, - para_id, - proposer, - collator_service, - collator_sender, - code_hash_provider, - authoring_duration, - para_backend, - relay_chain_slot_duration, - slot_drift, - } = params; - - let slot_timer = SlotTimer::<_, _, P>::new_with_drift(para_client.clone(), slot_drift); - - let mut collator = { - let params = collator_util::Params { + async move { + let BuilderTaskParams { + relay_client, create_inherent_data_providers, + para_client, + keystore, block_import, - relay_client: relay_client.clone(), - keystore: keystore.clone(), para_id, proposer, collator_service, + collator_sender, + code_hash_provider, + authoring_duration, + para_backend, + relay_chain_slot_duration, + slot_drift, + } = params; + + let slot_timer = SlotTimer::<_, _, P>::new_with_drift(para_client.clone(), slot_drift); + + let mut collator = { + let params = collator_util::Params { + create_inherent_data_providers, + block_import, + relay_client: relay_client.clone(), + keystore: keystore.clone(), + para_id, + proposer, + collator_service, + }; + + collator_util::Collator::::new(params) }; - collator_util::Collator::::new(params) - }; - - let mut relay_chain_fetcher = RelayChainCachingFetcher::new(relay_client.clone(), para_id); - - loop { - // We wait here until the next slot arrives. - let Ok(para_slot) = slot_timer.wait_until_next_slot().await else { - return; - }; - - let Some(expected_cores) = - expected_core_count(relay_chain_slot_duration, para_slot.slot_duration) - else { - return - }; - - let Ok(RelayChainData { - relay_parent_header, - max_pov_size, - relay_parent_hash: relay_parent, - scheduled_cores, - }) = relay_chain_fetcher.get_relay_chain_data().await - else { - continue; - }; - - if scheduled_cores.is_empty() { - tracing::debug!(target: LOG_TARGET, "Parachain not scheduled, skipping slot."); - continue; - } - - let core_index_in_scheduled: u64 = *para_slot.slot % expected_cores; - let Some(core_index) = scheduled_cores.get(core_index_in_scheduled as usize) else { - tracing::debug!(target: LOG_TARGET, core_index_in_scheduled, core_len = scheduled_cores.len(), "Para is scheduled, but not enough cores available."); - continue; - }; - - let Some((included_block, parent)) = - crate::collators::find_parent(relay_parent, para_id, &*para_backend, &relay_client) - .await - else { - continue - }; - - let parent_header = parent.header; - let parent_hash = parent.hash; - - // We mainly call this to inform users at genesis if there is a mismatch with the - // on-chain data. - collator.collator_service().check_block_status(parent_hash, &parent_header); - - let slot_claim = match crate::collators::can_build_upon::<_, _, P>( - para_slot.slot, - para_slot.timestamp, - parent_hash, - included_block, - &*para_client, - &keystore, - ) - .await - { - Some(slot) => slot, - None => { - tracing::debug!( - target: crate::LOG_TARGET, - ?core_index, - slot_info = ?para_slot, - unincluded_segment_len = parent.depth, - relay_parent = %relay_parent, - included = %included_block, - parent = %parent_hash, - "Not building block." - ); + let mut relay_chain_fetcher = RelayChainCachingFetcher::new(relay_client.clone(), para_id); + + loop { + // We wait here until the next slot arrives. + let Ok(para_slot) = slot_timer.wait_until_next_slot().await else { + return; + }; + + let Some(expected_cores) = + expected_core_count(relay_chain_slot_duration, para_slot.slot_duration) + else { + return + }; + + let Ok(RelayChainData { + relay_parent_header, + max_pov_size, + relay_parent_hash: relay_parent, + scheduled_cores, + }) = relay_chain_fetcher.get_relay_chain_data().await + else { + continue; + }; + + if scheduled_cores.is_empty() { + tracing::debug!(target: LOG_TARGET, "Parachain not scheduled, skipping slot."); + continue; + } + + let core_index_in_scheduled: u64 = *para_slot.slot % expected_cores; + let Some(core_index) = scheduled_cores.get(core_index_in_scheduled as usize) else { + tracing::debug!(target: LOG_TARGET, core_index_in_scheduled, core_len = scheduled_cores.len(), "Para is scheduled, but not enough cores available."); + continue; + }; + + let Some((included_block, parent)) = + crate::collators::find_parent(relay_parent, para_id, &*para_backend, &relay_client) + .await + else { continue - }, - }; + }; - tracing::debug!( - target: crate::LOG_TARGET, - ?core_index, - slot_info = ?para_slot, - unincluded_segment_len = parent.depth, - relay_parent = %relay_parent, - included = %included_block, - parent = %parent_hash, - "Building block." - ); - - let validation_data = PersistedValidationData { - parent_head: parent_header.encode().into(), - relay_parent_number: *relay_parent_header.number(), - relay_parent_storage_root: *relay_parent_header.state_root(), - max_pov_size, - }; + let parent_header = parent.header; + let parent_hash = parent.hash; - let (parachain_inherent_data, other_inherent_data) = match collator - .create_inherent_data( - relay_parent, - &validation_data, + // We mainly call this to inform users at genesis if there is a mismatch with the + // on-chain data. + collator.collator_service().check_block_status(parent_hash, &parent_header); + + let slot_claim = match crate::collators::can_build_upon::<_, _, P>( + para_slot.slot, + para_slot.timestamp, parent_hash, - slot_claim.timestamp(), + included_block, + &*para_client, + &keystore, ) .await - { - Err(err) => { - tracing::error!(target: crate::LOG_TARGET, ?err); - break - }, - Ok(x) => x, - }; - - let validation_code_hash = match code_hash_provider.code_hash_at(parent_hash) { - None => { - tracing::error!(target: crate::LOG_TARGET, ?parent_hash, "Could not fetch validation code hash"); - break - }, - Some(v) => v, - }; - - check_validation_code_or_log(&validation_code_hash, para_id, &relay_client, relay_parent) + { + Some(slot) => slot, + None => { + tracing::debug!( + target: crate::LOG_TARGET, + ?core_index, + slot_info = ?para_slot, + unincluded_segment_len = parent.depth, + relay_parent = %relay_parent, + included = %included_block, + parent = %parent_hash, + "Not building block." + ); + continue + }, + }; + + tracing::debug!( + target: crate::LOG_TARGET, + ?core_index, + slot_info = ?para_slot, + unincluded_segment_len = parent.depth, + relay_parent = %relay_parent, + included = %included_block, + parent = %parent_hash, + "Building block." + ); + + let validation_data = PersistedValidationData { + parent_head: parent_header.encode().into(), + relay_parent_number: *relay_parent_header.number(), + relay_parent_storage_root: *relay_parent_header.state_root(), + max_pov_size, + }; + + let (parachain_inherent_data, other_inherent_data) = match collator + .create_inherent_data( + relay_parent, + &validation_data, + parent_hash, + slot_claim.timestamp(), + ) + .await + { + Err(err) => { + tracing::error!(target: crate::LOG_TARGET, ?err); + break + }, + Ok(x) => x, + }; + + let validation_code_hash = match code_hash_provider.code_hash_at(parent_hash) { + None => { + tracing::error!(target: crate::LOG_TARGET, ?parent_hash, "Could not fetch validation code hash"); + break + }, + Some(v) => v, + }; + + check_validation_code_or_log( + &validation_code_hash, + para_id, + &relay_client, + relay_parent, + ) .await; - let Ok(Some(candidate)) = collator - .build_block_and_import( - &parent_header, - &slot_claim, - None, - (parachain_inherent_data, other_inherent_data), - authoring_duration, - // Set the block limit to 50% of the maximum PoV size. - // - // TODO: If we got benchmarking that includes the proof size, - // we should be able to use the maximum pov size. - (validation_data.max_pov_size / 2) as usize, - ) - .await - else { - tracing::error!(target: crate::LOG_TARGET, "Unable to build block at slot."); - continue; - }; + let Ok(Some(candidate)) = collator + .build_block_and_import( + &parent_header, + &slot_claim, + None, + (parachain_inherent_data, other_inherent_data), + authoring_duration, + // Set the block limit to 50% of the maximum PoV size. + // + // TODO: If we got benchmarking that includes the proof size, + // we should be able to use the maximum pov size. + (validation_data.max_pov_size / 2) as usize, + ) + .await + else { + tracing::error!(target: crate::LOG_TARGET, "Unable to build block at slot."); + continue; + }; - let new_block_hash = candidate.block.header().hash(); + let new_block_hash = candidate.block.header().hash(); - // Announce the newly built block to our peers. - collator.collator_service().announce_block(new_block_hash, None); + // Announce the newly built block to our peers. + collator.collator_service().announce_block(new_block_hash, None); - if let Err(err) = collator_sender.unbounded_send(CollatorMessage { - relay_parent, - parent_header, - parachain_candidate: candidate, - validation_code_hash, - core_index: *core_index, - }) { - tracing::error!(target: crate::LOG_TARGET, ?err, "Unable to send block to collation task."); - break + if let Err(err) = collator_sender.unbounded_send(CollatorMessage { + relay_parent, + parent_header, + parachain_candidate: candidate, + validation_code_hash, + core_index: *core_index, + }) { + tracing::error!(target: crate::LOG_TARGET, ?err, "Unable to send block to collation task."); + break + } } } } diff --git a/cumulus/client/consensus/aura/src/collators/slot_based/mod.rs b/cumulus/client/consensus/aura/src/collators/slot_based/mod.rs index 7e5637526c0f..e6a86fac61c5 100644 --- a/cumulus/client/consensus/aura/src/collators/slot_based/mod.rs +++ b/cumulus/client/consensus/aura/src/collators/slot_based/mod.rs @@ -56,7 +56,7 @@ use sp_api::ProvideRuntimeApi; use sp_application_crypto::AppPublic; use sp_blockchain::HeaderBackend; use sp_consensus_aura::AuraApi; -use sp_core::{crypto::Pair, traits::SpawnEssentialNamed}; +use sp_core::crypto::Pair; use sp_inherents::CreateInherentDataProviders; use sp_keystore::KeystorePtr; use sp_runtime::traits::{Block as BlockT, Member}; @@ -69,7 +69,7 @@ mod block_builder_task; mod collation_task; /// Parameters for [`run`]. -pub struct Params { +pub struct Params { /// Inherent data providers. Only non-consensus inherent data should be provided, i.e. /// the timestamp, slot, and paras inherents should be omitted, as they are set by this /// collator. @@ -103,14 +103,13 @@ pub struct Params( - params: Params, -) where +pub fn run( + params: Params, +) -> (impl futures::Future, impl futures::Future) +where Block: BlockT, Client: ProvideRuntimeApi + BlockOf @@ -131,10 +130,9 @@ pub fn run + Send + Sync + 'static, CS: CollatorServiceInterface + Send + Sync + Clone + 'static, CHP: consensus_common::ValidationCodeHashProvider + Send + 'static, - P: Pair + Send + Sync + 'static, + P: Pair + 'static, P::Public: AppPublic + Member + Codec, P::Signature: TryFrom> + Member + Codec, - SpawnHandle: SpawnEssentialNamed, { let (tx, rx) = tracing_unbounded("mpsc_builder_to_collator", 100); let collator_task_params = collation_task::Params { @@ -168,17 +166,7 @@ pub fn run(block_builder_params); - params.spawn_handle.spawn_essential_blocking( - "collation-task", - Some("parachain-authoring"), - Box::pin(collation_task_fut), - ); - - params.spawn_handle.spawn_essential_blocking( - "parachain-block-builder-task", - Some("parachain-authoring"), - Box::pin(block_builder_fut), - ); + (collation_task_fut, block_builder_fut) } /// Message to be sent from the block builder to the collation task. diff --git a/cumulus/polkadot-parachain/Cargo.toml b/cumulus/polkadot-parachain/Cargo.toml index 4ad8caf21dbc..f027895dd068 100644 --- a/cumulus/polkadot-parachain/Cargo.toml +++ b/cumulus/polkadot-parachain/Cargo.toml @@ -111,6 +111,7 @@ cumulus-primitives-aura = { path = "../primitives/aura" } cumulus-primitives-core = { path = "../primitives/core" } cumulus-relay-chain-interface = { path = "../client/relay-chain-interface" } color-print = "0.3.4" +tokio = { version = "1.32.0", features = ["macros", "parking_lot", "time"] } [build-dependencies] substrate-build-script-utils = { path = "../../substrate/utils/build-script-utils" } @@ -120,7 +121,6 @@ assert_cmd = "2.0" nix = { version = "0.28.0", features = ["signal"] } tempfile = "3.8.0" wait-timeout = "0.2" -tokio = { version = "1.32.0", features = ["macros", "parking_lot", "time"] } [features] default = [] diff --git a/cumulus/polkadot-parachain/src/service.rs b/cumulus/polkadot-parachain/src/service.rs index 89c079252cc5..6af4dab081be 100644 --- a/cumulus/polkadot-parachain/src/service.rs +++ b/cumulus/polkadot-parachain/src/service.rs @@ -747,6 +747,7 @@ where + cumulus_primitives_aura::AuraUnincludedSegmentApi, <::Pair as Pair>::Signature: TryFrom> + std::hash::Hash + sp_runtime::traits::Member + Codec, + ::Pair: Send + Sync, Net: NetworkBackend, { start_node_impl::( @@ -790,6 +791,7 @@ where telemetry.clone(), ); + let essential_spawner = task_manager.spawn_essential_handle(); let collation_future = Box::pin(async move { // Start collating with the `shell` runtime while waiting for an upgrade to an Aura // compatible runtime. @@ -824,12 +826,53 @@ where } } + // Move to Aura consensus. + let proposer = Proposer::new(proposer_factory); if use_experimental_slot_based { - panic!(); - } else { - // Move to Aura consensus. - let proposer = Proposer::new(proposer_factory); + log::info!("Starting block authoring with slot based authoring."); + let client_for_aura = client.clone(); + let params = SlotBasedParams { + create_inherent_data_providers: move |_, ()| async move { Ok(()) }, + block_import, + para_client: client.clone(), + para_backend: backend.clone(), + relay_client: relay_chain_interface, + code_hash_provider: move |block_hash| { + client_for_aura + .code_at(block_hash) + .ok() + .map(|c| ValidationCode::from(c).hash()) + }, + keystore, + collator_key, + para_id, + relay_chain_slot_duration, + proposer, + collator_service, + authoring_duration: Duration::from_millis(2000), + reinitialize: false, + slot_drift: Duration::from_secs(1), + }; + let (collation_future, block_builer_future) = slot_based::run::< + Block, + ::Pair, + _, + _, + _, + _, + _, + _, + _, + _, + >(params); + essential_spawner.spawn_essential( + "block-builder-task", + Some("parachain-block-authoring"), + Box::pin(collation_future), + ); + block_builer_future.await; + } else { let params = AuraParams { create_inherent_data_providers: move |_, ()| async move { Ok(()) }, block_import, @@ -857,7 +900,11 @@ where }); let spawner = task_manager.spawn_essential_handle(); - spawner.spawn_essential("cumulus-asset-hub-collator", None, collation_future); + spawner.spawn_essential( + "cumulus-asset-hub-collator", + Some("parachain-block-authoring"), + collation_future, + ); Ok(()) }, @@ -1047,10 +1094,15 @@ fn start_slot_based_aura_consensus( authoring_duration: Duration::from_millis(2000), reinitialize: false, slot_drift: Duration::from_secs(1), - spawn_handle: task_manager.spawn_essential_handle(), }; - - slot_based::run::::Pair, _, _, _, _, _, _, _, _, _>(params); + let (collation_future, block_builer_future) = + slot_based::run::::Pair, _, _, _, _, _, _, _, _>(params); + task_manager + .spawn_essential_handle() + .spawn("collation-task", None, collation_future); + task_manager + .spawn_essential_handle() + .spawn("block-builder-task", None, block_builer_future); Ok(()) } diff --git a/cumulus/test/service/src/lib.rs b/cumulus/test/service/src/lib.rs index bd400a542d38..6ae170103a29 100644 --- a/cumulus/test/service/src/lib.rs +++ b/cumulus/test/service/src/lib.rs @@ -489,10 +489,20 @@ where authoring_duration: Duration::from_millis(2000), reinitialize: false, slot_drift: Duration::from_secs(1), - spawn_handle: task_manager.spawn_essential_handle(), }; - slot_based::run::(params); + let (collation_future, block_builer_future) = + slot_based::run::(params); + task_manager.spawn_essential_handle().spawn( + "collation-task", + None, + collation_future, + ); + task_manager.spawn_essential_handle().spawn( + "block-builder-task", + None, + block_builer_future, + ); } else { tracing::info!(target: LOG_TARGET, "Starting block authoring with lookahead collator."); let params = AuraParams { From f3233db8ca61bfbed0f2760420ee44a2d8ba5120 Mon Sep 17 00:00:00 2001 From: Sebastian Kunert Date: Wed, 19 Jun 2024 14:29:28 +0200 Subject: [PATCH 51/71] Move lost comment --- .../consensus/aura/src/collators/lookahead.rs | 6 ----- .../consensus/aura/src/collators/mod.rs | 6 +++++ cumulus/polkadot-parachain/src/service.rs | 24 ++++++++++++------- 3 files changed, 21 insertions(+), 15 deletions(-) diff --git a/cumulus/client/consensus/aura/src/collators/lookahead.rs b/cumulus/client/consensus/aura/src/collators/lookahead.rs index 41cc285cd377..a0bf1a323e04 100644 --- a/cumulus/client/consensus/aura/src/collators/lookahead.rs +++ b/cumulus/client/consensus/aura/src/collators/lookahead.rs @@ -123,12 +123,6 @@ where P::Public: AppPublic + Member + Codec, P::Signature: TryFrom> + Member + Codec, { - // This is an arbitrary value which is likely guaranteed to exceed any reasonable - // limit, as it would correspond to 10 non-included blocks. - // - // Since we only search for parent blocks which have already been imported, - // we can guarantee that all imported blocks respect the unincluded segment - // rules specified by the parachain's runtime and thus will never be too deep. async move { cumulus_client_collator::initialize_collator_subsystems( &mut params.overseer_handle, diff --git a/cumulus/client/consensus/aura/src/collators/mod.rs b/cumulus/client/consensus/aura/src/collators/mod.rs index 95e5fb25084c..3ef4707098de 100644 --- a/cumulus/client/consensus/aura/src/collators/mod.rs +++ b/cumulus/client/consensus/aura/src/collators/mod.rs @@ -42,6 +42,12 @@ pub mod basic; pub mod lookahead; pub mod slot_based; +// This is an arbitrary value which is likely guaranteed to exceed any reasonable +// limit, as it would correspond to 10 non-included blocks. +// +// Since we only search for parent blocks which have already been imported, +// we can guarantee that all imported blocks respect the unincluded segment +// rules specified by the parachain's runtime and thus will never be too deep. const PARENT_SEARCH_DEPTH: usize = 10; /// Check the `local_validation_code_hash` against the validation code hash in the relay chain diff --git a/cumulus/polkadot-parachain/src/service.rs b/cumulus/polkadot-parachain/src/service.rs index 6af4dab081be..a264cde683f6 100644 --- a/cumulus/polkadot-parachain/src/service.rs +++ b/cumulus/polkadot-parachain/src/service.rs @@ -854,7 +854,7 @@ where slot_drift: Duration::from_secs(1), }; - let (collation_future, block_builer_future) = slot_based::run::< + let (collation_future, block_builder_future) = slot_based::run::< Block, ::Pair, _, @@ -871,7 +871,7 @@ where Some("parachain-block-authoring"), Box::pin(collation_future), ); - block_builer_future.await; + block_builder_future.await; } else { let params = AuraParams { create_inherent_data_providers: move |_, ()| async move { Ok(()) }, @@ -1095,14 +1095,20 @@ fn start_slot_based_aura_consensus( reinitialize: false, slot_drift: Duration::from_secs(1), }; - let (collation_future, block_builer_future) = + + let (collation_future, block_builder_future) = slot_based::run::::Pair, _, _, _, _, _, _, _, _>(params); - task_manager - .spawn_essential_handle() - .spawn("collation-task", None, collation_future); - task_manager - .spawn_essential_handle() - .spawn("block-builder-task", None, block_builer_future); + + task_manager.spawn_essential_handle().spawn( + "collation-task", + Some("parachain-block-authoring"), + collation_future, + ); + task_manager.spawn_essential_handle().spawn( + "block-builder-task", + Some("parachain-block-authoring"), + block_builder_future, + ); Ok(()) } From da96123a780687f3600ac57e00c1400d60d3c7bc Mon Sep 17 00:00:00 2001 From: Sebastian Kunert Date: Wed, 19 Jun 2024 16:20:31 +0200 Subject: [PATCH 52/71] Improve pov-recovery test --- cumulus/test/service/src/lib.rs | 13 ++++++------- .../zombienet/tests/0009-elastic_pov_recovery.toml | 2 +- .../zombienet/tests/0009-elastic_pov_recovery.zndsl | 6 ++++-- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/cumulus/test/service/src/lib.rs b/cumulus/test/service/src/lib.rs index 6ae170103a29..51cdebbaf54e 100644 --- a/cumulus/test/service/src/lib.rs +++ b/cumulus/test/service/src/lib.rs @@ -48,7 +48,7 @@ use cumulus_client_cli::{CollatorOptions, RelayChainMode}; use cumulus_client_consensus_common::{ ParachainBlockImport as TParachainBlockImport, ParachainCandidate, ParachainConsensus, }; -use cumulus_client_pov_recovery::RecoveryHandle; +use cumulus_client_pov_recovery::{RecoveryDelayRange, RecoveryHandle}; #[allow(deprecated)] use cumulus_client_service::old_consensus; use cumulus_client_service::{ @@ -413,7 +413,6 @@ where } else { Box::new(overseer_handle.clone()) }; - let is_collator = collator_key.is_some(); let relay_chain_slot_duration = Duration::from_secs(6); start_relay_chain_tasks(StartRelayChainTasksParams { @@ -422,11 +421,11 @@ where para_id, relay_chain_interface: relay_chain_interface.clone(), task_manager: &mut task_manager, - da_recovery_profile: if is_collator { - DARecoveryProfile::Collator - } else { - DARecoveryProfile::FullNode - }, + // Increase speed of recovery for testing purposes. + da_recovery_profile: DARecoveryProfile::Other(RecoveryDelayRange { + min: Duration::from_secs(1), + max: Duration::from_secs(5), + }), import_queue: import_queue_service, relay_chain_slot_duration, recovery_handle, diff --git a/cumulus/zombienet/tests/0009-elastic_pov_recovery.toml b/cumulus/zombienet/tests/0009-elastic_pov_recovery.toml index cd12d09c5b44..bee4f9074272 100644 --- a/cumulus/zombienet/tests/0009-elastic_pov_recovery.toml +++ b/cumulus/zombienet/tests/0009-elastic_pov_recovery.toml @@ -45,4 +45,4 @@ add_to_genesis = true validator = false # full node image = "{{COL_IMAGE}}" command = "test-parachain" - args = ["-lparachain::availability=trace,sync=debug,parachain=debug,cumulus-pov-recovery=debug,cumulus-consensus=debug", "--disable-block-announcements", "--bootnodes {{'bob'|zombie('multiAddress')}}", "--in-peers 0", "--out-peers 0", "--", "--reserved-only", "--reserved-nodes {{'alice'|zombie('multiAddress')}}"] + args = ["-lparachain::availability=trace,sync=debug,parachain=debug,cumulus-pov-recovery=debug,cumulus-consensus=debug", "--disable-block-announcements", "--bootnodes {{'collator-elastic'|zombie('multiAddress')}}", "--in-peers 0", "--out-peers 0", "--", "--reserved-only", "--reserved-nodes {{'alice'|zombie('multiAddress')}}"] diff --git a/cumulus/zombienet/tests/0009-elastic_pov_recovery.zndsl b/cumulus/zombienet/tests/0009-elastic_pov_recovery.zndsl index ff2dc38e647f..3a805078112c 100644 --- a/cumulus/zombienet/tests/0009-elastic_pov_recovery.zndsl +++ b/cumulus/zombienet/tests/0009-elastic_pov_recovery.zndsl @@ -7,8 +7,10 @@ collator-elastic: is up # wait 20 blocks and register parachain alice: reports block height is at least 20 within 250 seconds -alice: js-script ./register-para.js with "2000" within 240 seconds -alice: parachain 2000 is registered within 300 seconds + +# configure relay chain +alice: js-script ./assign-core.js with "2100,0" return is 0 within 600 seconds +alice: js-script ./assign-core.js with "2100,1" return is 0 within 600 seconds # check block production collator-elastic: reports block height is at least 40 within 225 seconds From 05c38129ef00fa182e57f523deeb799c24c38fa1 Mon Sep 17 00:00:00 2001 From: Sebastian Kunert Date: Wed, 19 Jun 2024 16:46:27 +0200 Subject: [PATCH 53/71] Increase velocity to 4 --- cumulus/test/runtime/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cumulus/test/runtime/src/lib.rs b/cumulus/test/runtime/src/lib.rs index 14b3bc39225a..042dccf8685f 100644 --- a/cumulus/test/runtime/src/lib.rs +++ b/cumulus/test/runtime/src/lib.rs @@ -96,7 +96,7 @@ const BLOCK_PROCESSING_VELOCITY: u32 = 1; #[cfg(feature = "elastic-scaling")] const UNINCLUDED_SEGMENT_CAPACITY: u32 = 6; #[cfg(feature = "elastic-scaling")] -const BLOCK_PROCESSING_VELOCITY: u32 = 3; +const BLOCK_PROCESSING_VELOCITY: u32 = 4; #[cfg(not(feature = "elastic-scaling"))] pub const MILLISECS_PER_BLOCK: u64 = 6000; From 938a7367f039c8cf0ad1053a07cee0ece684f159 Mon Sep 17 00:00:00 2001 From: Sebastian Kunert Date: Wed, 19 Jun 2024 16:59:20 +0200 Subject: [PATCH 54/71] Add test to pipeline --- .gitlab/pipeline/zombienet/cumulus.yml | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/.gitlab/pipeline/zombienet/cumulus.yml b/.gitlab/pipeline/zombienet/cumulus.yml index 1f44ffc88fab..6e2b53fae619 100644 --- a/.gitlab/pipeline/zombienet/cumulus.yml +++ b/.gitlab/pipeline/zombienet/cumulus.yml @@ -161,3 +161,15 @@ zombienet-cumulus-0008-elastic_authoring: --local-dir="${LOCAL_DIR}" --concurrency=1 --test="0008-elastic_authoring.zndsl" + +zombienet-cumulus-0009-elastic_pov_recovery: + extends: + - .zombienet-cumulus-common + - .zombienet-refs + - .zombienet-before-script + - .zombienet-after-script + script: + - /home/nonroot/zombie-net/scripts/ci/run-test-local-env-manager.sh + --local-dir="${LOCAL_DIR}" + --concurrency=1 + --test="0009-elastic_pov_recovery.zndsl" From fa3f070822ff83214fe8caeca0a0b44ce65f3cd3 Mon Sep 17 00:00:00 2001 From: Sebastian Kunert Date: Wed, 19 Jun 2024 16:59:31 +0200 Subject: [PATCH 55/71] Add prdoc --- prdoc/pr_4097.prdoc | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 prdoc/pr_4097.prdoc diff --git a/prdoc/pr_4097.prdoc b/prdoc/pr_4097.prdoc new file mode 100644 index 000000000000..e9b6c264ec68 --- /dev/null +++ b/prdoc/pr_4097.prdoc @@ -0,0 +1,19 @@ +# Schema: Polkadot SDK PRDoc Schema (prdoc) v1.0.0 +# See doc at https://raw.githubusercontent.com/paritytech/polkadot-sdk/master/prdoc/schema_user.json + +title: Introduce experimental slot-based collator + +doc: + - audience: Node Operator + description: | + Introduces an experimental collator that is fit fot elastic-scaling. + It can be activated on `test-parachain` and `polkadot-parachain` binaries via + `--experimental-use-slot-based` flag. The current implementation is MVP status and purely + for testing. Behaviour can change any time and should not be relied upon in environments with + any stability requirements. + +crates: + - name: cumulus-client-consensus-aura + bump: minor + - name: cumulus-client-consensus-common + bump: minor From edfe4744da8850da8407151bb4113675fa1a3a83 Mon Sep 17 00:00:00 2001 From: Sebastian Kunert Date: Wed, 19 Jun 2024 17:16:42 +0200 Subject: [PATCH 56/71] Abort block builder task if we can not send to collation task --- .../aura/src/collators/slot_based/block_builder_task.rs | 2 +- cumulus/test/runtime/src/lib.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs b/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs index c6145c870704..07687ddd2550 100644 --- a/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs +++ b/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs @@ -376,7 +376,7 @@ where core_index: *core_index, }) { tracing::error!(target: crate::LOG_TARGET, ?err, "Unable to send block to collation task."); - break + return } } } diff --git a/cumulus/test/runtime/src/lib.rs b/cumulus/test/runtime/src/lib.rs index 042dccf8685f..4424b17f1499 100644 --- a/cumulus/test/runtime/src/lib.rs +++ b/cumulus/test/runtime/src/lib.rs @@ -94,7 +94,7 @@ const UNINCLUDED_SEGMENT_CAPACITY: u32 = 4; const BLOCK_PROCESSING_VELOCITY: u32 = 1; #[cfg(feature = "elastic-scaling")] -const UNINCLUDED_SEGMENT_CAPACITY: u32 = 6; +const UNINCLUDED_SEGMENT_CAPACITY: u32 = 7; #[cfg(feature = "elastic-scaling")] const BLOCK_PROCESSING_VELOCITY: u32 = 4; From b1490ebf5b20b00641b60b542b90595fdc7eed57 Mon Sep 17 00:00:00 2001 From: Sebastian Kunert Date: Wed, 19 Jun 2024 17:20:08 +0200 Subject: [PATCH 57/71] Fixes --- Cargo.lock | 15 --------------- cumulus/client/consensus/aura/Cargo.toml | 2 -- .../collators/slot_based/block_builder_task.rs | 1 + cumulus/client/parachain-inherent/Cargo.toml | 4 ---- .../client/relay-chain-minimal-node/Cargo.toml | 8 -------- .../client/relay-chain-rpc-interface/Cargo.toml | 1 - .../client/relay-chain-rpc-interface/src/lib.rs | 2 +- cumulus/polkadot-parachain/src/service.rs | 2 -- 8 files changed, 2 insertions(+), 33 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f9d76409637f..4f38c40e94b8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3683,9 +3683,7 @@ dependencies = [ "sc-client-api", "sc-consensus", "sc-consensus-aura", - "sc-consensus-babe", "sc-consensus-slots", - "sc-service", "sc-telemetry", "sc-utils", "schnellru", @@ -3823,15 +3821,11 @@ dependencies = [ "cumulus-test-relay-sproof-builder", "parity-scale-codec", "sc-client-api", - "scale-info", - "sp-api", "sp-crypto-hashing", "sp-inherents", "sp-runtime", "sp-state-machine", - "sp-std 14.0.0", "sp-storage 19.0.0", - "sp-trie", "tracing", ] @@ -4250,15 +4244,8 @@ dependencies = [ "cumulus-relay-chain-interface", "cumulus-relay-chain-rpc-interface", "futures", - "parking_lot 0.12.1", - "polkadot-availability-recovery", - "polkadot-collator-protocol", "polkadot-core-primitives", "polkadot-network-bridge", - "polkadot-node-collation-generation", - "polkadot-node-core-chain-api", - "polkadot-node-core-prospective-parachains", - "polkadot-node-core-runtime-api", "polkadot-node-network-protocol", "polkadot-node-subsystem-util", "polkadot-overseer", @@ -4277,7 +4264,6 @@ dependencies = [ "sp-consensus-babe", "sp-runtime", "substrate-prometheus-endpoint", - "tokio", "tracing", ] @@ -4304,7 +4290,6 @@ dependencies = [ "serde_json", "smoldot", "smoldot-light", - "sp-api", "sp-authority-discovery", "sp-consensus-babe", "sp-core", diff --git a/cumulus/client/consensus/aura/Cargo.toml b/cumulus/client/consensus/aura/Cargo.toml index a59ed9d42bb4..d43f029f9a43 100644 --- a/cumulus/client/consensus/aura/Cargo.toml +++ b/cumulus/client/consensus/aura/Cargo.toml @@ -21,7 +21,6 @@ tokio = { version = "1.36.0", features = ["sync"] } sc-client-api = { path = "../../../../substrate/client/api" } sc-consensus = { path = "../../../../substrate/client/consensus/common" } sc-consensus-aura = { path = "../../../../substrate/client/consensus/aura" } -sc-consensus-babe = { path = "../../../../substrate/client/consensus/babe" } sc-consensus-slots = { path = "../../../../substrate/client/consensus/slots" } sc-telemetry = { path = "../../../../substrate/client/telemetry" } sp-api = { path = "../../../../substrate/primitives/api" } @@ -36,7 +35,6 @@ sp-keystore = { path = "../../../../substrate/primitives/keystore" } sp-runtime = { path = "../../../../substrate/primitives/runtime" } sp-timestamp = { path = "../../../../substrate/primitives/timestamp" } sp-state-machine = { path = "../../../../substrate/primitives/state-machine" } -sc-service = { path = "../../../../substrate/client/service" } prometheus-endpoint = { package = "substrate-prometheus-endpoint", path = "../../../../substrate/utils/prometheus" } # Cumulus diff --git a/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs b/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs index 07687ddd2550..c17739c1b06e 100644 --- a/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs +++ b/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs @@ -186,6 +186,7 @@ where P::Signature: TryFrom> + Member + Codec, { async move { + tracing::info!(target: LOG_TARGET, "Starting slot-based block-builder task."); let BuilderTaskParams { relay_client, create_inherent_data_providers, diff --git a/cumulus/client/parachain-inherent/Cargo.toml b/cumulus/client/parachain-inherent/Cargo.toml index 85619e840345..9481281ef4d1 100644 --- a/cumulus/client/parachain-inherent/Cargo.toml +++ b/cumulus/client/parachain-inherent/Cargo.toml @@ -9,19 +9,15 @@ license = "Apache-2.0" [dependencies] async-trait = "0.1.79" codec = { package = "parity-scale-codec", version = "3.6.12", features = ["derive"] } -scale-info = { version = "2.11.1", features = ["derive"] } tracing = { version = "0.1.37" } # Substrate sc-client-api = { path = "../../../substrate/client/api" } -sp-api = { path = "../../../substrate/primitives/api" } sp-crypto-hashing = { path = "../../../substrate/primitives/crypto/hashing" } sp-inherents = { path = "../../../substrate/primitives/inherents" } sp-runtime = { path = "../../../substrate/primitives/runtime" } sp-state-machine = { path = "../../../substrate/primitives/state-machine" } -sp-std = { path = "../../../substrate/primitives/std" } sp-storage = { path = "../../../substrate/primitives/storage" } -sp-trie = { path = "../../../substrate/primitives/trie" } # Cumulus cumulus-primitives-core = { path = "../../primitives/core" } diff --git a/cumulus/client/relay-chain-minimal-node/Cargo.toml b/cumulus/client/relay-chain-minimal-node/Cargo.toml index 0b541092a3de..e2c7426b28b4 100644 --- a/cumulus/client/relay-chain-minimal-node/Cargo.toml +++ b/cumulus/client/relay-chain-minimal-node/Cargo.toml @@ -17,13 +17,7 @@ polkadot-overseer = { path = "../../../polkadot/node/overseer" } polkadot-node-subsystem-util = { path = "../../../polkadot/node/subsystem-util" } polkadot-node-network-protocol = { path = "../../../polkadot/node/network/protocol" } -polkadot-availability-recovery = { path = "../../../polkadot/node/network/availability-recovery" } -polkadot-collator-protocol = { path = "../../../polkadot/node/network/collator-protocol" } polkadot-network-bridge = { path = "../../../polkadot/node/network/bridge" } -polkadot-node-collation-generation = { path = "../../../polkadot/node/collation-generation" } -polkadot-node-core-runtime-api = { path = "../../../polkadot/node/core/runtime-api" } -polkadot-node-core-chain-api = { path = "../../../polkadot/node/core/chain-api" } -polkadot-node-core-prospective-parachains = { path = "../../../polkadot/node/core/prospective-parachains" } polkadot-service = { path = "../../../polkadot/node/service" } # substrate deps @@ -40,7 +34,6 @@ sp-consensus-babe = { path = "../../../substrate/primitives/consensus/babe" } sp-consensus = { path = "../../../substrate/primitives/consensus/common" } sp-runtime = { path = "../../../substrate/primitives/runtime" } sp-blockchain = { path = "../../../substrate/primitives/blockchain" } -tokio = { version = "1.32.0", features = ["macros"] } # cumulus deps cumulus-relay-chain-interface = { path = "../relay-chain-interface" } @@ -51,4 +44,3 @@ array-bytes = "6.2.2" tracing = "0.1.37" async-trait = "0.1.79" futures = "0.3.28" -parking_lot = "0.12.1" diff --git a/cumulus/client/relay-chain-rpc-interface/Cargo.toml b/cumulus/client/relay-chain-rpc-interface/Cargo.toml index ea6bc2ede4c0..b7e4ee11669f 100644 --- a/cumulus/client/relay-chain-rpc-interface/Cargo.toml +++ b/cumulus/client/relay-chain-rpc-interface/Cargo.toml @@ -15,7 +15,6 @@ polkadot-overseer = { path = "../../../polkadot/node/overseer" } cumulus-primitives-core = { path = "../../primitives/core" } cumulus-relay-chain-interface = { path = "../relay-chain-interface" } -sp-api = { path = "../../../substrate/primitives/api" } sp-core = { path = "../../../substrate/primitives/core" } sp-consensus-babe = { path = "../../../substrate/primitives/consensus/babe" } sp-authority-discovery = { path = "../../../substrate/primitives/authority-discovery" } diff --git a/cumulus/client/relay-chain-rpc-interface/src/lib.rs b/cumulus/client/relay-chain-rpc-interface/src/lib.rs index 18d3346bc99f..692a1fb537a8 100644 --- a/cumulus/client/relay-chain-rpc-interface/src/lib.rs +++ b/cumulus/client/relay-chain-rpc-interface/src/lib.rs @@ -252,7 +252,7 @@ impl RelayChainInterface for RelayChainRpcInterface { async fn version(&self, relay_parent: RelayHash) -> RelayChainResult { self.rpc_client.runtime_version(relay_parent).await } - + async fn availability_cores( &self, relay_parent: RelayHash, diff --git a/cumulus/polkadot-parachain/src/service.rs b/cumulus/polkadot-parachain/src/service.rs index a264cde683f6..3021fdaa6ed2 100644 --- a/cumulus/polkadot-parachain/src/service.rs +++ b/cumulus/polkadot-parachain/src/service.rs @@ -829,7 +829,6 @@ where // Move to Aura consensus. let proposer = Proposer::new(proposer_factory); if use_experimental_slot_based { - log::info!("Starting block authoring with slot based authoring."); let client_for_aura = client.clone(); let params = SlotBasedParams { create_inherent_data_providers: move |_, ()| async move { Ok(()) }, @@ -1058,7 +1057,6 @@ fn start_slot_based_aura_consensus( announce_block: Arc>) + Send + Sync>, backend: Arc, ) -> Result<(), sc_service::Error> { - log::info!("Starting block authoring with slot based authoring."); let proposer_factory = sc_basic_authorship::ProposerFactory::with_proof_recording( task_manager.spawn_handle(), client.clone(), From 9d149896e837557b450bf928f2cc9a823cac2b3a Mon Sep 17 00:00:00 2001 From: Sebastian Kunert Date: Wed, 19 Jun 2024 18:01:12 +0200 Subject: [PATCH 58/71] Comment adjustments --- .../aura/src/collators/slot_based/block_builder_task.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs b/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs index c17739c1b06e..1f40b35eac05 100644 --- a/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs +++ b/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs @@ -90,6 +90,8 @@ pub struct BuilderTaskParams< /// Slot duration of the relay chain pub relay_chain_slot_duration: Duration, /// Drift every slot by this duration. + /// This can be used to shift slots in relation to relay chain slots, + /// making it less likely to to encounter unlucky notification arrival timings. pub slot_drift: Duration, } From 96d885aa87d1694b08412bad0f8f5301fd08ad7c Mon Sep 17 00:00:00 2001 From: Sebastian Kunert Date: Thu, 20 Jun 2024 09:50:48 +0200 Subject: [PATCH 59/71] Fix pov-recovery test --- cumulus/client/pov-recovery/src/tests.rs | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/cumulus/client/pov-recovery/src/tests.rs b/cumulus/client/pov-recovery/src/tests.rs index 75bf308ef27a..6f274ed18b6b 100644 --- a/cumulus/client/pov-recovery/src/tests.rs +++ b/cumulus/client/pov-recovery/src/tests.rs @@ -17,7 +17,9 @@ use super::*; use assert_matches::assert_matches; use codec::{Decode, Encode}; -use cumulus_primitives_core::relay_chain::{BlockId, CandidateCommitments, CandidateDescriptor}; +use cumulus_primitives_core::relay_chain::{ + BlockId, CandidateCommitments, CandidateDescriptor, CoreState, +}; use cumulus_relay_chain_interface::{ InboundDownwardMessage, InboundHrmpMessage, OccupiedCoreAssumption, PHash, PHeader, PersistedValidationData, StorageValue, ValidationCodeHash, ValidatorId, @@ -478,6 +480,13 @@ impl RelayChainInterface for Relaychain { async fn header(&self, _: BlockId) -> RelayChainResult> { unimplemented!("Not needed for test"); } + + async fn availability_cores( + &self, + _: PHash, + ) -> RelayChainResult>>> { + unimplemented!("Not needed for test"); + } } fn make_candidate_chain(candidate_number_range: Range) -> Vec { From eb614494e6ecc9c672269ea0a00528d618c14a45 Mon Sep 17 00:00:00 2001 From: Sebastian Kunert Date: Thu, 20 Jun 2024 10:23:41 +0200 Subject: [PATCH 60/71] Fix prdoc semver --- prdoc/pr_4097.prdoc | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/prdoc/pr_4097.prdoc b/prdoc/pr_4097.prdoc index e9b6c264ec68..7d4cf2348fc8 100644 --- a/prdoc/pr_4097.prdoc +++ b/prdoc/pr_4097.prdoc @@ -17,3 +17,23 @@ crates: bump: minor - name: cumulus-client-consensus-common bump: minor + - name: cumulus-pallet-aura-ext: + bump: minor + - name: cumulus-relay-chain-interface: + bump: major + - name: sc-consensus-slots: + bump: minor + - name: sc-basic-authorship: + bump: minor + - name: cumulus-client-network: + validate: false + - name: cumulus-relay-chain-inprocess-interface: + bump: minor + - name: sc-consensus-aura: + bump: minor + - name: cumulus-relay-chain-rpc-interface: + bump: minor + - name: polkadot-parachain-bin: + bump: minor + - name: polkadot: + validate: false From cb160533908ff13bfc352aac15bd7535f3419190 Mon Sep 17 00:00:00 2001 From: Sebastian Kunert Date: Thu, 20 Jun 2024 10:36:24 +0200 Subject: [PATCH 61/71] Add slot based collator for default runtimes or chainspecs --- cumulus/polkadot-parachain/src/command.rs | 1 + cumulus/polkadot-parachain/src/service.rs | 8 +++++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/cumulus/polkadot-parachain/src/command.rs b/cumulus/polkadot-parachain/src/command.rs index ffc7bd1b77a4..1a40cec3367d 100644 --- a/cumulus/polkadot-parachain/src/command.rs +++ b/cumulus/polkadot-parachain/src/command.rs @@ -808,6 +808,7 @@ async fn start_node>( polkadot_config, collator_options, id, + use_experimental_slot_based, hwbench, ) .await diff --git a/cumulus/polkadot-parachain/src/service.rs b/cumulus/polkadot-parachain/src/service.rs index 4232486352aa..da0ce0fc070f 100644 --- a/cumulus/polkadot-parachain/src/service.rs +++ b/cumulus/polkadot-parachain/src/service.rs @@ -413,8 +413,14 @@ pub async fn start_rococo_parachain_node>( polkadot_config: Configuration, collator_options: CollatorOptions, para_id: ParaId, + use_experimental_slot_based: bool, hwbench: Option, ) -> sc_service::error::Result<(TaskManager, Arc>)> { + let consensus_starter = if use_experimental_slot_based { + start_slot_based_aura_consensus + } else { + start_lookahead_aura_consensus + }; start_node_impl::( parachain_config, polkadot_config, @@ -423,7 +429,7 @@ pub async fn start_rococo_parachain_node>( para_id, build_parachain_rpc_extensions::, build_aura_import_queue, - start_lookahead_aura_consensus, + consensus_starter, hwbench, ) .await From 48267c2669e47a696f044d56cc0d3d0e03f042f6 Mon Sep 17 00:00:00 2001 From: Sebastian Kunert Date: Thu, 20 Jun 2024 10:43:11 +0200 Subject: [PATCH 62/71] PRDoc one more time --- prdoc/pr_4097.prdoc | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/prdoc/pr_4097.prdoc b/prdoc/pr_4097.prdoc index 7d4cf2348fc8..995a8a1540fb 100644 --- a/prdoc/pr_4097.prdoc +++ b/prdoc/pr_4097.prdoc @@ -17,23 +17,23 @@ crates: bump: minor - name: cumulus-client-consensus-common bump: minor - - name: cumulus-pallet-aura-ext: + - name: cumulus-pallet-aura-ext bump: minor - - name: cumulus-relay-chain-interface: + - name: cumulus-relay-chain-interface bump: major - - name: sc-consensus-slots: + - name: sc-consensus-slots bump: minor - - name: sc-basic-authorship: + - name: sc-basic-authorship bump: minor - - name: cumulus-client-network: + - name: cumulus-client-network validate: false - - name: cumulus-relay-chain-inprocess-interface: + - name: cumulus-relay-chain-inprocess-interface bump: minor - - name: sc-consensus-aura: + - name: sc-consensus-aura bump: minor - - name: cumulus-relay-chain-rpc-interface: + - name: cumulus-relay-chain-rpc-interface bump: minor - - name: polkadot-parachain-bin: + - name: polkadot-parachain-bin bump: minor - - name: polkadot: + - name: polkadot validate: false From d595c7bf02b1a0e9bc587acc2c453cbfcd4ba96e Mon Sep 17 00:00:00 2001 From: Sebastian Kunert Date: Fri, 21 Jun 2024 16:23:20 +0200 Subject: [PATCH 63/71] Fix collator image Co-authored-by: Javier Viola <363911+pepoviola@users.noreply.github.com> --- cumulus/zombienet/tests/0008-elastic_authoring.toml | 4 ++-- cumulus/zombienet/tests/0009-elastic_pov_recovery.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cumulus/zombienet/tests/0008-elastic_authoring.toml b/cumulus/zombienet/tests/0008-elastic_authoring.toml index 80425f74c7b3..f2e2010a9e45 100644 --- a/cumulus/zombienet/tests/0008-elastic_authoring.toml +++ b/cumulus/zombienet/tests/0008-elastic_authoring.toml @@ -34,7 +34,7 @@ add_to_genesis = true [[parachains.collators]] name = "collator-elastic" - image = "{{CUMULUS_IMAGE}}" + image = "{{COL_IMAGE}}" command = "test-parachain" args = ["-laura=trace,runtime=info,cumulus-consensus=trace,consensus::common=trace,parachain::collation-generation=trace,parachain::collator-protocol=trace,parachain=debug", "--force-authoring", "--experimental-use-slot-based"] @@ -45,6 +45,6 @@ add_to_genesis = true [[parachains.collators]] name = "collator-single-core" - image = "{{CUMULUS_IMAGE}}" + image = "{{COL_IMAGE}}" command = "test-parachain" args = ["-laura=trace,runtime=info,cumulus-consensus=trace,consensus::common=trace,parachain::collation-generation=trace,parachain::collator-protocol=trace,parachain=debug", "--force-authoring", "--experimental-use-slot-based"] diff --git a/cumulus/zombienet/tests/0009-elastic_pov_recovery.toml b/cumulus/zombienet/tests/0009-elastic_pov_recovery.toml index bee4f9074272..9b296e8a8b36 100644 --- a/cumulus/zombienet/tests/0009-elastic_pov_recovery.toml +++ b/cumulus/zombienet/tests/0009-elastic_pov_recovery.toml @@ -35,7 +35,7 @@ add_to_genesis = true # Slot based authoring with 3 cores and 2s slot duration [[parachains.collators]] name = "collator-elastic" - image = "{{CUMULUS_IMAGE}}" + image = "{{COL_IMAGE}}" command = "test-parachain" args = ["--disable-block-announcements", "-laura=trace,runtime=info,cumulus-consensus=trace,consensus::common=trace,parachain::collation-generation=trace,parachain::collator-protocol=trace,parachain=debug", "--force-authoring", "--experimental-use-slot-based"] From 62d5b9dfa9ef3b16fdcf5799c2a11bcabf0459fe Mon Sep 17 00:00:00 2001 From: Sebastian Kunert Date: Thu, 4 Jul 2024 15:58:09 +0200 Subject: [PATCH 64/71] post-merge adjustments --- Cargo.lock | 12 ++----- cumulus/polkadot-parachain/src/command.rs | 1 + cumulus/polkadot-parachain/src/service.rs | 41 ++++++++++++++--------- 3 files changed, 29 insertions(+), 25 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7b08e96f4bbe..f3808b7eaa53 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3772,6 +3772,7 @@ dependencies = [ "sc-consensus-babe", "sc-consensus-slots", "sc-telemetry", + "sc-utils", "schnellru", "sp-api", "sp-application-crypto", @@ -3786,6 +3787,7 @@ dependencies = [ "sp-state-machine", "sp-timestamp", "substrate-prometheus-endpoint", + "tokio", "tracing", ] @@ -3906,7 +3908,6 @@ dependencies = [ "cumulus-test-relay-sproof-builder", "parity-scale-codec", "sc-client-api", - "scale-info", "sp-api", "sp-crypto-hashing", "sp-inherents", @@ -4333,15 +4334,8 @@ dependencies = [ "cumulus-relay-chain-interface", "cumulus-relay-chain-rpc-interface", "futures", - "parking_lot 0.12.3", - "polkadot-availability-recovery", - "polkadot-collator-protocol", "polkadot-core-primitives", "polkadot-network-bridge", - "polkadot-node-collation-generation", - "polkadot-node-core-chain-api", - "polkadot-node-core-prospective-parachains", - "polkadot-node-core-runtime-api", "polkadot-node-network-protocol", "polkadot-node-subsystem-util", "polkadot-overseer", @@ -4535,7 +4529,6 @@ dependencies = [ "polkadot-test-service", "portpicker", "rand", - "rococo-parachain-runtime", "sc-basic-authorship", "sc-block-builder", "sc-chain-spec", @@ -4560,7 +4553,6 @@ dependencies = [ "sp-blockchain", "sp-consensus", "sp-consensus-aura", - "sp-consensus-grandpa", "sp-core", "sp-io", "sp-keyring", diff --git a/cumulus/polkadot-parachain/src/command.rs b/cumulus/polkadot-parachain/src/command.rs index 0fad71556624..323216f300d8 100644 --- a/cumulus/polkadot-parachain/src/command.rs +++ b/cumulus/polkadot-parachain/src/command.rs @@ -868,6 +868,7 @@ async fn start_node>( polkadot_config, collator_options, id, + use_experimental_slot_based, hwbench, ) .await diff --git a/cumulus/polkadot-parachain/src/service.rs b/cumulus/polkadot-parachain/src/service.rs index 1a373d0ee7e7..cbf3e6b91f7e 100644 --- a/cumulus/polkadot-parachain/src/service.rs +++ b/cumulus/polkadot-parachain/src/service.rs @@ -412,9 +412,9 @@ pub async fn start_rococo_parachain_node>( hwbench: Option, ) -> sc_service::error::Result<(TaskManager, Arc>)> { let consensus_starter = if use_experimental_slot_based { - start_slot_based_aura_consensus + start_slot_based_aura_consensus::<_, AuraId> } else { - start_lookahead_aura_consensus + start_lookahead_aura_consensus::<_, AuraId> }; start_node_impl::( parachain_config, @@ -595,9 +595,9 @@ pub async fn start_generic_aura_async_backing_node, ) -> sc_service::error::Result<(TaskManager, Arc>)> { let consensus_starter = if use_experimental_slot_based { - start_slot_based_aura_consensus + start_slot_based_aura_consensus::<_, AuraId> } else { - start_lookahead_aura_consensus + start_lookahead_aura_consensus::<_, AuraId> }; start_node_impl::( parachain_config, @@ -635,15 +635,21 @@ where AuraId: AuraIdT + Sync, Net: NetworkBackend, { + let consensus_starter = if use_experimental_slot_based { + start_slot_based_aura_consensus::<_, AuraId> + } else { + start_lookahead_aura_consensus::<_, AuraId> + }; + start_node_impl::( parachain_config, polkadot_config, collator_options, CollatorSybilResistance::Resistant, // Aura para_id, - build_parachain_rpc_extensions::, + build_parachain_rpc_extensions, build_relay_to_aura_import_queue::<_, AuraId>, - start_lookahead_aura_consensus::, + consensus_starter, hwbench, ) .await @@ -819,14 +825,14 @@ where Ok(()) } /// Start consensus using the lookahead aura collator. -fn start_slot_based_aura_consensus( - client: Arc>, - block_import: ParachainBlockImport, +fn start_slot_based_aura_consensus( + client: Arc>, + block_import: ParachainBlockImport, prometheus_registry: Option<&Registry>, telemetry: Option, task_manager: &TaskManager, relay_chain_interface: Arc, - transaction_pool: Arc>>, + transaction_pool: Arc>>, keystore: KeystorePtr, relay_chain_slot_duration: Duration, para_id: ParaId, @@ -834,7 +840,12 @@ fn start_slot_based_aura_consensus( _overseer_handle: OverseerHandle, announce_block: Arc>) + Send + Sync>, backend: Arc, -) -> Result<(), sc_service::Error> { +) -> Result<(), sc_service::Error> +where + RuntimeApi: ConstructNodeRuntimeApi>, + RuntimeApi::RuntimeApi: AuraRuntimeApi, + AuraId: AuraIdT + Sync, +{ let proposer_factory = sc_basic_authorship::ProposerFactory::with_proof_recording( task_manager.spawn_handle(), client.clone(), @@ -900,9 +911,9 @@ pub async fn start_basic_async_backing_node>( hwbench: Option, ) -> sc_service::error::Result<(TaskManager, Arc>)> { let consensus_starter = if use_experimental_slot_based { - start_slot_based_aura_consensus + start_slot_based_aura_consensus::<_, AuraId> } else { - start_lookahead_aura_consensus + start_lookahead_aura_consensus::<_, AuraId> }; start_node_impl::( parachain_config, @@ -928,9 +939,9 @@ pub async fn start_contracts_rococo_node>( hwbench: Option, ) -> sc_service::error::Result<(TaskManager, Arc>)> { let consensus_starter = if use_experimental_slot_based { - start_slot_based_aura_consensus + start_slot_based_aura_consensus::<_, AuraId> } else { - start_lookahead_aura_consensus + start_lookahead_aura_consensus::<_, AuraId> }; start_node_impl::( parachain_config, From 7bc28ff632a1346c87d9476bcde754aa8c25624d Mon Sep 17 00:00:00 2001 From: Sebastian Kunert Date: Mon, 1 Jul 2024 16:00:54 +0200 Subject: [PATCH 65/71] Review comments --- .../aura/src/collators/slot_based/mod.rs | 18 +++++------------- .../consensus/common/src/parent_search.rs | 15 ++++++++++++--- cumulus/polkadot-parachain/Cargo.toml | 2 +- cumulus/polkadot-parachain/src/cli.rs | 5 +++-- cumulus/test/service/src/cli.rs | 5 +++-- 5 files changed, 24 insertions(+), 21 deletions(-) diff --git a/cumulus/client/consensus/aura/src/collators/slot_based/mod.rs b/cumulus/client/consensus/aura/src/collators/slot_based/mod.rs index e6a86fac61c5..0fe49d58d25b 100644 --- a/cumulus/client/consensus/aura/src/collators/slot_based/mod.rs +++ b/cumulus/client/consensus/aura/src/collators/slot_based/mod.rs @@ -22,19 +22,11 @@ //! 2. A collator task that transforms the blocks into a collation and submits them to the relay //! chain. //! -//! This collator also builds additional blocks when the maximum backlog is not saturated. -//! The size of the backlog is determined by invoking a runtime API. If that runtime API -//! is not supported, this assumes a maximum backlog size of 1. -//! -//! This takes more advantage of asynchronous backing, though not complete advantage. -//! When the backlog is not saturated, this approach lets the backlog temporarily 'catch up' -//! with periods of higher throughput. When the backlog is saturated, we typically -//! fall back to the limited cadence of a single parachain block per relay-chain block. -//! -//! Despite this, the fact that there is a backlog at all allows us to spend more time -//! building the block, as there is some buffer before it can get posted to the relay-chain. -//! The main limitation is block propagation time - i.e. the new blocks created by an author -//! must be propagated to the next author before their turn. +//! Blocks are built on every parachain slot if there is a core scheduled on the relay chain. At the +//! beginning of each block building loop, we determine how many blocks we expect to build per relay +//! chain block. The collator implementation then expects that we have that many cores scheduled +//! during the relay chain block. After the block is built, the block builder task sends it to +//! the collation task which compresses it and submits it to the collation-generation subsystem. use codec::Codec; use consensus_common::ParachainCandidate; diff --git a/cumulus/client/consensus/common/src/parent_search.rs b/cumulus/client/consensus/common/src/parent_search.rs index b84f21f6f588..f9bc1471495e 100644 --- a/cumulus/client/consensus/common/src/parent_search.rs +++ b/cumulus/client/consensus/common/src/parent_search.rs @@ -117,7 +117,7 @@ pub async fn find_potential_parents( // Pending header and hash. let maybe_pending = { // Fetch the pending header from the relay chain. We use `OccupiedCoreAssumption::Included` - // so the candidate pending availability gets enacted before being returned to us. + // so the candidate pending availability gets enacted before being returned to us. let pending_header = relay_client .persisted_validation_data( params.relay_parent, @@ -254,7 +254,7 @@ async fn fetch_included_from_relay_chain( let included_hash = included_header.hash(); // If the included block is not locally known, we can't do anything. match backend.blockchain().header(included_hash) { - Ok(None) | Err(_) => { + Ok(None) => { tracing::warn!( target: PARENT_SEARCH_LOG_TARGET, %included_hash, @@ -262,6 +262,15 @@ async fn fetch_included_from_relay_chain( ); return Ok(None) }, + Err(e) => { + tracing::warn!( + target: PARENT_SEARCH_LOG_TARGET, + %included_hash, + %e, + "Failed to get header for included block.", + ); + return Ok(None) + }, _ => {}, }; @@ -360,7 +369,7 @@ pub fn search_child_branches_for_parents( let child_depth = entry.depth + 1; let hash = entry.hash; - tracing::trace!(target: PARENT_SEARCH_LOG_TARGET, root_in_ancestry = is_potential && !is_pending && !is_included, ?hash, is_pending, is_included, "Checking potential parent."); + tracing::trace!(target: PARENT_SEARCH_LOG_TARGET, ?hash, is_potential, is_pending, is_included, "Checking potential parent."); if is_potential { potential_parents.push(entry); } diff --git a/cumulus/polkadot-parachain/Cargo.toml b/cumulus/polkadot-parachain/Cargo.toml index f38b7bde51e4..7085211dad26 100644 --- a/cumulus/polkadot-parachain/Cargo.toml +++ b/cumulus/polkadot-parachain/Cargo.toml @@ -24,7 +24,6 @@ hex-literal = { workspace = true, default-features = true } log = { workspace = true, default-features = true } serde = { features = ["derive"], workspace = true, default-features = true } serde_json = { workspace = true, default-features = true } -tokio = { version = "1.32.0", features = ["macros", "parking_lot", "time"] } # Local rococo-parachain-runtime = { workspace = true } @@ -121,6 +120,7 @@ substrate-build-script-utils = { workspace = true, default-features = true } assert_cmd = { workspace = true } nix = { features = ["signal"], workspace = true } tempfile = { workspace = true } +tokio = { version = "1.32.0", features = ["macros", "parking_lot", "time"] } wait-timeout = { workspace = true } [features] diff --git a/cumulus/polkadot-parachain/src/cli.rs b/cumulus/polkadot-parachain/src/cli.rs index 88e1b6478081..7c01e34f9a03 100644 --- a/cumulus/polkadot-parachain/src/cli.rs +++ b/cumulus/polkadot-parachain/src/cli.rs @@ -73,8 +73,9 @@ pub struct Cli { #[command(flatten)] pub run: cumulus_client_cli::RunCmd, - /// EXPERIMENTAL: Use slot-based collator which can handle elastic scaling. Use with care, this - /// flag is unstable and subject to change. + /// EXPERIMENTAL: Use slot-based collator which can handle elastic scaling. + /// + /// Use with care, this flag is unstable and subject to change. #[arg(long)] pub experimental_use_slot_based: bool, diff --git a/cumulus/test/service/src/cli.rs b/cumulus/test/service/src/cli.rs index e9c075555f59..37ca27542cbf 100644 --- a/cumulus/test/service/src/cli.rs +++ b/cumulus/test/service/src/cli.rs @@ -51,8 +51,9 @@ pub struct TestCollatorCli { #[arg(long)] pub fail_pov_recovery: bool, - /// EXPERIMENTAL: Use slot-based collator which can handle elastic scaling. Use with care, this - /// flag is unstable and subject to change. + /// EXPERIMENTAL: Use slot-based collator which can handle elastic scaling. + /// + /// Use with care, this flag is unstable and subject to change. #[arg(long)] pub experimental_use_slot_based: bool, } From d986ee77f032ede79af69a96e4b0739108ac9b11 Mon Sep 17 00:00:00 2001 From: Sebastian Kunert Date: Thu, 4 Jul 2024 23:03:18 +0200 Subject: [PATCH 66/71] Apply suggestions from code review Co-authored-by: Davide Galassi --- .../src/collators/slot_based/block_builder_task.rs | 9 ++++++--- cumulus/client/consensus/common/src/parent_search.rs | 11 +++++++++-- cumulus/polkadot-parachain/src/service.rs | 1 + 3 files changed, 16 insertions(+), 5 deletions(-) diff --git a/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs b/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs index 1f40b35eac05..4ac8f8ebb66a 100644 --- a/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs +++ b/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs @@ -90,8 +90,11 @@ pub struct BuilderTaskParams< /// Slot duration of the relay chain pub relay_chain_slot_duration: Duration, /// Drift every slot by this duration. - /// This can be used to shift slots in relation to relay chain slots, - /// making it less likely to to encounter unlucky notification arrival timings. + /// This is a time quantity that is subtracted from the actual timestamp when computing + /// the time left to enter a new slot. In practice, this *left-shifts* the clock time with the intent + /// to keep our "clock" slightly behind the relay chain one and thus reducing the likelihood of + /// encountering unfavorable notification arrival timings (i.e. we don't want to wait for relay + /// chain notifications because we woke up too early). pub slot_drift: Duration, } @@ -114,7 +117,7 @@ fn duration_now() -> Duration { use std::time::SystemTime; let now = SystemTime::now(); now.duration_since(SystemTime::UNIX_EPOCH).unwrap_or_else(|e| { - panic!("Current time {:?} is before unix epoch. Something is wrong: {:?}", now, e) + panic!("Current time {:?} is before Unix epoch. Something is wrong: {:?}", now, e) }) } diff --git a/cumulus/client/consensus/common/src/parent_search.rs b/cumulus/client/consensus/common/src/parent_search.rs index f9bc1471495e..ade00c5b5bef 100644 --- a/cumulus/client/consensus/common/src/parent_search.rs +++ b/cumulus/client/consensus/common/src/parent_search.rs @@ -116,7 +116,7 @@ pub async fn find_potential_parents( // Pending header and hash. let maybe_pending = { - // Fetch the pending header from the relay chain. We use `OccupiedCoreAssumption::Included` + // Fetch the most recent pending header from the relay chain. We use `OccupiedCoreAssumption::Included` // so the candidate pending availability gets enacted before being returned to us. let pending_header = relay_client .persisted_validation_data( @@ -345,7 +345,14 @@ pub fn search_child_branches_for_parents( .map_or(true, |route| route.enacted().iter().any(|x| x.hash == hash)) }; - tracing::trace!(target: PARENT_SEARCH_LOG_TARGET, ?included_hash, included_num = ?included_header.number(), ?pending_hash , ?rp_ancestry, "Searching relay chain ancestry."); + tracing::trace!( + target: PARENT_SEARCH_LOG_TARGET, + ?included_hash, + included_num = ?included_header.number(), + ?pending_hash , + ?rp_ancestry, + "Searching relay chain ancestry." + ); while let Some(entry) = frontier.pop() { let is_pending = pending_hash.as_ref().map_or(false, |h| &entry.hash == h); let is_included = included_hash == entry.hash; diff --git a/cumulus/polkadot-parachain/src/service.rs b/cumulus/polkadot-parachain/src/service.rs index cbf3e6b91f7e..0f2aed8ee4d8 100644 --- a/cumulus/polkadot-parachain/src/service.rs +++ b/cumulus/polkadot-parachain/src/service.rs @@ -824,6 +824,7 @@ where Ok(()) } + /// Start consensus using the lookahead aura collator. fn start_slot_based_aura_consensus( client: Arc>, From dd36c44684872718e326c3e6bc8d05d4daa3c556 Mon Sep 17 00:00:00 2001 From: Sebastian Kunert Date: Thu, 4 Jul 2024 23:37:06 +0200 Subject: [PATCH 67/71] Remove unnecessary pending condition in parent search Co-authored-by: Davide Galassi --- cumulus/client/consensus/common/src/parent_search.rs | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/cumulus/client/consensus/common/src/parent_search.rs b/cumulus/client/consensus/common/src/parent_search.rs index ade00c5b5bef..24ecf4cde1a7 100644 --- a/cumulus/client/consensus/common/src/parent_search.rs +++ b/cumulus/client/consensus/common/src/parent_search.rs @@ -298,13 +298,9 @@ async fn build_relay_parent_ancestry( let Some(header) = relay_client.header(RBlockId::hash(current_rp)).await? else { break }; let session = relay_client.session_index_for_child(current_rp).await?; - if let Some(required_session) = required_session { + if required_session.get_or_insert(session) != &session { // Respect the relay-chain rule not to cross session boundaries. - if session != required_session { - break - } - } else { - required_session = Some(session); + break; } ancestry.push((current_rp, *header.state_root())); From 91c6539b742dc61e4b2892a419ac114fb77288d3 Mon Sep 17 00:00:00 2001 From: Sebastian Kunert Date: Thu, 4 Jul 2024 23:37:47 +0200 Subject: [PATCH 68/71] Adjust comments and tracing log --- .../client/consensus/common/src/parent_search.rs | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/cumulus/client/consensus/common/src/parent_search.rs b/cumulus/client/consensus/common/src/parent_search.rs index 24ecf4cde1a7..2faf2fe40b1c 100644 --- a/cumulus/client/consensus/common/src/parent_search.rs +++ b/cumulus/client/consensus/common/src/parent_search.rs @@ -116,8 +116,9 @@ pub async fn find_potential_parents( // Pending header and hash. let maybe_pending = { - // Fetch the most recent pending header from the relay chain. We use `OccupiedCoreAssumption::Included` - // so the candidate pending availability gets enacted before being returned to us. + // Fetch the most recent pending header from the relay chain. We use + // `OccupiedCoreAssumption::Included` so the candidate pending availability gets enacted + // before being returned to us. let pending_header = relay_client .persisted_validation_data( params.relay_parent, @@ -372,7 +373,15 @@ pub fn search_child_branches_for_parents( let child_depth = entry.depth + 1; let hash = entry.hash; - tracing::trace!(target: PARENT_SEARCH_LOG_TARGET, ?hash, is_potential, is_pending, is_included, "Checking potential parent."); + tracing::trace!( + target: PARENT_SEARCH_LOG_TARGET, + ?hash, + is_potential, + is_pending, + is_included, + "Checking potential parent." + ); + if is_potential { potential_parents.push(entry); } From 8e0b80da7109427084f52935e64ff29ab8e4596a Mon Sep 17 00:00:00 2001 From: command-bot <> Date: Thu, 4 Jul 2024 21:48:59 +0000 Subject: [PATCH 69/71] ".git/.scripts/commands/fmt/fmt.sh" --- .../aura/src/collators/slot_based/block_builder_task.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs b/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs index 4ac8f8ebb66a..1fbc0689da86 100644 --- a/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs +++ b/cumulus/client/consensus/aura/src/collators/slot_based/block_builder_task.rs @@ -91,10 +91,10 @@ pub struct BuilderTaskParams< pub relay_chain_slot_duration: Duration, /// Drift every slot by this duration. /// This is a time quantity that is subtracted from the actual timestamp when computing - /// the time left to enter a new slot. In practice, this *left-shifts* the clock time with the intent - /// to keep our "clock" slightly behind the relay chain one and thus reducing the likelihood of - /// encountering unfavorable notification arrival timings (i.e. we don't want to wait for relay - /// chain notifications because we woke up too early). + /// the time left to enter a new slot. In practice, this *left-shifts* the clock time with the + /// intent to keep our "clock" slightly behind the relay chain one and thus reducing the + /// likelihood of encountering unfavorable notification arrival timings (i.e. we don't want to + /// wait for relay chain notifications because we woke up too early). pub slot_drift: Duration, } From 09222f745a4efa7102508365ddd78ae1062924f4 Mon Sep 17 00:00:00 2001 From: Sebastian Kunert Date: Fri, 5 Jul 2024 10:15:15 +0200 Subject: [PATCH 70/71] make semver happy --- prdoc/pr_4097.prdoc | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/prdoc/pr_4097.prdoc b/prdoc/pr_4097.prdoc index 995a8a1540fb..2804a9571c79 100644 --- a/prdoc/pr_4097.prdoc +++ b/prdoc/pr_4097.prdoc @@ -14,26 +14,32 @@ doc: crates: - name: cumulus-client-consensus-aura - bump: minor + bump: major - name: cumulus-client-consensus-common bump: minor + - name: cumulus-client-pov-recovery + bump: none + validate: false - name: cumulus-pallet-aura-ext - bump: minor + bump: patch - name: cumulus-relay-chain-interface bump: major + validate: false - name: sc-consensus-slots bump: minor - name: sc-basic-authorship - bump: minor + bump: patch - name: cumulus-client-network + bump: none validate: false - name: cumulus-relay-chain-inprocess-interface bump: minor - name: sc-consensus-aura - bump: minor + bump: patch - name: cumulus-relay-chain-rpc-interface bump: minor - name: polkadot-parachain-bin - bump: minor + bump: patch - name: polkadot + bump: none validate: false From 938d6e69922321016097a7620e858dc72145522f Mon Sep 17 00:00:00 2001 From: Sebastian Kunert Date: Fri, 5 Jul 2024 10:29:07 +0200 Subject: [PATCH 71/71] Reviewer comment: Remove duplicate check for pending --- cumulus/client/consensus/common/src/parent_search.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/cumulus/client/consensus/common/src/parent_search.rs b/cumulus/client/consensus/common/src/parent_search.rs index 2faf2fe40b1c..c371ec62f845 100644 --- a/cumulus/client/consensus/common/src/parent_search.rs +++ b/cumulus/client/consensus/common/src/parent_search.rs @@ -396,7 +396,6 @@ pub fn search_child_branches_for_parents( let aligned_with_pending = parent_aligned_with_pending && (pending_distance.map_or(true, |dist| child_depth > dist) || - pending_hash.as_ref().map_or(true, |h| &child == h) || is_child_pending(child)); if ignore_alternative_branches && !aligned_with_pending {