paritytech · sandreim · Mar 27, 2024 · Mar 19, 2024 · Mar 19, 2024 · Mar 19, 2024
diff --git a/cumulus/client/consensus/aura/src/collators/lookahead.rs b/cumulus/client/consensus/aura/src/collators/lookahead.rs
@@ -49,7 +49,7 @@ use polkadot_node_subsystem::messages::{
 	CollationGenerationMessage, RuntimeApiMessage, RuntimeApiRequest,
 };
 use polkadot_overseer::Handle as OverseerHandle;
-use polkadot_primitives::{CollatorPair, Id as ParaId, OccupiedCoreAssumption};
+use polkadot_primitives::{CollatorPair, CoreIndex, Id as ParaId, OccupiedCoreAssumption};
 
 use futures::{channel::oneshot, prelude::*};
 use sc_client_api::{backend::AuxStore, BlockBackend, BlockOf};
@@ -184,7 +184,15 @@ where
 		while let Some(relay_parent_header) = import_notifications.next().await {
 			let relay_parent = relay_parent_header.hash();
 
-			if !is_para_scheduled(relay_parent, params.para_id, &mut params.overseer_handle).await {
+			let core_index = if let Some(core_index) = fist_core_scheduled_for_para(
+				relay_parent,
+				params.para_id,
+				&mut params.overseer_handle,
+			)
+			.await
+			{
+				core_index
+			} else {
 				tracing::trace!(
 					target: crate::LOG_TARGET,
 					?relay_parent,
@@ -193,7 +201,7 @@ where
 				);
 
 				continue
-			}
+			};
 
 			let max_pov_size = match params
 				.relay_client
@@ -396,6 +404,7 @@ where
 										parent_head: parent_header.encode().into(),
 										validation_code_hash,
 										result_sender: None,
+										core_index,
 									},
 								),
 								"SubmitCollation",
@@ -480,14 +489,12 @@ async fn max_ancestry_lookback(
 	}
 }
 
-// Checks if there exists a scheduled core for the para at the provided relay parent.
-//
-// Falls back to `false` in case of an error.
-async fn is_para_scheduled(
+// Checks the first `CoreIndex` assigned to the para at the provided relay parent.
+async fn fist_core_scheduled_for_para(
 	relay_parent: PHash,
 	para_id: ParaId,
 	overseer_handle: &mut OverseerHandle,
-) -> bool {
+) -> Option<CoreIndex> {
 	let (tx, rx) = oneshot::channel();
 	let request = RuntimeApiRequest::AvailabilityCores(tx);
 	overseer_handle
@@ -503,17 +510,20 @@ async fn is_para_scheduled(
 				?relay_parent,
 				"Failed to query availability cores runtime API",
 			);
-			return false
+			return None
 		},
 		Err(oneshot::Canceled) => {
 			tracing::error!(
 				target: crate::LOG_TARGET,
 				?relay_parent,
 				"Sender for availability cores runtime request dropped",
 			);
-			return false
+			return None
 		},
 	};
 
-	cores.iter().any(|core| core.para_id() == Some(para_id))
+	cores
+		.iter()
+		.position(|core| core.para_id() == Some(para_id))
+		.map(|index| CoreIndex(index as _))
 }
diff --git a/polkadot/node/collation-generation/src/error.rs b/polkadot/node/collation-generation/src/error.rs
@@ -28,6 +28,8 @@ pub enum Error {
 	Util(#[from] polkadot_node_subsystem_util::Error),
 	#[error(transparent)]
 	Erasure(#[from] polkadot_erasure_coding::Error),
+	#[error("Parachain backing state not available in runtime.")]
+	MissingParaBackingState,
 }
 
 pub type Result<T> = std::result::Result<T, Error>;
diff --git a/polkadot/node/collation-generation/src/lib.rs b/polkadot/node/collation-generation/src/lib.rs
@@ -44,8 +44,8 @@ use polkadot_node_subsystem::{
 };
 use polkadot_node_subsystem_util::{
 	has_required_runtime, request_async_backing_params, request_availability_cores,
-	request_claim_queue, request_persisted_validation_data, request_validation_code,
-	request_validation_code_hash, request_validators,
+	request_claim_queue, request_para_backing_state, request_persisted_validation_data,
+	request_validation_code, request_validation_code_hash, request_validators,
 };
 use polkadot_primitives::{
 	collator_signature_payload, CandidateCommitments, CandidateDescriptor, CandidateReceipt,
@@ -212,38 +212,41 @@ async fn handle_new_activations<Context>(
 	if config.collator.is_none() {
 		return Ok(())
 	}
+	let para_id = config.para_id;
 
 	let _overall_timer = metrics.time_new_activations();
 
 	for relay_parent in activated {
 		let _relay_parent_timer = metrics.time_new_activations_relay_parent();
 
-		let (availability_cores, validators, async_backing_params) = join!(
+		let (availability_cores, validators, async_backing_params, para_backing_state) = join!(
 			request_availability_cores(relay_parent, ctx.sender()).await,
 			request_validators(relay_parent, ctx.sender()).await,
 			request_async_backing_params(relay_parent, ctx.sender()).await,
+			request_para_backing_state(relay_parent, config.para_id, ctx.sender()).await,
 		);
 
 		let availability_cores = availability_cores??;
-		let n_validators = validators??.len();
 		let async_backing_params = async_backing_params?.ok();
+		let n_validators = validators??.len();
+		let para_backing_state =
+			para_backing_state??.ok_or(crate::error::Error::MissingParaBackingState)?;
+
 		let maybe_claim_queue = fetch_claim_queue(ctx.sender(), relay_parent).await?;
 
-		for (core_idx, core) in availability_cores.into_iter().enumerate() {
-			let _availability_core_timer = metrics.time_new_activations_availability_core();
+		// The loop bellow will fill in cores that the para is allowed to build on.
+		let mut cores_to_build_on = Vec::new();
 
-			let (scheduled_core, assumption) = match core {
-				CoreState::Scheduled(scheduled_core) =>
-					(scheduled_core, OccupiedCoreAssumption::Free),
+		for (core_idx, core) in availability_cores.into_iter().enumerate() {
+			let scheduled_core = match core {
+				CoreState::Scheduled(scheduled_core) => scheduled_core,
 				CoreState::Occupied(occupied_core) => match async_backing_params {
 					Some(params) if params.max_candidate_depth >= 1 => {
 						// maximum candidate depth when building on top of a block
 						// pending availability is necessarily 1 - the depth of the
 						// pending block is 0 so the child has depth 1.
 
-						// TODO [now]: this assumes that next up == current.
-						// in practice we should only set `OccupiedCoreAssumption::Included`
-						// when the candidate occupying the core is also of the same para.
+						// Use claim queue if available, or fallback to `next_up_on_available`
 						let res = match maybe_claim_queue {
 							Some(ref claim_queue) => {
 								// read what's in the claim queue for this core
@@ -257,8 +260,7 @@ async fn handle_new_activations<Context>(
 								// `next_up_on_available`
 								occupied_core.next_up_on_available
 							},
-						}
-						.map(|scheduled| (scheduled, OccupiedCoreAssumption::Included));
+						};
 
 						match res {
 							Some(res) => res,
@@ -279,7 +281,7 @@ async fn handle_new_activations<Context>(
 					gum::trace!(
 						target: LOG_TARGET,
 						core_idx = %core_idx,
-						"core is free. Keep going.",
+						"core is not assigned to any para. Keep going.",
 					);
 					continue
 				},
@@ -297,61 +299,82 @@ async fn handle_new_activations<Context>(
 				continue
 			}
 
-			// we get validation data and validation code synchronously for each core instead of
-			// within the subtask loop, because we have only a single mutable handle to the
-			// context, so the work can't really be distributed
-
-			let validation_data = match request_persisted_validation_data(
-				relay_parent,
-				scheduled_core.para_id,
-				assumption,
-				ctx.sender(),
-			)
-			.await
-			.await??
-			{
-				Some(v) => v,
-				None => {
-					gum::trace!(
-						target: LOG_TARGET,
-						core_idx = %core_idx,
-						relay_parent = ?relay_parent,
-						our_para = %config.para_id,
-						their_para = %scheduled_core.para_id,
-						"validation data is not available",
-					);
-					continue
-				},
-			};
+			// Accumulate cores for building collation(s) outside the loop.
+			cores_to_build_on.push(CoreIndex(core_idx as u32));
+		}
 
-			let validation_code_hash = match obtain_validation_code_hash_with_assumption(
-				relay_parent,
-				scheduled_core.para_id,
-				assumption,
-				ctx.sender(),
-			)
-			.await?
-			{
-				Some(v) => v,
-				None => {
-					gum::trace!(
-						target: LOG_TARGET,
-						core_idx = %core_idx,
-						relay_parent = ?relay_parent,
-						our_para = %config.para_id,
-						their_para = %scheduled_core.para_id,
-						"validation code hash is not found.",
-					);
-					continue
-				},
-			};
+		// Skip to next relay parent if there is no core assigned to us.
+		if cores_to_build_on.is_empty() {
+			continue
+		}
 
-			let task_config = config.clone();
-			let metrics = metrics.clone();
-			let mut task_sender = ctx.sender().clone();
-			ctx.spawn(
-				"collation-builder",
-				Box::pin(async move {
+		// We are being very optimistic here, but one of the cores could pend availability some more
+		// block, ore even time out.
+		// For timeout assumption the collator can't really know because it doesn't receive bitfield
+		// gossip.
+		let assumption = if para_backing_state.pending_availability.is_empty() {
+			OccupiedCoreAssumption::Free
+		} else {
+			OccupiedCoreAssumption::Included
+		};
+
+		gum::debug!(
+			target: LOG_TARGET,
+			relay_parent = ?relay_parent,
+			our_para = %config.para_id,
+			?assumption,
+			"Occupied core(s) assumption",
+		);
+
+		let mut validation_data = match request_persisted_validation_data(
+			relay_parent,
+			config.para_id,
+			assumption,
+			ctx.sender(),
+		)
+		.await
+		.await??
+		{
+			Some(v) => v,
+			None => {
+				gum::debug!(
+					target: LOG_TARGET,
+					relay_parent = ?relay_parent,
+					our_para = %config.para_id,
+					"validation data is not available",
+				);
+				continue
+			},
+		};
+
+		let validation_code_hash = match obtain_validation_code_hash_with_assumption(
+			relay_parent,
+			config.para_id,
+			assumption,
+			ctx.sender(),
+		)
+		.await?
+		{
+			Some(v) => v,
+			None => {
+				gum::debug!(
+					target: LOG_TARGET,
+					relay_parent = ?relay_parent,
+					our_para = %config.para_id,
+					"validation code hash is not found.",
+				);
+				continue
+			},
+		};
+
+		let task_config = config.clone();
+		let metrics = metrics.clone();
+		let mut task_sender = ctx.sender().clone();
+
+		ctx.spawn(
+			"chained-collation-builder",
+			Box::pin(async move {
+				for core_index in cores_to_build_on {
 					let collator_fn = match task_config.collator.as_ref() {
 						Some(x) => x,
 						None => return,
@@ -363,31 +386,37 @@ async fn handle_new_activations<Context>(
 							None => {
 								gum::debug!(
 									target: LOG_TARGET,
-									para_id = %scheduled_core.para_id,
+									?para_id,
 									"collator returned no collation on collate",
 								);
 								return
 							},
 						};
 
+					let parent_head = collation.head_data.clone();
 					construct_and_distribute_receipt(
 						PreparedCollation {
 							collation,
-							para_id: scheduled_core.para_id,
+							para_id,
 							relay_parent,
-							validation_data,
+							validation_data: validation_data.clone(),
 							validation_code_hash,
 							n_validators,
+							core_index,
 						},
 						task_config.key.clone(),
 						&mut task_sender,
 						result_sender,
 						&metrics,
 					)
 					.await;
-				}),
-			)?;
-		}
+
+					// Chain the collations. All else stays the same as we build the chained
+					// collation on same relay parent.
+					validation_data.parent_head = parent_head;
+				}
+			}),
+		)?;
 	}
 
 	Ok(())
@@ -408,6 +437,7 @@ async fn handle_submit_collation<Context>(
 		parent_head,
 		validation_code_hash,
 		result_sender,
+		core_index,
 	} = params;
 
 	let validators = request_validators(relay_parent, ctx.sender()).await.await??;
@@ -444,6 +474,7 @@ async fn handle_submit_collation<Context>(
 		validation_data,
 		validation_code_hash,
 		n_validators,
+		core_index,
 	};
 
 	construct_and_distribute_receipt(
@@ -465,6 +496,7 @@ struct PreparedCollation {
 	validation_data: PersistedValidationData,
 	validation_code_hash: ValidationCodeHash,
 	n_validators: usize,
+	core_index: CoreIndex,
 }
 
 /// Takes a prepared collation, along with its context, and produces a candidate receipt
@@ -483,6 +515,7 @@ async fn construct_and_distribute_receipt(
 		validation_data,
 		validation_code_hash,
 		n_validators,
+		core_index,
 	} = collation;
 
 	let persisted_validation_data_hash = validation_data.hash();
@@ -578,6 +611,7 @@ async fn construct_and_distribute_receipt(
 			pov,
 			parent_head_data,
 			result_sender,
+			core_index,
 		})
 		.await;
 }