From 232a817a73fa842ca4b3be419bc775c85204901e Mon Sep 17 00:00:00 2001 From: Joaquin Carletti <56092489+ColoCarletti@users.noreply.github.com> Date: Mon, 19 Aug 2024 09:43:20 -0300 Subject: [PATCH] feat(prover_cli): Stuck status (#2441) This PR adds the functionality to display jobs that are stuck at some point in the process for the status batch command, along with their respective tests. - [x] PR title corresponds to the body of PR (we generate changelog entries from PRs). - [x] Tests for the changes have been added / updated. - [ ] Documentation comments have been added / updated. - [x] Code has been formatted via `zk fmt` and `zk lint`. --------- Co-authored-by: Joaquin Carletti Co-authored-by: Ivan Litteri <67517699+ilitteri@users.noreply.github.com> Co-authored-by: Ivan Litteri Co-authored-by: ilitteri Co-authored-by: EmilLuta --- core/lib/basic_types/src/prover_dal.rs | 55 ++++++ .../prover_cli/src/commands/status/batch.rs | 130 +++++++------ .../prover_cli/src/commands/status/utils.rs | 172 ++++++++++++++---- prover/crates/bin/prover_cli/tests/batch.rs | 158 +++++++++++++++- .../crates/lib/prover_dal/src/cli_test_dal.rs | 42 +++++ 5 files changed, 461 insertions(+), 96 deletions(-) diff --git a/core/lib/basic_types/src/prover_dal.rs b/core/lib/basic_types/src/prover_dal.rs index 52de0eae919c..7eb671448608 100644 --- a/core/lib/basic_types/src/prover_dal.rs +++ b/core/lib/basic_types/src/prover_dal.rs @@ -262,6 +262,11 @@ pub struct ProverJobFriInfo { pub picked_by: Option, } +pub trait Stallable { + fn get_status(&self) -> WitnessJobStatus; + fn get_attempts(&self) -> u32; +} + #[derive(Debug, Clone)] pub struct BasicWitnessGeneratorJobInfo { pub l1_batch_number: L1BatchNumber, @@ -277,6 +282,16 @@ pub struct BasicWitnessGeneratorJobInfo { pub picked_by: Option, } +impl Stallable for BasicWitnessGeneratorJobInfo { + fn get_status(&self) -> WitnessJobStatus { + self.status.clone() + } + + fn get_attempts(&self) -> u32 { + self.attempts + } +} + #[derive(Debug, Clone)] pub struct LeafWitnessGeneratorJobInfo { pub id: u32, @@ -295,6 +310,16 @@ pub struct LeafWitnessGeneratorJobInfo { pub picked_by: Option, } +impl Stallable for LeafWitnessGeneratorJobInfo { + fn get_status(&self) -> WitnessJobStatus { + self.status.clone() + } + + fn get_attempts(&self) -> u32 { + self.attempts + } +} + #[derive(Debug, Clone)] pub struct NodeWitnessGeneratorJobInfo { pub id: u32, @@ -314,6 +339,16 @@ pub struct NodeWitnessGeneratorJobInfo { pub picked_by: Option, } +impl Stallable for NodeWitnessGeneratorJobInfo { + fn get_status(&self) -> WitnessJobStatus { + self.status.clone() + } + + fn get_attempts(&self) -> u32 { + self.attempts + } +} + #[derive(Debug, Clone)] pub struct RecursionTipWitnessGeneratorJobInfo { pub l1_batch_number: L1BatchNumber, @@ -329,6 +364,16 @@ pub struct RecursionTipWitnessGeneratorJobInfo { pub picked_by: Option, } +impl Stallable for RecursionTipWitnessGeneratorJobInfo { + fn get_status(&self) -> WitnessJobStatus { + self.status.clone() + } + + fn get_attempts(&self) -> u32 { + self.attempts + } +} + #[derive(Debug, Clone)] pub struct SchedulerWitnessGeneratorJobInfo { pub l1_batch_number: L1BatchNumber, @@ -344,6 +389,16 @@ pub struct SchedulerWitnessGeneratorJobInfo { pub picked_by: Option, } +impl Stallable for SchedulerWitnessGeneratorJobInfo { + fn get_status(&self) -> WitnessJobStatus { + self.status.clone() + } + + fn get_attempts(&self) -> u32 { + self.attempts + } +} + #[derive(Debug, EnumString, Display, Clone)] pub enum ProofCompressionJobStatus { #[strum(serialize = "queued")] diff --git a/prover/crates/bin/prover_cli/src/commands/status/batch.rs b/prover/crates/bin/prover_cli/src/commands/status/batch.rs index 84a8e7184a65..797695b02278 100644 --- a/prover/crates/bin/prover_cli/src/commands/status/batch.rs +++ b/prover/crates/bin/prover_cli/src/commands/status/batch.rs @@ -4,6 +4,8 @@ use anyhow::Context as _; use circuit_definitions::zkevm_circuits::scheduler::aux::BaseLayerCircuitType; use clap::Args as ClapArgs; use colored::*; +use zksync_config::configs::FriProverConfig; +use zksync_env_config::FromEnv; use zksync_prover_dal::{Connection, ConnectionPool, Prover, ProverDal}; use zksync_types::{ basic_fri_types::AggregationRound, @@ -16,8 +18,11 @@ use zksync_types::{ L1BatchNumber, }; -use super::utils::{BatchData, StageInfo, Status}; -use crate::cli::ProverCLIConfig; +use super::utils::{get_prover_job_status, BatchData, StageInfo, Status}; +use crate::{ + cli::ProverCLIConfig, + commands::status::utils::{get_prover_jobs_status_from_vec, get_witness_generator_job_status}, +}; #[derive(ClapArgs)] pub struct Args { @@ -36,7 +41,7 @@ pub(crate) async fn run(args: Args, config: ProverCLIConfig) -> anyhow::Result<( format!("Batch {} Status", batch_data.batch_number).bold() ); - if let Status::Custom(msg) = batch_data.compressor.witness_generator_jobs_status() { + if let Status::Custom(msg) = batch_data.compressor.witness_generator_jobs_status(10) { if msg.contains("Sent to server") { println!("> Proof sent to server ✅"); continue; @@ -45,7 +50,7 @@ pub(crate) async fn run(args: Args, config: ProverCLIConfig) -> anyhow::Result<( let basic_witness_generator_status = batch_data .basic_witness_generator - .witness_generator_jobs_status(); + .witness_generator_jobs_status(10); if matches!(basic_witness_generator_status, Status::JobsNotFound) { println!("> No batch found. 🚫"); continue; @@ -205,25 +210,21 @@ fn display_batch_status(batch_data: BatchData) { } fn display_status_for_stage(stage_info: StageInfo) { + let max_attempts = FriProverConfig::from_env() + .expect("Fail to read prover config.") + .max_attempts; display_aggregation_round(&stage_info); - match stage_info.witness_generator_jobs_status() { + let status = stage_info.witness_generator_jobs_status(max_attempts); + match status { Status::Custom(msg) => { println!("{}: {} \n", stage_info.to_string().bold(), msg); } Status::Queued | Status::WaitingForProofs | Status::Stuck | Status::JobsNotFound => { - println!( - "{}: {}", - stage_info.to_string().bold(), - stage_info.witness_generator_jobs_status() - ) + println!("{}: {}", stage_info.to_string().bold(), status) } Status::InProgress | Status::Successful => { - println!( - "{}: {}", - stage_info.to_string().bold(), - stage_info.witness_generator_jobs_status() - ); - if let Some(job_status) = stage_info.prover_jobs_status() { + println!("{}: {}", stage_info.to_string().bold(), status); + if let Some(job_status) = stage_info.prover_jobs_status(max_attempts) { println!("> {}: {}", "Prover Jobs".to_owned().bold(), job_status); } } @@ -240,53 +241,51 @@ fn display_batch_info(batch_data: BatchData) { } fn display_info_for_stage(stage_info: StageInfo) { + let max_attempts = FriProverConfig::from_env() + .expect("Fail to read prover config.") + .max_attempts; display_aggregation_round(&stage_info); - match stage_info.witness_generator_jobs_status() { + let status = stage_info.witness_generator_jobs_status(max_attempts); + match status { Status::Custom(msg) => { println!("{}: {}", stage_info.to_string().bold(), msg); } - Status::Queued | Status::WaitingForProofs | Status::Stuck | Status::JobsNotFound => { - println!( - " > {}: {}", - stage_info.to_string().bold(), - stage_info.witness_generator_jobs_status() - ) + Status::Queued | Status::WaitingForProofs | Status::JobsNotFound => { + println!(" > {}: {}", stage_info.to_string().bold(), status) } - Status::InProgress => { - println!( - "v {}: {}", - stage_info.to_string().bold(), - stage_info.witness_generator_jobs_status() - ); + Status::InProgress | Status::Stuck => { + println!("v {}: {}", stage_info.to_string().bold(), status); match stage_info { StageInfo::BasicWitnessGenerator { prover_jobs_info, .. } => { - display_prover_jobs_info(prover_jobs_info); + display_prover_jobs_info(prover_jobs_info, max_attempts); } StageInfo::LeafWitnessGenerator { witness_generator_jobs_info, prover_jobs_info, } => { - display_leaf_witness_generator_jobs_info(witness_generator_jobs_info); - display_prover_jobs_info(prover_jobs_info); + display_leaf_witness_generator_jobs_info( + witness_generator_jobs_info, + max_attempts, + ); + display_prover_jobs_info(prover_jobs_info, max_attempts); } StageInfo::NodeWitnessGenerator { witness_generator_jobs_info, prover_jobs_info, } => { - display_node_witness_generator_jobs_info(witness_generator_jobs_info); - display_prover_jobs_info(prover_jobs_info); + display_node_witness_generator_jobs_info( + witness_generator_jobs_info, + max_attempts, + ); + display_prover_jobs_info(prover_jobs_info, max_attempts); } _ => (), } } Status::Successful => { - println!( - "> {}: {}", - stage_info.to_string().bold(), - stage_info.witness_generator_jobs_status() - ); + println!("> {}: {}", stage_info.to_string().bold(), status); match stage_info { StageInfo::BasicWitnessGenerator { prover_jobs_info, .. @@ -296,7 +295,7 @@ fn display_info_for_stage(stage_info: StageInfo) { } | StageInfo::NodeWitnessGenerator { prover_jobs_info, .. - } => display_prover_jobs_info(prover_jobs_info), + } => display_prover_jobs_info(prover_jobs_info, max_attempts), _ => (), } } @@ -304,11 +303,12 @@ fn display_info_for_stage(stage_info: StageInfo) { } fn display_leaf_witness_generator_jobs_info( - mut leaf_witness_generators_jobs_info: Vec, + mut jobs_info: Vec, + max_attempts: u32, ) { - leaf_witness_generators_jobs_info.sort_by_key(|job| job.circuit_id); + jobs_info.sort_by_key(|job| job.circuit_id); - leaf_witness_generators_jobs_info.iter().for_each(|job| { + jobs_info.iter().for_each(|job| { println!( " > {}: {}", format!( @@ -316,17 +316,18 @@ fn display_leaf_witness_generator_jobs_info( BaseLayerCircuitType::from_numeric_value(job.circuit_id as u8) ) .bold(), - Status::from(job.status.clone()) + get_witness_generator_job_status(job, max_attempts) ) }); } fn display_node_witness_generator_jobs_info( - mut node_witness_generators_jobs_info: Vec, + mut jobs_info: Vec, + max_attempts: u32, ) { - node_witness_generators_jobs_info.sort_by_key(|job| job.circuit_id); + jobs_info.sort_by_key(|job| job.circuit_id); - node_witness_generators_jobs_info.iter().for_each(|job| { + jobs_info.iter().for_each(|job| { println!( " > {}: {}", format!( @@ -334,17 +335,18 @@ fn display_node_witness_generator_jobs_info( BaseLayerCircuitType::from_numeric_value(job.circuit_id as u8) ) .bold(), - Status::from(job.status.clone()) + get_witness_generator_job_status(job, max_attempts) ) }); } -fn display_prover_jobs_info(prover_jobs_info: Vec) { - let prover_jobs_status = Status::from(prover_jobs_info.clone()); +fn display_prover_jobs_info(prover_jobs_info: Vec, max_attempts: u32) { + let prover_jobs_status = get_prover_jobs_status_from_vec(&prover_jobs_info, max_attempts); - if matches!(prover_jobs_status, Status::Successful) - || matches!(prover_jobs_status, Status::JobsNotFound) - { + if matches!( + prover_jobs_status, + Status::Successful | Status::JobsNotFound + ) { println!( "> {}: {prover_jobs_status}", "Prover Jobs".to_owned().bold() @@ -366,7 +368,7 @@ fn display_prover_jobs_info(prover_jobs_info: Vec) { }); for (circuit_id, prover_jobs_info) in jobs_by_circuit_id { - let status = Status::from(prover_jobs_info.clone()); + let status = get_prover_jobs_status_from_vec(&prover_jobs_info, max_attempts); println!( " > {}: {}", format!( @@ -376,8 +378,10 @@ fn display_prover_jobs_info(prover_jobs_info: Vec) { .bold(), status ); - if matches!(status, Status::InProgress) { - display_job_status_count(prover_jobs_info); + match status { + Status::InProgress => display_job_status_count(prover_jobs_info), + Status::Stuck => display_stuck_jobs(prover_jobs_info, max_attempts), + _ => (), } } } @@ -400,6 +404,20 @@ fn display_job_status_count(jobs: Vec) { println!(" - Failed: {}", jobs_counts.failed); } +fn display_stuck_jobs(jobs: Vec, max_attempts: u32) { + jobs.iter().for_each(|job| { + if matches!( + get_prover_job_status(job.clone(), max_attempts), + Status::Stuck + ) { + println!( + " - Prover Job: {} stuck after {} attempts", + job.id, job.attempts + ); + } + }) +} + fn display_aggregation_round(stage_info: &StageInfo) { if let Some(aggregation_round) = stage_info.aggregation_round() { println!( diff --git a/prover/crates/bin/prover_cli/src/commands/status/utils.rs b/prover/crates/bin/prover_cli/src/commands/status/utils.rs index 31726e749209..eee5c08b96fc 100644 --- a/prover/crates/bin/prover_cli/src/commands/status/utils.rs +++ b/prover/crates/bin/prover_cli/src/commands/status/utils.rs @@ -6,7 +6,8 @@ use zksync_types::{ prover_dal::{ BasicWitnessGeneratorJobInfo, LeafWitnessGeneratorJobInfo, NodeWitnessGeneratorJobInfo, ProofCompressionJobInfo, ProofCompressionJobStatus, ProverJobFriInfo, ProverJobStatus, - RecursionTipWitnessGeneratorJobInfo, SchedulerWitnessGeneratorJobInfo, WitnessJobStatus, + RecursionTipWitnessGeneratorJobInfo, SchedulerWitnessGeneratorJobInfo, Stallable, + WitnessJobStatus, }, L1BatchNumber, }; @@ -55,6 +56,20 @@ pub enum Status { JobsNotFound, } +impl From for Status { + fn from(status: ProverJobStatus) -> Self { + match status { + ProverJobStatus::Queued => Status::Queued, + ProverJobStatus::InProgress(_) => Status::InProgress, + ProverJobStatus::Successful(_) => Status::Successful, + ProverJobStatus::Failed(_) => Status::Custom("Failed".to_owned()), + ProverJobStatus::Skipped => Status::Custom("Skipped ⏩".to_owned()), + ProverJobStatus::Ignored => Status::Custom("Ignored".to_owned()), + ProverJobStatus::InGPUProof => Status::Custom("In GPU Proof".to_owned()), + } + } +} + impl From for Status { fn from(status: WitnessJobStatus) -> Self { match status { @@ -151,31 +166,6 @@ impl From for Status { } } -impl From> for Status { - fn from(jobs_vector: Vec) -> Self { - if jobs_vector.is_empty() { - Status::JobsNotFound - } else if jobs_vector - .iter() - .all(|job| matches!(job.status, ProverJobStatus::InGPUProof)) - { - Status::Custom("In GPU Proof ⚡️".to_owned()) - } else if jobs_vector - .iter() - .all(|job| matches!(job.status, ProverJobStatus::Queued)) - { - Status::Queued - } else if jobs_vector - .iter() - .all(|job| matches!(job.status, ProverJobStatus::Successful(_))) - { - Status::Successful - } else { - Status::InProgress - } - } -} - #[allow(clippy::large_enum_variant)] #[derive(EnumString, Clone, Display)] pub enum StageInfo { @@ -214,7 +204,7 @@ impl StageInfo { } } - pub fn prover_jobs_status(&self) -> Option { + pub fn prover_jobs_status(&self, max_attempts: u32) -> Option { match self.clone() { StageInfo::BasicWitnessGenerator { prover_jobs_info, .. @@ -224,38 +214,144 @@ impl StageInfo { } | StageInfo::NodeWitnessGenerator { prover_jobs_info, .. - } => Some(Status::from(prover_jobs_info)), + } => Some(get_prover_jobs_status_from_vec( + &prover_jobs_info, + max_attempts, + )), StageInfo::RecursionTipWitnessGenerator(_) | StageInfo::SchedulerWitnessGenerator(_) | StageInfo::Compressor(_) => None, } } - pub fn witness_generator_jobs_status(&self) -> Status { + pub fn witness_generator_jobs_status(&self, max_attempts: u32) -> Status { match self.clone() { StageInfo::BasicWitnessGenerator { witness_generator_job_info, .. } => witness_generator_job_info - .map(|witness_generator_job_info| Status::from(witness_generator_job_info.status)) + .map(|witness_generator_job_info| { + get_witness_generator_job_status(&witness_generator_job_info, max_attempts) + }) .unwrap_or_default(), StageInfo::LeafWitnessGenerator { witness_generator_jobs_info, .. - } => Status::from(witness_generator_jobs_info), + } => { + get_witness_generator_job_status_from_vec(witness_generator_jobs_info, max_attempts) + } StageInfo::NodeWitnessGenerator { witness_generator_jobs_info, .. - } => Status::from(witness_generator_jobs_info), - StageInfo::RecursionTipWitnessGenerator(status) => status - .map(|job| Status::from(job.status)) - .unwrap_or_default(), - StageInfo::SchedulerWitnessGenerator(status) => status - .map(|job| Status::from(job.status)) - .unwrap_or_default(), + } => { + get_witness_generator_job_status_from_vec(witness_generator_jobs_info, max_attempts) + } + StageInfo::RecursionTipWitnessGenerator(witness_generator_job_info) => { + witness_generator_job_info + .map(|witness_generator_job_info| { + get_witness_generator_job_status(&witness_generator_job_info, max_attempts) + }) + .unwrap_or_default() + } + StageInfo::SchedulerWitnessGenerator(witness_generator_job_info) => { + witness_generator_job_info + .map(|witness_generator_job_info| { + get_witness_generator_job_status(&witness_generator_job_info, max_attempts) + }) + .unwrap_or_default() + } StageInfo::Compressor(status) => status .map(|job| Status::from(job.status)) .unwrap_or_default(), } } } + +pub fn get_witness_generator_job_status(data: &impl Stallable, max_attempts: u32) -> Status { + let status = data.get_status(); + if matches!( + status, + WitnessJobStatus::Failed(_) | WitnessJobStatus::InProgress, + ) && data.get_attempts() >= max_attempts + { + return Status::Stuck; + } + Status::from(status) +} + +pub fn get_witness_generator_job_status_from_vec( + prover_jobs: Vec, + max_attempts: u32, +) -> Status { + if prover_jobs.is_empty() { + Status::JobsNotFound + } else if prover_jobs + .iter() + .all(|job| matches!(job.get_status(), WitnessJobStatus::WaitingForProofs)) + { + Status::WaitingForProofs + } else if prover_jobs.iter().any(|job| { + matches!( + job.get_status(), + WitnessJobStatus::Failed(_) | WitnessJobStatus::InProgress, + ) && job.get_attempts() >= max_attempts + }) { + Status::Stuck + } else if prover_jobs.iter().all(|job| { + matches!(job.get_status(), WitnessJobStatus::Queued) + || matches!(job.get_status(), WitnessJobStatus::WaitingForProofs) + }) { + Status::Queued + } else if prover_jobs + .iter() + .all(|job| matches!(job.get_status(), WitnessJobStatus::Successful(_))) + { + Status::Successful + } else { + Status::InProgress + } +} + +pub fn get_prover_job_status(prover_jobs: ProverJobFriInfo, max_attempts: u32) -> Status { + if matches!( + prover_jobs.status, + ProverJobStatus::Failed(_) | ProverJobStatus::InProgress(_), + ) && prover_jobs.attempts as u32 >= max_attempts + { + return Status::Stuck; + } + Status::from(prover_jobs.status) +} + +pub fn get_prover_jobs_status_from_vec( + prover_jobs: &[ProverJobFriInfo], + max_attempts: u32, +) -> Status { + if prover_jobs.is_empty() { + Status::JobsNotFound + } else if prover_jobs.iter().any(|job| { + matches!( + job.status, + ProverJobStatus::Failed(_) | ProverJobStatus::InProgress(_), + ) && job.attempts as u32 >= max_attempts + }) { + Status::Stuck + } else if prover_jobs + .iter() + .all(|job| matches!(job.status, ProverJobStatus::InGPUProof)) + { + Status::Custom("In GPU Proof ⚡️".to_owned()) + } else if prover_jobs + .iter() + .all(|job| matches!(job.status, ProverJobStatus::Queued)) + { + Status::Queued + } else if prover_jobs + .iter() + .all(|job| matches!(job.status, ProverJobStatus::Successful(_))) + { + Status::Successful + } else { + Status::InProgress + } +} diff --git a/prover/crates/bin/prover_cli/tests/batch.rs b/prover/crates/bin/prover_cli/tests/batch.rs index 9e9060fe8837..bfd944ec29be 100644 --- a/prover/crates/bin/prover_cli/tests/batch.rs +++ b/prover/crates/bin/prover_cli/tests/batch.rs @@ -8,8 +8,9 @@ use zksync_types::{ basic_fri_types::AggregationRound, protocol_version::{L1VerifierConfig, ProtocolSemanticVersion}, prover_dal::{ - ProofCompressionJobStatus, ProverJobStatus, ProverJobStatusInProgress, - ProverJobStatusSuccessful, WitnessJobStatus, WitnessJobStatusSuccessful, + ProofCompressionJobStatus, ProverJobStatus, ProverJobStatusFailed, + ProverJobStatusInProgress, ProverJobStatusSuccessful, WitnessJobStatus, + WitnessJobStatusSuccessful, }, L1BatchNumber, }; @@ -179,6 +180,41 @@ async fn insert_prover_job( .await; } +async fn update_attempts_prover_job( + status: ProverJobStatus, + attempts: u8, + circuit_id: BaseLayerCircuitType, + aggregation_round: AggregationRound, + batch_number: L1BatchNumber, + sequence_number: usize, + connection: &mut Connection<'_, Prover>, +) { + connection + .cli_test_dal() + .update_attempts_prover_job( + status, + attempts, + circuit_id as u8, + aggregation_round as i64, + batch_number, + sequence_number, + ) + .await; +} + +async fn update_attempts_lwg( + status: ProverJobStatus, + attempts: u8, + circuit_id: BaseLayerCircuitType, + batch_number: L1BatchNumber, + connection: &mut Connection<'_, Prover>, +) { + connection + .cli_test_dal() + .update_attempts_lwg(status, attempts, circuit_id as u8, batch_number) + .await; +} + async fn insert_bwg_job( status: FriWitnessJobStatus, batch_number: L1BatchNumber, @@ -1338,3 +1374,121 @@ v Scheduler: In Progress ⌛️ COMPLETE_BATCH_STATUS_STDOUT.into(), ); } + +#[tokio::test] +async fn pli_status_stuck_job() { + let connection_pool = ConnectionPool::::prover_test_pool().await; + let mut connection = connection_pool.connection().await.unwrap(); + + connection + .fri_protocol_versions_dal() + .save_prover_protocol_version( + ProtocolSemanticVersion::default(), + L1VerifierConfig::default(), + ) + .await; + + let batch_0 = L1BatchNumber(0); + + let scenario = Scenario::new(batch_0) + .add_bwg(FriWitnessJobStatus::Successful) + .add_agg_0_prover_job( + ProverJobStatus::Successful(ProverJobStatusSuccessful::default()), + BaseLayerCircuitType::VM, + 1, + ) + .add_agg_0_prover_job(ProverJobStatus::Queued, BaseLayerCircuitType::VM, 2) + .add_lwg(WitnessJobStatus::WaitingForProofs, BaseLayerCircuitType::VM) + .add_nwg(WitnessJobStatus::WaitingForProofs, BaseLayerCircuitType::VM) + .add_rt(WitnessJobStatus::WaitingForProofs) + .add_scheduler(WitnessJobStatus::WaitingForProofs); + load_scenario(scenario, &mut connection).await; + + update_attempts_prover_job( + ProverJobStatus::Failed(ProverJobStatusFailed::default()), + 10, + BaseLayerCircuitType::VM, + AggregationRound::BasicCircuits, + batch_0, + 2, + &mut connection, + ) + .await; + + status_verbose_batch_0_expects( + connection_pool.database_url().expose_str(), + "== Batch 0 Status == + +-- Aggregation Round 0 -- +> Basic Witness Generator: Successful ✅ +v Prover Jobs: Stuck ⛔️ + > VM: Stuck ⛔️ + - Prover Job: 2 stuck after 10 attempts + +-- Aggregation Round 1 -- + > Leaf Witness Generator: Waiting for Proof ⏱️ + +-- Aggregation Round 2 -- + > Node Witness Generator: Waiting for Proof ⏱️ + +-- Aggregation Round 3 -- + > Recursion Tip: Waiting for Proof ⏱️ + +-- Aggregation Round 4 -- + > Scheduler: Waiting for Proof ⏱️ + +-- Proof Compression -- + > Compressor: Jobs not found 🚫 +" + .into(), + ); + + let scenario = Scenario::new(batch_0) + .add_agg_0_prover_job( + ProverJobStatus::Successful(ProverJobStatusSuccessful::default()), + BaseLayerCircuitType::VM, + 2, + ) + .add_lwg(WitnessJobStatus::InProgress, BaseLayerCircuitType::VM) + .add_agg_1_prover_job(ProverJobStatus::Queued, BaseLayerCircuitType::VM, 1) + .add_agg_1_prover_job(ProverJobStatus::Queued, BaseLayerCircuitType::VM, 2); + load_scenario(scenario, &mut connection).await; + + update_attempts_lwg( + ProverJobStatus::Failed(ProverJobStatusFailed::default()), + 10, + BaseLayerCircuitType::VM, + batch_0, + &mut connection, + ) + .await; + + status_verbose_batch_0_expects( + connection_pool.database_url().expose_str(), + "== Batch 0 Status == + +-- Aggregation Round 0 -- +> Basic Witness Generator: Successful ✅ +> Prover Jobs: Successful ✅ + +-- Aggregation Round 1 -- +v Leaf Witness Generator: Stuck ⛔️ + > VM: Stuck ⛔️ +v Prover Jobs: Queued 📥 + > VM: Queued 📥 + +-- Aggregation Round 2 -- + > Node Witness Generator: Waiting for Proof ⏱️ + +-- Aggregation Round 3 -- + > Recursion Tip: Waiting for Proof ⏱️ + +-- Aggregation Round 4 -- + > Scheduler: Waiting for Proof ⏱️ + +-- Proof Compression -- + > Compressor: Jobs not found 🚫 +" + .into(), + ); +} diff --git a/prover/crates/lib/prover_dal/src/cli_test_dal.rs b/prover/crates/lib/prover_dal/src/cli_test_dal.rs index 474c84c53fd5..069fa9c6a41c 100644 --- a/prover/crates/lib/prover_dal/src/cli_test_dal.rs +++ b/prover/crates/lib/prover_dal/src/cli_test_dal.rs @@ -170,4 +170,46 @@ impl CliTestDal<'_, '_> { .await .unwrap(); } + + pub async fn update_attempts_prover_job( + &mut self, + status: ProverJobStatus, + attempts: u8, + circuit_id: u8, + aggregation_round: i64, + batch_number: L1BatchNumber, + sequence_number: usize, + ) { + sqlx::query(&format!( + "UPDATE prover_jobs_fri + SET status = '{}', attempts = {} + WHERE l1_batch_number = {} + AND sequence_number = {} + AND aggregation_round = {} + AND circuit_id = {}", + status, attempts, batch_number.0, sequence_number, aggregation_round, circuit_id, + )) + .execute(self.storage.conn()) + .await + .unwrap(); + } + + pub async fn update_attempts_lwg( + &mut self, + status: ProverJobStatus, + attempts: u8, + circuit_id: u8, + batch_number: L1BatchNumber, + ) { + sqlx::query(&format!( + "UPDATE leaf_aggregation_witness_jobs_fri + SET status = '{}', attempts = {} + WHERE l1_batch_number = {} + AND circuit_id = {}", + status, attempts, batch_number.0, circuit_id, + )) + .execute(self.storage.conn()) + .await + .unwrap(); + } }