From e32b08425ce95b91086fa29b14b5049205d3d00b Mon Sep 17 00:00:00 2001 From: antoniolocascio Date: Fri, 5 Jul 2024 13:34:25 +0200 Subject: [PATCH 1/3] feat(prover): store time taken by prover with ms precision --- core/lib/db_connection/src/utils.rs | 26 ++++++++++++++----- prover/crates/bin/prover_fri/src/utils.rs | 5 ++-- .../lib/prover_dal/src/fri_prover_dal.rs | 4 +-- prover/crates/lib/prover_dal/src/lib.rs | 2 +- 4 files changed, 25 insertions(+), 12 deletions(-) diff --git a/core/lib/db_connection/src/utils.rs b/core/lib/db_connection/src/utils.rs index 80cf0a5cbb35..532a355d916c 100644 --- a/core/lib/db_connection/src/utils.rs +++ b/core/lib/db_connection/src/utils.rs @@ -13,14 +13,26 @@ const MICROSECONDS_IN_A_SECOND: i64 = 1_000_000; const MICROSECONDS_IN_A_MINUTE: i64 = MICROSECONDS_IN_A_SECOND * 60; const MICROSECONDS_IN_AN_HOUR: i64 = MICROSECONDS_IN_A_MINUTE * 60; +fn duration_to_naive_time_common(duration: Duration, use_ms: bool) -> NaiveTime { + let total_ms = duration.as_millis(); + let total_seconds = (total_ms / 1_000) as u32; + let hours = total_seconds / 3_600; + let minutes = (total_seconds / 60) % 60; + let seconds = total_seconds % 60; + let ms = (total_ms % 1_000) as u32; + + if use_ms { + NaiveTime::from_hms_milli_opt(hours, minutes, seconds, ms).unwrap() + } else { + NaiveTime::from_hms_opt(hours, minutes, seconds).unwrap() + } +} pub fn duration_to_naive_time(duration: Duration) -> NaiveTime { - let total_seconds = duration.as_secs() as u32; - NaiveTime::from_hms_opt( - total_seconds / 3600, - (total_seconds / 60) % 60, - total_seconds % 60, - ) - .unwrap() + duration_to_naive_time_common(duration, false) +} + +pub fn duration_to_naive_time_ms(duration: Duration) -> NaiveTime { + duration_to_naive_time_common(duration, true) } pub const fn pg_interval_from_duration(processing_timeout: Duration) -> PgInterval { diff --git a/prover/crates/bin/prover_fri/src/utils.rs b/prover/crates/bin/prover_fri/src/utils.rs index 15a2a6c18bb2..e53e0810d693 100644 --- a/prover/crates/bin/prover_fri/src/utils.rs +++ b/prover/crates/bin/prover_fri/src/utils.rs @@ -67,10 +67,11 @@ pub async fn save_proof( connection: &mut Connection<'_, Prover>, protocol_version: ProtocolSemanticVersion, ) { + let time_taken = started_at.elapsed(); tracing::info!( "Successfully proven job: {}, total time taken: {:?}", job_id, - started_at.elapsed() + time_taken, ); let proof = artifacts.proof_wrapper; @@ -101,7 +102,7 @@ pub async fn save_proof( let mut transaction = connection.start_transaction().await.unwrap(); transaction .fri_prover_jobs_dal() - .save_proof(job_id, started_at.elapsed(), &blob_url) + .save_proof(job_id, time_taken, &blob_url) .await; if is_scheduler_proof { transaction diff --git a/prover/crates/lib/prover_dal/src/fri_prover_dal.rs b/prover/crates/lib/prover_dal/src/fri_prover_dal.rs index f6efc6afa6ad..943b52fd6e12 100644 --- a/prover/crates/lib/prover_dal/src/fri_prover_dal.rs +++ b/prover/crates/lib/prover_dal/src/fri_prover_dal.rs @@ -13,7 +13,7 @@ use zksync_db_connection::{ connection::Connection, instrument::InstrumentExt, metrics::MethodLatency, }; -use crate::{duration_to_naive_time, pg_interval_from_duration, Prover}; +use crate::{duration_to_naive_time_ms, pg_interval_from_duration, Prover}; #[derive(Debug)] pub struct FriProverDal<'a, 'c> { @@ -265,7 +265,7 @@ impl FriProverDal<'_, '_> { prover_jobs_fri.depth, prover_jobs_fri.is_node_final_proof "#, - duration_to_naive_time(time_taken), + duration_to_naive_time_ms(time_taken), blob_url, i64::from(id) ) diff --git a/prover/crates/lib/prover_dal/src/lib.rs b/prover/crates/lib/prover_dal/src/lib.rs index bb552b899e90..3df1e4098e9b 100644 --- a/prover/crates/lib/prover_dal/src/lib.rs +++ b/prover/crates/lib/prover_dal/src/lib.rs @@ -2,7 +2,7 @@ use zksync_db_connection::connection::DbMarker; pub use zksync_db_connection::{ connection::Connection, connection_pool::ConnectionPool, - utils::{duration_to_naive_time, pg_interval_from_duration}, + utils::{duration_to_naive_time, duration_to_naive_time_ms, pg_interval_from_duration}, }; use crate::{ From aac728f358512fc7333e6cc178b4c494b757841b Mon Sep 17 00:00:00 2001 From: antoniolocascio Date: Wed, 10 Jul 2024 13:35:56 +0200 Subject: [PATCH 2/3] feat(prover): store time taken by wvg --- .../witness_vector_generator/src/generator.rs | 11 ++++++++++- ...2f37e379c9a7ff62fa84eeaa8df7749803466c.json | 15 +++++++++++++++ ...0d34a5baece02812f8c950fc84d37eeebd33a4.json | 8 +++++++- .../20240708090440_add_wvg_time_taken.down.sql | 5 +++++ .../20240708090440_add_wvg_time_taken.up.sql | 5 +++++ .../lib/prover_dal/src/fri_prover_dal.rs | 18 ++++++++++++++++++ 6 files changed, 60 insertions(+), 2 deletions(-) create mode 100644 prover/crates/lib/prover_dal/.sqlx/query-ada952c0172767d1ba41ab9b812f37e379c9a7ff62fa84eeaa8df7749803466c.json create mode 100644 prover/crates/lib/prover_dal/migrations/20240708090440_add_wvg_time_taken.down.sql create mode 100644 prover/crates/lib/prover_dal/migrations/20240708090440_add_wvg_time_taken.up.sql diff --git a/prover/crates/bin/witness_vector_generator/src/generator.rs b/prover/crates/bin/witness_vector_generator/src/generator.rs index 5574f0f1578d..5ab5537358ff 100644 --- a/prover/crates/bin/witness_vector_generator/src/generator.rs +++ b/prover/crates/bin/witness_vector_generator/src/generator.rs @@ -145,15 +145,24 @@ impl JobProcessor for WitnessVectorGenerator { METRICS.gpu_witness_vector_generation_time[&circuit_type].observe(started_at.elapsed()); + let wvg_time_taken = started_at.elapsed(); tracing::info!( "Finished witness vector generation for job: {job_id} in zone: {:?} took: {:?}", self.zone, - started_at.elapsed() + wvg_time_taken ); let serialized: Vec = bincode::serialize(&artifacts).expect("Failed to serialize witness vector artifacts"); + self.pool + .connection() + .await + .unwrap() + .fri_prover_jobs_dal() + .save_wvg_time_taken(job_id, wvg_time_taken) + .await; + let now = Instant::now(); let mut attempts = 0; diff --git a/prover/crates/lib/prover_dal/.sqlx/query-ada952c0172767d1ba41ab9b812f37e379c9a7ff62fa84eeaa8df7749803466c.json b/prover/crates/lib/prover_dal/.sqlx/query-ada952c0172767d1ba41ab9b812f37e379c9a7ff62fa84eeaa8df7749803466c.json new file mode 100644 index 000000000000..3ced718c7500 --- /dev/null +++ b/prover/crates/lib/prover_dal/.sqlx/query-ada952c0172767d1ba41ab9b812f37e379c9a7ff62fa84eeaa8df7749803466c.json @@ -0,0 +1,15 @@ +{ + "db_name": "PostgreSQL", + "query": "\n UPDATE prover_jobs_fri\n SET\n updated_at = NOW(),\n wvg_time_taken = $1\n WHERE\n id = $2\n ", + "describe": { + "columns": [], + "parameters": { + "Left": [ + "Time", + "Int8" + ] + }, + "nullable": [] + }, + "hash": "ada952c0172767d1ba41ab9b812f37e379c9a7ff62fa84eeaa8df7749803466c" +} diff --git a/prover/crates/lib/prover_dal/.sqlx/query-c2c140d136df5303d7b3a66ccd0d34a5baece02812f8c950fc84d37eeebd33a4.json b/prover/crates/lib/prover_dal/.sqlx/query-c2c140d136df5303d7b3a66ccd0d34a5baece02812f8c950fc84d37eeebd33a4.json index 25a49e191f6e..90d8dcd47f8f 100644 --- a/prover/crates/lib/prover_dal/.sqlx/query-c2c140d136df5303d7b3a66ccd0d34a5baece02812f8c950fc84d37eeebd33a4.json +++ b/prover/crates/lib/prover_dal/.sqlx/query-c2c140d136df5303d7b3a66ccd0d34a5baece02812f8c950fc84d37eeebd33a4.json @@ -97,6 +97,11 @@ "ordinal": 18, "name": "protocol_version_patch", "type_info": "Int4" + }, + { + "ordinal": 20, + "name": "wvg_time_taken", + "type_info": "Time" } ], "parameters": { @@ -124,7 +129,8 @@ true, true, true, - false + false, + true ] }, "hash": "c2c140d136df5303d7b3a66ccd0d34a5baece02812f8c950fc84d37eeebd33a4" diff --git a/prover/crates/lib/prover_dal/migrations/20240708090440_add_wvg_time_taken.down.sql b/prover/crates/lib/prover_dal/migrations/20240708090440_add_wvg_time_taken.down.sql new file mode 100644 index 000000000000..55c7b1bd8f84 --- /dev/null +++ b/prover/crates/lib/prover_dal/migrations/20240708090440_add_wvg_time_taken.down.sql @@ -0,0 +1,5 @@ +ALTER TABLE prover_jobs_fri + DROP COLUMN IF EXISTS wvg_time_taken; + +ALTER TABLE prover_jobs_fri_archive + DROP COLUMN IF EXISTS wvg_time_taken; diff --git a/prover/crates/lib/prover_dal/migrations/20240708090440_add_wvg_time_taken.up.sql b/prover/crates/lib/prover_dal/migrations/20240708090440_add_wvg_time_taken.up.sql new file mode 100644 index 000000000000..0530f66873b9 --- /dev/null +++ b/prover/crates/lib/prover_dal/migrations/20240708090440_add_wvg_time_taken.up.sql @@ -0,0 +1,5 @@ +ALTER TABLE prover_jobs_fri + ADD COLUMN IF NOT EXISTS wvg_time_taken TIME; + +ALTER TABLE prover_jobs_fri_archive + ADD COLUMN IF NOT EXISTS wvg_time_taken TIME; diff --git a/prover/crates/lib/prover_dal/src/fri_prover_dal.rs b/prover/crates/lib/prover_dal/src/fri_prover_dal.rs index 943b52fd6e12..37f65bf68718 100644 --- a/prover/crates/lib/prover_dal/src/fri_prover_dal.rs +++ b/prover/crates/lib/prover_dal/src/fri_prover_dal.rs @@ -288,6 +288,24 @@ impl FriProverDal<'_, '_> { .unwrap() } + pub async fn save_wvg_time_taken(&mut self, id: u32, wvg_time_taken: Duration) { + sqlx::query!( + r#" + UPDATE prover_jobs_fri + SET + updated_at = NOW(), + wvg_time_taken = $1 + WHERE + id = $2 + "#, + duration_to_naive_time_ms(wvg_time_taken), + i64::from(id) + ) + .execute(self.storage.conn()) + .await + .unwrap(); + } + pub async fn requeue_stuck_jobs( &mut self, processing_timeout: Duration, From ce5c718842b6cb7ec382715708997d562adfed08 Mon Sep 17 00:00:00 2001 From: antoniolocascio Date: Thu, 25 Jul 2024 15:00:22 +0200 Subject: [PATCH 3/3] fixup! feat(prover): store time taken by prover with ms precision --- core/lib/db_connection/src/utils.rs | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) diff --git a/core/lib/db_connection/src/utils.rs b/core/lib/db_connection/src/utils.rs index 532a355d916c..02840afaf1d1 100644 --- a/core/lib/db_connection/src/utils.rs +++ b/core/lib/db_connection/src/utils.rs @@ -13,26 +13,20 @@ const MICROSECONDS_IN_A_SECOND: i64 = 1_000_000; const MICROSECONDS_IN_A_MINUTE: i64 = MICROSECONDS_IN_A_SECOND * 60; const MICROSECONDS_IN_AN_HOUR: i64 = MICROSECONDS_IN_A_MINUTE * 60; -fn duration_to_naive_time_common(duration: Duration, use_ms: bool) -> NaiveTime { - let total_ms = duration.as_millis(); - let total_seconds = (total_ms / 1_000) as u32; - let hours = total_seconds / 3_600; - let minutes = (total_seconds / 60) % 60; - let seconds = total_seconds % 60; - let ms = (total_ms % 1_000) as u32; - - if use_ms { - NaiveTime::from_hms_milli_opt(hours, minutes, seconds, ms).unwrap() - } else { - NaiveTime::from_hms_opt(hours, minutes, seconds).unwrap() - } -} pub fn duration_to_naive_time(duration: Duration) -> NaiveTime { - duration_to_naive_time_common(duration, false) + let total_seconds = duration.as_secs() as u32; + NaiveTime::from_hms_opt( + total_seconds / 3600, + (total_seconds / 60) % 60, + total_seconds % 60, + ) + .expect("failed to convert Duration to NaiveTime") } pub fn duration_to_naive_time_ms(duration: Duration) -> NaiveTime { - duration_to_naive_time_common(duration, true) + let total_ms = duration.as_millis(); + let ms = (total_ms % 1_000) as u64; + duration_to_naive_time(duration) + Duration::from_millis(ms) } pub const fn pg_interval_from_duration(processing_timeout: Duration) -> PgInterval {