From 0722ae192a0df075d6c74c1cdc56edcc06e48fae Mon Sep 17 00:00:00 2001 From: Harald Hoyer Date: Wed, 13 Nov 2024 13:22:11 +0100 Subject: [PATCH 1/2] feat: add more metrics for the tee_prover Especially add a `sealed_at` column to the `l1_batches` table to get the desired duration metric of sealed -> proven. Signed-off-by: Harald Hoyer --- core/bin/zksync_tee_prover/src/tee_prover.rs | 8 ++++- core/lib/basic_types/src/tee_types.rs | 2 +- ...de79388b62926ff83d954b4d93dedfe2503a.json} | 4 +-- ...5d8370dd90de4425d699446f3c9b0c77d8de5.json | 22 ++++++++++++++ .../20241108051505_sealed_at.down.sql | 2 ++ .../20241108051505_sealed_at.up.sql | 2 ++ core/lib/dal/src/blocks_dal.rs | 25 ++++++++++++++++ core/node/proof_data_handler/Cargo.toml | 1 + core/node/proof_data_handler/src/metrics.rs | 23 +++++++++++++- .../src/tee_request_processor.rs | 30 +++++++++++++++---- 10 files changed, 108 insertions(+), 11 deletions(-) rename core/lib/dal/.sqlx/{query-746d8b62d576b4b9596458aa865e0294e53eb37c1a2dbcc3044b8311200d549a.json => query-6069d168d5c4b5131b50500302cdde79388b62926ff83d954b4d93dedfe2503a.json} (85%) create mode 100644 core/lib/dal/.sqlx/query-73fb2eb75353d57898ff9810d3f5d8370dd90de4425d699446f3c9b0c77d8de5.json create mode 100644 core/lib/dal/migrations/20241108051505_sealed_at.down.sql create mode 100644 core/lib/dal/migrations/20241108051505_sealed_at.up.sql diff --git a/core/bin/zksync_tee_prover/src/tee_prover.rs b/core/bin/zksync_tee_prover/src/tee_prover.rs index 5d22d1e7c630..da431ac5b332 100644 --- a/core/bin/zksync_tee_prover/src/tee_prover.rs +++ b/core/bin/zksync_tee_prover/src/tee_prover.rs @@ -80,7 +80,13 @@ impl TeeProver { let msg_to_sign = Message::from_slice(root_hash_bytes) .map_err(|e| TeeProverError::Verification(e.into()))?; let signature = self.config.signing_key.sign_ecdsa(msg_to_sign); - observer.observe(); + let duration = observer.observe(); + tracing::info!( + proof_generation_time = %duration.as_secs_f64(), + l1_batch_number = %batch_number, + l1_root_hash = %verification_result.value_hash, + "L1 batch verified", + ); Ok((signature, batch_number, verification_result.value_hash)) } _ => Err(TeeProverError::Verification(anyhow::anyhow!( diff --git a/core/lib/basic_types/src/tee_types.rs b/core/lib/basic_types/src/tee_types.rs index d49f2f183885..44a3ba02c1d2 100644 --- a/core/lib/basic_types/src/tee_types.rs +++ b/core/lib/basic_types/src/tee_types.rs @@ -2,7 +2,7 @@ use std::fmt; use serde::{Deserialize, Serialize}; -#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)] #[serde(rename_all = "lowercase")] #[non_exhaustive] pub enum TeeType { diff --git a/core/lib/dal/.sqlx/query-746d8b62d576b4b9596458aa865e0294e53eb37c1a2dbcc3044b8311200d549a.json b/core/lib/dal/.sqlx/query-6069d168d5c4b5131b50500302cdde79388b62926ff83d954b4d93dedfe2503a.json similarity index 85% rename from core/lib/dal/.sqlx/query-746d8b62d576b4b9596458aa865e0294e53eb37c1a2dbcc3044b8311200d549a.json rename to core/lib/dal/.sqlx/query-6069d168d5c4b5131b50500302cdde79388b62926ff83d954b4d93dedfe2503a.json index 306f193861f1..98d228726d48 100644 --- a/core/lib/dal/.sqlx/query-746d8b62d576b4b9596458aa865e0294e53eb37c1a2dbcc3044b8311200d549a.json +++ b/core/lib/dal/.sqlx/query-6069d168d5c4b5131b50500302cdde79388b62926ff83d954b4d93dedfe2503a.json @@ -1,6 +1,6 @@ { "db_name": "PostgreSQL", - "query": "\n UPDATE l1_batches\n SET\n l1_tx_count = $2,\n l2_tx_count = $3,\n l2_to_l1_messages = $4,\n bloom = $5,\n priority_ops_onchain_data = $6,\n predicted_commit_gas_cost = $7,\n predicted_prove_gas_cost = $8,\n predicted_execute_gas_cost = $9,\n initial_bootloader_heap_content = $10,\n used_contract_hashes = $11,\n bootloader_code_hash = $12,\n default_aa_code_hash = $13,\n evm_emulator_code_hash = $14,\n protocol_version = $15,\n system_logs = $16,\n storage_refunds = $17,\n pubdata_costs = $18,\n pubdata_input = $19,\n predicted_circuits_by_type = $20,\n updated_at = NOW(),\n is_sealed = TRUE\n WHERE\n number = $1\n ", + "query": "\n UPDATE l1_batches\n SET\n l1_tx_count = $2,\n l2_tx_count = $3,\n l2_to_l1_messages = $4,\n bloom = $5,\n priority_ops_onchain_data = $6,\n predicted_commit_gas_cost = $7,\n predicted_prove_gas_cost = $8,\n predicted_execute_gas_cost = $9,\n initial_bootloader_heap_content = $10,\n used_contract_hashes = $11,\n bootloader_code_hash = $12,\n default_aa_code_hash = $13,\n evm_emulator_code_hash = $14,\n protocol_version = $15,\n system_logs = $16,\n storage_refunds = $17,\n pubdata_costs = $18,\n pubdata_input = $19,\n predicted_circuits_by_type = $20,\n updated_at = NOW(),\n sealed_at = NOW(),\n is_sealed = TRUE\n WHERE\n number = $1\n ", "describe": { "columns": [], "parameters": { @@ -29,5 +29,5 @@ }, "nullable": [] }, - "hash": "746d8b62d576b4b9596458aa865e0294e53eb37c1a2dbcc3044b8311200d549a" + "hash": "6069d168d5c4b5131b50500302cdde79388b62926ff83d954b4d93dedfe2503a" } diff --git a/core/lib/dal/.sqlx/query-73fb2eb75353d57898ff9810d3f5d8370dd90de4425d699446f3c9b0c77d8de5.json b/core/lib/dal/.sqlx/query-73fb2eb75353d57898ff9810d3f5d8370dd90de4425d699446f3c9b0c77d8de5.json new file mode 100644 index 000000000000..a95f9b04de51 --- /dev/null +++ b/core/lib/dal/.sqlx/query-73fb2eb75353d57898ff9810d3f5d8370dd90de4425d699446f3c9b0c77d8de5.json @@ -0,0 +1,22 @@ +{ + "db_name": "PostgreSQL", + "query": "\n SELECT\n sealed_at\n FROM\n l1_batches\n WHERE\n is_sealed\n AND number = $1\n ", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "sealed_at", + "type_info": "Timestamp" + } + ], + "parameters": { + "Left": [ + "Int8" + ] + }, + "nullable": [ + true + ] + }, + "hash": "73fb2eb75353d57898ff9810d3f5d8370dd90de4425d699446f3c9b0c77d8de5" +} diff --git a/core/lib/dal/migrations/20241108051505_sealed_at.down.sql b/core/lib/dal/migrations/20241108051505_sealed_at.down.sql new file mode 100644 index 000000000000..6e20a782e7a4 --- /dev/null +++ b/core/lib/dal/migrations/20241108051505_sealed_at.down.sql @@ -0,0 +1,2 @@ +-- Add down migration script here +ALTER TABLE l1_batches DROP COLUMN IF EXISTS sealed_at; diff --git a/core/lib/dal/migrations/20241108051505_sealed_at.up.sql b/core/lib/dal/migrations/20241108051505_sealed_at.up.sql new file mode 100644 index 000000000000..e442fd6bf309 --- /dev/null +++ b/core/lib/dal/migrations/20241108051505_sealed_at.up.sql @@ -0,0 +1,2 @@ +-- add a sealed_at column for metrics +ALTER TABLE l1_batches ADD COLUMN sealed_at TIMESTAMP; diff --git a/core/lib/dal/src/blocks_dal.rs b/core/lib/dal/src/blocks_dal.rs index f71dc68ce757..e4b8664169c9 100644 --- a/core/lib/dal/src/blocks_dal.rs +++ b/core/lib/dal/src/blocks_dal.rs @@ -7,6 +7,7 @@ use std::{ use anyhow::Context as _; use bigdecimal::{BigDecimal, FromPrimitive, ToPrimitive}; +use chrono::NaiveDateTime; use zksync_db_connection::{ connection::Connection, error::{DalResult, SqlxContext}, @@ -735,6 +736,7 @@ impl BlocksDal<'_, '_> { pubdata_input = $19, predicted_circuits_by_type = $20, updated_at = NOW(), + sealed_at = NOW(), is_sealed = TRUE WHERE number = $1 @@ -2324,6 +2326,29 @@ impl BlocksDal<'_, '_> { .flatten()) } + pub async fn get_sealed_at( + &mut self, + l1_batch_number: L1BatchNumber, + ) -> DalResult> { + Ok(sqlx::query!( + r#" + SELECT + sealed_at + FROM + l1_batches + WHERE + is_sealed + AND number = $1 + "#, + i64::from(l1_batch_number.0) + ) + .instrument("get_sealed_at") + .with_arg("l1_batch_number", &l1_batch_number) + .fetch_optional(self.storage) + .await? + .and_then(|row| row.sealed_at)) + } + pub async fn set_protocol_version_for_pending_l2_blocks( &mut self, id: ProtocolVersionId, diff --git a/core/node/proof_data_handler/Cargo.toml b/core/node/proof_data_handler/Cargo.toml index e2ddc972a2f5..f6b8bc5c78a7 100644 --- a/core/node/proof_data_handler/Cargo.toml +++ b/core/node/proof_data_handler/Cargo.toml @@ -11,6 +11,7 @@ keywords.workspace = true categories.workspace = true [dependencies] +chrono.workspace = true vise.workspace = true zksync_config.workspace = true zksync_dal.workspace = true diff --git a/core/node/proof_data_handler/src/metrics.rs b/core/node/proof_data_handler/src/metrics.rs index edccda90dc24..233db15aa0dd 100644 --- a/core/node/proof_data_handler/src/metrics.rs +++ b/core/node/proof_data_handler/src/metrics.rs @@ -1,6 +1,9 @@ -use vise::{Histogram, Metrics}; +use std::{fmt, time::Duration}; + +use vise::{EncodeLabelSet, EncodeLabelValue, Family, Histogram, Metrics, Unit}; use zksync_object_store::bincode; use zksync_prover_interface::inputs::WitnessInputData; +use zksync_types::tee_types::TeeType; const BYTES_IN_MEGABYTE: u64 = 1024 * 1024; @@ -14,6 +17,24 @@ pub(super) struct ProofDataHandlerMetrics { pub eip_4844_blob_size_in_mb: Histogram, #[metrics(buckets = vise::Buckets::exponential(1.0..=2_048.0, 2.0))] pub total_blob_size_in_mb: Histogram, + #[metrics(buckets = vise::Buckets::LATENCIES, unit = Unit::Seconds)] + pub tee_proof_roundtrip_time: Family>, +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash, EncodeLabelSet, EncodeLabelValue)] +#[metrics(label = "tee_type")] +pub(crate) struct MetricsTeeType(pub TeeType); + +impl fmt::Display for MetricsTeeType { + fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { + self.0.fmt(formatter) + } +} + +impl From for MetricsTeeType { + fn from(value: TeeType) -> Self { + Self(value) + } } impl ProofDataHandlerMetrics { diff --git a/core/node/proof_data_handler/src/tee_request_processor.rs b/core/node/proof_data_handler/src/tee_request_processor.rs index 2c2a56300097..df323ad593fe 100644 --- a/core/node/proof_data_handler/src/tee_request_processor.rs +++ b/core/node/proof_data_handler/src/tee_request_processor.rs @@ -1,6 +1,7 @@ use std::sync::Arc; use axum::{extract::Path, Json}; +use chrono::Utc; use zksync_config::configs::ProofDataHandlerConfig; use zksync_dal::{ConnectionPool, Core, CoreDal}; use zksync_object_store::{ObjectStore, ObjectStoreError}; @@ -16,7 +17,7 @@ use zksync_prover_interface::{ use zksync_types::{tee_types::TeeType, L1BatchNumber, L2ChainId}; use zksync_vm_executor::storage::L1BatchParamsProvider; -use crate::errors::RequestProcessorError; +use crate::{errors::RequestProcessorError, metrics::METRICS}; #[derive(Clone)] pub(crate) struct TeeRequestProcessor { @@ -193,11 +194,6 @@ impl TeeRequestProcessor { let mut connection = self.pool.connection_tagged("tee_request_processor").await?; let mut dal = connection.tee_proof_generation_dal(); - tracing::info!( - "Received proof {:?} for batch number: {:?}", - proof, - l1_batch_number - ); dal.save_proof_artifacts_metadata( l1_batch_number, proof.0.tee_type, @@ -207,6 +203,28 @@ impl TeeRequestProcessor { ) .await?; + let sealed_at = connection + .blocks_dal() + .get_sealed_at(l1_batch_number) + .await?; + + let duration = + sealed_at.and_then(|sealed_at| (Utc::now() - sealed_at.and_utc()).to_std().ok()); + + let duration_secs_f64 = if let Some(duration) = duration { + METRICS.tee_proof_roundtrip_time[&proof.0.tee_type.into()].observe(duration); + duration.as_secs_f64() + } else { + f64::NAN + }; + + tracing::info!( + l1_batch_number = %l1_batch_number, + sealed_to_proven_in_secs = %duration_secs_f64, + "Received proof {:?}", + proof + ); + Ok(Json(SubmitProofResponse::Success)) } From e87acfec24ace2fc50cc6d3db3a96774607ed8c3 Mon Sep 17 00:00:00 2001 From: Harald Hoyer Date: Mon, 18 Nov 2024 13:01:40 +0100 Subject: [PATCH 2/2] fixup! feat: add more metrics for the tee_prover Signed-off-by: Harald Hoyer --- core/bin/zksync_tee_prover/src/tee_prover.rs | 4 ++-- ...f5ae0d087ed816920e8e75a163b87a19e39db86227.json} | 4 ++-- core/lib/dal/src/blocks_dal.rs | 13 ++++++------- .../proof_data_handler/src/tee_request_processor.rs | 7 +++---- 4 files changed, 13 insertions(+), 15 deletions(-) rename core/lib/dal/.sqlx/{query-73fb2eb75353d57898ff9810d3f5d8370dd90de4425d699446f3c9b0c77d8de5.json => query-6ec93ebdd58bdc0259d98ef5ae0d087ed816920e8e75a163b87a19e39db86227.json} (72%) diff --git a/core/bin/zksync_tee_prover/src/tee_prover.rs b/core/bin/zksync_tee_prover/src/tee_prover.rs index da431ac5b332..58f3d45969ca 100644 --- a/core/bin/zksync_tee_prover/src/tee_prover.rs +++ b/core/bin/zksync_tee_prover/src/tee_prover.rs @@ -82,9 +82,9 @@ impl TeeProver { let signature = self.config.signing_key.sign_ecdsa(msg_to_sign); let duration = observer.observe(); tracing::info!( - proof_generation_time = %duration.as_secs_f64(), + proof_generation_time = duration.as_secs_f64(), l1_batch_number = %batch_number, - l1_root_hash = %verification_result.value_hash, + l1_root_hash = ?verification_result.value_hash, "L1 batch verified", ); Ok((signature, batch_number, verification_result.value_hash)) diff --git a/core/lib/dal/.sqlx/query-73fb2eb75353d57898ff9810d3f5d8370dd90de4425d699446f3c9b0c77d8de5.json b/core/lib/dal/.sqlx/query-6ec93ebdd58bdc0259d98ef5ae0d087ed816920e8e75a163b87a19e39db86227.json similarity index 72% rename from core/lib/dal/.sqlx/query-73fb2eb75353d57898ff9810d3f5d8370dd90de4425d699446f3c9b0c77d8de5.json rename to core/lib/dal/.sqlx/query-6ec93ebdd58bdc0259d98ef5ae0d087ed816920e8e75a163b87a19e39db86227.json index a95f9b04de51..bdc6b007e9df 100644 --- a/core/lib/dal/.sqlx/query-73fb2eb75353d57898ff9810d3f5d8370dd90de4425d699446f3c9b0c77d8de5.json +++ b/core/lib/dal/.sqlx/query-6ec93ebdd58bdc0259d98ef5ae0d087ed816920e8e75a163b87a19e39db86227.json @@ -1,6 +1,6 @@ { "db_name": "PostgreSQL", - "query": "\n SELECT\n sealed_at\n FROM\n l1_batches\n WHERE\n is_sealed\n AND number = $1\n ", + "query": "\n SELECT\n sealed_at\n FROM\n l1_batches\n WHERE\n number = $1\n ", "describe": { "columns": [ { @@ -18,5 +18,5 @@ true ] }, - "hash": "73fb2eb75353d57898ff9810d3f5d8370dd90de4425d699446f3c9b0c77d8de5" + "hash": "6ec93ebdd58bdc0259d98ef5ae0d087ed816920e8e75a163b87a19e39db86227" } diff --git a/core/lib/dal/src/blocks_dal.rs b/core/lib/dal/src/blocks_dal.rs index 307d9717ad5a..0cb607a304e2 100644 --- a/core/lib/dal/src/blocks_dal.rs +++ b/core/lib/dal/src/blocks_dal.rs @@ -7,7 +7,7 @@ use std::{ use anyhow::Context as _; use bigdecimal::{BigDecimal, FromPrimitive, ToPrimitive}; -use chrono::NaiveDateTime; +use sqlx::types::chrono::{DateTime, Utc}; use zksync_db_connection::{ connection::Connection, error::{DalResult, SqlxContext}, @@ -2396,10 +2396,10 @@ impl BlocksDal<'_, '_> { .flatten()) } - pub async fn get_sealed_at( + pub async fn get_batch_sealed_at( &mut self, l1_batch_number: L1BatchNumber, - ) -> DalResult> { + ) -> DalResult>> { Ok(sqlx::query!( r#" SELECT @@ -2407,16 +2407,15 @@ impl BlocksDal<'_, '_> { FROM l1_batches WHERE - is_sealed - AND number = $1 + number = $1 "#, i64::from(l1_batch_number.0) ) - .instrument("get_sealed_at") + .instrument("get_batch_sealed_at") .with_arg("l1_batch_number", &l1_batch_number) .fetch_optional(self.storage) .await? - .and_then(|row| row.sealed_at)) + .and_then(|row| row.sealed_at.map(|d| d.and_utc()))) } pub async fn set_protocol_version_for_pending_l2_blocks( diff --git a/core/node/proof_data_handler/src/tee_request_processor.rs b/core/node/proof_data_handler/src/tee_request_processor.rs index becc6f1052ba..ee5be844b981 100644 --- a/core/node/proof_data_handler/src/tee_request_processor.rs +++ b/core/node/proof_data_handler/src/tee_request_processor.rs @@ -206,11 +206,10 @@ impl TeeRequestProcessor { let sealed_at = connection .blocks_dal() - .get_sealed_at(l1_batch_number) + .get_batch_sealed_at(l1_batch_number) .await?; - let duration = - sealed_at.and_then(|sealed_at| (Utc::now() - sealed_at.and_utc()).to_std().ok()); + let duration = sealed_at.and_then(|sealed_at| (Utc::now() - sealed_at).to_std().ok()); let duration_secs_f64 = if let Some(duration) = duration { METRICS.tee_proof_roundtrip_time[&proof.0.tee_type.into()].observe(duration); @@ -221,7 +220,7 @@ impl TeeRequestProcessor { tracing::info!( l1_batch_number = %l1_batch_number, - sealed_to_proven_in_secs = %duration_secs_f64, + sealed_to_proven_in_secs = duration_secs_f64, "Received proof {:?}", proof );