Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(tee): add Prometheus metrics to the TEE Prover #2386

Merged
merged 10 commits into from
Jul 5, 2024
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions core/bin/zksync_tee_prover/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ thiserror.workspace = true
tokio = { workspace = true, features = ["full"] }
tracing.workspace = true
url.workspace = true
vise.workspace = true
zksync_basic_types.workspace = true
zksync_config.workspace = true
zksync_env_config.workspace = true
Expand Down
6 changes: 5 additions & 1 deletion core/bin/zksync_tee_prover/src/api_client.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
use std::time::Instant;

use reqwest::Client;
use secp256k1::{ecdsa::Signature, PublicKey};
use serde::{de::DeserializeOwned, Serialize};
Expand All @@ -13,7 +15,7 @@ use zksync_prover_interface::{
};
use zksync_types::{tee_types::TeeType, L1BatchNumber};

use crate::error::TeeProverError;
use crate::{error::TeeProverError, metrics::METRICS};

/// Implementation of the API client for the proof data handler, run by
/// [`zksync_proof_data_handler::run_server`].
Expand Down Expand Up @@ -97,11 +99,13 @@ impl TeeApiClient {
proof: root_hash.as_bytes().into(),
tee_type,
}));
let started_at = Instant::now();
self.post::<_, SubmitTeeProofResponse, _>(
format!("/tee/submit_proofs/{batch_number}").as_str(),
request,
)
.await?;
METRICS.proof_submitting_time.observe(started_at.elapsed());
pbeza marked this conversation as resolved.
Show resolved Hide resolved
tracing::info!(
"Proof submitted successfully for batch number {}",
batch_number
Expand Down
16 changes: 14 additions & 2 deletions core/bin/zksync_tee_prover/src/main.rs
Original file line number Diff line number Diff line change
@@ -1,15 +1,20 @@
use anyhow::Context as _;
use config::TeeProverConfig;
use tee_prover::TeeProverLayer;
use zksync_config::configs::ObservabilityConfig;
use zksync_config::configs::{ObservabilityConfig, PrometheusConfig};
use zksync_env_config::FromEnv;
use zksync_node_framework::{
implementations::layers::sigint::SigintHandlerLayer, service::ZkStackServiceBuilder,
implementations::layers::{
prometheus_exporter::PrometheusExporterLayer, sigint::SigintHandlerLayer,
},
service::ZkStackServiceBuilder,
};
use zksync_vlog::prometheus::PrometheusExporterConfig;

mod api_client;
mod config;
mod error;
mod metrics;
mod tee_prover;

/// This application serves as a TEE verifier, a.k.a. a TEE prover.
Expand Down Expand Up @@ -41,8 +46,15 @@ fn main() -> anyhow::Result<()> {
let tee_prover_config = TeeProverConfig::from_env()?;
let attestation_quote_bytes = std::fs::read(tee_prover_config.attestation_quote_file_path)?;

let prometheus_config = PrometheusConfig::from_env()?;
let exporter_config = PrometheusExporterConfig::push(
prometheus_config.gateway_endpoint(),
prometheus_config.push_interval(),
);

ZkStackServiceBuilder::new()
.add_layer(SigintHandlerLayer)
.add_layer(PrometheusExporterLayer(exporter_config))
.add_layer(TeeProverLayer::new(
tee_prover_config.api_url,
tee_prover_config.signing_key,
Expand Down
21 changes: 21 additions & 0 deletions core/bin/zksync_tee_prover/src/metrics.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
//! Metrics for the TEE Prover.

use std::time::Duration;

use vise::{Buckets, Gauge, Histogram, Metrics, Unit};

#[derive(Debug, Metrics)]
#[metrics(prefix = "tee_prover")]
pub(crate) struct TeeProverMetrics {
#[metrics(buckets = Buckets::LATENCIES, unit = Unit::Seconds)]
pub job_waiting_time: Histogram<Duration>,
#[metrics(buckets = Buckets::LATENCIES, unit = Unit::Seconds)]
pub proof_generation_time: Histogram<Duration>,
#[metrics(buckets = Buckets::LATENCIES, unit = Unit::Seconds)]
pub proof_submitting_time: Histogram<Duration>,
pub network_errors_counter: Gauge<u64>,
pub last_batch_number_processed: Gauge<u64>,
}

#[vise::register]
pub(super) static METRICS: vise::Global<TeeProverMetrics> = vise::Global::new();
28 changes: 22 additions & 6 deletions core/bin/zksync_tee_prover/src/tee_prover.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use std::time::Duration;
use std::time::{Duration, Instant};

use secp256k1::{ecdsa::Signature, Message, PublicKey, Secp256k1, SecretKey};
use url::Url;
Expand All @@ -12,7 +12,7 @@ use zksync_prover_interface::inputs::TeeVerifierInput;
use zksync_tee_verifier::Verify;
use zksync_types::{tee_types::TeeType, L1BatchNumber};

use crate::{api_client::TeeApiClient, error::TeeProverError};
use crate::{api_client::TeeApiClient, error::TeeProverError, metrics::METRICS};

/// Wiring layer for `TeeProver`
///
Expand Down Expand Up @@ -83,12 +83,14 @@ impl TeeProver {
) -> Result<(Signature, L1BatchNumber, H256), TeeProverError> {
match tvi {
TeeVerifierInput::V1(tvi) => {
let started_at = Instant::now();
let verification_result = tvi.verify().map_err(TeeProverError::Verification)?;
let root_hash_bytes = verification_result.value_hash.as_bytes();
let batch_number = verification_result.batch_number;
let msg_to_sign = Message::from_slice(root_hash_bytes)
.map_err(|e| TeeProverError::Verification(e.into()))?;
let signature = self.signing_key.sign_ecdsa(msg_to_sign);
METRICS.proof_generation_time.observe(started_at.elapsed());
Ok((signature, batch_number, verification_result.value_hash))
}
_ => Err(TeeProverError::Verification(anyhow::anyhow!(
Expand All @@ -97,7 +99,7 @@ impl TeeProver {
}
}

async fn step(&self) -> Result<(), TeeProverError> {
async fn step(&self) -> Result<Option<L1BatchNumber>, TeeProverError> {
match self.api_client.get_job().await? {
Some(job) => {
let (signature, batch_number, root_hash) = self.verify(*job)?;
Expand All @@ -110,10 +112,13 @@ impl TeeProver {
self.tee_type,
)
.await?;
Ok(Some(batch_number))
}
None => {
tracing::trace!("There are currently no pending batches to be proven");
Ok(None)
}
None => tracing::trace!("There are currently no pending batches to be proven"),
}
Ok(())
}
}

Expand Down Expand Up @@ -156,6 +161,7 @@ impl Task for TeeProver {

let mut retries = 1;
let mut backoff = self.config.initial_retry_backoff;
let mut job_wait_started_at = Instant::now();

loop {
if *stop_receiver.0.borrow() {
Expand All @@ -164,11 +170,21 @@ impl Task for TeeProver {
}
let result = self.step().await;
match result {
Ok(()) => {
Ok(batch_number) => {
retries = 1;
backoff = self.config.initial_retry_backoff;
if let Some(batch_number) = batch_number {
METRICS
.job_waiting_time
.observe(job_wait_started_at.elapsed());
METRICS
.last_batch_number_processed
.set(batch_number.0 as u64);
job_wait_started_at = Instant::now();
}
}
Err(err) => {
METRICS.network_errors_counter.inc_by(1);
if !err.is_transient() || retries > self.config.max_retries {
return Err(err.into());
}
Expand Down
2 changes: 1 addition & 1 deletion core/node/tee_verifier_input_producer/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ impl JobProcessor for TeeVerifierInputProducer {
.commit()
.await
.context("failed to commit DB transaction for TeeVerifierInputProducer")?;
METRICS.block_number_processed.set(job_id.0 as i64);
METRICS.block_number_processed.set(job_id.0 as u64);
Ok(())
}

Expand Down
2 changes: 1 addition & 1 deletion core/node/tee_verifier_input_producer/src/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ pub(crate) struct TeeVerifierInputProducerMetrics {
pub process_batch_time: Histogram<Duration>,
#[metrics(buckets = Buckets::LATENCIES, unit = Unit::Seconds)]
pub upload_input_time: Histogram<Duration>,
pub block_number_processed: Gauge,
pub block_number_processed: Gauge<u64>,
}

#[vise::register]
Expand Down
Loading