Skip to content

Commit

Permalink
use prom server registry for load generator & adjust buckets (#4581)
Browse files Browse the repository at this point in the history
* update/add buckets

* remove 40 and 50
  • Loading branch information
longbowlu authored Sep 13, 2022
1 parent 66113ea commit 09e052a
Show file tree
Hide file tree
Showing 10 changed files with 69 additions and 53 deletions.
4 changes: 3 additions & 1 deletion crates/sui-benchmark/src/drivers/bench_driver.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,9 @@ pub struct BenchMetrics {
pub latency_s: HistogramVec,
}

const LATENCY_SEC_BUCKETS: &[f64] = &[0.01, 0.1, 1., 2., 3., 5., 10., 20., 30., 60., 180.];
const LATENCY_SEC_BUCKETS: &[f64] = &[
0.01, 0.05, 0.1, 0.25, 0.5, 1., 2.5, 5., 10., 20., 30., 60., 90.,
];

impl BenchMetrics {
fn new(registry: &Registry) -> Self {
Expand Down
4 changes: 3 additions & 1 deletion crates/sui-core/src/authority.rs
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,9 @@ const POSITIVE_INT_BUCKETS: &[f64] = &[
1., 2., 5., 10., 20., 50., 100., 200., 500., 1000., 2000., 5000., 10000., 20000., 50000.,
];

const LATENCY_SEC_BUCKETS: &[f64] = &[0.001, 0.01, 0.1, 1., 2., 3., 5., 10., 20., 30., 60., 180.];
const LATENCY_SEC_BUCKETS: &[f64] = &[
0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1., 2.5, 5., 10., 20., 30., 60., 90.,
];

impl AuthorityMetrics {
pub fn new(registry: &prometheus::Registry) -> AuthorityMetrics {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ use std::{

use parking_lot::Mutex;
use prometheus::{
register_histogram_with_registry, register_int_counter_with_registry,
linear_buckets, register_histogram_with_registry, register_int_counter_with_registry,
register_int_gauge_with_registry, Histogram, IntCounter, IntGauge, Registry,
};
use sui_types::{
Expand Down Expand Up @@ -122,6 +122,9 @@ impl CheckpointMetrics {
checkpoint_frequency: register_histogram_with_registry!(
"checkpoint_frequency",
"Number of seconds elapsed between two consecutive checkpoint certificates",
// start from 1 min, increase by 3 min, so [1, 4, ... 58]
// safe to unwrap because params are good
linear_buckets(60., 180., 20).unwrap(),
registry,
)
.unwrap(),
Expand Down
9 changes: 9 additions & 0 deletions crates/sui-core/src/authority_active/gossip/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,13 @@ pub struct GossipMetrics {
pub follower_stream_duration: Histogram,
}

const WAIT_FOR_FINALITY_LATENCY_SEC_BUCKETS: &[f64] = &[
0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1., 2.5, 5., 10., 20., 30., 60., 90.,
];
const FOLLOWER_STREAM_DURATION_SEC_BUCKETS: &[f64] = &[
0.1, 1., 5., 10., 20., 30., 40., 50., 60., 90., 120., 180., 240., 300.,
];

impl GossipMetrics {
pub fn new(registry: &Registry) -> Self {
Self {
Expand Down Expand Up @@ -87,6 +94,7 @@ impl GossipMetrics {
wait_for_finality_latency_sec: register_histogram_with_registry!(
"gossip_wait_for_finality_latency_sec",
"Latency histogram for gossip/node sync process to wait for txs to become final, in seconds",
WAIT_FOR_FINALITY_LATENCY_SEC_BUCKETS.to_vec(),
registry,
)
.unwrap(),
Expand All @@ -105,6 +113,7 @@ impl GossipMetrics {
follower_stream_duration: register_histogram_with_registry!(
"follower_stream_duration",
"Latency histogram of the duration of the follower streams to peers, in seconds",
FOLLOWER_STREAM_DURATION_SEC_BUCKETS.to_vec(),
registry,
)
.unwrap(),
Expand Down
76 changes: 28 additions & 48 deletions crates/sui-core/src/authority_client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -134,107 +134,82 @@ impl AuthorityAPI for NetworkAuthorityClient {
&self,
transaction: Transaction,
) -> Result<TransactionInfoResponse, SuiError> {
let timer = self
let _timer = self
.metrics
.handle_transaction_request_latency
.start_timer();

let response = self
.client()
self.client()
.transaction(transaction)
.await
.map(tonic::Response::into_inner)
.map_err(Into::into);

timer.stop_and_record();

response
.map_err(Into::into)
}

/// Execute a certificate.
async fn handle_certificate(
&self,
certificate: CertifiedTransaction,
) -> Result<TransactionInfoResponse, SuiError> {
let timer = self
let _timer = self
.metrics
.handle_certificate_request_latency
.start_timer();

let response = self
.client()
self.client()
.handle_certificate(certificate)
.await
.map(tonic::Response::into_inner)
.map_err(Into::into);

timer.stop_and_record();

response
.map_err(Into::into)
}

async fn handle_account_info_request(
&self,
request: AccountInfoRequest,
) -> Result<AccountInfoResponse, SuiError> {
let timer = self
let _timer = self
.metrics
.handle_account_info_request_latency
.start_timer();

let response = self
.client()
self.client()
.account_info(request)
.await
.map(tonic::Response::into_inner)
.map_err(Into::into);

timer.stop_and_record();

response
.map_err(Into::into)
}

async fn handle_object_info_request(
&self,
request: ObjectInfoRequest,
) -> Result<ObjectInfoResponse, SuiError> {
let timer = self
let _timer = self
.metrics
.handle_object_info_request_latency
.start_timer();

let response = self
.client()
self.client()
.object_info(request)
.await
.map(tonic::Response::into_inner)
.map_err(Into::into);

timer.stop_and_record();

response
.map_err(Into::into)
}

/// Handle Object information requests for this account.
async fn handle_transaction_info_request(
&self,
request: TransactionInfoRequest,
) -> Result<TransactionInfoResponse, SuiError> {
let timer = self
let _timer = self
.metrics
.handle_transaction_info_request_latency
.start_timer();

let response = self
.client()
self.client()
.transaction_info(request)
.await
.map(tonic::Response::into_inner)
.map_err(Into::into);

timer.stop_and_record();

response
.map_err(Into::into)
}

/// Handle Batch information requests for this authority.
Expand All @@ -257,18 +232,13 @@ impl AuthorityAPI for NetworkAuthorityClient {
&self,
request: CheckpointRequest,
) -> Result<CheckpointResponse, SuiError> {
let timer = self.metrics.handle_checkpoint_request_latency.start_timer();
let _timer = self.metrics.handle_checkpoint_request_latency.start_timer();

let response = self
.client()
self.client()
.checkpoint(request)
.await
.map(tonic::Response::into_inner)
.map_err(Into::into);

timer.stop_and_record();

response
.map_err(Into::into)
}

async fn handle_epoch(&self, request: EpochRequest) -> Result<EpochResponse, SuiError> {
Expand Down Expand Up @@ -488,42 +458,52 @@ pub struct NetworkAuthorityClientMetrics {
pub handle_checkpoint_request_latency: Histogram,
}

const LATENCY_SEC_BUCKETS: &[f64] = &[
0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1., 2.5, 5., 10., 20., 30., 60., 90.,
];

impl NetworkAuthorityClientMetrics {
pub fn new(registry: &prometheus::Registry) -> Self {
Self {
handle_transaction_request_latency: register_histogram_with_registry!(
"handle_transaction_request_latency",
"Latency of handle transaction request",
LATENCY_SEC_BUCKETS.to_vec(),
registry
)
.unwrap(),
handle_certificate_request_latency: register_histogram_with_registry!(
"handle_certificate_request_latency",
"Latency of handle certificate request",
LATENCY_SEC_BUCKETS.to_vec(),
registry
)
.unwrap(),
handle_account_info_request_latency: register_histogram_with_registry!(
"handle_account_info_request_latency",
"Latency of handle account info request",
LATENCY_SEC_BUCKETS.to_vec(),
registry
)
.unwrap(),
handle_object_info_request_latency: register_histogram_with_registry!(
"handle_object_info_request_latency",
"Latency of handle object info request",
LATENCY_SEC_BUCKETS.to_vec(),
registry
)
.unwrap(),
handle_transaction_info_request_latency: register_histogram_with_registry!(
"handle_transaction_info_request_latency",
"Latency of handle transaction info request",
LATENCY_SEC_BUCKETS.to_vec(),
registry
)
.unwrap(),
handle_checkpoint_request_latency: register_histogram_with_registry!(
"handle_checkpoint_request_latency",
"Latency of handle checkpoint request",
LATENCY_SEC_BUCKETS.to_vec(),
registry
)
.unwrap(),
Expand Down
4 changes: 3 additions & 1 deletion crates/sui-core/src/authority_server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,9 @@ pub struct ValidatorServiceMetrics {
pub handle_certificate_non_consensus_latency: Histogram,
}

const LATENCY_SEC_BUCKETS: &[f64] = &[0.001, 0.01, 0.1, 1., 2., 3., 5., 10., 20., 30., 60., 180.];
const LATENCY_SEC_BUCKETS: &[f64] = &[
0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1., 2.5, 5., 10., 20., 30., 60., 90.,
];

impl ValidatorServiceMetrics {
pub fn new(registry: &Registry) -> Self {
Expand Down
4 changes: 3 additions & 1 deletion crates/sui-core/src/safe_client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,9 @@ pub struct SafeClientMetrics {
latency: HistogramVec,
}

const LATENCY_SEC_BUCKETS: &[f64] = &[0.001, 0.01, 0.1, 1., 2., 3., 5., 10., 20., 30., 60., 180.];
const LATENCY_SEC_BUCKETS: &[f64] = &[
0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1., 2.5, 5., 10., 20., 30., 60., 90.,
];

impl SafeClientMetrics {
pub fn new(registry: &prometheus::Registry) -> Self {
Expand Down
4 changes: 4 additions & 0 deletions crates/sui-faucet/src/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@ pub struct FaucetMetrics {
pub(crate) current_requests_in_flight: IntGauge,
pub(crate) process_latency: Histogram,
}
const LATENCY_SEC_BUCKETS: &[f64] = &[
0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1., 2.5, 5., 10., 20., 30., 60., 90.,
];

impl FaucetMetrics {
pub fn new(registry: &Registry) -> Self {
Expand All @@ -40,6 +43,7 @@ impl FaucetMetrics {
process_latency: register_histogram_with_registry!(
"process_latency",
"Latency of processing a Faucet request",
LATENCY_SEC_BUCKETS.to_vec(),
registry,
)
.unwrap(),
Expand Down
5 changes: 5 additions & 0 deletions crates/sui-json-rpc/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,10 @@ pub struct JsonRpcMetrics {
errors_by_route: IntCounterVec,
}

const LATENCY_SEC_BUCKETS: &[f64] = &[
0.001, 0.005, 0.01, 0.05, 0.1, 0.25, 0.5, 1., 2.5, 5., 10., 20., 30., 60., 90.,
];

impl JsonRpcMetrics {
pub fn new(registry: &prometheus::Registry) -> Self {
Self {
Expand All @@ -211,6 +215,7 @@ impl JsonRpcMetrics {
"req_latency_by_route",
"Latency of a request by route",
&["route"],
LATENCY_SEC_BUCKETS.to_vec(),
registry,
)
.unwrap(),
Expand Down
7 changes: 7 additions & 0 deletions crates/sui-quorum-driver/src/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@ pub struct QuorumDriverMetrics {
pub(crate) current_requests_in_flight: IntGauge,
}

const LATENCY_SEC_BUCKETS: &[f64] = &[
0.01, 0.05, 0.1, 0.25, 0.5, 1., 2., 4., 6., 8., 10., 20., 30., 60., 90.,
];

impl QuorumDriverMetrics {
pub fn new(registry: &Registry) -> Self {
Self {
Expand Down Expand Up @@ -65,18 +69,21 @@ impl QuorumDriverMetrics {
latency_sec_immediate_return: register_histogram_with_registry!(
"quorum_driver_latency_sec_immediate_return",
"Latency of processing an immdediate_return execution request, in sec",
LATENCY_SEC_BUCKETS.to_vec(),
registry,
)
.unwrap(),
latency_sec_wait_for_tx_cert: register_histogram_with_registry!(
"quorum_driver_latency_sec_wait_for_tx_cert",
"Latency of processing an wait_for_tx_cert execution request, in sec",
LATENCY_SEC_BUCKETS.to_vec(),
registry,
)
.unwrap(),
latency_sec_wait_for_effects_cert: register_histogram_with_registry!(
"quorum_driver_latency_sec_wait_for_effects_cert",
"Latency of processing an wait_for_effects_cert execution request, in sec",
LATENCY_SEC_BUCKETS.to_vec(),
registry,
)
.unwrap(),
Expand Down

0 comments on commit 09e052a

Please sign in to comment.