Skip to content

Commit

Permalink
Remove expensive counters (#10188)
Browse files Browse the repository at this point in the history
  • Loading branch information
sitalkedia authored Oct 3, 2023
1 parent 593ad78 commit 059e4f8
Show file tree
Hide file tree
Showing 7 changed files with 41 additions and 49 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 0 additions & 11 deletions aptos-move/block-executor/src/counters.rs
Original file line number Diff line number Diff line change
Expand Up @@ -148,17 +148,6 @@ pub static TASK_EXECUTE_SECONDS: Lazy<Histogram> = Lazy::new(|| {
.unwrap()
});

pub static GET_NEXT_TASK_SECONDS: Lazy<Histogram> = Lazy::new(|| {
register_histogram!(
// metric name
"aptos_execution_get_next_task_seconds",
// metric description
"The time spent in seconds for getting next task from the scheduler",
exponential_buckets(/*start=*/ 1e-6, /*factor=*/ 2.0, /*count=*/ 30).unwrap(),
)
.unwrap()
});

pub static DEPENDENCY_WAIT_SECONDS: Lazy<Histogram> = Lazy::new(|| {
register_histogram!(
"aptos_execution_dependency_wait",
Expand Down
2 changes: 0 additions & 2 deletions aptos-move/block-executor/src/scheduler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
// Parts of the project are originally copyright © Meta Platforms, Inc.
// SPDX-License-Identifier: Apache-2.0

use crate::counters::GET_NEXT_TASK_SECONDS;
use aptos_infallible::Mutex;
use aptos_mvhashmap::types::{Incarnation, TxnIndex};
use crossbeam::utils::CachePadded;
Expand Down Expand Up @@ -343,7 +342,6 @@ impl Scheduler {

/// Return the next task for the thread.
pub fn next_task(&self, committing: bool) -> SchedulerTask {
let _timer = GET_NEXT_TASK_SECONDS.start_timer();
loop {
if self.done() {
// No more tasks.
Expand Down
1 change: 1 addition & 0 deletions storage/schemadb/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ aptos-logger = { workspace = true }
aptos-metrics-core = { workspace = true }
once_cell = { workspace = true }
proptest = { workspace = true, optional = true }
rand = { workspace = true }
rocksdb = { workspace = true }

[dev-dependencies]
Expand Down
46 changes: 30 additions & 16 deletions storage/schemadb/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@ pub mod iterator;
use crate::{
metrics::{
APTOS_SCHEMADB_BATCH_COMMIT_BYTES, APTOS_SCHEMADB_BATCH_COMMIT_LATENCY_SECONDS,
APTOS_SCHEMADB_BATCH_PUT_LATENCY_SECONDS, APTOS_SCHEMADB_DELETES, APTOS_SCHEMADB_GET_BYTES,
APTOS_SCHEMADB_DELETES_SAMPLED, APTOS_SCHEMADB_GET_BYTES,
APTOS_SCHEMADB_GET_LATENCY_SECONDS, APTOS_SCHEMADB_ITER_BYTES,
APTOS_SCHEMADB_ITER_LATENCY_SECONDS, APTOS_SCHEMADB_PUT_BYTES,
APTOS_SCHEMADB_ITER_LATENCY_SECONDS, APTOS_SCHEMADB_PUT_BYTES_SAMPLED,
APTOS_SCHEMADB_SEEK_LATENCY_SECONDS,
},
schema::{KeyCodec, Schema, SeekKeyCodec, ValueCodec},
Expand All @@ -33,6 +33,7 @@ use anyhow::{format_err, Result};
use aptos_infallible::Mutex;
use aptos_logger::prelude::*;
use iterator::{ScanDirection, SchemaIterator};
use rand::Rng;
/// Type alias to `rocksdb::ReadOptions`. See [`rocksdb doc`](https://github.com/pingcap/rust-rocksdb/blob/master/src/rocksdb_options.rs)
pub use rocksdb::{
BlockBasedOptions, Cache, ColumnFamilyDescriptor, DBCompressionType, Options, ReadOptions,
Expand Down Expand Up @@ -71,9 +72,6 @@ impl SchemaBatch {

/// Adds an insert/update operation to the batch.
pub fn put<S: Schema>(&self, key: &S::Key, value: &S::Value) -> Result<()> {
let _timer = APTOS_SCHEMADB_BATCH_PUT_LATENCY_SECONDS
.with_label_values(&["unknown"])
.start_timer();
let key = <S::Key as KeyCodec<S>>::encode_key(key)?;
let value = <S::Value as ValueCodec<S>>::encode_value(value)?;
self.rows
Expand Down Expand Up @@ -224,10 +222,21 @@ impl DB {

/// Writes a group of records wrapped in a [`SchemaBatch`].
pub fn write_schemas(&self, batch: SchemaBatch) -> Result<()> {
// Function to determine if the counter should be sampled based on a sampling percentage
fn should_sample(sampling_percentage: usize) -> bool {
// Generate a random number between 0 and 100
let random_value = rand::thread_rng().gen_range(0, 100);

// Sample the counter if the random value is less than the sampling percentage
random_value <= sampling_percentage
}

let _timer = APTOS_SCHEMADB_BATCH_COMMIT_LATENCY_SECONDS
.with_label_values(&[&self.name])
.start_timer();
let rows_locked = batch.rows.lock();
let sampling_rate_pct = 1;
let sampled_kv_bytes = should_sample(sampling_rate_pct);

let mut db_batch = rocksdb::WriteBatch::default();
for (cf_name, rows) in rows_locked.iter() {
Expand All @@ -244,20 +253,25 @@ impl DB {
self.inner.write_opt(db_batch, &default_write_options())?;

// Bump counters only after DB write succeeds.
for (cf_name, rows) in rows_locked.iter() {
for write_op in rows {
match write_op {
WriteOp::Value { key, value } => {
APTOS_SCHEMADB_PUT_BYTES
.with_label_values(&[cf_name])
.observe((key.len() + value.len()) as f64);
},
WriteOp::Deletion { key: _ } => {
APTOS_SCHEMADB_DELETES.with_label_values(&[cf_name]).inc();
},
if sampled_kv_bytes {
for (cf_name, rows) in rows_locked.iter() {
for write_op in rows {
match write_op {
WriteOp::Value { key, value } => {
APTOS_SCHEMADB_PUT_BYTES_SAMPLED
.with_label_values(&[cf_name])
.observe((key.len() + value.len()) as f64);
},
WriteOp::Deletion { key: _ } => {
APTOS_SCHEMADB_DELETES_SAMPLED
.with_label_values(&[cf_name])
.inc();
},
}
}
}
}

APTOS_SCHEMADB_BATCH_COMMIT_BYTES
.with_label_values(&[&self.name])
.observe(serialized_size as f64);
Expand Down
27 changes: 8 additions & 19 deletions storage/schemadb/src/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -96,34 +96,23 @@ pub static APTOS_SCHEMADB_BATCH_COMMIT_BYTES: Lazy<HistogramVec> = Lazy::new(||
.unwrap()
});

pub static APTOS_SCHEMADB_PUT_BYTES: Lazy<HistogramVec> = Lazy::new(|| {
pub static APTOS_SCHEMADB_PUT_BYTES_SAMPLED: Lazy<HistogramVec> = Lazy::new(|| {
register_histogram_vec!(
// metric name
"aptos_schemadb_put_bytes",
"aptos_schemadb_put_bytes_sampled",
// metric description
"Aptos schemadb put call puts data size in bytes",
"Aptos schemadb put call puts data size in bytes (sampled)",
// metric labels (dimensions)
&["cf_name"]
)
.unwrap()
});

pub static APTOS_SCHEMADB_DELETES: Lazy<IntCounterVec> = Lazy::new(|| {
register_int_counter_vec!("aptos_storage_deletes", "Aptos storage delete calls", &[
"cf_name"
])
.unwrap()
});

pub static APTOS_SCHEMADB_BATCH_PUT_LATENCY_SECONDS: Lazy<HistogramVec> = Lazy::new(|| {
register_histogram_vec!(
// metric name
"aptos_schemadb_batch_put_latency_seconds",
// metric description
"Aptos schemadb schema batch put latency in seconds",
// metric labels (dimensions)
&["db_name"],
exponential_buckets(/*start=*/ 1e-3, /*factor=*/ 2.0, /*count=*/ 20).unwrap(),
pub static APTOS_SCHEMADB_DELETES_SAMPLED: Lazy<IntCounterVec> = Lazy::new(|| {
register_int_counter_vec!(
"aptos_storage_deletes_sampled",
"Aptos storage delete calls (sampled)",
&["cf_name"]
)
.unwrap()
});
2 changes: 1 addition & 1 deletion testsuite/single_node_performance.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ class RunGroupConfig:
RunGroupConfig(expected_tps=22700, key=RunGroupKey("no-op"), included_in=Flow.LAND_BLOCKING),
RunGroupConfig(expected_tps=3200, key=RunGroupKey("no-op", module_working_set_size=1000), included_in=Flow.LAND_BLOCKING),
RunGroupConfig(expected_tps=15000, key=RunGroupKey("coin-transfer"), included_in=Flow.LAND_BLOCKING | Flow.REPRESENTATIVE),
RunGroupConfig(expected_tps=26300, key=RunGroupKey("coin-transfer", executor_type="native"), included_in=Flow.LAND_BLOCKING),
RunGroupConfig(expected_tps=29000, key=RunGroupKey("coin-transfer", executor_type="native"), included_in=Flow.LAND_BLOCKING),
RunGroupConfig(expected_tps=12700, key=RunGroupKey("account-generation"), included_in=Flow.LAND_BLOCKING | Flow.REPRESENTATIVE),
RunGroupConfig(expected_tps=26500, key=RunGroupKey("account-generation", executor_type="native"), included_in=Flow.CONTINUOUS),
RunGroupConfig(expected_tps=20000, key=RunGroupKey("account-resource32-b"), included_in=Flow.LAND_BLOCKING),
Expand Down

0 comments on commit 059e4f8

Please sign in to comment.