Skip to content

Commit

Permalink
fixing construct_query_with_extra_labels, and updating land_blocking …
Browse files Browse the repository at this point in the history
…checks
  • Loading branch information
igor-aptos committed Aug 2, 2023
1 parent 61d6c7a commit 00f6842
Show file tree
Hide file tree
Showing 4 changed files with 66 additions and 70 deletions.
18 changes: 11 additions & 7 deletions testsuite/forge-cli/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ use anyhow::{format_err, Context, Result};
use aptos_config::config::{ChainHealthBackoffValues, ConsensusConfig, PipelineBackpressureValues};
use aptos_forge::{
args::TransactionTypeArg,
prometheus_metrics::LatencyBreakdownSlice,
success_criteria::{
LatencyBreakdownThreshold, LatencyType, MetricsThreshold, StateProgressThreshold,
SuccessCriteria, SystemMetricsThreshold,
Expand Down Expand Up @@ -231,7 +232,7 @@ static SYSTEM_12_CORES_10GB_THRESHOLD: Lazy<SystemMetricsThreshold> = Lazy::new(
SystemMetricsThreshold::new(
// Check that we don't use more than 12 CPU cores for 30% of the time.
MetricsThreshold::new(12.0, 30),
// Check that we don't use more than 5 GB of memory for 30% of the time.
// Check that we don't use more than 10 GB of memory for 30% of the time.
MetricsThreshold::new_gb(10.0, 30),
)
});
Expand Down Expand Up @@ -1523,16 +1524,19 @@ fn realistic_env_max_load_test(
(duration.as_secs() / 10).max(60),
)
.add_system_metrics_threshold(SystemMetricsThreshold::new(
// Check that we don't use more than 12 CPU cores for 30% of the time.
// Check that we don't use more than 14 CPU cores for 30% of the time.
MetricsThreshold::new(14.0, max_cpu_threshold),
// Check that we don't use more than 10 GB of memory for 30% of the time.
MetricsThreshold::new_gb(10.0, 30),
))
.add_latency_threshold(3.0, LatencyType::P50)
.add_latency_threshold(5.0, LatencyType::P90)
.add_latency_breakdown_threshold(LatencyBreakdownThreshold::new_strict(
0.3, 0.25, 0.8, 0.6,
))
.add_latency_threshold(3.4, LatencyType::P50)
.add_latency_threshold(4.5, LatencyType::P90)
.add_latency_breakdown_threshold(LatencyBreakdownThreshold::new_strict(vec![
(LatencyBreakdownSlice::QsBatchToPos, 0.3),
(LatencyBreakdownSlice::QsPosToProposal, 0.25),
(LatencyBreakdownSlice::ConsensusProposalToOrdered, 0.8),
(LatencyBreakdownSlice::ConsensusOrderedToCommit, 0.6),
]))
.add_chain_progress(StateProgressThreshold {
max_no_progress_secs: 10.0,
max_round_gap: 4,
Expand Down
85 changes: 47 additions & 38 deletions testsuite/forge/src/backend/k8s/prometheus.rs
Original file line number Diff line number Diff line change
Expand Up @@ -105,28 +105,36 @@ pub fn construct_query_with_extra_labels(
labels_map: &BTreeMap<String, String>,
) -> String {
// edit the query string to insert swarm metadata
let mut new_query = query.to_string();
let mut label_start_idx = query.find('{').unwrap_or(query.len());
if label_start_idx == query.len() {
// add a new curly and insert after it
new_query.insert_str(query.len(), "{}");
label_start_idx += 1;
} else {
// add a comma prefix to the existing labels and insert before it
label_start_idx += 1;
new_query.insert(label_start_idx, ',');
}
let mut new_query = "".to_string();

let mut labels_strs = vec![];
for (k, v) in labels_map {
labels_strs.push(format!(r#"{}="{}""#, k, v));
}

let labels = labels_strs.join(",");

// assume no collisions in Forge namespace
new_query.insert_str(label_start_idx, &labels);
new_query
let parts: Vec<&str> = query.split_inclusive('{').collect();
if parts.len() == 1 {
// no labels in query
format!("{}{{{}}}", query, labels)
} else {
let mut parts_iter = parts.into_iter();
let prev = parts_iter.next();
new_query.push_str(prev.unwrap());

for part in parts_iter {
if part.starts_with('}') {
// assume no collisions in Forge namespace
new_query.push_str(&labels);
} else {
// assume no collisions in Forge namespace
new_query.push_str(&labels);
new_query.push(',');
}
new_query.push_str(part);
}
new_query
}
}

pub async fn query_with_metadata(
Expand Down Expand Up @@ -169,16 +177,14 @@ pub async fn query_range_with_metadata(
new_query
)
})?;
let range = r.as_range()
.ok_or_else(|| {
anyhow!(
"Failed to get range from prometheus response. start={}, end={}, query={}",
start_time,
end_time,
new_query
)
})?;
info!("For Query {} got range {:?}", new_query, range);
let range = r.as_range().ok_or_else(|| {
anyhow!(
"Failed to get range from prometheus response. start={}, end={}, query={}",
start_time,
end_time,
new_query
)
})?;
if range.len() != 1 {
bail!(
"Expected only one range vector from prometheus, recieved {} ({:?}). start={}, end={}, query={}",
Expand All @@ -191,14 +197,7 @@ pub async fn query_range_with_metadata(
}
Ok(range
.first()
.ok_or_else(|| {
anyhow!(
"Empty range vector returned from prometheus. start={}, end={}, query={}",
start_time,
end_time,
new_query
)
})?
.unwrap() // safe because we checked length above
.samples()
.to_vec())
}
Expand Down Expand Up @@ -324,22 +323,32 @@ mod tests {

#[test]
fn test_create_query() {
// test when no existing labels
let original_query = "aptos_connections";
let mut labels_map = BTreeMap::new();
labels_map.insert("a".to_string(), "a".to_string());
labels_map.insert("some_label".to_string(), "blabla".to_string());

// test when no existing labels
let original_query = "aptos_connections";
let expected_query = r#"aptos_connections{a="a",some_label="blabla"}"#;
let new_query = construct_query_with_extra_labels(original_query, &labels_map);
assert_eq!(expected_query, new_query);

// test when empty labels
let original_query = "aptos_connections{}";
let expected_query = r#"aptos_connections{a="a",some_label="blabla"}"#;
let new_query = construct_query_with_extra_labels(original_query, &labels_map);
assert_eq!(expected_query, new_query);

// test when existing labels
let original_query = r#"aptos_connections{abc="123",def="456"}"#;
let mut labels_map = BTreeMap::new();
labels_map.insert("a".to_string(), "a".to_string());
labels_map.insert("some_label".to_string(), "blabla".to_string());
let expected_query = r#"aptos_connections{a="a",some_label="blabla",abc="123",def="456"}"#;
let new_query = construct_query_with_extra_labels(original_query, &labels_map);
assert_eq!(expected_query, new_query);

// test when multiple queries
let original_query = r#"aptos_connections{abc="123",def="456"} - aptos_disconnects{abc="123"} / aptos_count{}"#;
let expected_query = r#"aptos_connections{a="a",some_label="blabla",abc="123",def="456"} - aptos_disconnects{a="a",some_label="blabla",abc="123"} / aptos_count{a="a",some_label="blabla"}"#;
let new_query = construct_query_with_extra_labels(original_query, &labels_map);
assert_eq!(expected_query, new_query);
}
}
31 changes: 7 additions & 24 deletions testsuite/forge/src/success_criteria.rs
Original file line number Diff line number Diff line change
Expand Up @@ -105,30 +105,13 @@ pub struct LatencyBreakdownThreshold {
}

impl LatencyBreakdownThreshold {
pub fn new_strict(
qs_batch_to_pos_threshold: f64,
qs_pos_to_proposal_threshold: f64,
consensus_proposal_to_ordered_threshold: f64,
consensus_ordered_to_commit_threshold: f64,
) -> Self {
let mut thresholds = BTreeMap::new();
thresholds.insert(
LatencyBreakdownSlice::QsBatchToPos,
MetricsThreshold::new(qs_batch_to_pos_threshold, 0),
);
thresholds.insert(
LatencyBreakdownSlice::QsPosToProposal,
MetricsThreshold::new(qs_pos_to_proposal_threshold, 0),
);
thresholds.insert(
LatencyBreakdownSlice::ConsensusProposalToOrdered,
MetricsThreshold::new(consensus_proposal_to_ordered_threshold, 0),
);
thresholds.insert(
LatencyBreakdownSlice::ConsensusOrderedToCommit,
MetricsThreshold::new(consensus_ordered_to_commit_threshold, 0),
);
Self { thresholds }
pub fn new_strict(thresholds: Vec<(LatencyBreakdownSlice, f64)>) -> Self {
Self {
thresholds: thresholds
.into_iter()
.map(|(k, v)| (k, MetricsThreshold::new(v, 0)))
.collect(),
}
}

pub fn ensure_threshold(&self, metrics: &LatencyBreakdown) -> anyhow::Result<()> {
Expand Down
2 changes: 1 addition & 1 deletion testsuite/testcases/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -339,7 +339,7 @@ impl dyn NetworkLoadTest {
phase_timing[i].end_unixtime_s,
))?;
info!(
"latency_breakdown: from {} to {}: {:?}",
"Latency breakdown: from {} to {}: {:?}",
phase_timing[i].start_unixtime_s, phase_timing[i].end_unixtime_s, latency_breakdown
);
stats_by_phase_filtered.push(LoadTestPhaseStats {
Expand Down

0 comments on commit 00f6842

Please sign in to comment.