Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[forge] better sts errors #4582

Merged
merged 2 commits into from
Sep 27, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ jobs:
secrets: inherit
with:
FORGE_NAMESPACE: forge-account-creation-test
FORGE_CLUSTER_NAME: aptos-forge-big-1
FORGE_RUNNER_DURATION_SECS: 900
FORGE_TEST_SUITE: account_creation
POST_TO_SLACK: true
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ jobs:
secrets: inherit
with:
FORGE_NAMESPACE: forge-changing-working-quorum-test
FORGE_CLUSTER_NAME: aptos-forge-big-1
FORGE_RUNNER_DURATION_SECS: 1200
FORGE_TEST_SUITE: changing_working_quorum_test
POST_TO_SLACK: true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ jobs:
secrets: inherit
with:
FORGE_NAMESPACE: forge-changing-working-quorum-test-high-load
FORGE_CLUSTER_NAME: aptos-forge-big-1
FORGE_RUNNER_DURATION_SECS: 900
FORGE_TEST_SUITE: changing_working_quorum_test_high_load
POST_TO_SLACK: true
Expand Down
3 changes: 1 addition & 2 deletions .github/workflows/continuous-e2e-compat-test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,9 @@ jobs:
secrets: inherit
with:
FORGE_NAMESPACE: forge-compat
FORGE_CLUSTER_NAME: aptos-forge-big-1
# Run for 5 minutes
FORGE_RUNNER_DURATION_SECS: 300
# This will upgrade from testnet branch to the latest main
FORGE_TEST_SUITE: compat
IMAGE_TAG: testnet
POST_TO_SLACK: true
POST_TO_SLACK: true
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ jobs:
secrets: inherit
with:
FORGE_NAMESPACE: forge-consensus-stress-test
FORGE_CLUSTER_NAME: aptos-forge-big-1
FORGE_RUNNER_DURATION_SECS: 2400
FORGE_TEST_SUITE: consensus_stress_test
POST_TO_SLACK: true
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,11 @@ on:
jobs:
### Please remember to use different namespace for different tests
# Performance test in an optimal setting
run-different-node-speed-and-reliability-test:
run-forge-different-node-speed-and-reliability-test:
uses: ./.github/workflows/run-forge.yaml
secrets: inherit
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

since you are changing this file already, do you mind appropriately naming this test, it is not starting with run-forge-.. like all others (sorry)

run-different-node-speed-and-reliability-test => run-forge-different-node-speed-and-reliability-test:

with:
FORGE_NAMESPACE: forge-different-node-speed-and-reliability-test
FORGE_CLUSTER_NAME: aptos-forge-big-1
FORGE_RUNNER_DURATION_SECS: 900
FORGE_TEST_SUITE: different_node_speed_and_reliability_test
POST_TO_SLACK: true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ jobs:
secrets: inherit
with:
FORGE_NAMESPACE: forge-fullnode-reboot-stress
FORGE_CLUSTER_NAME: aptos-forge-big-1
# Run for 40 minutes
FORGE_RUNNER_DURATION_SECS: 1800
FORGE_TEST_SUITE: fullnode_reboot_stress_test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ jobs:
secrets: inherit
with:
FORGE_NAMESPACE: forge-graceful-overload-test
FORGE_CLUSTER_NAME: aptos-forge-big-1
FORGE_RUNNER_DURATION_SECS: 1800
FORGE_TEST_SUITE: graceful_overload
POST_TO_SLACK: true
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ jobs:
secrets: inherit
with:
FORGE_NAMESPACE: forge-three-region
FORGE_CLUSTER_NAME: aptos-forge-big-1
# Run for 30 minutes
FORGE_RUNNER_DURATION_SECS: 1800
# Pre release has chaos applied
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ jobs:
secrets: inherit
with:
FORGE_NAMESPACE: forge-network-partition
FORGE_CLUSTER_NAME: aptos-forge-big-1
# Run for 15 minutes
FORGE_RUNNER_DURATION_SECS: 900
FORGE_TEST_SUITE: network_partition
Expand Down
1 change: 0 additions & 1 deletion .github/workflows/continuous-e2e-nft-mint-test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ jobs:
secrets: inherit
with:
FORGE_NAMESPACE: forge-nft-mint-test
FORGE_CLUSTER_NAME: aptos-forge-big-1
FORGE_RUNNER_DURATION_SECS: 900
FORGE_TEST_SUITE: nft_mint
POST_TO_SLACK: true
1 change: 0 additions & 1 deletion .github/workflows/continuous-e2e-performance-test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ jobs:
secrets: inherit
with:
FORGE_NAMESPACE: forge-performance
FORGE_CLUSTER_NAME: aptos-forge-big-1
# Run for 2 hours
FORGE_RUNNER_DURATION_SECS: 7200
# Land blocking is performance test
Expand Down
1 change: 0 additions & 1 deletion .github/workflows/continuous-e2e-single-vfn-test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ jobs:
secrets: inherit
with:
FORGE_NAMESPACE: forge-continuous-e2e-single-vfn
FORGE_CLUSTER_NAME: aptos-forge-big-1
# Run for 8 minutes
FORGE_RUNNER_DURATION_SECS: 480
FORGE_TEST_SUITE: single_vfn_perf
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ jobs:
secrets: inherit
with:
FORGE_NAMESPACE: forge-state-sync-perf-fullnode-apply
FORGE_CLUSTER_NAME: aptos-forge-big-1
# Run for 40 minutes
FORGE_RUNNER_DURATION_SECS: 2400
FORGE_TEST_SUITE: state_sync_perf_fullnodes_apply_outputs
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ jobs:
secrets: inherit
with:
FORGE_NAMESPACE: forge-state-sync-perf-fullnode-execute
FORGE_CLUSTER_NAME: aptos-forge-big-1
# Run for 40 minutes
FORGE_RUNNER_DURATION_SECS: 2400
FORGE_TEST_SUITE: state_sync_perf_fullnodes_execute_transactions
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ jobs:
secrets: inherit
with:
FORGE_NAMESPACE: forge-state-sync-perf-validator
FORGE_CLUSTER_NAME: aptos-forge-big-1
# Run for 40 minutes
FORGE_RUNNER_DURATION_SECS: 2400
FORGE_TEST_SUITE: state_sync_perf_validators
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ jobs:
secrets: inherit
with:
FORGE_NAMESPACE: forge-validator-reboot-stress
FORGE_CLUSTER_NAME: aptos-forge-big-1
# Run for 40 minutes
FORGE_RUNNER_DURATION_SECS: 2400
FORGE_TEST_SUITE: validator_reboot_stress_test
Expand Down
1 change: 0 additions & 1 deletion .github/workflows/twin-validator-test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ jobs:
secrets: inherit
with:
FORGE_NAMESPACE: forge-twin-validator
FORGE_CLUSTER_NAME: aptos-forge-big-1
FORGE_RUNNER_DURATION_SECS: 900
FORGE_TEST_SUITE: twin_validator_test
POST_TO_SLACK: true
16 changes: 7 additions & 9 deletions testsuite/forge/src/backend/k8s/stateful_set.rs
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ async fn check_stateful_set_status(
.map_err(|e| WorkloadScalingError::RetryableError(e.to_string()))?
.status
{
if let Some(container_statuses) = status.container_statuses {
if let Some(ref container_statuses) = status.container_statuses {
if let Some(container_status) = container_statuses.last() {
if let Some(state) = &container_status.state {
if let Some(waiting) = &state.waiting {
Expand Down Expand Up @@ -161,8 +161,8 @@ async fn check_stateful_set_status(
info!("Pod {} at phase {}", &pod_name, phase)
}
Err(WorkloadScalingError::RetryableError(format!(
"Retry due to pod {} status",
&pod_name
"Retry due to pod {} status {:?}",
&pod_name, status
)))
} else {
Err(WorkloadScalingError::FinalError(format!(
Expand Down Expand Up @@ -292,17 +292,15 @@ pub async fn check_for_container_restart(
let pod_api: Api<Pod> = Api::namespaced(kube_client.clone(), kube_namespace);
Box::pin(async move {
// Get the StatefulSet's Pod status
if let Some(status) = pod_api
.get_status(format!("{}-0", sts_name).as_str())
.await?
.status
{
let pod_name = format!("{}-0", sts_name);
if let Some(status) = pod_api.get_status(&pod_name).await?.status {
if let Some(container_statuses) = status.container_statuses {
for container_status in container_statuses {
if container_status.restart_count > 0 {
bail!(
"Container {} restarted {} times ",
"Container {} in pod {} restarted {} times ",
container_status.name,
&pod_name,
container_status.restart_count
);
}
Expand Down