From def03055c99f430293999da18d320e5c33e1760f Mon Sep 17 00:00:00 2001 From: Dean Roehrich Date: Wed, 23 Aug 2023 17:08:45 -0500 Subject: [PATCH 1/2] Update tests to support fatal errors DWS v0.0.11 has the concept of fatal errors. This updates the tests to use fatal errors, and to show how drivers should handle them. Signed-off-by: Dean Roehrich --- .../src/features/test_dws_states.feature | 53 +++++---- .../src/features/test_environment.feature | 6 +- testsuite/integration/src/pytest.ini | 8 +- testsuite/integration/src/tests/conftest.py | 8 +- .../tests/dws_bb_plugin/test_dws_states.py | 108 ++++++++++++++---- .../src/tests/dws_bb_plugin/workflow.py | 8 +- testsuite/integration/src/tests/slurmctld.py | 64 +++++++---- 7 files changed, 173 insertions(+), 82 deletions(-) diff --git a/testsuite/integration/src/features/test_dws_states.feature b/testsuite/integration/src/features/test_dws_states.feature index 20439c1..92b921b 100644 --- a/testsuite/integration/src/features/test_dws_states.feature +++ b/testsuite/integration/src/features/test_dws_states.feature @@ -1,5 +1,5 @@ # -# Copyright 2022 Hewlett Packard Enterprise Development LP +# Copyright 2022-2023 Hewlett Packard Enterprise Development LP # Other additional copyright holders may be indicated within. # # The entirety of this work is licensed under the Apache License, @@ -17,6 +17,7 @@ # limitations under the License. # +@dws_states Feature: Data Workflow Services State Progression Verify that the DWS-Slurm Burst Buffer Plugin progresses through Data Workflow Services states @@ -35,8 +36,8 @@ Feature: Data Workflow Services State Progression /bin/hostname When the job is run - And a Workflow is created for the job - Then the Workflow and job progress to the Proposal state + Then a Workflow has been created for the job + And the Workflow and job progress to the Proposal state And the Workflow and job progress to the Setup state And the Workflow and job progress to the DataIn state And the Workflow and job progress to the PreRun state @@ -49,21 +50,21 @@ Feature: Data Workflow Services State Progression # constraint, the dws-test-driver replaces underscores ("_") in the message value with # spaces. This ensures that the dws-slurm-plugin can handle whitespace in error messages # It also makes it easier to check that the error is included in scontrol output. - Scenario Outline: The DWS-BB Plugin can handle DWS driver errors before being canceled + Scenario Outline: The DWS-BB Plugin can handle fatal driver errors before being canceled Given a job script: #!/bin/bash - #DW action=error message=TEST_ERROR + #DW action=error message=TEST_FATAL_ERROR severity=Fatal #DW Teardown action=wait /bin/hostname When the job is run - And a Workflow is created for the job - And the Workflow and job report errors at the state + Then a Workflow has been created for the job + And the Workflow and job report fatal errors at the state And the job is canceled - Then the Workflow and job progress to the Teardown state - And the job's system comment contains the following: - TEST ERROR + And the Workflow and job progress to the Teardown state + And the job's final system comment contains the following: + TEST FATAL ERROR Examples: # *** HEADER *** @@ -79,19 +80,23 @@ Feature: Data Workflow Services State Progression # "--hurry" flag to transition to the Teardown state. If # "Flags=TeardownFailure" is set in burst_buffer.conf, then all states will # transition to Teardown without needing to be canceled - Scenario Outline: The DWS-BB Plugin can handle DWS driver errors + Scenario Outline: The DWS-BB Plugin can handle fatal driver errors for PreRun Given a job script: #!/bin/bash - #DW action=error message=TEST_ERROR + #DW action=error message=TEST_FATAL_ERROR severity=Fatal #DW Teardown action=wait /bin/hostname When the job is run - And a Workflow is created for the job - Then the Workflow and job progress to the Teardown state - And the job's system comment contains the following: - TEST ERROR + Then a Workflow has been created for the job + And the Workflow reports a fatal error in the state + And the Workflow and job progress to the Teardown state + # Slurm moved it from PreRun/Error to Teardown without canceling + # the job. So the driver (this test) must cancel it. + And the job is canceled + And the job's final system comment contains the following: + TEST FATAL ERROR Examples: # *** HEADER *** @@ -99,14 +104,20 @@ Feature: Data Workflow Services State Progression # *** VALUES *** | PreRun | - Scenario: The DWS-BB Plugin can handle DWS driver errors during Teardown + Scenario: The DWS-BB Plugin can handle fatal driver errors during Teardown Given a job script: #!/bin/bash - #DW Teardown action=error message=TEST_ERROR + #DW Teardown action=error message=TEST_FATAL_ERROR severity=Fatal /bin/hostname When the job is run - Then the job's system comment contains the following: - TEST ERROR - And the workflow still exists + Then a Workflow has been created for the job + And the Workflow reports a fatal error in the Teardown state + And the job's intermediate system comment contains the following: + TEST FATAL ERROR + # Eventually the driver (this test) must work through the Teardown + # issues and complete that step. Slurm has already marked the job + # as completed and is now looping over slurm_bb_job_teardown() in + # burst_buffer.lua. + And the Workflow error is cleared from the Teardown state diff --git a/testsuite/integration/src/features/test_environment.feature b/testsuite/integration/src/features/test_environment.feature index d12e9e4..dfbd412 100644 --- a/testsuite/integration/src/features/test_environment.feature +++ b/testsuite/integration/src/features/test_environment.feature @@ -1,5 +1,5 @@ # -# Copyright 2022 Hewlett Packard Enterprise Development LP +# Copyright 2022-2023 Hewlett Packard Enterprise Development LP # Other additional copyright holders may be indicated within. # # The entirety of this work is licensed under the Apache License, @@ -16,6 +16,8 @@ # See the License for the specific language governing permissions and # limitations under the License. # + +@environment Feature: Integration test environment Verify the integration test environment has been setup correctly @@ -39,4 +41,4 @@ Feature: Integration test environment Scenario: Kubernetes and slurm are connected Given the kubernetes cluster kube-system UID When the kube-system UID is queried from slurmctld - Then the UIDs match and the cluster is the same \ No newline at end of file + Then the UIDs match and the cluster is the same diff --git a/testsuite/integration/src/pytest.ini b/testsuite/integration/src/pytest.ini index 1899dfb..5399438 100644 --- a/testsuite/integration/src/pytest.ini +++ b/testsuite/integration/src/pytest.ini @@ -1,5 +1,5 @@ # -# Copyright 2022 Hewlett Packard Enterprise Development LP +# Copyright 2022-2023 Hewlett Packard Enterprise Development LP # Other additional copyright holders may be indicated within. # # The entirety of this work is licensed under the Apache License, @@ -18,4 +18,8 @@ # [pytest] -bdd_features_base_dir = features \ No newline at end of file +bdd_features_base_dir = features +markers = + environment + dws_states + diff --git a/testsuite/integration/src/tests/conftest.py b/testsuite/integration/src/tests/conftest.py index 507fbab..255d323 100644 --- a/testsuite/integration/src/tests/conftest.py +++ b/testsuite/integration/src/tests/conftest.py @@ -18,19 +18,18 @@ # import os -import pytest import secrets import warnings +import pytest -from .slurmctld import Slurmctld from kubernetes import client, config from pytest_bdd import ( given, - scenarios, parsers, then, when, ) +from .slurmctld import Slurmctld @pytest.fixture def k8s(): @@ -54,7 +53,7 @@ def pytest_bdd_apply_tag(tag, function): def _(script): """a job script: