diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index f65c9c1..6a0ff97 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -29,6 +29,7 @@ jobs: publish-unit-test: uses: ./.github/workflows/publish-unit-test.yml needs: unit-test + if: always() integration-test: uses: ./.github/workflows/integration-test.yml needs: unit-test diff --git a/.github/workflows/integration-test.yml b/.github/workflows/integration-test.yml index 5dfba16..f95f45d 100644 --- a/.github/workflows/integration-test.yml +++ b/.github/workflows/integration-test.yml @@ -78,6 +78,18 @@ jobs: cache-from: type=gha,scope=dws-${{ env.BRANCH }} cache-to: type=gha,mode=max,scope=dws-${{ env.BRANCH }} + # Pre-build dws-test-driver image with docker cache. Expect 2 minutes + # for a full build + - name: Build dws-test-driver + uses: docker/build-push-action@v3 + with: + context: testsuite/submodules/dws-test-driver + push: false + load: true + tags: local/dws-test-driver-operator:test + cache-from: type=gha,scope=dws-test-driver-${{ env.BRANCH }} + cache-to: type=gha,mode=max,scope=dws-test-driver-${{ env.BRANCH }} + - name: Integration Test run: cd testsuite/integration && make setup test reports @@ -89,6 +101,6 @@ jobs: # The "Integration Test" step should never fail, otherwise the test # reports will not be available. This step will check the integration - # test container to see if a failure occured. + # test container to see if a failure occurred. - name: Check Integration Test - run: test $(docker inspect integration-test --format="{{.State.ExitCode}}") -eq 0 \ No newline at end of file + run: test $(docker inspect integration-test --format="{{.State.ExitCode}}") -eq 0 diff --git a/.gitignore b/.gitignore index 748588c..c9e709e 100644 --- a/.gitignore +++ b/.gitignore @@ -47,4 +47,13 @@ testsuite/integration/jobs/slurm*.out # Kind files testsuite/integration/kubeconfig -testsuite/integration/kind-config.yaml \ No newline at end of file +testsuite/integration/kind-config.yaml + +# Python files +**/__pycache__/* + +# integration test reports +testsuite/integration/reports/ + +# IDE cache +.vscode \ No newline at end of file diff --git a/.gitmodules b/.gitmodules index 6c4c99f..36f39db 100644 --- a/.gitmodules +++ b/.gitmodules @@ -6,3 +6,6 @@ path = testsuite/submodules/slurm-docker-cluster url = git@github.com:DataWorkflowServices/slurm-docker-cluster.git branch = master +[submodule "testsuite/submodules/dws-test-driver"] + path = testsuite/submodules/dws-test-driver + url = git@github.com:DataWorkflowServices/dws-test-driver.git diff --git a/src/burst_buffer/burst_buffer.lua b/src/burst_buffer/burst_buffer.lua index c897ffd..31db12e 100644 --- a/src/burst_buffer/burst_buffer.lua +++ b/src/burst_buffer/burst_buffer.lua @@ -163,7 +163,7 @@ function DWS:initialize(wlmID, jobID, userID, groupID, dw_directives, labels) else dwd_block = "dwDirectives:\n" for k, v in ipairs(dw_directives) do - dwd_block = dwd_block .. " - " .. dw_directives[k] .. "\n" + dwd_block = dwd_block .. " - \"" .. dw_directives[k] .. "\"\n" end yaml = string.gsub(yaml, "DWDIRECTIVES", dwd_block) end @@ -181,6 +181,7 @@ function DWS:save(fname) local msg = "unable to open " .. fname return false, msg end + slurm.log_info(self.yaml) f:write(self.yaml) local rc = {f:close()} -- Success or failure is a boolean in rc[1]. diff --git a/testsuite/integration/Dockerfile b/testsuite/integration/Dockerfile index 3040c48..673ae38 100644 --- a/testsuite/integration/Dockerfile +++ b/testsuite/integration/Dockerfile @@ -14,8 +14,6 @@ RUN yum update -y && \ install -o root -g root -m 0755 kubectl /usr/local/bin/kubectl && \ pip install -r requirements.txt -COPY src /tests - RUN mkdir /reports FROM testbase AS test @@ -27,5 +25,5 @@ CMD [ \ "--junitxml=/reports/integration.junit.xml", \ "--html=/reports/integration.html", \ "--gherkin-terminal-reporter", \ - "-v" \ + "-v", "." \ ] \ No newline at end of file diff --git a/testsuite/integration/Makefile b/testsuite/integration/Makefile index a07b15a..92a6b36 100644 --- a/testsuite/integration/Makefile +++ b/testsuite/integration/Makefile @@ -39,19 +39,46 @@ setup-dws: @{\ set -e ; \ cd ../submodules/dws ; \ - docker buildx build -t local/dws-operator:test --cache-from="/tmp/.buildx-cache" --load . ; \ + docker buildx build -t local/dws-operator:test --load . ; \ IMAGE_TAG_BASE=local/dws-operator VERSION=test KIND_CLUSTER=dws make kind-push deploy ; \ kubectl wait deployment --timeout=60s -n dws-operator-system dws-operator-controller-manager --for condition=Available=True ; \ } +.PHONY: setup-dws-test-driver +setup-dws-test-driver: + @{\ + set -e ; \ + cd ../submodules/dws-test-driver ; \ + docker buildx build -t local/dws-test-driver-operator:test --load . ; \ + IMAGE_TAG_BASE=local/dws-test-driver-operator VERSION=test KIND_CLUSTER=dws make kind-push deploy ; \ + kubectl wait deployment --timeout=60s -n dws-test-operator-system dws-test-driver-controller-manager --for condition=Available=True ; \ + } + .PHONY: setup -setup: setup-kind setup-dws +setup: setup-kind setup-dws setup-dws-test-driver .PHONY: test test: docker compose build docker compose up +.PHONY: debug +debug: + echo "***** SLURMCTLD LOGS *****" + docker logs slurmctld + echo + echo "***** SLURM COMPUTE LOGS *****" + docker logs c1 + echo + echo "***** KIND NODE *****" + kubectl describe node dws-control-plane dws-worker + echo + echo "***** DWS DEPLOYMENT *****" + kubectl describe deployment -n dws-operator-system dws-operator-controller-manager + echo + echo "***** DWS LOGS *****" + kubectl logs -n dws-operator-system deployment/dws-operator-controller-manager + .PHONY: reports reports: mkdir reports diff --git a/testsuite/integration/docker-compose.yml b/testsuite/integration/docker-compose.yml index f270e12..463e1f5 100644 --- a/testsuite/integration/docker-compose.yml +++ b/testsuite/integration/docker-compose.yml @@ -35,6 +35,7 @@ services: target: /jobs - ./kubeconfig:/root/.kube/config - /var/run/docker.sock:/var/run/docker.sock + - ./src/:/tests networks: default: diff --git a/testsuite/integration/kind/kind.sh b/testsuite/integration/kind/kind.sh index 87e0681..bf21e4d 100755 --- a/testsuite/integration/kind/kind.sh +++ b/testsuite/integration/kind/kind.sh @@ -58,6 +58,9 @@ install_dependencies () { # Make sure the current context is set to dws kubectl config use-context kind-dws + # Create the slurm namespace. This will be the default location of dws-slurm-bb-plugin workflows + kubectl create namespace slurm + # Install the cert-manager for the DWS webhook. kubectl apply -f https://github.com/jetstack/cert-manager/releases/download/v1.7.0/cert-manager.yaml @@ -70,8 +73,9 @@ prep_kubeconfig () { set -e cp ~/.kube/config kubeconfig yq -i e '(.clusters | map(select(.name=="kind-dws")))[0].cluster.server |= "https://dws-control-plane:6443"' kubeconfig - yq -i e '.current-context |= "kind-dws"' kubeconfig chmod a+r kubeconfig + KUBECONFIG=kubeconfig kubectl config use-context kind-dws + KUBECONFIG=kubeconfig kubectl config set-context --current --namespace=slurm } teardown () { diff --git a/testsuite/integration/requirements.txt b/testsuite/integration/requirements.txt index ffda462..3411ce5 100644 --- a/testsuite/integration/requirements.txt +++ b/testsuite/integration/requirements.txt @@ -2,4 +2,5 @@ pytest == 7 pytest-bdd == 6 pytest-html == 3 docker == 6 -kubernetes >= 25.3 \ No newline at end of file +kubernetes >= 25.3 +tenacity == 8 \ No newline at end of file diff --git a/testsuite/integration/slurm/docker-compose.yml b/testsuite/integration/slurm/docker-compose.yml index 6e5fee4..62c2265 100644 --- a/testsuite/integration/slurm/docker-compose.yml +++ b/testsuite/integration/slurm/docker-compose.yml @@ -20,19 +20,38 @@ version: "2.2" services: - mysql: - image: mysql:8 - hostname: mysql - container_name: mysql - environment: - MYSQL_RANDOM_ROOT_PASSWORD: "yes" - MYSQL_DATABASE: slurm_acct_db - MYSQL_USER: slurm - MYSQL_PASSWORD: password - volumes: - - var_lib_mysql:/var/lib/mysql + # mysql: + # image: mysql:8 + # hostname: mysql + # container_name: mysql + # environment: + # MYSQL_RANDOM_ROOT_PASSWORD: "yes" + # MYSQL_DATABASE: slurm_acct_db + # MYSQL_USER: slurm + # MYSQL_PASSWORD: password + # volumes: + # - var_lib_mysql:/var/lib/mysql + + # slurmdbd: + # image: slurm-bb:${TAG:-test} + # build: + # context: ../../submodules/slurm-docker-cluster + # args: + # SLURM_TAG: ${SLURM_TAG:-slurm-22-05-4-1} + # cache_from: + # - "/tmp/.buildx-cache" + # command: ["slurmdbd"] + # container_name: slurmdbd + # hostname: slurmdbd + # volumes: + # - etc_munge:/etc/munge + # - var_log_slurm:/var/log/slurm + # expose: + # - "6819" + # depends_on: + # - mysql - slurmdbd: + slurmctld: image: slurm-bb:${TAG:-test} build: context: ../../submodules/slurm-docker-cluster @@ -40,19 +59,6 @@ services: SLURM_TAG: ${SLURM_TAG:-slurm-22-05-4-1} cache_from: - "/tmp/.buildx-cache" - command: ["slurmdbd"] - container_name: slurmdbd - hostname: slurmdbd - volumes: - - etc_munge:/etc/munge - - var_log_slurm:/var/log/slurm - expose: - - "6819" - depends_on: - - mysql - - slurmctld: - image: slurm-bb:${TAG:-test} command: ["slurmctld"] container_name: slurmctld hostname: slurmctld @@ -71,14 +77,15 @@ services: - ../kubeconfig:/home/slurm/.kube/config expose: - "6817" - depends_on: - - "slurmdbd" + # depends_on: + # - "slurmdbd" c1: image: slurm-bb:${TAG:-test} command: ["slurmd"] hostname: c1 container_name: c1 + cpus: 2 volumes: - etc_munge:/etc/munge - type: bind @@ -90,21 +97,21 @@ services: depends_on: - "slurmctld" - c2: - image: slurm-bb:${TAG:-test} - command: ["slurmd"] - hostname: c2 - container_name: c2 - volumes: - - etc_munge:/etc/munge - - type: bind - source: ./jobs - target: /jobs - - var_log_slurm:/var/log/slurm - expose: - - "6818" - depends_on: - - "slurmctld" + # c2: + # image: slurm-bb:${TAG:-test} + # command: ["slurmd"] + # hostname: c2 + # container_name: c2 + # volumes: + # - etc_munge:/etc/munge + # - type: bind + # source: ./jobs + # target: /jobs + # - var_log_slurm:/var/log/slurm + # expose: + # - "6818" + # depends_on: + # - "slurmctld" volumes: etc_munge: diff --git a/testsuite/integration/slurm/jobs/integration-test-bb.sh b/testsuite/integration/slurm/jobs/integration-test-bb.sh new file mode 100644 index 0000000..caad89d --- /dev/null +++ b/testsuite/integration/slurm/jobs/integration-test-bb.sh @@ -0,0 +1,27 @@ +#!/bin/sh + +# +# Copyright 2022 Hewlett Packard Enterprise Development LP +# Other additional copyright holders may be indicated within. +# +# The entirety of this work is licensed under the Apache License, +# Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. +# +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +#SBATCH --output=/jobs/slurm-%j.out +#DW Proposal action=complete +#DW DataIn action=wait +/bin/hostname +srun -l /bin/hostname +srun -l /bin/pwd diff --git a/testsuite/integration/src/features/test_dws_states.feature b/testsuite/integration/src/features/test_dws_states.feature new file mode 100644 index 0000000..1bb26e4 --- /dev/null +++ b/testsuite/integration/src/features/test_dws_states.feature @@ -0,0 +1,117 @@ +# +# Copyright 2022 Hewlett Packard Enterprise Development LP +# Other additional copyright holders may be indicated within. +# +# The entirety of this work is licensed under the Apache License, +# Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. +# +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +Feature: Data Workflow Services State Progression + Verify that the DWS-Slurm Burst Buffer Plugin progresses through Data + Workflow Services states + + Scenario: The DWS-BB Plugin progresses through DWS states + Given a job script: + #!/bin/bash + + #DW Proposal action=wait + #DW Setup action=wait + #DW DataIn action=wait + #DW PreRun action=wait + #DW PostRun action=wait + #DW DataOut action=wait + #DW Teardown action=wait + /bin/hostname + + When the job is run + And a Workflow is created for the job + #Then the job's temporary Workflow is not found + Then the Workflow and job progress to the Proposal state + And the Workflow and job progress to the Setup state + And the Workflow and job progress to the DataIn state + And the Workflow and job progress to the PreRun state + And the Workflow and job progress to the PostRun state + And the Workflow and job progress to the DataOut state + And the Workflow and job progress to the Teardown state + And the job is COMPLETED + + @todo + Scenario: The DWS-BB Plugin can handle DWS driver errors + Given a job script: + #!/bin/bash + + #DW action=error message=TEST_ERROR + #DW Teardown action=wait + /bin/hostname + + When the job is run + And a Workflow is created for the job + Then the Workflow and job progress to the Teardown state + And the job shows an error with message "TEST ERROR" + + Examples: + # *** HEADER *** + | state | + # *** VALUES *** + | Proposal | + | Setup | + | DataIn | + | PreRun | + | PostRun | + | DataOut | + + @todo + Scenario: The DWS-BB Plugin can handle a DWS driver error during Teardown + Given a job script: + #!/bin/bash + + #DW Teardown action=error message=TEST_ERROR + /bin/hostname + + When the job is run + And a Workflow is created for the job + Then the job shows an error with message "TEST ERROR" + + @todo + Scenario: The DWS-BB Plugin can cancel jobs + Given a job script: + #!/bin/bash + + #DW action=wait + #DW Teardown action=wait + /bin/hostname + + When the job is run + And a Workflow is created for the job + And the Workflow and job progress to the state + And the job is canceled with the hurry flag set to + Then the Workflow and job progress to the Teardown state + And the Workflow's hurry flag is set to + + Examples: + # *** HEADER *** + | state | hurry_flag | + # *** VALUES *** + | Proposal | false | + | Setup | false | + | DataIn | false | + | PreRun | false | + | PostRun | false | + | DataOut | false | + | Proposal | true | + | Setup | true | + | DataIn | true | + | PreRun | true | + | PostRun | true | + | DataOut | true | \ No newline at end of file diff --git a/testsuite/integration/src/features/test_environment.feature b/testsuite/integration/src/features/test_environment.feature index bb00c3a..af09b04 100644 --- a/testsuite/integration/src/features/test_environment.feature +++ b/testsuite/integration/src/features/test_environment.feature @@ -24,19 +24,18 @@ Feature: Integration test environment When kubernetes cluster nodes are queried Then one or more kubernetes nodes are available - # Scenario: Using DataWorkflowServices - # When the DataWorkflowServices deployment is queried - # Then the DataWorkflowServices deployment is found + Scenario: The DataWorkflowServices deployment exists + When the DataWorkflowServices deployment is queried + Then the DataWorkflowServices deployment is found - # Scenario: Using Slurm - # Given a simple job script - # """ - # /bin/hostname - # srun -l /bin/hostname - # srun -l /bin/pwd - # """ - # When the job is run - # Then the job completes successfully + Scenario: Slurm is usable + Given a job script: + #!/bin/bash + /bin/hostname + srun -l /bin/hostname + srun -l /bin/pwd + When the job is run + Then the job is COMPLETED Scenario: Kubernetes and slurm are connected Given the kubernetes cluster kube-system UID diff --git a/testsuite/integration/src/tests/__init__.py b/testsuite/integration/src/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/testsuite/integration/src/tests/conftest.py b/testsuite/integration/src/tests/conftest.py new file mode 100644 index 0000000..689c247 --- /dev/null +++ b/testsuite/integration/src/tests/conftest.py @@ -0,0 +1,98 @@ +# +# Copyright 2022 Hewlett Packard Enterprise Development LP +# Other additional copyright holders may be indicated within. +# +# The entirety of this work is licensed under the Apache License, +# Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. +# +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import os +import pytest +import secrets +import warnings + +from .slurmctld import Slurmctld +from kubernetes import client, config +from pytest_bdd import ( + given, + scenarios, + parsers, + then, + when, +) + +@pytest.fixture +def k8s(): + config.load_kube_config() + return client + +@pytest.fixture +def slurmctld(): + return Slurmctld() + +def pytest_bdd_apply_tag(tag, function): + if tag == 'todo': + marker = pytest.mark.skip(reason="Not implemented yet") + marker(function) + return True + else: + # Fall back to the default behavior of pytest-bdd + return None + +@given(parsers.parse('a job script:\n{script}'), target_fixture="script_path") +def _(script): + """a simple job script: