From cce1a9bbc55df864a3cb46bef301fff30dd25a70 Mon Sep 17 00:00:00 2001 From: Gustavo Valverde Date: Sun, 8 Oct 2023 21:31:04 +0100 Subject: [PATCH 01/23] fix: use `exit-nopipe` with consistent `shell` usage Temporarily disabled the `set -e` option around the docker logs command to handle the broken pipe error gracefully. Handle more complex scenarios in our `Result of ${{ inputs.test_id }} test` job --- .github/workflows/continous-delivery.yml | 4 +- .github/workflows/deploy-gcp-tests.yml | 56 ++++++++++++++---------- 2 files changed, 34 insertions(+), 26 deletions(-) diff --git a/.github/workflows/continous-delivery.yml b/.github/workflows/continous-delivery.yml index 1d1c6efba11..1255ab2d796 100644 --- a/.github/workflows/continous-delivery.yml +++ b/.github/workflows/continous-delivery.yml @@ -138,7 +138,7 @@ jobs: docker run --detach --name default-conf-tests -t ${{ vars.GAR_BASE }}/zebrad@${{ needs.build.outputs.image_digest }} # show the logs, even if the job times out docker logs --tail all --follow default-conf-tests | \ - tee --output-error=exit /dev/stderr | \ + tee --output-error=exit-nopipe /dev/stderr | \ grep --max-count=1 --extended-regexp --color=always \ 'net.*=.*Main.*estimated progress to chain tip.*BeforeOverwinter' docker stop default-conf-tests @@ -178,7 +178,7 @@ jobs: docker run --env "NETWORK=Testnet" --detach --name testnet-conf-tests -t ${{ vars.GAR_BASE }}/zebrad@${{ needs.build.outputs.image_digest }} # show the logs, even if the job times out docker logs --tail all --follow testnet-conf-tests | \ - tee --output-error=exit /dev/stderr | \ + tee --output-error=exit-nopipe /dev/stderr | \ grep --max-count=1 --extended-regexp --color=always \ -e 'net.*=.*Test.*estimated progress to chain tip.*Genesis' \ -e 'net.*=.*Test.*estimated progress to chain tip.*BeforeOverwinter' diff --git a/.github/workflows/deploy-gcp-tests.yml b/.github/workflows/deploy-gcp-tests.yml index b1242ce4c4d..dcd1beab18c 100644 --- a/.github/workflows/deploy-gcp-tests.yml +++ b/.github/workflows/deploy-gcp-tests.yml @@ -183,14 +183,14 @@ jobs: # Format the mounted disk if the test doesn't use a cached state. - name: Format ${{ inputs.test_id }} volume + shell: /usr/bin/bash -exo pipefail {0} run: | gcloud compute ssh ${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \ --zone ${{ vars.GCP_ZONE }} \ --ssh-flag="-o ServerAliveInterval=5" \ --ssh-flag="-o ConnectionAttempts=20" \ --ssh-flag="-o ConnectTimeout=5" \ - --command \ - "\ + --command=" \ while sudo lsof /dev/sdb; do \ echo 'Waiting for /dev/sdb to be free...'; \ sleep 10; \ @@ -200,14 +200,14 @@ jobs: # Launch the test without any cached state - name: Launch ${{ inputs.test_id }} test + shell: /usr/bin/bash -exo pipefail {0} run: | gcloud compute ssh ${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \ --zone ${{ vars.GCP_ZONE }} \ --ssh-flag="-o ServerAliveInterval=5" \ --ssh-flag="-o ConnectionAttempts=20" \ --ssh-flag="-o ConnectTimeout=5" \ - --command \ - "\ + --command=" \ sudo docker run \ --name ${{ inputs.test_id }} \ --tty \ @@ -405,14 +405,14 @@ jobs: # lightwalletd-full-sync reads Zebra and writes lwd, so it is handled specially. # TODO: we should find a better logic for this use cases if: ${{ (inputs.needs_zebra_state && !inputs.needs_lwd_state) && inputs.test_id != 'lwd-full-sync' }} + shell: /usr/bin/bash -exo pipefail {0} run: | gcloud compute ssh ${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \ --zone ${{ vars.GCP_ZONE }} \ --ssh-flag="-o ServerAliveInterval=5" \ --ssh-flag="-o ConnectionAttempts=20" \ --ssh-flag="-o ConnectTimeout=5" \ - --command \ - "\ + --command=" \ sudo docker run \ --name ${{ inputs.test_id }} \ --tty \ @@ -455,14 +455,14 @@ jobs: # lightwalletd-full-sync reads Zebra and writes lwd, so it is handled specially. # TODO: we should find a better logic for this use cases if: ${{ (inputs.needs_zebra_state && inputs.needs_lwd_state) || inputs.test_id == 'lwd-full-sync' }} + shell: /usr/bin/bash -exo pipefail {0} run: | gcloud compute ssh ${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \ --zone ${{ vars.GCP_ZONE }} \ --ssh-flag="-o ServerAliveInterval=5" \ --ssh-flag="-o ConnectionAttempts=20" \ --ssh-flag="-o ConnectTimeout=5" \ - --command \ - "\ + --command=" \ sudo docker run \ --name ${{ inputs.test_id }} \ --tty \ @@ -538,20 +538,20 @@ jobs: # # Errors in the tests are caught by the final test status job. - name: Check startup logs for ${{ inputs.test_id }} + shell: /usr/bin/bash -exo pipefail {0} run: | gcloud compute ssh ${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \ --zone ${{ vars.GCP_ZONE }} \ --ssh-flag="-o ServerAliveInterval=5" \ --ssh-flag="-o ConnectionAttempts=20" \ --ssh-flag="-o ConnectTimeout=5" \ - --command \ - "\ + --command=" \ sudo docker logs \ --tail all \ --follow \ ${{ inputs.test_id }} | \ head -700 | \ - tee --output-error=exit /dev/stderr | \ + tee --output-error=exit-nopipe /dev/stderr | \ grep --max-count=1 --extended-regexp --color=always \ -e 'Zcash network: ${{ inputs.network }}' \ " @@ -567,34 +567,40 @@ jobs: # with that status. # (`docker wait` can also wait for multiple containers, but we only ever wait for a single container.) - name: Result of ${{ inputs.test_id }} test + shell: /usr/bin/bash -exo pipefail {0} run: | gcloud compute ssh ${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \ --zone ${{ vars.GCP_ZONE }} \ --ssh-flag="-o ServerAliveInterval=5" \ --ssh-flag="-o ConnectionAttempts=20" \ --ssh-flag="-o ConnectTimeout=5" \ - --command=' \ - set -e; - set -o pipefail; + --command=" \ trap '' PIPE; + # Temporarily disable 'set -e' to handle the broken pipe error gracefully + set +e; sudo docker logs \ --tail all \ --follow \ ${{ inputs.test_id }} | \ - tee --output-error=exit /dev/stderr | \ + tee --output-error=exit-nopipe /dev/stderr | \ grep --max-count=1 --extended-regexp --color=always \ - "test result: .*ok.* [1-9][0-9]* passed.*finished in"; \ + 'test result: .*ok.* [1-9][0-9]* passed.*finished in'; + LOGS_EXIT_STATUS=$?; + set -e; - EXIT_STATUS=$( \ - sudo docker wait ${{ inputs.test_id }} || \ - sudo docker inspect --format "{{.State.ExitCode}}" ${{ inputs.test_id }} || \ - echo "missing container, or missing exit status for container" \ - ); \ + EXIT_STATUS=$(sudo docker wait ${{ inputs.test_id }} || echo 'Error retrieving exit status'); + echo 'sudo docker exit status: '$EXIT_STATUS; - echo "sudo docker exit status: $EXIT_STATUS"; \ - exit "$EXIT_STATUS" \ - ' + # If grep found the pattern, exit with the Docker container's exit status + if [ $LOGS_EXIT_STATUS -eq 0 ]; then + exit $EXIT_STATUS; + fi + + # Handle other potential errors here + echo 'An error occurred while processing the logs.'; + exit 1; \ + " # create a state image from the instance's state disk, if requested by the caller create-state-image: @@ -707,6 +713,7 @@ jobs: # Passes the versions to subsequent steps using the $INITIAL_DISK_DB_VERSION, # $RUNNING_DB_VERSION, and $DB_VERSION_SUMMARY env variables. - name: Get database versions from logs + shell: /usr/bin/bash -exo pipefail {0} run: | INITIAL_DISK_DB_VERSION="" RUNNING_DB_VERSION="" @@ -796,6 +803,7 @@ jobs: # # Passes the sync height to subsequent steps using the $SYNC_HEIGHT env variable. - name: Get sync height from logs + shell: /usr/bin/bash -exo pipefail {0} run: | SYNC_HEIGHT="" From 3a5cb9857e5d77e5ffc48920d2a1892406ff1aed Mon Sep 17 00:00:00 2001 From: Gustavo Valverde Date: Sun, 8 Oct 2023 22:31:17 +0100 Subject: [PATCH 02/23] fix: Use single quotes for the outer command --- .github/workflows/deploy-gcp-tests.yml | 50 +++++++++++++------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/.github/workflows/deploy-gcp-tests.yml b/.github/workflows/deploy-gcp-tests.yml index dcd1beab18c..a98f15cd19b 100644 --- a/.github/workflows/deploy-gcp-tests.yml +++ b/.github/workflows/deploy-gcp-tests.yml @@ -190,13 +190,13 @@ jobs: --ssh-flag="-o ServerAliveInterval=5" \ --ssh-flag="-o ConnectionAttempts=20" \ --ssh-flag="-o ConnectTimeout=5" \ - --command=" \ + --command=' \ while sudo lsof /dev/sdb; do \ - echo 'Waiting for /dev/sdb to be free...'; \ + echo "Waiting for /dev/sdb to be free..."; \ sleep 10; \ done; \ sudo mkfs.ext4 -v /dev/sdb \ - " + ' # Launch the test without any cached state - name: Launch ${{ inputs.test_id }} test @@ -207,7 +207,7 @@ jobs: --ssh-flag="-o ServerAliveInterval=5" \ --ssh-flag="-o ConnectionAttempts=20" \ --ssh-flag="-o ConnectTimeout=5" \ - --command=" \ + --command=' \ sudo docker run \ --name ${{ inputs.test_id }} \ --tty \ @@ -215,7 +215,7 @@ jobs: ${{ inputs.test_variables }} \ --mount type=volume,volume-driver=local,volume-opt=device=/dev/sdb,volume-opt=type=ext4,dst=${{ inputs.root_state_path }}/${{ inputs.zebra_state_dir }} \ ${{ vars.GAR_BASE }}/${{ vars.CI_IMAGE_NAME }}:sha-${{ env.GITHUB_SHA_SHORT }} \ - " + ' # set up and launch the test, if it uses cached state # each test runs one of the *-with/without-cached-state job series, and skips the other @@ -412,7 +412,7 @@ jobs: --ssh-flag="-o ServerAliveInterval=5" \ --ssh-flag="-o ConnectionAttempts=20" \ --ssh-flag="-o ConnectTimeout=5" \ - --command=" \ + --command=' \ sudo docker run \ --name ${{ inputs.test_id }} \ --tty \ @@ -420,7 +420,7 @@ jobs: ${{ inputs.test_variables }} \ --mount type=volume,volume-driver=local,volume-opt=device=/dev/sdb,volume-opt=type=ext4,dst=${{ inputs.root_state_path }}/${{ inputs.zebra_state_dir }} \ ${{ vars.GAR_BASE }}/${{ vars.CI_IMAGE_NAME }}:sha-${{ env.GITHUB_SHA_SHORT }} \ - " + ' # Launch the test with the previously created Lightwalletd and Zebra cached state. # Each test runs one of the "Launch test" steps, and skips the other. @@ -462,7 +462,7 @@ jobs: --ssh-flag="-o ServerAliveInterval=5" \ --ssh-flag="-o ConnectionAttempts=20" \ --ssh-flag="-o ConnectTimeout=5" \ - --command=" \ + --command=' \ sudo docker run \ --name ${{ inputs.test_id }} \ --tty \ @@ -471,7 +471,7 @@ jobs: --mount type=volume,volume-driver=local,volume-opt=device=/dev/sdb,volume-opt=type=ext4,dst=${{ inputs.root_state_path }}/${{ inputs.zebra_state_dir }} \ --mount type=volume,volume-driver=local,volume-opt=device=/dev/sdb,volume-opt=type=ext4,dst=${{ inputs.root_state_path }}/${{ inputs.lwd_state_dir }} \ ${{ vars.GAR_BASE }}/${{ vars.CI_IMAGE_NAME }}:sha-${{ env.GITHUB_SHA_SHORT }} \ - " + ' # Show all the test logs, then follow the logs of the test we just launched, until it finishes. # Then check the result of the test. @@ -545,7 +545,7 @@ jobs: --ssh-flag="-o ServerAliveInterval=5" \ --ssh-flag="-o ConnectionAttempts=20" \ --ssh-flag="-o ConnectTimeout=5" \ - --command=" \ + --command=' \ sudo docker logs \ --tail all \ --follow \ @@ -553,8 +553,8 @@ jobs: head -700 | \ tee --output-error=exit-nopipe /dev/stderr | \ grep --max-count=1 --extended-regexp --color=always \ - -e 'Zcash network: ${{ inputs.network }}' \ - " + -e "Zcash network: ${{ inputs.network }}" \ + ' # Check that the container executed at least 1 Rust test harness test, and that all tests passed. # Then wait for the container to finish, and exit with the test's exit status. @@ -574,10 +574,10 @@ jobs: --ssh-flag="-o ServerAliveInterval=5" \ --ssh-flag="-o ConnectionAttempts=20" \ --ssh-flag="-o ConnectTimeout=5" \ - --command=" \ - trap '' PIPE; + --command=' \ + trap "" PIPE; - # Temporarily disable 'set -e' to handle the broken pipe error gracefully + # Temporarily disable "set -e" to handle the broken pipe error gracefully set +e; sudo docker logs \ --tail all \ @@ -585,22 +585,22 @@ jobs: ${{ inputs.test_id }} | \ tee --output-error=exit-nopipe /dev/stderr | \ grep --max-count=1 --extended-regexp --color=always \ - 'test result: .*ok.* [1-9][0-9]* passed.*finished in'; + "test result: .*ok.* [1-9][0-9]* passed.*finished in"; LOGS_EXIT_STATUS=$?; set -e; - EXIT_STATUS=$(sudo docker wait ${{ inputs.test_id }} || echo 'Error retrieving exit status'); - echo 'sudo docker exit status: '$EXIT_STATUS; + EXIT_STATUS=$(sudo docker wait ${{ inputs.test_id }} || echo "Error retrieving exit status"); + echo "sudo docker exit status: $EXIT_STATUS"; - # If grep found the pattern, exit with the Docker container's exit status + # If grep found the pattern, exit with the Docker container"s exit status if [ $LOGS_EXIT_STATUS -eq 0 ]; then exit $EXIT_STATUS; fi # Handle other potential errors here - echo 'An error occurred while processing the logs.'; + echo "An error occurred while processing the logs."; exit 1; \ - " + ' # create a state image from the instance's state disk, if requested by the caller create-state-image: @@ -725,9 +725,9 @@ jobs: --ssh-flag="-o ServerAliveInterval=5" \ --ssh-flag="-o ConnectionAttempts=20" \ --ssh-flag="-o ConnectTimeout=5" \ - --command=" \ + --command=' \ sudo docker logs ${{ inputs.test_id }} | head -1000 \ - ") + ') # either a semantic version or "creating new database" INITIAL_DISK_DB_VERSION=$( \ @@ -813,9 +813,9 @@ jobs: --ssh-flag="-o ServerAliveInterval=5" \ --ssh-flag="-o ConnectionAttempts=20" \ --ssh-flag="-o ConnectTimeout=5" \ - --command=" \ + --command=' \ sudo docker logs ${{ inputs.test_id }} --tail 200 \ - ") + ') SYNC_HEIGHT=$( \ echo "$DOCKER_LOGS" | \ From 01da33c25ed5f5f76a3caf285ae35c328b6351b6 Mon Sep 17 00:00:00 2001 From: Gustavo Valverde Date: Sun, 8 Oct 2023 23:04:51 +0100 Subject: [PATCH 03/23] fix: use same approach for CD --- .github/workflows/continous-delivery.yml | 79 ++++++++++++++---------- 1 file changed, 47 insertions(+), 32 deletions(-) diff --git a/.github/workflows/continous-delivery.yml b/.github/workflows/continous-delivery.yml index 1255ab2d796..6434a762707 100644 --- a/.github/workflows/continous-delivery.yml +++ b/.github/workflows/continous-delivery.yml @@ -29,7 +29,7 @@ on: type: boolean default: false - # Temporarily disabled to reduce network load, see #6894. + # TODO: Temporarily disabled to reduce network load, see #6894. #push: # branches: # - main @@ -132,29 +132,37 @@ jobs: # Make sure Zebra can sync at least one full checkpoint on mainnet - name: Run tests using the default config + shell: /usr/bin/bash -exo pipefail {0} run: | - set -ex docker pull ${{ vars.GAR_BASE }}/zebrad@${{ needs.build.outputs.image_digest }} docker run --detach --name default-conf-tests -t ${{ vars.GAR_BASE }}/zebrad@${{ needs.build.outputs.image_digest }} + # show the logs, even if the job times out - docker logs --tail all --follow default-conf-tests | \ + # Temporarily disable "set -e" to handle the broken pipe error gracefully + set +e; + docker logs \ + --tail all \ + --follow \ + default-conf-tests | \ tee --output-error=exit-nopipe /dev/stderr | \ grep --max-count=1 --extended-regexp --color=always \ - 'net.*=.*Main.*estimated progress to chain tip.*BeforeOverwinter' - docker stop default-conf-tests + "net.*=.*Main.*estimated progress to chain tip.*BeforeOverwinter"; + LOGS_EXIT_STATUS=$?; + set -e; + # get the exit status from docker - EXIT_STATUS=$( \ - docker wait default-conf-tests || \ - docker inspect --format "{{.State.ExitCode}}" default-conf-tests || \ - echo "missing container, or missing exit status for container" \ - ) - docker logs default-conf-tests - echo "docker exit status: $EXIT_STATUS" - if [[ "$EXIT_STATUS" = "137" ]]; then - echo "ignoring expected signal status" - exit 0 + docker stop default-conf-tests + EXIT_STATUS=$(docker wait default-conf-tests || echo "Error retrieving exit status"); + echo "docker exit status: $EXIT_STATUS"; + + # If grep found the pattern, exit with the Docker container"s exit status + if [ $LOGS_EXIT_STATUS -eq 0 ]; then + exit $EXIT_STATUS; fi - exit "$EXIT_STATUS" + + # Handle other potential errors here + echo "An error occurred while processing the logs."; + exit 1 # Test reconfiguring the docker image for testnet. test-configuration-file-testnet: @@ -172,30 +180,37 @@ jobs: # Make sure Zebra can sync the genesis block on testnet - name: Run tests using a testnet config + shell: /usr/bin/bash -exo pipefail {0} run: | - set -ex docker pull ${{ vars.GAR_BASE }}/zebrad@${{ needs.build.outputs.image_digest }} docker run --env "NETWORK=Testnet" --detach --name testnet-conf-tests -t ${{ vars.GAR_BASE }}/zebrad@${{ needs.build.outputs.image_digest }} + # show the logs, even if the job times out - docker logs --tail all --follow testnet-conf-tests | \ + # Temporarily disable "set -e" to handle the broken pipe error gracefully + set +e; + docker logs \ + --tail all \ + --follow \ + testnet-conf-tests | \ tee --output-error=exit-nopipe /dev/stderr | \ grep --max-count=1 --extended-regexp --color=always \ - -e 'net.*=.*Test.*estimated progress to chain tip.*Genesis' \ - -e 'net.*=.*Test.*estimated progress to chain tip.*BeforeOverwinter' - docker stop testnet-conf-tests + "net.*=.*Main.*estimated progress to chain tip.*BeforeOverwinter"; + LOGS_EXIT_STATUS=$?; + set -e; + # get the exit status from docker - EXIT_STATUS=$( \ - docker wait testnet-conf-tests || \ - docker inspect --format "{{.State.ExitCode}}" testnet-conf-tests || \ - echo "missing container, or missing exit status for container" \ - ) - docker logs testnet-conf-tests - echo "docker exit status: $EXIT_STATUS" - if [[ "$EXIT_STATUS" = "137" ]]; then - echo "ignoring expected signal status" - exit 0 + docker stop testnet-conf-tests + EXIT_STATUS=$(docker wait testnet-conf-tests || echo "Error retrieving exit status"); + echo "docker exit status: $EXIT_STATUS"; + + # If grep found the pattern, exit with the Docker container"s exit status + if [ $LOGS_EXIT_STATUS -eq 0 ]; then + exit $EXIT_STATUS; fi - exit "$EXIT_STATUS" + + # Handle other potential errors here + echo "An error occurred while processing the logs."; + exit 1 # Deploy Managed Instance Groups (MiGs) for Mainnet and Testnet, # with one node in the configured GCP region. From 8c45ee5db1bb931fa0dc85aa5df9b4ae6a1383a6 Mon Sep 17 00:00:00 2001 From: Gustavo Valverde Date: Sun, 8 Oct 2023 23:22:55 +0100 Subject: [PATCH 04/23] test: check launch failure logs --- .github/workflows/deploy-gcp-tests.yml | 43 ++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/.github/workflows/deploy-gcp-tests.yml b/.github/workflows/deploy-gcp-tests.yml index a98f15cd19b..910add66f2b 100644 --- a/.github/workflows/deploy-gcp-tests.yml +++ b/.github/workflows/deploy-gcp-tests.yml @@ -200,6 +200,7 @@ jobs: # Launch the test without any cached state - name: Launch ${{ inputs.test_id }} test + id: launch-test shell: /usr/bin/bash -exo pipefail {0} run: | gcloud compute ssh ${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \ @@ -217,6 +218,20 @@ jobs: ${{ vars.GAR_BASE }}/${{ vars.CI_IMAGE_NAME }}:sha-${{ env.GITHUB_SHA_SHORT }} \ ' + # Show dmesg logs if previous job failed + - name: Show dmesg logs if previous job failed + if: ${{ failure() }} + shell: /usr/bin/bash -exo pipefail {0} + run: | + gcloud compute ssh ${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \ + --zone ${{ vars.GCP_ZONE }} \ + --ssh-flag="-o ServerAliveInterval=5" \ + --ssh-flag="-o ConnectionAttempts=20" \ + --ssh-flag="-o ConnectTimeout=5" \ + --command=' \ + sudo dmesg \ + ' + # set up and launch the test, if it uses cached state # each test runs one of the *-with/without-cached-state job series, and skips the other launch-with-cached-state: @@ -422,6 +437,20 @@ jobs: ${{ vars.GAR_BASE }}/${{ vars.CI_IMAGE_NAME }}:sha-${{ env.GITHUB_SHA_SHORT }} \ ' + # Show dmesg logs if previous job failed + - name: Show dmesg logs if previous job failed + if: ${{ failure() }} + shell: /usr/bin/bash -exo pipefail {0} + run: | + gcloud compute ssh ${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \ + --zone ${{ vars.GCP_ZONE }} \ + --ssh-flag="-o ServerAliveInterval=5" \ + --ssh-flag="-o ConnectionAttempts=20" \ + --ssh-flag="-o ConnectTimeout=5" \ + --command=' \ + sudo dmesg \ + ' + # Launch the test with the previously created Lightwalletd and Zebra cached state. # Each test runs one of the "Launch test" steps, and skips the other. # @@ -473,6 +502,20 @@ jobs: ${{ vars.GAR_BASE }}/${{ vars.CI_IMAGE_NAME }}:sha-${{ env.GITHUB_SHA_SHORT }} \ ' + # Show dmesg logs if previous job failed + - name: Show dmesg logs if previous job failed + if: ${{ failure() }} + shell: /usr/bin/bash -exo pipefail {0} + run: | + gcloud compute ssh ${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \ + --zone ${{ vars.GCP_ZONE }} \ + --ssh-flag="-o ServerAliveInterval=5" \ + --ssh-flag="-o ConnectionAttempts=20" \ + --ssh-flag="-o ConnectTimeout=5" \ + --command=' \ + sudo dmesg \ + ' + # Show all the test logs, then follow the logs of the test we just launched, until it finishes. # Then check the result of the test. # From e50e1c14bc64c62a5e363b05302c89247bf35a8f Mon Sep 17 00:00:00 2001 From: Gustavo Valverde Date: Mon, 9 Oct 2023 00:04:47 +0100 Subject: [PATCH 05/23] fix: revert CD changes --- .github/workflows/continous-delivery.yml | 80 ++++++++++-------------- 1 file changed, 33 insertions(+), 47 deletions(-) diff --git a/.github/workflows/continous-delivery.yml b/.github/workflows/continous-delivery.yml index 6434a762707..a0fa28d0fc0 100644 --- a/.github/workflows/continous-delivery.yml +++ b/.github/workflows/continous-delivery.yml @@ -132,37 +132,29 @@ jobs: # Make sure Zebra can sync at least one full checkpoint on mainnet - name: Run tests using the default config - shell: /usr/bin/bash -exo pipefail {0} run: | + set -ex docker pull ${{ vars.GAR_BASE }}/zebrad@${{ needs.build.outputs.image_digest }} docker run --detach --name default-conf-tests -t ${{ vars.GAR_BASE }}/zebrad@${{ needs.build.outputs.image_digest }} - # show the logs, even if the job times out - # Temporarily disable "set -e" to handle the broken pipe error gracefully - set +e; - docker logs \ - --tail all \ - --follow \ - default-conf-tests | \ - tee --output-error=exit-nopipe /dev/stderr | \ + docker logs --tail all --follow default-conf-tests | \ + tee --output-error=exit /dev/stderr | \ grep --max-count=1 --extended-regexp --color=always \ - "net.*=.*Main.*estimated progress to chain tip.*BeforeOverwinter"; - LOGS_EXIT_STATUS=$?; - set -e; - - # get the exit status from docker + 'net.*=.*Main.*estimated progress to chain tip.*BeforeOverwinter' docker stop default-conf-tests - EXIT_STATUS=$(docker wait default-conf-tests || echo "Error retrieving exit status"); - echo "docker exit status: $EXIT_STATUS"; - - # If grep found the pattern, exit with the Docker container"s exit status - if [ $LOGS_EXIT_STATUS -eq 0 ]; then - exit $EXIT_STATUS; + # get the exit status from docker + EXIT_STATUS=$( \ + docker wait default-conf-tests || \ + docker inspect --format "{{.State.ExitCode}}" default-conf-tests || \ + echo "missing container, or missing exit status for container" \ + ) + docker logs default-conf-tests + echo "docker exit status: $EXIT_STATUS" + if [[ "$EXIT_STATUS" = "137" ]]; then + echo "ignoring expected signal status" + exit 0 fi - - # Handle other potential errors here - echo "An error occurred while processing the logs."; - exit 1 + exit "$EXIT_STATUS" # Test reconfiguring the docker image for testnet. test-configuration-file-testnet: @@ -182,35 +174,29 @@ jobs: - name: Run tests using a testnet config shell: /usr/bin/bash -exo pipefail {0} run: | + set -ex docker pull ${{ vars.GAR_BASE }}/zebrad@${{ needs.build.outputs.image_digest }} docker run --env "NETWORK=Testnet" --detach --name testnet-conf-tests -t ${{ vars.GAR_BASE }}/zebrad@${{ needs.build.outputs.image_digest }} - # show the logs, even if the job times out - # Temporarily disable "set -e" to handle the broken pipe error gracefully - set +e; - docker logs \ - --tail all \ - --follow \ - testnet-conf-tests | \ - tee --output-error=exit-nopipe /dev/stderr | \ + docker logs --tail all --follow testnet-conf-tests | \ + tee --output-error=exit /dev/stderr | \ grep --max-count=1 --extended-regexp --color=always \ - "net.*=.*Main.*estimated progress to chain tip.*BeforeOverwinter"; - LOGS_EXIT_STATUS=$?; - set -e; - - # get the exit status from docker + -e 'net.*=.*Test.*estimated progress to chain tip.*Genesis' \ + -e 'net.*=.*Test.*estimated progress to chain tip.*BeforeOverwinter' docker stop testnet-conf-tests - EXIT_STATUS=$(docker wait testnet-conf-tests || echo "Error retrieving exit status"); - echo "docker exit status: $EXIT_STATUS"; - - # If grep found the pattern, exit with the Docker container"s exit status - if [ $LOGS_EXIT_STATUS -eq 0 ]; then - exit $EXIT_STATUS; + # get the exit status from docker + EXIT_STATUS=$( \ + docker wait testnet-conf-tests || \ + docker inspect --format "{{.State.ExitCode}}" testnet-conf-tests || \ + echo "missing container, or missing exit status for container" \ + ) + docker logs testnet-conf-tests + echo "docker exit status: $EXIT_STATUS" + if [[ "$EXIT_STATUS" = "137" ]]; then + echo "ignoring expected signal status" + exit 0 fi - - # Handle other potential errors here - echo "An error occurred while processing the logs."; - exit 1 + exit "$EXIT_STATUS" # Deploy Managed Instance Groups (MiGs) for Mainnet and Testnet, # with one node in the configured GCP region. From f5c86bc1ee34125e821eb96a0626552e62b3f521 Mon Sep 17 00:00:00 2001 From: Gustavo Valverde Date: Mon, 9 Oct 2023 00:06:31 +0100 Subject: [PATCH 06/23] fix: do not try to increase the disk size and wait mounting --- .github/workflows/deploy-gcp-tests.yml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/deploy-gcp-tests.yml b/.github/workflows/deploy-gcp-tests.yml index 910add66f2b..cf2471da4b7 100644 --- a/.github/workflows/deploy-gcp-tests.yml +++ b/.github/workflows/deploy-gcp-tests.yml @@ -166,7 +166,7 @@ jobs: id: create-instance run: | gcloud compute instances create-with-container "${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}" \ - --boot-disk-size 300GB \ + --boot-disk-size 10GB \ --boot-disk-type pd-ssd \ --image-project=cos-cloud \ --image-family=cos-stable \ @@ -180,6 +180,7 @@ jobs: --labels=app=${{ inputs.app_name }},environment=test,network=${NETWORK},github_ref=${{ env.GITHUB_REF_SLUG_URL }},test=${{ inputs.test_id }} \ --tags ${{ inputs.app_name }} \ --zone ${{ vars.GCP_ZONE }} + sleep 90 # Format the mounted disk if the test doesn't use a cached state. - name: Format ${{ inputs.test_id }} volume @@ -382,7 +383,7 @@ jobs: id: create-instance run: | gcloud compute instances create-with-container "${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}" \ - --boot-disk-size 300GB \ + --boot-disk-size 10GB \ --boot-disk-type pd-ssd \ --image-project=cos-cloud \ --image-family=cos-stable \ @@ -396,7 +397,7 @@ jobs: --labels=app=${{ inputs.app_name }},environment=test,network=${NETWORK},github_ref=${{ env.GITHUB_REF_SLUG_URL }},test=${{ inputs.test_id }} \ --tags ${{ inputs.app_name }} \ --zone ${{ vars.GCP_ZONE }} - sleep 60 + sleep 90 # Launch the test with the previously created Zebra-only cached state. # Each test runs one of the "Launch test" steps, and skips the other. From 3cd241e55fd1f4d7d82ee59cad07d987c9c51a25 Mon Sep 17 00:00:00 2001 From: Gustavo Valverde Date: Mon, 9 Oct 2023 00:32:19 +0100 Subject: [PATCH 07/23] fix: increase GB a bit more --- .github/workflows/deploy-gcp-tests.yml | 4 ++-- .github/workflows/zcashd-manual-deploy.yml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/deploy-gcp-tests.yml b/.github/workflows/deploy-gcp-tests.yml index cf2471da4b7..283e789c2a8 100644 --- a/.github/workflows/deploy-gcp-tests.yml +++ b/.github/workflows/deploy-gcp-tests.yml @@ -166,7 +166,7 @@ jobs: id: create-instance run: | gcloud compute instances create-with-container "${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}" \ - --boot-disk-size 10GB \ + --boot-disk-size 50GB \ --boot-disk-type pd-ssd \ --image-project=cos-cloud \ --image-family=cos-stable \ @@ -383,7 +383,7 @@ jobs: id: create-instance run: | gcloud compute instances create-with-container "${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}" \ - --boot-disk-size 10GB \ + --boot-disk-size 50GB \ --boot-disk-type pd-ssd \ --image-project=cos-cloud \ --image-family=cos-stable \ diff --git a/.github/workflows/zcashd-manual-deploy.yml b/.github/workflows/zcashd-manual-deploy.yml index 7b2c703e79c..b81baf4de6b 100644 --- a/.github/workflows/zcashd-manual-deploy.yml +++ b/.github/workflows/zcashd-manual-deploy.yml @@ -58,7 +58,7 @@ jobs: - name: Create instance template run: | gcloud compute instance-templates create-with-container zcashd-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \ - --boot-disk-size 10GB \ + --boot-disk-size 50GB \ --boot-disk-type=pd-ssd \ --image-project=cos-cloud \ --image-family=cos-stable \ From 0ece5112dd894b760c05e0651df53d4b2f10c4a4 Mon Sep 17 00:00:00 2001 From: Gustavo Valverde Date: Mon, 9 Oct 2023 00:34:48 +0100 Subject: [PATCH 08/23] fix: do not fail on pipe failure --- .github/workflows/continous-delivery.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/continous-delivery.yml b/.github/workflows/continous-delivery.yml index a0fa28d0fc0..566ca6c3dd2 100644 --- a/.github/workflows/continous-delivery.yml +++ b/.github/workflows/continous-delivery.yml @@ -134,11 +134,12 @@ jobs: - name: Run tests using the default config run: | set -ex + trap "" PIPE; docker pull ${{ vars.GAR_BASE }}/zebrad@${{ needs.build.outputs.image_digest }} docker run --detach --name default-conf-tests -t ${{ vars.GAR_BASE }}/zebrad@${{ needs.build.outputs.image_digest }} # show the logs, even if the job times out docker logs --tail all --follow default-conf-tests | \ - tee --output-error=exit /dev/stderr | \ + tee --output-error=exit-nopipe /dev/stderr | \ grep --max-count=1 --extended-regexp --color=always \ 'net.*=.*Main.*estimated progress to chain tip.*BeforeOverwinter' docker stop default-conf-tests @@ -175,11 +176,12 @@ jobs: shell: /usr/bin/bash -exo pipefail {0} run: | set -ex + trap "" PIPE; docker pull ${{ vars.GAR_BASE }}/zebrad@${{ needs.build.outputs.image_digest }} docker run --env "NETWORK=Testnet" --detach --name testnet-conf-tests -t ${{ vars.GAR_BASE }}/zebrad@${{ needs.build.outputs.image_digest }} # show the logs, even if the job times out docker logs --tail all --follow testnet-conf-tests | \ - tee --output-error=exit /dev/stderr | \ + tee --output-error=exit-nopipe /dev/stderr | \ grep --max-count=1 --extended-regexp --color=always \ -e 'net.*=.*Test.*estimated progress to chain tip.*Genesis' \ -e 'net.*=.*Test.*estimated progress to chain tip.*BeforeOverwinter' From 92bed9519bbe2aa688ee23cec4757b79e731607a Mon Sep 17 00:00:00 2001 From: Gustavo Valverde Date: Mon, 9 Oct 2023 01:15:02 +0100 Subject: [PATCH 09/23] fix: use plain `tee /dev/stderr` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If this does not work try `(tee … || true)` --- .github/workflows/continous-delivery.yml | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/.github/workflows/continous-delivery.yml b/.github/workflows/continous-delivery.yml index 566ca6c3dd2..8bf46dfa8ae 100644 --- a/.github/workflows/continous-delivery.yml +++ b/.github/workflows/continous-delivery.yml @@ -132,14 +132,13 @@ jobs: # Make sure Zebra can sync at least one full checkpoint on mainnet - name: Run tests using the default config + shell: /usr/bin/bash -x {0} run: | - set -ex - trap "" PIPE; docker pull ${{ vars.GAR_BASE }}/zebrad@${{ needs.build.outputs.image_digest }} docker run --detach --name default-conf-tests -t ${{ vars.GAR_BASE }}/zebrad@${{ needs.build.outputs.image_digest }} # show the logs, even if the job times out docker logs --tail all --follow default-conf-tests | \ - tee --output-error=exit-nopipe /dev/stderr | \ + tee /dev/stderr | \ grep --max-count=1 --extended-regexp --color=always \ 'net.*=.*Main.*estimated progress to chain tip.*BeforeOverwinter' docker stop default-conf-tests @@ -173,15 +172,13 @@ jobs: # Make sure Zebra can sync the genesis block on testnet - name: Run tests using a testnet config - shell: /usr/bin/bash -exo pipefail {0} + shell: /usr/bin/bash -x {0} run: | - set -ex - trap "" PIPE; docker pull ${{ vars.GAR_BASE }}/zebrad@${{ needs.build.outputs.image_digest }} docker run --env "NETWORK=Testnet" --detach --name testnet-conf-tests -t ${{ vars.GAR_BASE }}/zebrad@${{ needs.build.outputs.image_digest }} # show the logs, even if the job times out docker logs --tail all --follow testnet-conf-tests | \ - tee --output-error=exit-nopipe /dev/stderr | \ + tee /dev/stderr | \ grep --max-count=1 --extended-regexp --color=always \ -e 'net.*=.*Test.*estimated progress to chain tip.*Genesis' \ -e 'net.*=.*Test.*estimated progress to chain tip.*BeforeOverwinter' From e22166b97de3a4ae380ab306660d401648420898 Mon Sep 17 00:00:00 2001 From: Gustavo Valverde Date: Mon, 9 Oct 2023 01:27:01 +0100 Subject: [PATCH 10/23] fix: `tee` not stoping on cd config tests --- .github/workflows/continous-delivery.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/continous-delivery.yml b/.github/workflows/continous-delivery.yml index 8bf46dfa8ae..128be56b58e 100644 --- a/.github/workflows/continous-delivery.yml +++ b/.github/workflows/continous-delivery.yml @@ -138,7 +138,7 @@ jobs: docker run --detach --name default-conf-tests -t ${{ vars.GAR_BASE }}/zebrad@${{ needs.build.outputs.image_digest }} # show the logs, even if the job times out docker logs --tail all --follow default-conf-tests | \ - tee /dev/stderr | \ + (tee /dev/stderr || true) | \ grep --max-count=1 --extended-regexp --color=always \ 'net.*=.*Main.*estimated progress to chain tip.*BeforeOverwinter' docker stop default-conf-tests @@ -178,7 +178,7 @@ jobs: docker run --env "NETWORK=Testnet" --detach --name testnet-conf-tests -t ${{ vars.GAR_BASE }}/zebrad@${{ needs.build.outputs.image_digest }} # show the logs, even if the job times out docker logs --tail all --follow testnet-conf-tests | \ - tee /dev/stderr | \ + (tee /dev/stderr || true) | \ grep --max-count=1 --extended-regexp --color=always \ -e 'net.*=.*Test.*estimated progress to chain tip.*Genesis' \ -e 'net.*=.*Test.*estimated progress to chain tip.*BeforeOverwinter' From a6ff4d68889e989172e6e8aab643dabfa1c1bc82 Mon Sep 17 00:00:00 2001 From: Gustavo Valverde Date: Mon, 9 Oct 2023 01:45:32 +0100 Subject: [PATCH 11/23] fix: match logic with GCP tests --- .github/workflows/continous-delivery.yml | 84 ++++++++++++++---------- 1 file changed, 50 insertions(+), 34 deletions(-) diff --git a/.github/workflows/continous-delivery.yml b/.github/workflows/continous-delivery.yml index 128be56b58e..f45f4451fa3 100644 --- a/.github/workflows/continous-delivery.yml +++ b/.github/workflows/continous-delivery.yml @@ -132,29 +132,36 @@ jobs: # Make sure Zebra can sync at least one full checkpoint on mainnet - name: Run tests using the default config - shell: /usr/bin/bash -x {0} run: | + set -ex docker pull ${{ vars.GAR_BASE }}/zebrad@${{ needs.build.outputs.image_digest }} docker run --detach --name default-conf-tests -t ${{ vars.GAR_BASE }}/zebrad@${{ needs.build.outputs.image_digest }} - # show the logs, even if the job times out - docker logs --tail all --follow default-conf-tests | \ - (tee /dev/stderr || true) | \ + + # Temporarily disable "set -e" to handle the broken pipe error gracefully + set +e; + sudo docker logs \ + --tail all \ + --follow \ + default-conf-tests | \ + tee --output-error=exit /dev/stderr | \ grep --max-count=1 --extended-regexp --color=always \ - 'net.*=.*Main.*estimated progress to chain tip.*BeforeOverwinter' + -e "net.*=.*Main.*estimated progress to chain tip.*BeforeOverwinter"; + LOGS_EXIT_STATUS=$?; + set -e; + docker stop default-conf-tests - # get the exit status from docker - EXIT_STATUS=$( \ - docker wait default-conf-tests || \ - docker inspect --format "{{.State.ExitCode}}" default-conf-tests || \ - echo "missing container, or missing exit status for container" \ - ) - docker logs default-conf-tests - echo "docker exit status: $EXIT_STATUS" - if [[ "$EXIT_STATUS" = "137" ]]; then - echo "ignoring expected signal status" - exit 0 + + EXIT_STATUS=$(sudo docker wait default-conf-tests || echo "Error retrieving exit status"); + echo "sudo docker exit status: $EXIT_STATUS"; + + # If grep found the pattern, exit with the Docker container"s exit status + if [ $LOGS_EXIT_STATUS -eq 0 ]; then + exit $EXIT_STATUS; fi - exit "$EXIT_STATUS" + + # Handle other potential errors here + echo "An error occurred while processing the logs."; + exit 1; # Test reconfiguring the docker image for testnet. test-configuration-file-testnet: @@ -174,28 +181,37 @@ jobs: - name: Run tests using a testnet config shell: /usr/bin/bash -x {0} run: | + set -ex docker pull ${{ vars.GAR_BASE }}/zebrad@${{ needs.build.outputs.image_digest }} docker run --env "NETWORK=Testnet" --detach --name testnet-conf-tests -t ${{ vars.GAR_BASE }}/zebrad@${{ needs.build.outputs.image_digest }} - # show the logs, even if the job times out - docker logs --tail all --follow testnet-conf-tests | \ - (tee /dev/stderr || true) | \ + + # Temporarily disable "set -e" to handle the broken pipe error gracefully + set +e; + sudo docker logs \ + --tail all \ + --follow \ + testnet-conf-tests | \ + tee --output-error=exit /dev/stderr | \ grep --max-count=1 --extended-regexp --color=always \ - -e 'net.*=.*Test.*estimated progress to chain tip.*Genesis' \ - -e 'net.*=.*Test.*estimated progress to chain tip.*BeforeOverwinter' + -e "net.*=.*Test.*estimated progress to chain tip.*Genesis' \ + -e "net.*=.*Test.*estimated progress to chain tip.*BeforeOverwinter'; + LOGS_EXIT_STATUS=$?; + set -e; + docker stop testnet-conf-tests - # get the exit status from docker - EXIT_STATUS=$( \ - docker wait testnet-conf-tests || \ - docker inspect --format "{{.State.ExitCode}}" testnet-conf-tests || \ - echo "missing container, or missing exit status for container" \ - ) - docker logs testnet-conf-tests - echo "docker exit status: $EXIT_STATUS" - if [[ "$EXIT_STATUS" = "137" ]]; then - echo "ignoring expected signal status" - exit 0 + + EXIT_STATUS=$(sudo docker wait testnet-conf-tests || echo "Error retrieving exit status"); + echo "sudo docker exit status: $EXIT_STATUS"; + + # If grep found the pattern, exit with the Docker container"s exit status + if [ $LOGS_EXIT_STATUS -eq 0 ]; then + exit $EXIT_STATUS; fi - exit "$EXIT_STATUS" + + # Handle other potential errors here + echo "An error occurred while processing the logs."; + exit 1; + # Deploy Managed Instance Groups (MiGs) for Mainnet and Testnet, # with one node in the configured GCP region. From 674a5b3d9706b5cb8211e6037b4cf1e972cc9f0f Mon Sep 17 00:00:00 2001 From: Gustavo Valverde Date: Mon, 9 Oct 2023 01:59:14 +0100 Subject: [PATCH 12/23] fix(cd): handle pipe and other errors correctly --- .github/workflows/continous-delivery.yml | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/.github/workflows/continous-delivery.yml b/.github/workflows/continous-delivery.yml index f45f4451fa3..c68664e0ebc 100644 --- a/.github/workflows/continous-delivery.yml +++ b/.github/workflows/continous-delivery.yml @@ -132,14 +132,14 @@ jobs: # Make sure Zebra can sync at least one full checkpoint on mainnet - name: Run tests using the default config + shell: /usr/bin/bash -exo pipefail {0} run: | - set -ex docker pull ${{ vars.GAR_BASE }}/zebrad@${{ needs.build.outputs.image_digest }} docker run --detach --name default-conf-tests -t ${{ vars.GAR_BASE }}/zebrad@${{ needs.build.outputs.image_digest }} # Temporarily disable "set -e" to handle the broken pipe error gracefully set +e; - sudo docker logs \ + docker logs \ --tail all \ --follow \ default-conf-tests | \ @@ -151,8 +151,8 @@ jobs: docker stop default-conf-tests - EXIT_STATUS=$(sudo docker wait default-conf-tests || echo "Error retrieving exit status"); - echo "sudo docker exit status: $EXIT_STATUS"; + EXIT_STATUS=$(docker wait default-conf-tests || echo "Error retrieving exit status"); + echo "docker exit status: $EXIT_STATUS"; # If grep found the pattern, exit with the Docker container"s exit status if [ $LOGS_EXIT_STATUS -eq 0 ]; then @@ -179,7 +179,7 @@ jobs: # Make sure Zebra can sync the genesis block on testnet - name: Run tests using a testnet config - shell: /usr/bin/bash -x {0} + shell: /usr/bin/bash -exo pipefail {0} run: | set -ex docker pull ${{ vars.GAR_BASE }}/zebrad@${{ needs.build.outputs.image_digest }} @@ -187,21 +187,21 @@ jobs: # Temporarily disable "set -e" to handle the broken pipe error gracefully set +e; - sudo docker logs \ + docker logs \ --tail all \ --follow \ testnet-conf-tests | \ tee --output-error=exit /dev/stderr | \ grep --max-count=1 --extended-regexp --color=always \ - -e "net.*=.*Test.*estimated progress to chain tip.*Genesis' \ - -e "net.*=.*Test.*estimated progress to chain tip.*BeforeOverwinter'; + -e "net.*=.*Test.*estimated progress to chain tip.*Genesis" \ + -e "net.*=.*Test.*estimated progress to chain tip.*BeforeOverwinter"; LOGS_EXIT_STATUS=$?; set -e; docker stop testnet-conf-tests - EXIT_STATUS=$(sudo docker wait testnet-conf-tests || echo "Error retrieving exit status"); - echo "sudo docker exit status: $EXIT_STATUS"; + EXIT_STATUS=$(docker wait testnet-conf-tests || echo "Error retrieving exit status"); + echo "docker exit status: $EXIT_STATUS"; # If grep found the pattern, exit with the Docker container"s exit status if [ $LOGS_EXIT_STATUS -eq 0 ]; then @@ -212,7 +212,6 @@ jobs: echo "An error occurred while processing the logs."; exit 1; - # Deploy Managed Instance Groups (MiGs) for Mainnet and Testnet, # with one node in the configured GCP region. # From 0450a0da192e349f4f5586109dcd42900fc2d575 Mon Sep 17 00:00:00 2001 From: Gustavo Valverde Date: Mon, 9 Oct 2023 02:12:19 +0100 Subject: [PATCH 13/23] try `tee --output-error=exit-nopipe` --- .github/workflows/continous-delivery.yml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/continous-delivery.yml b/.github/workflows/continous-delivery.yml index c68664e0ebc..58507cb14df 100644 --- a/.github/workflows/continous-delivery.yml +++ b/.github/workflows/continous-delivery.yml @@ -143,7 +143,7 @@ jobs: --tail all \ --follow \ default-conf-tests | \ - tee --output-error=exit /dev/stderr | \ + tee --output-error=exit-nopipe /dev/stderr | \ grep --max-count=1 --extended-regexp --color=always \ -e "net.*=.*Main.*estimated progress to chain tip.*BeforeOverwinter"; LOGS_EXIT_STATUS=$?; @@ -181,7 +181,6 @@ jobs: - name: Run tests using a testnet config shell: /usr/bin/bash -exo pipefail {0} run: | - set -ex docker pull ${{ vars.GAR_BASE }}/zebrad@${{ needs.build.outputs.image_digest }} docker run --env "NETWORK=Testnet" --detach --name testnet-conf-tests -t ${{ vars.GAR_BASE }}/zebrad@${{ needs.build.outputs.image_digest }} @@ -191,7 +190,7 @@ jobs: --tail all \ --follow \ testnet-conf-tests | \ - tee --output-error=exit /dev/stderr | \ + tee --output-error=exit-nopipe /dev/stderr | \ grep --max-count=1 --extended-regexp --color=always \ -e "net.*=.*Test.*estimated progress to chain tip.*Genesis" \ -e "net.*=.*Test.*estimated progress to chain tip.*BeforeOverwinter"; From b1a99bcbd59af83752da6afdb913e09d86af131a Mon Sep 17 00:00:00 2001 From: Gustavo Valverde Date: Mon, 9 Oct 2023 02:25:20 +0100 Subject: [PATCH 14/23] fix: TRAP without pipefail --- .github/workflows/continous-delivery.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/continous-delivery.yml b/.github/workflows/continous-delivery.yml index 58507cb14df..7f3fbd2c47f 100644 --- a/.github/workflows/continous-delivery.yml +++ b/.github/workflows/continous-delivery.yml @@ -132,18 +132,18 @@ jobs: # Make sure Zebra can sync at least one full checkpoint on mainnet - name: Run tests using the default config - shell: /usr/bin/bash -exo pipefail {0} + shell: /usr/bin/bash -ex {0} run: | docker pull ${{ vars.GAR_BASE }}/zebrad@${{ needs.build.outputs.image_digest }} docker run --detach --name default-conf-tests -t ${{ vars.GAR_BASE }}/zebrad@${{ needs.build.outputs.image_digest }} - + trap "" PIPE; # Temporarily disable "set -e" to handle the broken pipe error gracefully set +e; docker logs \ --tail all \ --follow \ default-conf-tests | \ - tee --output-error=exit-nopipe /dev/stderr | \ + tee --output-error=warn /dev/stderr | \ grep --max-count=1 --extended-regexp --color=always \ -e "net.*=.*Main.*estimated progress to chain tip.*BeforeOverwinter"; LOGS_EXIT_STATUS=$?; @@ -179,18 +179,18 @@ jobs: # Make sure Zebra can sync the genesis block on testnet - name: Run tests using a testnet config - shell: /usr/bin/bash -exo pipefail {0} + shell: /usr/bin/bash -ex {0} run: | docker pull ${{ vars.GAR_BASE }}/zebrad@${{ needs.build.outputs.image_digest }} docker run --env "NETWORK=Testnet" --detach --name testnet-conf-tests -t ${{ vars.GAR_BASE }}/zebrad@${{ needs.build.outputs.image_digest }} - + trap "" PIPE; # Temporarily disable "set -e" to handle the broken pipe error gracefully set +e; docker logs \ --tail all \ --follow \ testnet-conf-tests | \ - tee --output-error=exit-nopipe /dev/stderr | \ + tee --output-error=warn /dev/stderr | \ grep --max-count=1 --extended-regexp --color=always \ -e "net.*=.*Test.*estimated progress to chain tip.*Genesis" \ -e "net.*=.*Test.*estimated progress to chain tip.*BeforeOverwinter"; From 048e487c0ed0177894ccb9bbd17198e3359752b9 Mon Sep 17 00:00:00 2001 From: Gustavo Valverde Date: Mon, 9 Oct 2023 02:35:59 +0100 Subject: [PATCH 15/23] test: pipefail with exit and trap --- .github/workflows/continous-delivery.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/continous-delivery.yml b/.github/workflows/continous-delivery.yml index 7f3fbd2c47f..8c0db81fb87 100644 --- a/.github/workflows/continous-delivery.yml +++ b/.github/workflows/continous-delivery.yml @@ -132,7 +132,7 @@ jobs: # Make sure Zebra can sync at least one full checkpoint on mainnet - name: Run tests using the default config - shell: /usr/bin/bash -ex {0} + shell: /usr/bin/bash -exo pipefail {0} run: | docker pull ${{ vars.GAR_BASE }}/zebrad@${{ needs.build.outputs.image_digest }} docker run --detach --name default-conf-tests -t ${{ vars.GAR_BASE }}/zebrad@${{ needs.build.outputs.image_digest }} @@ -143,7 +143,7 @@ jobs: --tail all \ --follow \ default-conf-tests | \ - tee --output-error=warn /dev/stderr | \ + tee --output-error=exit /dev/stderr | \ grep --max-count=1 --extended-regexp --color=always \ -e "net.*=.*Main.*estimated progress to chain tip.*BeforeOverwinter"; LOGS_EXIT_STATUS=$?; @@ -179,7 +179,7 @@ jobs: # Make sure Zebra can sync the genesis block on testnet - name: Run tests using a testnet config - shell: /usr/bin/bash -ex {0} + shell: /usr/bin/bash -exo pipefail {0} run: | docker pull ${{ vars.GAR_BASE }}/zebrad@${{ needs.build.outputs.image_digest }} docker run --env "NETWORK=Testnet" --detach --name testnet-conf-tests -t ${{ vars.GAR_BASE }}/zebrad@${{ needs.build.outputs.image_digest }} @@ -190,7 +190,7 @@ jobs: --tail all \ --follow \ testnet-conf-tests | \ - tee --output-error=warn /dev/stderr | \ + tee --output-error=exit /dev/stderr | \ grep --max-count=1 --extended-regexp --color=always \ -e "net.*=.*Test.*estimated progress to chain tip.*Genesis" \ -e "net.*=.*Test.*estimated progress to chain tip.*BeforeOverwinter"; From 406c1e0ad3366f5bd81e6f909448b19159676979 Mon Sep 17 00:00:00 2001 From: Gustavo Valverde Date: Mon, 9 Oct 2023 02:56:54 +0100 Subject: [PATCH 16/23] fix: use a subshell --- .github/workflows/continous-delivery.yml | 55 ++++++++++++------------ 1 file changed, 28 insertions(+), 27 deletions(-) diff --git a/.github/workflows/continous-delivery.yml b/.github/workflows/continous-delivery.yml index 8c0db81fb87..814148e1bca 100644 --- a/.github/workflows/continous-delivery.yml +++ b/.github/workflows/continous-delivery.yml @@ -136,25 +136,26 @@ jobs: run: | docker pull ${{ vars.GAR_BASE }}/zebrad@${{ needs.build.outputs.image_digest }} docker run --detach --name default-conf-tests -t ${{ vars.GAR_BASE }}/zebrad@${{ needs.build.outputs.image_digest }} - trap "" PIPE; - # Temporarily disable "set -e" to handle the broken pipe error gracefully - set +e; - docker logs \ - --tail all \ - --follow \ - default-conf-tests | \ - tee --output-error=exit /dev/stderr | \ - grep --max-count=1 --extended-regexp --color=always \ - -e "net.*=.*Main.*estimated progress to chain tip.*BeforeOverwinter"; - LOGS_EXIT_STATUS=$?; - set -e; + + # Use a subshell to handle the broken pipe error gracefully + ( + trap "" PIPE; + docker logs \ + --tail all \ + --follow \ + default-conf-tests | \ + tee --output-error=exit /dev/stderr | \ + grep --max-count=1 --extended-regexp --color=always \ + -e "net.*=.*Main.*estimated progress to chain tip.*BeforeOverwinter" + ) || true + LOGS_EXIT_STATUS=$? docker stop default-conf-tests EXIT_STATUS=$(docker wait default-conf-tests || echo "Error retrieving exit status"); echo "docker exit status: $EXIT_STATUS"; - # If grep found the pattern, exit with the Docker container"s exit status + # If grep found the pattern, exit with the Docker container exit status if [ $LOGS_EXIT_STATUS -eq 0 ]; then exit $EXIT_STATUS; fi @@ -183,26 +184,26 @@ jobs: run: | docker pull ${{ vars.GAR_BASE }}/zebrad@${{ needs.build.outputs.image_digest }} docker run --env "NETWORK=Testnet" --detach --name testnet-conf-tests -t ${{ vars.GAR_BASE }}/zebrad@${{ needs.build.outputs.image_digest }} - trap "" PIPE; - # Temporarily disable "set -e" to handle the broken pipe error gracefully - set +e; - docker logs \ - --tail all \ - --follow \ - testnet-conf-tests | \ - tee --output-error=exit /dev/stderr | \ - grep --max-count=1 --extended-regexp --color=always \ - -e "net.*=.*Test.*estimated progress to chain tip.*Genesis" \ - -e "net.*=.*Test.*estimated progress to chain tip.*BeforeOverwinter"; - LOGS_EXIT_STATUS=$?; - set -e; + # Use a subshell to handle the broken pipe error gracefully + ( + trap "" PIPE; + docker logs \ + --tail all \ + --follow \ + testnet-conf-tests | \ + tee --output-error=exit /dev/stderr | \ + grep --max-count=1 --extended-regexp --color=always \ + -e "net.*=.*Test.*estimated progress to chain tip.*Genesis" \ + -e "net.*=.*Test.*estimated progress to chain tip.*BeforeOverwinter"; + ) || true + LOGS_EXIT_STATUS=$? docker stop testnet-conf-tests EXIT_STATUS=$(docker wait testnet-conf-tests || echo "Error retrieving exit status"); echo "docker exit status: $EXIT_STATUS"; - # If grep found the pattern, exit with the Docker container"s exit status + # If grep found the pattern, exit with the Docker container exit status if [ $LOGS_EXIT_STATUS -eq 0 ]; then exit $EXIT_STATUS; fi From 0264324500db32d417127ad88720e0f82b00f39e Mon Sep 17 00:00:00 2001 From: Gustavo Valverde Date: Mon, 9 Oct 2023 07:37:05 +0100 Subject: [PATCH 17/23] fix(ci): wait for mounting and show system logs if fail --- .github/workflows/deploy-gcp-tests.yml | 31 +++++++++++++++++++++----- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/.github/workflows/deploy-gcp-tests.yml b/.github/workflows/deploy-gcp-tests.yml index 283e789c2a8..b1f60d931fe 100644 --- a/.github/workflows/deploy-gcp-tests.yml +++ b/.github/workflows/deploy-gcp-tests.yml @@ -399,6 +399,23 @@ jobs: --zone ${{ vars.GCP_ZONE }} sleep 90 + # Wait for the /dev/sdb to be ready and not in use by another process + - name: Wait for ${{ inputs.test_id }} volume + shell: /usr/bin/bash -exo pipefail {0} + run: | + gcloud compute ssh ${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \ + --zone ${{ vars.GCP_ZONE }} \ + --ssh-flag="-o ServerAliveInterval=5" \ + --ssh-flag="-o ConnectionAttempts=20" \ + --ssh-flag="-o ConnectTimeout=5" \ + --command=' \ + set -ex + while sudo lsof /dev/sdb; do \ + echo "Waiting for /dev/sdb to be free..."; \ + sleep 10; \ + done; \ + ' + # Launch the test with the previously created Zebra-only cached state. # Each test runs one of the "Launch test" steps, and skips the other. # @@ -439,8 +456,8 @@ jobs: ' # Show dmesg logs if previous job failed - - name: Show dmesg logs if previous job failed - if: ${{ failure() }} + - name: Show debug logs if previous job failed + if: ${{ failure() && (inputs.needs_zebra_state && !inputs.needs_lwd_state) && inputs.test_id != 'lwd-full-sync' }} shell: /usr/bin/bash -exo pipefail {0} run: | gcloud compute ssh ${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \ @@ -449,7 +466,9 @@ jobs: --ssh-flag="-o ConnectionAttempts=20" \ --ssh-flag="-o ConnectTimeout=5" \ --command=' \ - sudo dmesg \ + sudo lsof /dev/sdb; + sudo dmesg; + sudo journalctl -b \ ' # Launch the test with the previously created Lightwalletd and Zebra cached state. @@ -505,7 +524,7 @@ jobs: # Show dmesg logs if previous job failed - name: Show dmesg logs if previous job failed - if: ${{ failure() }} + if: ${{ failure() && (inputs.needs_zebra_state && inputs.needs_lwd_state) || inputs.test_id == 'lwd-full-sync' }} shell: /usr/bin/bash -exo pipefail {0} run: | gcloud compute ssh ${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \ @@ -514,7 +533,9 @@ jobs: --ssh-flag="-o ConnectionAttempts=20" \ --ssh-flag="-o ConnectTimeout=5" \ --command=' \ - sudo dmesg \ + sudo lsof /dev/sdb; + sudo dmesg; + sudo journalctl -b \ ' # Show all the test logs, then follow the logs of the test we just launched, until it finishes. From 877ab476291b2622351eccdb3d44598a37a159de Mon Sep 17 00:00:00 2001 From: Gustavo Valverde Date: Mon, 9 Oct 2023 10:27:13 +0100 Subject: [PATCH 18/23] fix(ci): GCP is not always mounting disks in the same order --- .github/workflows/deploy-gcp-tests.yml | 67 ++++++++++++-------------- 1 file changed, 32 insertions(+), 35 deletions(-) diff --git a/.github/workflows/deploy-gcp-tests.yml b/.github/workflows/deploy-gcp-tests.yml index b1f60d931fe..6120b349d73 100644 --- a/.github/workflows/deploy-gcp-tests.yml +++ b/.github/workflows/deploy-gcp-tests.yml @@ -192,11 +192,11 @@ jobs: --ssh-flag="-o ConnectionAttempts=20" \ --ssh-flag="-o ConnectTimeout=5" \ --command=' \ - while sudo lsof /dev/sdb; do \ - echo "Waiting for /dev/sdb to be free..."; \ - sleep 10; \ - done; \ - sudo mkfs.ext4 -v /dev/sdb \ + set -ex; + # Extract the correct disk name based on the device-name + DISK_IDENTIFIER="google-${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }}"; + export DISK_NAME=$(ls -l /dev/disk/by-id | awk -v id="$DISK_IDENTIFIER" "$9 == id {gsub(\"../../\", \"\"); print $11}"); + sudo mkfs.ext4 -v /dev/$DISK_NAME \ ' # Launch the test without any cached state @@ -215,12 +215,12 @@ jobs: --tty \ --detach \ ${{ inputs.test_variables }} \ - --mount type=volume,volume-driver=local,volume-opt=device=/dev/sdb,volume-opt=type=ext4,dst=${{ inputs.root_state_path }}/${{ inputs.zebra_state_dir }} \ + --mount type=volume,volume-driver=local,volume-opt=device=/dev/$DISK_NAME,volume-opt=type=ext4,dst=${{ inputs.root_state_path }}/${{ inputs.zebra_state_dir }} \ ${{ vars.GAR_BASE }}/${{ vars.CI_IMAGE_NAME }}:sha-${{ env.GITHUB_SHA_SHORT }} \ ' - # Show dmesg logs if previous job failed - - name: Show dmesg logs if previous job failed + # Show debug logs if previous job failed + - name: Show debug logs if previous job failed if: ${{ failure() }} shell: /usr/bin/bash -exo pipefail {0} run: | @@ -230,7 +230,10 @@ jobs: --ssh-flag="-o ConnectionAttempts=20" \ --ssh-flag="-o ConnectTimeout=5" \ --command=' \ - sudo dmesg \ + lsblk; + sudo lsof /dev/sdb; + sudo dmesg; + sudo journalctl -b \ ' # set up and launch the test, if it uses cached state @@ -397,24 +400,6 @@ jobs: --labels=app=${{ inputs.app_name }},environment=test,network=${NETWORK},github_ref=${{ env.GITHUB_REF_SLUG_URL }},test=${{ inputs.test_id }} \ --tags ${{ inputs.app_name }} \ --zone ${{ vars.GCP_ZONE }} - sleep 90 - - # Wait for the /dev/sdb to be ready and not in use by another process - - name: Wait for ${{ inputs.test_id }} volume - shell: /usr/bin/bash -exo pipefail {0} - run: | - gcloud compute ssh ${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \ - --zone ${{ vars.GCP_ZONE }} \ - --ssh-flag="-o ServerAliveInterval=5" \ - --ssh-flag="-o ConnectionAttempts=20" \ - --ssh-flag="-o ConnectTimeout=5" \ - --command=' \ - set -ex - while sudo lsof /dev/sdb; do \ - echo "Waiting for /dev/sdb to be free..."; \ - sleep 10; \ - done; \ - ' # Launch the test with the previously created Zebra-only cached state. # Each test runs one of the "Launch test" steps, and skips the other. @@ -446,16 +431,21 @@ jobs: --ssh-flag="-o ConnectionAttempts=20" \ --ssh-flag="-o ConnectTimeout=5" \ --command=' \ + set -ex; + # Extract the correct disk name based on the device-name + DISK_IDENTIFIER="google-${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }}"; + export DISK_NAME=$(ls -l /dev/disk/by-id | awk -v id="$DISK_IDENTIFIER" "$9 == id {gsub(\"../../\", \"\"); print $11}"); + sudo docker run \ --name ${{ inputs.test_id }} \ --tty \ --detach \ ${{ inputs.test_variables }} \ - --mount type=volume,volume-driver=local,volume-opt=device=/dev/sdb,volume-opt=type=ext4,dst=${{ inputs.root_state_path }}/${{ inputs.zebra_state_dir }} \ + --mount type=volume,volume-driver=local,volume-opt=device=/dev/$DISK_NAME,volume-opt=type=ext4,dst=${{ inputs.root_state_path }}/${{ inputs.zebra_state_dir }} \ ${{ vars.GAR_BASE }}/${{ vars.CI_IMAGE_NAME }}:sha-${{ env.GITHUB_SHA_SHORT }} \ ' - # Show dmesg logs if previous job failed + # Show debug logs if previous job failed - name: Show debug logs if previous job failed if: ${{ failure() && (inputs.needs_zebra_state && !inputs.needs_lwd_state) && inputs.test_id != 'lwd-full-sync' }} shell: /usr/bin/bash -exo pipefail {0} @@ -466,7 +456,8 @@ jobs: --ssh-flag="-o ConnectionAttempts=20" \ --ssh-flag="-o ConnectTimeout=5" \ --command=' \ - sudo lsof /dev/sdb; + lsblk; + sudo lsof /dev/$DISK_NAME; sudo dmesg; sudo journalctl -b \ ' @@ -512,18 +503,23 @@ jobs: --ssh-flag="-o ConnectionAttempts=20" \ --ssh-flag="-o ConnectTimeout=5" \ --command=' \ + set -ex; + # Extract the correct disk name based on the device-name + DISK_IDENTIFIER="google-${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }}"; + export DISK_NAME=$(ls -l /dev/disk/by-id | awk -v id="$DISK_IDENTIFIER" "$9 == id {gsub(\"../../\", \"\"); print $11}"); + sudo docker run \ --name ${{ inputs.test_id }} \ --tty \ --detach \ ${{ inputs.test_variables }} \ - --mount type=volume,volume-driver=local,volume-opt=device=/dev/sdb,volume-opt=type=ext4,dst=${{ inputs.root_state_path }}/${{ inputs.zebra_state_dir }} \ - --mount type=volume,volume-driver=local,volume-opt=device=/dev/sdb,volume-opt=type=ext4,dst=${{ inputs.root_state_path }}/${{ inputs.lwd_state_dir }} \ + --mount type=volume,volume-driver=local,volume-opt=device=/dev/$DISK_NAME,volume-opt=type=ext4,dst=${{ inputs.root_state_path }}/${{ inputs.zebra_state_dir }} \ + --mount type=volume,volume-driver=local,volume-opt=device=/dev/$DISK_NAME,volume-opt=type=ext4,dst=${{ inputs.root_state_path }}/${{ inputs.lwd_state_dir }} \ ${{ vars.GAR_BASE }}/${{ vars.CI_IMAGE_NAME }}:sha-${{ env.GITHUB_SHA_SHORT }} \ ' - # Show dmesg logs if previous job failed - - name: Show dmesg logs if previous job failed + # Show debug logs if previous job failed + - name: Show debug logs if previous job failed if: ${{ failure() && (inputs.needs_zebra_state && inputs.needs_lwd_state) || inputs.test_id == 'lwd-full-sync' }} shell: /usr/bin/bash -exo pipefail {0} run: | @@ -533,7 +529,8 @@ jobs: --ssh-flag="-o ConnectionAttempts=20" \ --ssh-flag="-o ConnectTimeout=5" \ --command=' \ - sudo lsof /dev/sdb; + lsblk; + sudo lsof /dev/$DISK_NAME; sudo dmesg; sudo journalctl -b \ ' From da47abca600be2020e819f111a8822d36ef1ea90 Mon Sep 17 00:00:00 2001 From: Gustavo Valverde Date: Mon, 9 Oct 2023 10:57:14 +0100 Subject: [PATCH 19/23] fix: use `grep` instead of `awk` --- .github/workflows/deploy-gcp-tests.yml | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/.github/workflows/deploy-gcp-tests.yml b/.github/workflows/deploy-gcp-tests.yml index 6120b349d73..7ba522517ac 100644 --- a/.github/workflows/deploy-gcp-tests.yml +++ b/.github/workflows/deploy-gcp-tests.yml @@ -194,8 +194,7 @@ jobs: --command=' \ set -ex; # Extract the correct disk name based on the device-name - DISK_IDENTIFIER="google-${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }}"; - export DISK_NAME=$(ls -l /dev/disk/by-id | awk -v id="$DISK_IDENTIFIER" "$9 == id {gsub(\"../../\", \"\"); print $11}"); + export DISK_NAME=$(ls -l /dev/disk/by-id | grep --extended-regexp "google-${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }}\$" | cut -d" " -f11 | cut -d"/" -f3); \; sudo mkfs.ext4 -v /dev/$DISK_NAME \ ' @@ -433,8 +432,7 @@ jobs: --command=' \ set -ex; # Extract the correct disk name based on the device-name - DISK_IDENTIFIER="google-${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }}"; - export DISK_NAME=$(ls -l /dev/disk/by-id | awk -v id="$DISK_IDENTIFIER" "$9 == id {gsub(\"../../\", \"\"); print $11}"); + export DISK_NAME=$(ls -l /dev/disk/by-id | grep --extended-regexp "google-${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }}\$" | cut -d" " -f11 | cut -d"/" -f3); \; sudo docker run \ --name ${{ inputs.test_id }} \ @@ -505,8 +503,7 @@ jobs: --command=' \ set -ex; # Extract the correct disk name based on the device-name - DISK_IDENTIFIER="google-${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }}"; - export DISK_NAME=$(ls -l /dev/disk/by-id | awk -v id="$DISK_IDENTIFIER" "$9 == id {gsub(\"../../\", \"\"); print $11}"); + export DISK_NAME=$(ls -l /dev/disk/by-id | grep --extended-regexp "google-${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }}\$" | cut -d" " -f11 | cut -d"/" -f3); \; sudo docker run \ --name ${{ inputs.test_id }} \ From 3a6a549a606779d24c5492eebe88c0211213bc41 Mon Sep 17 00:00:00 2001 From: Gustavo Valverde Date: Mon, 9 Oct 2023 11:11:22 +0100 Subject: [PATCH 20/23] fix: typo --- .github/workflows/deploy-gcp-tests.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/deploy-gcp-tests.yml b/.github/workflows/deploy-gcp-tests.yml index 7ba522517ac..2cb52413d28 100644 --- a/.github/workflows/deploy-gcp-tests.yml +++ b/.github/workflows/deploy-gcp-tests.yml @@ -194,7 +194,7 @@ jobs: --command=' \ set -ex; # Extract the correct disk name based on the device-name - export DISK_NAME=$(ls -l /dev/disk/by-id | grep --extended-regexp "google-${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }}\$" | cut -d" " -f11 | cut -d"/" -f3); \; + export DISK_NAME=$(ls -l /dev/disk/by-id | grep --extended-regexp "google-${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }}\$" | cut -d" " -f11 | cut -d"/" -f3); \ sudo mkfs.ext4 -v /dev/$DISK_NAME \ ' @@ -432,7 +432,7 @@ jobs: --command=' \ set -ex; # Extract the correct disk name based on the device-name - export DISK_NAME=$(ls -l /dev/disk/by-id | grep --extended-regexp "google-${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }}\$" | cut -d" " -f11 | cut -d"/" -f3); \; + export DISK_NAME=$(ls -l /dev/disk/by-id | grep --extended-regexp "google-${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }}\$" | cut -d" " -f11 | cut -d"/" -f3); \ sudo docker run \ --name ${{ inputs.test_id }} \ @@ -503,7 +503,7 @@ jobs: --command=' \ set -ex; # Extract the correct disk name based on the device-name - export DISK_NAME=$(ls -l /dev/disk/by-id | grep --extended-regexp "google-${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }}\$" | cut -d" " -f11 | cut -d"/" -f3); \; + export DISK_NAME=$(ls -l /dev/disk/by-id | grep --extended-regexp "google-${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }}\$" | cut -d" " -f11 | cut -d"/" -f3); \ sudo docker run \ --name ${{ inputs.test_id }} \ From f857593e747df55f8d2cb37ba200b9bcb6b21fbd Mon Sep 17 00:00:00 2001 From: Gustavo Valverde Date: Mon, 9 Oct 2023 11:30:54 +0100 Subject: [PATCH 21/23] fix: use simpler `grep` command --- .github/workflows/deploy-gcp-tests.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/deploy-gcp-tests.yml b/.github/workflows/deploy-gcp-tests.yml index 2cb52413d28..1d28049ffbb 100644 --- a/.github/workflows/deploy-gcp-tests.yml +++ b/.github/workflows/deploy-gcp-tests.yml @@ -194,7 +194,7 @@ jobs: --command=' \ set -ex; # Extract the correct disk name based on the device-name - export DISK_NAME=$(ls -l /dev/disk/by-id | grep --extended-regexp "google-${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }}\$" | cut -d" " -f11 | cut -d"/" -f3); \ + export DISK_NAME=$(ls -l /dev/disk/by-id | grep -oE "google-${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }} -> ../../[^ ]+" | grep -oE "/[^/]+$" | cut -c 2-); \ sudo mkfs.ext4 -v /dev/$DISK_NAME \ ' @@ -432,7 +432,7 @@ jobs: --command=' \ set -ex; # Extract the correct disk name based on the device-name - export DISK_NAME=$(ls -l /dev/disk/by-id | grep --extended-regexp "google-${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }}\$" | cut -d" " -f11 | cut -d"/" -f3); \ + export DISK_NAME=$(ls -l /dev/disk/by-id | grep -oE "google-${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }} -> ../../[^ ]+" | grep -oE "/[^/]+$" | cut -c 2-); \ sudo docker run \ --name ${{ inputs.test_id }} \ @@ -503,7 +503,7 @@ jobs: --command=' \ set -ex; # Extract the correct disk name based on the device-name - export DISK_NAME=$(ls -l /dev/disk/by-id | grep --extended-regexp "google-${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }}\$" | cut -d" " -f11 | cut -d"/" -f3); \ + export DISK_NAME=$(ls -l /dev/disk/by-id | grep -oE "google-${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }} -> ../../[^ ]+" | grep -oE "/[^/]+$" | cut -c 2-); \ sudo docker run \ --name ${{ inputs.test_id }} \ From f649776510cb7fc12e5b98adcabbfeff92224967 Mon Sep 17 00:00:00 2001 From: Gustavo Valverde Date: Mon, 9 Oct 2023 12:06:17 +0100 Subject: [PATCH 22/23] fix: do not sleep if not require --- .github/workflows/deploy-gcp-tests.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/deploy-gcp-tests.yml b/.github/workflows/deploy-gcp-tests.yml index 08ae9559f49..d6820b9a311 100644 --- a/.github/workflows/deploy-gcp-tests.yml +++ b/.github/workflows/deploy-gcp-tests.yml @@ -180,7 +180,6 @@ jobs: --labels=app=${{ inputs.app_name }},environment=test,network=${NETWORK},github_ref=${{ env.GITHUB_REF_SLUG_URL }},test=${{ inputs.test_id }} \ --tags ${{ inputs.app_name }} \ --zone ${{ vars.GCP_ZONE }} - sleep 90 # Format the mounted disk if the test doesn't use a cached state. - name: Format ${{ inputs.test_id }} volume From ce8bc41491fbae723df7ed4baeaf8f7552d1a259 Mon Sep 17 00:00:00 2001 From: Gustavo Valverde Date: Mon, 9 Oct 2023 12:07:19 +0100 Subject: [PATCH 23/23] chore: reduce diff --- .github/workflows/zcashd-manual-deploy.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/zcashd-manual-deploy.yml b/.github/workflows/zcashd-manual-deploy.yml index b81baf4de6b..7b2c703e79c 100644 --- a/.github/workflows/zcashd-manual-deploy.yml +++ b/.github/workflows/zcashd-manual-deploy.yml @@ -58,7 +58,7 @@ jobs: - name: Create instance template run: | gcloud compute instance-templates create-with-container zcashd-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \ - --boot-disk-size 50GB \ + --boot-disk-size 10GB \ --boot-disk-type=pd-ssd \ --image-project=cos-cloud \ --image-family=cos-stable \