From 78108b3073bfdd10a38841ee7348c6828c36e7ec Mon Sep 17 00:00:00 2001 From: ludamad Date: Fri, 12 Apr 2024 13:20:15 +0000 Subject: [PATCH 01/18] prune docker --- .github/workflows/setup-runner.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/setup-runner.yml b/.github/workflows/setup-runner.yml index 46d9c68f160..e8cad4b17f4 100644 --- a/.github/workflows/setup-runner.yml +++ b/.github/workflows/setup-runner.yml @@ -91,5 +91,9 @@ jobs: AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} run: ./scripts/attach_ebs_cache.sh ${{ inputs.runner_label }} 128 + - name: Run Docker Prune + # helps with not overuse space + run: docker system prune + - name: Run Earthly Bootstrap run: earthly bootstrap From ac629ec65893c54ff655a627e3760f4c8f1f34d5 Mon Sep 17 00:00:00 2001 From: ludamad Date: Fri, 12 Apr 2024 13:22:31 +0000 Subject: [PATCH 02/18] disable flake c++ test --- .../cpp/src/barretenberg/client_ivc/client_ivc.test.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/barretenberg/cpp/src/barretenberg/client_ivc/client_ivc.test.cpp b/barretenberg/cpp/src/barretenberg/client_ivc/client_ivc.test.cpp index 60d52d73ba8..0dd189112b8 100644 --- a/barretenberg/cpp/src/barretenberg/client_ivc/client_ivc.test.cpp +++ b/barretenberg/cpp/src/barretenberg/client_ivc/client_ivc.test.cpp @@ -111,7 +111,9 @@ class ClientIVCTests : public ::testing::Test { * @brief A full Goblin test using PG that mimicks the basic aztec client architecture * */ -TEST_F(ClientIVCTests, Full) +// TODO fix with https://github.com/AztecProtocol/barretenberg/issues/930 +// intermittent failures, presumably due to uninitialized memory +TEST_F(ClientIVCTests, DISABLED_Full) { using VerificationKey = Flavor::VerificationKey; From 958ff070f6ab3a868ae9f7e6dd9650fdbd3bc501 Mon Sep 17 00:00:00 2001 From: ludamad Date: Fri, 12 Apr 2024 13:28:21 +0000 Subject: [PATCH 03/18] docker prune --- .github/workflows/setup-runner.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/setup-runner.yml b/.github/workflows/setup-runner.yml index e8cad4b17f4..bf1eb4902d9 100644 --- a/.github/workflows/setup-runner.yml +++ b/.github/workflows/setup-runner.yml @@ -93,7 +93,7 @@ jobs: - name: Run Docker Prune # helps with not overuse space - run: docker system prune + run: docker system prune -f - name: Run Earthly Bootstrap run: earthly bootstrap From 09d0988256eedd3538f41f69c9ae130a34b7845b Mon Sep 17 00:00:00 2001 From: ludamad Date: Fri, 12 Apr 2024 13:50:25 +0000 Subject: [PATCH 04/18] cache /var/lib/docker, bigger cache --- .github/earthly-ci-config.yml | 2 +- .github/workflows/setup-runner.yml | 2 +- .vscode/settings.json | 3 ++- scripts/attach_ebs_cache.sh | 16 ++++++++-------- 4 files changed, 12 insertions(+), 11 deletions(-) diff --git a/.github/earthly-ci-config.yml b/.github/earthly-ci-config.yml index 47c8813a087..1611ffd4a17 100644 --- a/.github/earthly-ci-config.yml +++ b/.github/earthly-ci-config.yml @@ -1,5 +1,5 @@ global: - cache_size_pct: 75 + cache_size_pct: 50 buildkit_max_parallelism: 50 container_frontend: docker-shell buildkit_additional_args: ["-e", "BUILDKIT_STEP_LOG_MAX_SIZE=-1"] diff --git a/.github/workflows/setup-runner.yml b/.github/workflows/setup-runner.yml index bf1eb4902d9..4c6120b3a90 100644 --- a/.github/workflows/setup-runner.yml +++ b/.github/workflows/setup-runner.yml @@ -92,7 +92,7 @@ jobs: run: ./scripts/attach_ebs_cache.sh ${{ inputs.runner_label }} 128 - name: Run Docker Prune - # helps with not overuse space + # helps to not overuse space run: docker system prune -f - name: Run Earthly Bootstrap diff --git a/.vscode/settings.json b/.vscode/settings.json index 21f55227f99..3b7c9bf2d0e 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -169,5 +169,6 @@ "**/target/**": true, "**/l1-contracts/lib/**": true, "**/barretenberg/cpp/build*/**": true - } + }, + "cmake.sourceDirectory": "/mnt/user-data/adam/aztec-packages/barretenberg/cpp" } diff --git a/scripts/attach_ebs_cache.sh b/scripts/attach_ebs_cache.sh index 4bce6c775ba..3b107350330 100755 --- a/scripts/attach_ebs_cache.sh +++ b/scripts/attach_ebs_cache.sh @@ -11,8 +11,8 @@ INSTANCE_ID=$(curl http://169.254.169.254/latest/meta-data/instance-id) # TODO also mount various other aspects of docker image metadata # Check for existing mount, assume we can continue if existing -if mount | grep -q /var/lib/docker/volumes; then - echo "Detected mount existing on /var/lib/docker/volumes already" +if mount | grep -q "/var/lib/docker type ext4"; then + echo "Detected mount existing on /var/lib/docker already" echo "Continuing..." exit 0 fi @@ -20,9 +20,9 @@ fi # Check for existing volume # we don't filter by available - we want to just error if it's attached already # this means we are in a weird state (two spot instances running etc) -EXISTING_VOLUME=$(aws ec2 describe-volumes \ +EXISTING_VOLUME=$(aws ec2 describe-volumes \/var/lib/docker type --region $REGION \ - --filters "Name=tag:username,Values=$EBS_CACHE_TAG" \ + --filters "Name=tag:username,Values=$EBS_CACHE_TAG-$SIZE" \ --query "Volumes[0].VolumeId" \ --output text) @@ -33,7 +33,7 @@ if [ "$EXISTING_VOLUME" == "None" ]; then --availability-zone $AVAILABILITY_ZONE \ --size $SIZE \ --volume-type $VOLUME_TYPE \ - --tag-specifications "ResourceType=volume,Tags=[{Key=username,Value=$EBS_CACHE_TAG}]" \ + --tag-specifications "ResourceType=volume,Tags=[{Key=username,Value=$EBS_CACHE_TAG-$SIZE}]" \ --query "VolumeId" \ --output text) else @@ -77,7 +77,7 @@ while [ "$(aws ec2 describe-volumes \ sleep 1 done -# We are expecting the device to come up as /dev/nvme1n1, but include generic code from +# We are expecting the device to come up as /dev/nvme1n1, but include generic code from # https://github.com/slavivanov/ec2-spotter/blob/master/ec2spotter-remount-root while true; do if lsblk /dev/nvme1n1; then @@ -100,5 +100,5 @@ if ! file -s $BLKDEVICE | grep -q ext4; then fi # Create a mount point and mount the volume -mkdir -p /var/lib/docker/volumes -mount $BLKDEVICE /var/lib/docker/volumes +mkdir -p /var/lib/docker +mount $BLKDEVICE /var/lib/docker From 2f6699cded6bd2be3d48ca00fd713328f2392c2a Mon Sep 17 00:00:00 2001 From: ludamad Date: Fri, 12 Apr 2024 13:53:44 +0000 Subject: [PATCH 05/18] detect old mount --- scripts/attach_ebs_cache.sh | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/scripts/attach_ebs_cache.sh b/scripts/attach_ebs_cache.sh index 3b107350330..ff2043a0469 100755 --- a/scripts/attach_ebs_cache.sh +++ b/scripts/attach_ebs_cache.sh @@ -10,6 +10,13 @@ INSTANCE_ID=$(curl http://169.254.169.254/latest/meta-data/instance-id) # TODO also mount various other aspects of docker image metadata +# Check for existing mount, assume we can continue if existing +if mount | grep -q "/var/lib/docker/volumes type ext4"; then + echo "Detected mount existing on /var/lib/docker/volumes. This is our old mount." + echo "Run the stop spot workflow https://github.com/AztecProtocol/aztec-packages/actions/workflows/stop-spot.yml and rerun all steps in this workflow." + exit 0 +fi + # Check for existing mount, assume we can continue if existing if mount | grep -q "/var/lib/docker type ext4"; then echo "Detected mount existing on /var/lib/docker already" From c5e7564e6139b38fb60ff82992f60e28208094f1 Mon Sep 17 00:00:00 2001 From: ludamad Date: Fri, 12 Apr 2024 13:54:58 +0000 Subject: [PATCH 06/18] fix stop spot workflow --- .github/workflows/setup-runner.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/setup-runner.yml b/.github/workflows/setup-runner.yml index 4c6120b3a90..a53f40b9739 100644 --- a/.github/workflows/setup-runner.yml +++ b/.github/workflows/setup-runner.yml @@ -74,6 +74,7 @@ jobs: setup: needs: start-builder runs-on: ${{ inputs.runner_label }} + if: ${{inputs.subaction != 'stop'}} steps: - name: Checkout Repository uses: actions/checkout@v4 From 6e3c584e6c9a1af55c7552cf40a9159421b30575 Mon Sep 17 00:00:00 2001 From: ludamad Date: Fri, 12 Apr 2024 13:56:21 +0000 Subject: [PATCH 07/18] no stopping arm anymore --- .github/workflows/stop-spot.yml | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/.github/workflows/stop-spot.yml b/.github/workflows/stop-spot.yml index bccacb313fd..61ce3650066 100644 --- a/.github/workflows/stop-spot.yml +++ b/.github/workflows/stop-spot.yml @@ -3,19 +3,6 @@ name: Stop Personal Spot on: workflow_dispatch: {} jobs: - stop-build-arm: - uses: ./.github/workflows/setup-runner.yml - with: - runner_label: ${{ github.actor }}-arm - subaction: stop - # not used: - ebs_cache_size_gb: 128 - runner_concurrency: 8 - ec2_instance_type: r6g.16xlarge - ec2_ami_id: ami-0d8a9b0419ddb331a - ec2_instance_ttl: 40 - secrets: inherit - stop-build-x86: uses: ./.github/workflows/setup-runner.yml with: From ae9ebb892317907c640c06821611ce03c20b1868 Mon Sep 17 00:00:00 2001 From: ludamad Date: Fri, 12 Apr 2024 13:57:08 +0000 Subject: [PATCH 08/18] fix arm --- .github/workflows/ci-arm.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/ci-arm.yml b/.github/workflows/ci-arm.yml index 7100e8650e5..24891a69bcd 100644 --- a/.github/workflows/ci-arm.yml +++ b/.github/workflows/ci-arm.yml @@ -22,6 +22,7 @@ jobs: runner_label: master-arm ebs_cache_size_gb: 128 runner_concurrency: 8 + subaction: ${{ github.event.inputs.runner_action || 'start' }} ec2_instance_type: r6g.16xlarge ec2_ami_id: ami-0d8a9b0419ddb331a ec2_instance_ttl: 40 # refreshed by jobs From ad22701535c7a93ad320a534bbe304a4c7c5afd3 Mon Sep 17 00:00:00 2001 From: ludamad Date: Fri, 12 Apr 2024 13:57:38 +0000 Subject: [PATCH 09/18] bigger cache disks --- .github/workflows/ci-arm.yml | 2 +- .github/workflows/ci.yml | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci-arm.yml b/.github/workflows/ci-arm.yml index 24891a69bcd..4d53c58c270 100644 --- a/.github/workflows/ci-arm.yml +++ b/.github/workflows/ci-arm.yml @@ -20,7 +20,7 @@ jobs: uses: ./.github/workflows/setup-runner.yml with: runner_label: master-arm - ebs_cache_size_gb: 128 + ebs_cache_size_gb: 256 runner_concurrency: 8 subaction: ${{ github.event.inputs.runner_action || 'start' }} ec2_instance_type: r6g.16xlarge diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e05d440eafc..f1d67fb668b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -21,7 +21,7 @@ jobs: uses: ./.github/workflows/setup-runner.yml with: runner_label: ${{ github.actor }}-x86 - ebs_cache_size_gb: 128 + ebs_cache_size_gb: 256 runner_concurrency: 50 subaction: ${{ github.event.inputs.runner_action || 'start' }} ec2_instance_type: m6a.32xlarge @@ -121,7 +121,7 @@ jobs: needs: bb-bench-binaries with: runner_label: ${{ github.actor }}-bench-x86 - ebs_cache_size_gb: 32 + ebs_cache_size_gb: 64 runner_concurrency: 1 subaction: ${{ github.event.inputs.runner_action || 'start' }} ec2_instance_type: m6a.4xlarge From cc9edb3f0a536f5b5d062ea9e55bdc5711f9f565 Mon Sep 17 00:00:00 2001 From: ludamad Date: Fri, 12 Apr 2024 14:00:03 +0000 Subject: [PATCH 10/18] [ci restart-spot] From d45515a985ac20afd17262199e99a0dfaf33b6ea Mon Sep 17 00:00:00 2001 From: ludamad Date: Fri, 12 Apr 2024 14:01:40 +0000 Subject: [PATCH 11/18] [ci restart-spot] From 4c41da558c8cdb0f322c87e19ae8e2304a29b749 Mon Sep 17 00:00:00 2001 From: ludamad Date: Fri, 12 Apr 2024 14:04:53 +0000 Subject: [PATCH 12/18] fix paste-o --- scripts/attach_ebs_cache.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/attach_ebs_cache.sh b/scripts/attach_ebs_cache.sh index ff2043a0469..c53f8f41830 100755 --- a/scripts/attach_ebs_cache.sh +++ b/scripts/attach_ebs_cache.sh @@ -27,7 +27,7 @@ fi # Check for existing volume # we don't filter by available - we want to just error if it's attached already # this means we are in a weird state (two spot instances running etc) -EXISTING_VOLUME=$(aws ec2 describe-volumes \/var/lib/docker type +EXISTING_VOLUME=$(aws ec2 describe-volumes --region $REGION \ --filters "Name=tag:username,Values=$EBS_CACHE_TAG-$SIZE" \ --query "Volumes[0].VolumeId" \ From 330c44f07ff499132d65e285c9ef9447477bae17 Mon Sep 17 00:00:00 2001 From: ludamad Date: Fri, 12 Apr 2024 14:06:35 +0000 Subject: [PATCH 13/18] syntax --- scripts/attach_ebs_cache.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/attach_ebs_cache.sh b/scripts/attach_ebs_cache.sh index c53f8f41830..7eaa988afc2 100755 --- a/scripts/attach_ebs_cache.sh +++ b/scripts/attach_ebs_cache.sh @@ -27,7 +27,7 @@ fi # Check for existing volume # we don't filter by available - we want to just error if it's attached already # this means we are in a weird state (two spot instances running etc) -EXISTING_VOLUME=$(aws ec2 describe-volumes +EXISTING_VOLUME=$(aws ec2 describe-volumes \ --region $REGION \ --filters "Name=tag:username,Values=$EBS_CACHE_TAG-$SIZE" \ --query "Volumes[0].VolumeId" \ From 9e67aac7df0cf04596eee6371a0bd09964b92ec3 Mon Sep 17 00:00:00 2001 From: ludamad Date: Fri, 12 Apr 2024 14:12:14 +0000 Subject: [PATCH 14/18] restart docker properly --- .github/ci-setup-action/action.yml | 10 ---------- .github/workflows/setup-runner.yml | 13 +++++++++++++ 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/.github/ci-setup-action/action.yml b/.github/ci-setup-action/action.yml index f0f9d78a832..5565cac9f15 100644 --- a/.github/ci-setup-action/action.yml +++ b/.github/ci-setup-action/action.yml @@ -52,16 +52,6 @@ runs: shell: bash run: ./scripts/setup_env.sh ${{ inputs.dockerhub_password }} - - name: Setup Docker - shell: bash - run: | - if ! [ -f /etc/docker/daemon.json ] ; then - echo '{"default-address-pools":[{"base":"172.17.0.0/12","size":20}, {"base":"10.99.0.0/12","size":20}, {"base":"192.168.0.0/16","size":24}]}' > /etc/docker/daemon.json - sudo service docker restart - echo "Configured docker daemon for making many networks." - else - echo "Docker daemon already configured." - fi # As detailed in https://github.com/ben-z/gh-action-mutex # things do not become 'pending' in github actions, and instead just cancel one another # so we can't use the native concurrency in GA diff --git a/.github/workflows/setup-runner.yml b/.github/workflows/setup-runner.yml index a53f40b9739..6bf68b19014 100644 --- a/.github/workflows/setup-runner.yml +++ b/.github/workflows/setup-runner.yml @@ -92,6 +92,19 @@ jobs: AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} run: ./scripts/attach_ebs_cache.sh ${{ inputs.runner_label }} 128 + - name: Configure and Restart Docker + shell: bash + run: | + # We need to restart after attaching disk cache + # Both only happen once, so we just make sure this happens once + if ! [ -f /etc/docker/daemon.json ] ; then + echo '{"default-address-pools":[{"base":"172.17.0.0/12","size":20}, {"base":"10.99.0.0/12","size":20}, {"base":"192.168.0.0/16","size":24}]}' > /etc/docker/daemon.json + sudo service docker restart + echo "Configured docker daemon for making many networks." + else + echo "Docker daemon already configured." + fi + - name: Run Docker Prune # helps to not overuse space run: docker system prune -f From 5ee332f967d704d9056afcbf1edbc55a171421bd Mon Sep 17 00:00:00 2001 From: ludamad Date: Fri, 12 Apr 2024 14:19:11 +0000 Subject: [PATCH 15/18] [ci restart-spot] From 8f953398f8f17343e7bd26b4c0d46b8a5dd82560 Mon Sep 17 00:00:00 2001 From: ludamad Date: Fri, 12 Apr 2024 14:26:22 +0000 Subject: [PATCH 16/18] better lock --- .github/ci-setup-action/action.yml | 2 +- .github/workflows/protocol-circuits-gate-diff.yml | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/ci-setup-action/action.yml b/.github/ci-setup-action/action.yml index 77c6f4ffdad..d485708a143 100644 --- a/.github/ci-setup-action/action.yml +++ b/.github/ci-setup-action/action.yml @@ -26,7 +26,7 @@ runs: - name: Cache Submodules id: cache-submodules - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: .git/modules key: submodules-${{ hashFiles('.gitmodules') }}-spot-ebs diff --git a/.github/workflows/protocol-circuits-gate-diff.yml b/.github/workflows/protocol-circuits-gate-diff.yml index 151ac88b1ce..0840e67449b 100644 --- a/.github/workflows/protocol-circuits-gate-diff.yml +++ b/.github/workflows/protocol-circuits-gate-diff.yml @@ -37,7 +37,7 @@ jobs: sudo cp -r clang+llvm-16.0.0-x86_64-linux-gnu-ubuntu-18.04/share/* /usr/local/share/ rm -rf clang+llvm-16.0.0-x86_64-linux-gnu-ubuntu-18.04.tar.xz clang+llvm-16.0.0-x86_64-linux-gnu-ubuntu-18.04 - - uses: actions/cache@v3 + - uses: actions/cache@v4 with: path: | barretenberg/cpp/build @@ -60,7 +60,7 @@ jobs: INSTALL_URL: https://raw.githubusercontent.com/noir-lang/noirup/main/install NOIRUP_BIN_URL: https://raw.githubusercontent.com/noir-lang/noirup/main/noirup - - uses: actions/cache@v3 + - uses: actions/cache@v4 with: path: | ~/.cargo/bin/ From 2c3a3345518f86bbbfc44c373f4b8803d471ec91 Mon Sep 17 00:00:00 2001 From: ludamad Date: Fri, 12 Apr 2024 15:10:18 +0000 Subject: [PATCH 17/18] say who holds the lock --- .github/ci-setup-action/action.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/ci-setup-action/action.yml b/.github/ci-setup-action/action.yml index d485708a143..4eba68046b3 100644 --- a/.github/ci-setup-action/action.yml +++ b/.github/ci-setup-action/action.yml @@ -60,8 +60,8 @@ runs: if: ${{ inputs.concurrency_key }} with: run: | - while [ -f "/run/${{ inputs.concurrency_key }}.lock" ]; do sleep 1 ; echo "Lock is currently held, waiting..." ; done - touch "/run/${{ inputs.concurrency_key }}.lock" + while [ -f "/run/${{ inputs.concurrency_key }}.lock" ]; do sleep 1 ; echo "Lock is currently held by $(cat '/run/${{ inputs.concurrency_key }}.lock'), waiting..." ; done + echo "${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}" > "/run/${{ inputs.concurrency_key }}.lock" echo "/run/${{ inputs.concurrency_key }}.lock acquired." post: | rm "/run/${{ inputs.concurrency_key }}.lock" From 76325b7c59e8e1465a98ea833f87bcdbdf059eb5 Mon Sep 17 00:00:00 2001 From: ludamad Date: Fri, 12 Apr 2024 11:22:07 -0400 Subject: [PATCH 18/18] Update settings.json --- .vscode/settings.json | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index 3b7c9bf2d0e..21f55227f99 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -169,6 +169,5 @@ "**/target/**": true, "**/l1-contracts/lib/**": true, "**/barretenberg/cpp/build*/**": true - }, - "cmake.sourceDirectory": "/mnt/user-data/adam/aztec-packages/barretenberg/cpp" + } }