From f31772bc74538b1f7af7fb3e911e3b22709dc6fe Mon Sep 17 00:00:00 2001 From: Giorgis Georgakoudis Date: Wed, 18 Sep 2024 09:22:01 -0700 Subject: [PATCH] Update containers and deploy at LLNL - Fix #81, #86 --- .../x86_64-broadwell-cuda11.6.1/Dockerfile | 12 +---- .../x86_64-broadwell-cuda11.6.1/spack.yaml | 7 +-- .../x86_64-broadwell-gcc11.2.1/Dockerfile | 18 +++---- .../x86_64-broadwell-gcc11.2.1/spack.yaml | 7 +-- .github/workflows/ci.yml | 13 ++--- .github/workflows/create-containers.yml | 53 +++++++++++++++++++ .gitlab/custom-jobs-and-variables.yml | 2 +- scripts/gitlab/ci-build-test.sh | 16 ++++-- 8 files changed, 89 insertions(+), 39 deletions(-) create mode 100644 .github/workflows/create-containers.yml diff --git a/.github/containers/x86_64-broadwell-cuda11.6.1/Dockerfile b/.github/containers/x86_64-broadwell-cuda11.6.1/Dockerfile index 826ef662..15bc767f 100644 --- a/.github/containers/x86_64-broadwell-cuda11.6.1/Dockerfile +++ b/.github/containers/x86_64-broadwell-cuda11.6.1/Dockerfile @@ -15,14 +15,6 @@ RUN \ git clone --depth 1 --branch releases/v0.20 https://github.com/spack/spack.git &&\ source spack/share/spack/setup-env.sh &&\ spack env activate -p ams-spack-env &&\ - spack external find --all --not-buildable --exclude openssl --exclude openblas --exclude bzip2 - -FROM setup-spack-env AS install-spack-env -RUN \ - source spack/share/spack/setup-env.sh &&\ - spack env activate -p ams-spack-env &&\ - spack install --fail-fast - -FROM install-spack-env AS clean-spack -RUN \ + spack external find --all --not-buildable --exclude openssl --exclude openblas --exclude bzip2 &&\ + spack install --fail-fast &&\ spack clean --all diff --git a/.github/containers/x86_64-broadwell-cuda11.6.1/spack.yaml b/.github/containers/x86_64-broadwell-cuda11.6.1/spack.yaml index 2830d79a..fb62aa54 100644 --- a/.github/containers/x86_64-broadwell-cuda11.6.1/spack.yaml +++ b/.github/containers/x86_64-broadwell-cuda11.6.1/spack.yaml @@ -20,7 +20,8 @@ spack: - flux-sched - py-pika - amqp-cpp +tcp - - adiak + #- adiak + - nlohmann-json view: local concretizer: unify: true @@ -77,6 +78,6 @@ spack: require: '@0.28' py-pika: require: '@1.3.1' - adiak: - require: '@0.4.0+shared+mpi' + #adiak: + # require: '@0.4.0+shared+mpi' diff --git a/.github/containers/x86_64-broadwell-gcc11.2.1/Dockerfile b/.github/containers/x86_64-broadwell-gcc11.2.1/Dockerfile index 3f07390d..b26e7f41 100644 --- a/.github/containers/x86_64-broadwell-gcc11.2.1/Dockerfile +++ b/.github/containers/x86_64-broadwell-gcc11.2.1/Dockerfile @@ -1,34 +1,34 @@ -FROM centos:7 AS base +FROM almalinux:8 AS base MAINTAINER Giorgis Georgakoudis RUN \ yum install -y dnf &&\ - dnf install -y epel-release &&\ dnf group install -y "Development Tools" &&\ - dnf install -y curl findutils gcc-gfortran gnupg2 hostname iproute redhat-lsb-core python3 python3-pip python3-setuptools unzip python-boto3 centos-release-scl-rh &&\ - dnf install -y devtoolset-11 environment-modules &&\ + dnf install -y git gcc-toolset-11 environment-modules &&\ dnf upgrade -y COPY repo repo RUN \ mkdir -p ams-spack-env COPY spack.yaml ams-spack-env/spack.yaml - FROM base AS setup-spack-env RUN \ source /etc/profile &&\ mkdir -p /usr/share/Modules/modulefiles/gcc &&\ - /usr/share/Modules/bin/createmodule.sh /opt/rh/devtoolset-11/enable > /usr/share/Modules/modulefiles/gcc/11.2.1 &&\ + /usr/share/Modules/bin/createmodule.sh /opt/rh/gcc-toolset-11/enable > /usr/share/Modules/modulefiles/gcc/11.2.1 &&\ module load gcc/11.2.1 &&\ git clone --depth 1 --branch releases/v0.20 https://github.com/spack/spack.git &&\ source spack/share/spack/setup-env.sh &&\ spack compiler find &&\ - spack compiler rm gcc@4.8.5 &&\ + spack compiler rm gcc@8.5.0 &&\ sed -i "s/modules.*/modules: [gcc\/11.2.1]/" ~/.spack/linux/compilers.yaml FROM setup-spack-env AS install-spack-env RUN \ + source /etc/profile &&\ + module load gcc/11.2.1 &&\ source spack/share/spack/setup-env.sh &&\ spack env activate -p ams-spack-env &&\ - spack install &&\ - spack clean --all + spack install --fail-fast &&\ + spack clean --all &&\ + dnf clean all diff --git a/.github/containers/x86_64-broadwell-gcc11.2.1/spack.yaml b/.github/containers/x86_64-broadwell-gcc11.2.1/spack.yaml index 1596d8ec..227d26d8 100644 --- a/.github/containers/x86_64-broadwell-gcc11.2.1/spack.yaml +++ b/.github/containers/x86_64-broadwell-gcc11.2.1/spack.yaml @@ -20,7 +20,8 @@ spack: - flux-sched - py-pika - amqp-cpp +tcp - - adiak + #- adiak + - nlohmann-json view: local concretizer: unify: true @@ -69,6 +70,6 @@ spack: require: '@0.28' py-pika: require: '@1.3.1' - adiak: - require: '@0.4.0+shared+mpi' + #adiak: + # require: '@0.4.0+shared+mpi' diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 620fc045..0e6657ee 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,23 +1,16 @@ name: CI on: - # Triggers the workflow on push or pull request events but only for the "develop" branch - push: - branches: [ "develop" ] + # Triggers the workflow on pull request events only for the "develop" branch pull_request: branches: [ "develop" ] - workflow_dispatch: jobs: build-run-tests: # The type of runner that the job will run on runs-on: ubuntu-latest - container: ghcr.io/ggeorgakoudis/ams-ci-test-ruby-centos7 - - # Temporary fix for https://github.blog/changelog/2024-03-07-github-actions-all-actions-will-run-on-node20-instead-of-node16-by-default/ - env: - ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true # Allow using Node16 actions + container: ghcr.io/llnl/ams-ci-almalinux8:latest # Steps represent a sequence of tasks that will be executed as part of the job steps: @@ -256,7 +249,7 @@ jobs: build-cuda-tests: # The type of runner that the job will run on runs-on: ubuntu-latest - container: ghcr.io/ggeorgakoudis/ams-ci-cuda11.6.1:latest + container: ghcr.io/llnl/ams-ci-cuda11.6.1:latest # Steps represent a sequence of tasks that will be executed as part of the job steps: diff --git a/.github/workflows/create-containers.yml b/.github/workflows/create-containers.yml new file mode 100644 index 00000000..bac1b4fe --- /dev/null +++ b/.github/workflows/create-containers.yml @@ -0,0 +1,53 @@ +name: Deploy containers + +on: + workflow_dispatch: + +jobs: + deploy-containers: + runs-on: ubuntu-latest + steps: + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Login to GitHub Container Registry + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Free up disk space on runner + run: | + sudo rm -rf /usr/local/lib/android + sudo rm -rf /usr/share/dotnet + sudo apt-get remove -y '^aspnetcore-.*' + sudo apt-get remove -y '^dotnet-.*' --fix-missing + sudo apt-get remove -y '^llvm-.*' --fix-missing + sudo apt-get remove -y google-cloud-cli --fix-missing + sudo apt-get remove -y azure-cli google-chrome-stable firefox powershell mono-devel libgl1-mesa-dri --fix-missing + sudo apt-get autoremove -y + sudo apt-get clean + sudo docker image prune --all --force + sudo rm -rf "$AGENT_TOOLSDIRECTORY" + + - name: Build and push almalinux8 container + uses: docker/build-push-action@v6 + with: + platforms: linux/amd64 + context: "{{defaultContext}}:.github/containers/x86_64-broadwell-gcc11.2.1" + push: true + provenance: false + tags: ghcr.io/llnl/ams-ci-almalinux8:latest + + - name: Build and push cuda container + uses: docker/build-push-action@v6 + with: + platforms: linux/amd64 + context: "{{defaultContext}}:.github/containers/x86_64-broadwell-cuda11.6.1" + push: true + provenance: false + tags: ghcr.io/llnl/ams-ci-cuda11.6.1:latest diff --git a/.gitlab/custom-jobs-and-variables.yml b/.gitlab/custom-jobs-and-variables.yml index 5af33aee..298632a6 100644 --- a/.gitlab/custom-jobs-and-variables.yml +++ b/.gitlab/custom-jobs-and-variables.yml @@ -14,7 +14,7 @@ variables: # Ruby # Arguments for top level allocation - RUBY_SHARED_ALLOC: "--mpi=none --exclusive --reservation=ci --time=20 --nodes=1" + RUBY_SHARED_ALLOC: "--mpi=none --exclusive --reservation=ci --time=30 --nodes=1" # Arguments for job level allocation RUBY_JOB_ALLOC: "--mpi=none --reservation=ci --nodes=1" # Add variables that should apply to all the jobs on a machine: diff --git a/scripts/gitlab/ci-build-test.sh b/scripts/gitlab/ci-build-test.sh index 83224762..bb67830d 100755 --- a/scripts/gitlab/ci-build-test.sh +++ b/scripts/gitlab/ci-build-test.sh @@ -5,12 +5,19 @@ source scripts/gitlab/setup-env.sh export CTEST_OUTPUT_ON_FAILURE=1 # WITH_CUDA is defined in the per machine job yml. +cleanup() { + if [ -n "$VIRTUAL_ENV" ]; then + deactivate + fi + rm -rf ci-venv + rm -rf build +} + build_and_test() { WITH_TORCH=${1} WITH_FAISS=${2} WITH_HDF5=${3} WITH_MPI=${4} - WITH_CALIPER=${5} echo "*******************************************************************************************" echo "Build configuration" \ @@ -21,7 +28,10 @@ build_and_test() { "WITH_CUDA ${WITH_CUDA}" echo "*******************************************************************************************" - rm -rf build + cleanup + + python -m venv ci-venv + source ci-venv/bin/activate mkdir build pushd build @@ -53,7 +63,7 @@ build_and_test() { make test || { echo "Tests failed"; exit 1; } popd - rm -rf build + cleanup } # build_and_test WITH_TORCH WITH_FAISS WITH_HDF5 WITH_MPI