From 2f38d986cb65bc0128422b0ef7eeb528883aba3a Mon Sep 17 00:00:00 2001 From: Jirka Borovec Date: Tue, 14 Jun 2022 19:11:54 +0200 Subject: [PATCH] Update CI setup (#13291) * drop mamba * use legacy GPU machines --- .azure/gpu-benchmark.yml | 2 +- .azure/gpu-tests.yml | 2 +- .github/workflows/ci_dockers.yml | 4 ++-- .github/workflows/events-nightly.yml | 4 ++-- dockers/base-conda/Dockerfile | 7 +++---- 5 files changed, 9 insertions(+), 10 deletions(-) diff --git a/.azure/gpu-benchmark.yml b/.azure/gpu-benchmark.yml index cfccbf7081f14..451f5b5646dca 100644 --- a/.azure/gpu-benchmark.yml +++ b/.azure/gpu-benchmark.yml @@ -26,7 +26,7 @@ jobs: - job: benchmarks timeoutInMinutes: "90" cancelTimeoutInMinutes: "2" - pool: azure-gpus-spot + pool: azure-jirka-spot container: image: "pytorchlightning/pytorch_lightning:base-cuda-py3.9-torch1.11" options: "--runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all --shm-size=32g" diff --git a/.azure/gpu-tests.yml b/.azure/gpu-tests.yml index 157bb1d535f9a..5ec9db1297b43 100644 --- a/.azure/gpu-tests.yml +++ b/.azure/gpu-tests.yml @@ -29,7 +29,7 @@ jobs: # how much time to give 'run always even if cancelled tasks' before stopping them cancelTimeoutInMinutes: "2" - pool: azure-gpus-spot + pool: azure-jirka-spot container: image: $(image) diff --git a/.github/workflows/ci_dockers.yml b/.github/workflows/ci_dockers.yml index 94d5be6901c5a..c9ed112516ff4 100644 --- a/.github/workflows/ci_dockers.yml +++ b/.github/workflows/ci_dockers.yml @@ -97,7 +97,7 @@ jobs: UBUNTU_VERSION=${{ matrix.ubuntu_version }} file: dockers/base-cuda/Dockerfile push: false - timeout-minutes: 75 + timeout-minutes: 95 build-Conda: runs-on: ubuntu-20.04 @@ -123,7 +123,7 @@ jobs: CUDA_VERSION=${{ matrix.cuda_version }} file: dockers/base-conda/Dockerfile push: false - timeout-minutes: 75 + timeout-minutes: 95 build-ipu: runs-on: ubuntu-20.04 diff --git a/.github/workflows/events-nightly.yml b/.github/workflows/events-nightly.yml index 91a666dd2f6dd..4496e176d9720 100644 --- a/.github/workflows/events-nightly.yml +++ b/.github/workflows/events-nightly.yml @@ -146,7 +146,7 @@ jobs: file: dockers/base-cuda/Dockerfile push: ${{ env.PUSH_TO_HUB }} tags: pytorchlightning/pytorch_lightning:base-cuda-py${{ matrix.python_version }}-torch${{ matrix.pytorch_version }} - timeout-minutes: 85 + timeout-minutes: 95 # report failure to Slack - name: Slack notification @@ -197,7 +197,7 @@ jobs: file: dockers/base-conda/Dockerfile push: ${{ env.PUSH_TO_HUB }} tags: pytorchlightning/pytorch_lightning:base-conda-py${{ matrix.python_version }}-torch${{ matrix.pytorch_version }} - timeout-minutes: 85 + timeout-minutes: 95 # report failure to Slack - name: Slack notification diff --git a/dockers/base-conda/Dockerfile b/dockers/base-conda/Dockerfile index 05227707b31fa..72603b04ffd64 100644 --- a/dockers/base-conda/Dockerfile +++ b/dockers/base-conda/Dockerfile @@ -72,16 +72,15 @@ COPY environment.yml environment.yml # conda init RUN conda update -n base -c defaults conda && \ - conda install mamba -n base -c conda-forge && \ - mamba create -y --name $CONDA_ENV python=${PYTHON_VERSION} pytorch=${PYTORCH_VERSION} torchvision torchtext cudatoolkit=${CUDA_VERSION} -c nvidia -c pytorch -c pytorch-test -c pytorch-nightly && \ + conda create -y --name $CONDA_ENV python=${PYTHON_VERSION} pytorch=${PYTORCH_VERSION} torchvision torchtext cudatoolkit=${CUDA_VERSION} -c nvidia -c pytorch -c pytorch-test -c pytorch-nightly && \ conda init bash && \ # NOTE: this requires that the channel is presented in the yaml before packages \ printf "import re;\nfname = 'environment.yml';\nreq = open(fname).read();\nfor n in ['python', 'pytorch', 'torchtext', 'torchvision']:\n req = re.sub(rf'- {n}[>=]+', f'# - {n}=', req);\nopen(fname, 'w').write(req)" > prune.py && \ python prune.py && \ rm prune.py && \ cat environment.yml && \ - mamba env update --name $CONDA_ENV --file environment.yml && \ - mamba clean -ya && \ + conda env update --name $CONDA_ENV --file environment.yml && \ + conda clean -ya && \ rm environment.yml ENV \