diff --git a/.azure-pipelines/gpu-benchmark.yml b/.azure-pipelines/gpu-benchmark.yml index cfccbf7081f14..451f5b5646dca 100644 --- a/.azure-pipelines/gpu-benchmark.yml +++ b/.azure-pipelines/gpu-benchmark.yml @@ -26,7 +26,7 @@ jobs: - job: benchmarks timeoutInMinutes: "90" cancelTimeoutInMinutes: "2" - pool: azure-gpus-spot + pool: azure-jirka-spot container: image: "pytorchlightning/pytorch_lightning:base-cuda-py3.9-torch1.11" options: "--runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all --shm-size=32g" diff --git a/.azure-pipelines/gpu-tests.yml b/.azure-pipelines/gpu-tests.yml index 157bb1d535f9a..5ec9db1297b43 100644 --- a/.azure-pipelines/gpu-tests.yml +++ b/.azure-pipelines/gpu-tests.yml @@ -29,7 +29,7 @@ jobs: # how much time to give 'run always even if cancelled tasks' before stopping them cancelTimeoutInMinutes: "2" - pool: azure-gpus-spot + pool: azure-jirka-spot container: image: $(image) diff --git a/.github/workflows/ci_dockers.yml b/.github/workflows/ci_dockers.yml index 6953ee815c614..ccd3fd27aaf8d 100644 --- a/.github/workflows/ci_dockers.yml +++ b/.github/workflows/ci_dockers.yml @@ -97,7 +97,7 @@ jobs: UBUNTU_VERSION=${{ matrix.ubuntu_version }} file: dockers/base-cuda/Dockerfile push: false - timeout-minutes: 75 + timeout-minutes: 95 build-Conda: runs-on: ubuntu-20.04 @@ -123,7 +123,7 @@ jobs: CUDA_VERSION=${{ matrix.cuda_version }} file: dockers/base-conda/Dockerfile push: false - timeout-minutes: 75 + timeout-minutes: 95 build-ipu: runs-on: ubuntu-20.04 diff --git a/.github/workflows/events-nightly.yml b/.github/workflows/events-nightly.yml index d3f2f5259b731..b4f137e898a1a 100644 --- a/.github/workflows/events-nightly.yml +++ b/.github/workflows/events-nightly.yml @@ -146,7 +146,7 @@ jobs: file: dockers/base-cuda/Dockerfile push: ${{ env.PUSH_TO_HUB }} tags: pytorchlightning/pytorch_lightning:base-cuda-py${{ matrix.python_version }}-torch${{ matrix.pytorch_version }} - timeout-minutes: 85 + timeout-minutes: 95 # report failure to Slack - name: Slack notification @@ -197,7 +197,7 @@ jobs: file: dockers/base-conda/Dockerfile push: ${{ env.PUSH_TO_HUB }} tags: pytorchlightning/pytorch_lightning:base-conda-py${{ matrix.python_version }}-torch${{ matrix.pytorch_version }} - timeout-minutes: 85 + timeout-minutes: 95 # report failure to Slack - name: Slack notification diff --git a/dockers/base-conda/Dockerfile b/dockers/base-conda/Dockerfile index 05227707b31fa..72603b04ffd64 100644 --- a/dockers/base-conda/Dockerfile +++ b/dockers/base-conda/Dockerfile @@ -72,16 +72,15 @@ COPY environment.yml environment.yml # conda init RUN conda update -n base -c defaults conda && \ - conda install mamba -n base -c conda-forge && \ - mamba create -y --name $CONDA_ENV python=${PYTHON_VERSION} pytorch=${PYTORCH_VERSION} torchvision torchtext cudatoolkit=${CUDA_VERSION} -c nvidia -c pytorch -c pytorch-test -c pytorch-nightly && \ + conda create -y --name $CONDA_ENV python=${PYTHON_VERSION} pytorch=${PYTORCH_VERSION} torchvision torchtext cudatoolkit=${CUDA_VERSION} -c nvidia -c pytorch -c pytorch-test -c pytorch-nightly && \ conda init bash && \ # NOTE: this requires that the channel is presented in the yaml before packages \ printf "import re;\nfname = 'environment.yml';\nreq = open(fname).read();\nfor n in ['python', 'pytorch', 'torchtext', 'torchvision']:\n req = re.sub(rf'- {n}[>=]+', f'# - {n}=', req);\nopen(fname, 'w').write(req)" > prune.py && \ python prune.py && \ rm prune.py && \ cat environment.yml && \ - mamba env update --name $CONDA_ENV --file environment.yml && \ - mamba clean -ya && \ + conda env update --name $CONDA_ENV --file environment.yml && \ + conda clean -ya && \ rm environment.yml ENV \