Skip to content

Commit

Permalink
update CI
Browse files Browse the repository at this point in the history
  • Loading branch information
Borda committed Jun 14, 2022
1 parent d5b37e9 commit b9ae60d
Show file tree
Hide file tree
Showing 15 changed files with 93 additions and 75 deletions.
10 changes: 5 additions & 5 deletions .actions/pull_legacy_checkpoints.sh
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
#!/bin/bash
# Run this script from the project root.
URL="https://pl-public-data.s3.amazonaws.com/legacy/checkpoints.zip"
mkdir -p legacy
mkdir -p test/legacy
# wget is simpler but does not work on Windows
python -c "from urllib.request import urlretrieve; urlretrieve('$URL', 'legacy/checkpoints.zip')"
ls -l legacy/
unzip -o legacy/checkpoints.zip -d legacy/
ls -l legacy/checkpoints/
python -c "from urllib.request import urlretrieve; urlretrieve('$URL', 'test/legacy/checkpoints.zip')"
ls -l test/legacy/
unzip -o test/legacy/checkpoints.zip -d test/legacy/
ls -l test/legacy/checkpoints/
16 changes: 11 additions & 5 deletions .azure-pipelines/gpu-benchmark.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,14 @@ jobs:
clean: all

steps:
- bash: |
python -m pytest tests/benchmarks -v --durations=0
displayName: 'Testing: benchmarks'
env:
PL_RUNNING_BENCHMARKS: 1

- bash: |
pip install -e . -r requirements/strategies.txt
pip list
displayName: 'Install package'
- bash: python -m pytest unittests_pl/benchmarks -v --durations=0
env:
PL_RUNNING_BENCHMARKS: 1
workingDirectory: test
displayName: 'Testing: benchmarks'
23 changes: 13 additions & 10 deletions .azure-pipelines/gpu-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ pr:
- "release/*"

jobs:
- job: pytest
- job: testing
strategy:
matrix:
'PyTorch - LTS':
Expand All @@ -28,15 +28,12 @@ jobs:
timeoutInMinutes: "100"
# how much time to give 'run always even if cancelled tasks' before stopping them
cancelTimeoutInMinutes: "2"

pool: azure-jirka-spot

container:
image: $(image)
# default shm size is 64m. Increase it to avoid:
# 'Error while creating shared memory: unhandled system error, NCCL version 2.7.8'
options: "--runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all --shm-size=512m"

workspace:
clean: all

Expand All @@ -56,8 +53,9 @@ jobs:
python -c "fname = 'requirements/strategies.txt' ; lines = [line for line in open(fname).readlines() if 'horovod' not in line] ; open(fname, 'w').writelines(lines)"
CUDA_VERSION_MM=$(python -c "import torch ; print(''.join(map(str, torch.version.cuda.split('.')[:2])))")
pip install "bagua-cuda$CUDA_VERSION_MM>=0.9.0"
pip install . --requirement requirements/devel.txt
pip install . --requirement requirements/strategies.txt
pip install -e .
pip install --requirement requirements/devel.txt
pip install --requirement requirements/strategies.txt
pip list
displayName: 'Install dependencies'
Expand All @@ -76,8 +74,12 @@ jobs:
python -m coverage run --source pytorch_lightning -m pytest pytorch_lightning tests --ignore tests/benchmarks -v --junitxml=$(Build.StagingDirectory)/test-results.xml --durations=50
displayName: 'Testing: standard'
- bash: |
bash tests/standalone_tests.sh
- bash: python -m coverage run --source pytorch_lightning -m pytest unittests_pl --ignore unittests_pl/benchmarks -v --junitxml=$(Build.StagingDirectory)/test-results.xml --durations=50
displayName: 'Testing: unittests'
workingDirectory: test

- bash: bash run_standalone_tests.sh
workingDirectory: test
env:
PL_USE_MOCKED_MNIST: "1"
displayName: 'Testing: standalone'
Expand All @@ -88,6 +90,7 @@ jobs:
python -m coverage html
python -m codecov --token=$(CODECOV_TOKEN) --commit=$(Build.SourceVersion) --flags=gpu,pytest --name="GPU-coverage" --env=linux,azure
ls -l
workingDirectory: test
displayName: 'Statistics'
- task: PublishTestResults@2
Expand Down Expand Up @@ -117,6 +120,6 @@ jobs:
PL_USE_MOCKED_MNIST: "1"
displayName: 'Testing: examples'
- bash: |
python -m pytest tests/benchmarks -v --maxfail=2 --durations=0
- bash: python -m pytest unittests_pl/benchmarks -v --maxfail=2 --durations=0
workingDirectory: test
displayName: 'Testing: benchmarks'
17 changes: 10 additions & 7 deletions .azure-pipelines/hpu-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,12 @@ pr:
- "release/*"

jobs:
- job: tests

- job: testing
# how long to run the job before automatically cancelling
timeoutInMinutes: "10"
# how much time to give 'run always even if cancelled tasks' before stopping them
cancelTimeoutInMinutes: "2"

pool: intel-hpus

workspace:
clean: all

Expand All @@ -38,15 +35,21 @@ jobs:
displayName: 'Install dependencies'
- bash: |
python -m pytest -sv tests/accelerators/test_hpu.py --forked --junitxml=hpu1_test-results.xml
python -m pytest -sv unittests_pl/accelerators/test_hpu.py --forked --junitxml=hpu1_test-results.xml
workingDirectory: test
displayName: 'Single card HPU test'
- bash: |
python -m pytest -sv tests/accelerators/test_hpu.py --forked --hpus 8 --junitxml=hpu8_test-results.xml
python -m pytest -sv unittests_pl/accelerators/test_hpu.py --forked --hpus 8 --junitxml=hpu8_test-results.xml
workingDirectory: test
displayName: 'Multi card(8) HPU test'
- bash: |
python -m pytest -sv tests/plugins/precision/hpu/test_hpu.py --hmp-bf16 'tests/plugins/precision/hpu/ops_bf16.txt' --hmp-fp32 'tests/plugins/precision/hpu/ops_fp32.txt' --forked --junitxml=hpu1_precision_test-results.xml
python -m pytest -sv unittests_pl/plugins/precision/hpu/test_hpu.py --hmp-bf16 \
'unittests_pl/plugins/precision/hpu/ops_bf16.txt' --hmp-fp32 \
'unittests_pl/plugins/precision/hpu/ops_fp32.txt' --forked \
--junitxml=hpu1_precision_test-results.xml
workingDirectory: test
displayName: 'HPU precision test'
- bash: |
Expand Down
20 changes: 12 additions & 8 deletions .azure-pipelines/ipu-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,10 @@ variables:
value: "poplar_sdk-ubuntu_20_04-2.3.1+793-89796d462d"

jobs:
- job: tests

- job: testing
# how long to run the job before automatically cancelling
timeoutInMinutes: "15"
pool: graphcore-ipus

workspace:
clean: all

Expand Down Expand Up @@ -55,7 +53,7 @@ jobs:
export GIT_TERMINAL_PROMPT=1
python ./requirements/adjust-versions.py requirements/extra.txt
python ./requirements/adjust-versions.py requirements/examples.txt
pip install . --requirement ./requirements/devel.txt
pip install -e . --requirement ./requirements/devel.txt
pip list
displayName: 'Install dependencies'
Expand All @@ -68,16 +66,22 @@ jobs:
set -eux
source ${{ variables.poplar_sdk }}/poplar-ubuntu*/enable.sh
source ${{ variables.poplar_sdk }}/popart-ubuntu*/enable.sh
python -c "import poptorch; print(poptorch.__version__)"
displayName: "Check poptorch installation"
- bash: |
source ${{ variables.poplar_sdk }}/poplar-ubuntu*/enable.sh
source ${{ variables.poplar_sdk }}/popart-ubuntu*/enable.sh
export POPTORCH_WAIT_FOR_IPU=1
export PL_RUN_IPU_TESTS=1
python -m coverage run --source pytorch_lightning -m pytest tests -vv --junitxml=$(Build.StagingDirectory)/test-results.xml --durations=50
python -m pytest pytorch_lightning
displayName: 'DocTests'
- bash: |
source ${{ variables.poplar_sdk }}/poplar-ubuntu*/enable.sh
source ${{ variables.poplar_sdk }}/popart-ubuntu*/enable.sh
cd test
python -m coverage run --source pytorch_lightning -m pytest unittests_pl -vv --durations=50
env:
MKL_THREADING_LAYER: "GNU"
POPTORCH_WAIT_FOR_IPU: 1
PL_RUN_IPU_TESTS: 1
displayName: 'Testing: standard'
2 changes: 1 addition & 1 deletion .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ references:
# the image uses python 2.7 by default, force a different version
pyenv global 3.7.3
python --version
pip install -r requirements/docs.txt
pip install -e . -r requirements/docs.txt
pip list
cd docs
make clean
Expand Down
2 changes: 1 addition & 1 deletion .github/CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ To learn about development of docs, check out the docs [README.md](https://githu

### Testing

To learn about tests, check out the tests [README.md](https://github.com/PyTorchLightning/pytorch-lightning/blob/master/tests/README.md).
To learn about tests, check out the tests [README.md](https://github.com/PyTorchLightning/pytorch-lightning/blob/master/test/README.md).

### Pull Request

Expand Down
14 changes: 7 additions & 7 deletions .github/workflows/ci_test-conda.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ jobs:
run: |
conda info
conda list
pip install -r requirements/test.txt
pip install -e . -r requirements/test.txt
- name: DocTests
run: |
Expand Down Expand Up @@ -67,19 +67,19 @@ jobs:
run: bash .actions/pull_legacy_checkpoints.sh

- name: UnitTests
run: |
coverage run --source pytorch_lightning -m pytest --timeout 150 tests -v --durations=50 --junitxml=junit/test-results-${{ runner.os }}-torch${{ matrix.pytorch-version }}.xml
working-directory: ./test
run: coverage run --source pytorch_lightning -m pytest unittests_pl -v --timeout 150 --durations=50 --junitxml=results-${{ runner.os }}-torch${{ matrix.pytorch-version }}.xml

- name: Upload pytest results
uses: actions/upload-artifact@v2
with:
name: pytest-results-${{ runner.os }}-torch${{ matrix.pytorch-version }}
path: junit/test-results-${{ runner.os }}-torch${{ matrix.pytorch-version }}.xml
if-no-files-found: error
name: unittest-results-${{ runner.os }}-torch${{ matrix.pytorch-version }}
path: test/results-${{ runner.os }}-torch${{ matrix.pytorch-version }}.xml
if: failure()

- name: Statistics
if: success()
working-directory: ./test
run: |
coverage report
coverage xml
Expand All @@ -91,7 +91,7 @@ jobs:
continue-on-error: true
with:
token: ${{ secrets.CODECOV_TOKEN }}
file: coverage.xml
file: test/coverage.xml
flags: cpu,pytest,torch${{ matrix.pytorch-version }}
name: CPU-coverage
fail_ci_if_error: false
19 changes: 9 additions & 10 deletions .github/workflows/ci_test-full.yml
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ jobs:
run: |
flag=$(python -c "print('--pre' if '${{matrix.release}}' == 'pre' else '')" 2>&1)
url=$(python -c "print('test/cpu/torch_test.html' if '${{matrix.release}}' == 'pre' else 'cpu/torch_stable.html')" 2>&1)
pip install -r requirements.txt --upgrade $flag --find-links "https://download.pytorch.org/whl/${url}"
pip install -e . --upgrade $flag --find-links "https://download.pytorch.org/whl/${url}"
pip install -r requirements/test.txt --upgrade
pip list
shell: bash
Expand Down Expand Up @@ -123,13 +123,12 @@ jobs:
key: pl-dataset

- name: Sanity check
run: |
python requirements/check-avail-extras.py
run: python requirements/check-avail-extras.py

- name: UnitTests
run: |
# NOTE: do not include coverage report here, see: https://github.com/nedbat/coveragepy/issues/1003
coverage run --source pytorch_lightning -m pytest pytorch_lightning tests -v --durations=50 --junitxml=junit/test-results-${{ runner.os }}-py${{ matrix.python-version }}-${{ matrix.requires }}-${{ matrix.release }}.xml
working-directory: ./test
# NOTE: do not include coverage report here, see: https://github.com/nedbat/coveragepy/issues/1003
run: coverage run --source pytorch_lightning -m pytest unittests_pl -v --durations=50 --junitxml=results-${{ runner.os }}-py${{ matrix.python-version }}-${{ matrix.requires }}-${{ matrix.release }}.xml

- name: Examples
run: |
Expand All @@ -141,13 +140,13 @@ jobs:
- name: Upload pytest results
uses: actions/upload-artifact@v2
with:
name: pytest-results-${{ runner.os }}-py${{ matrix.python-version }}-${{ matrix.requires }}-${{ matrix.release }}
path: junit/test-results-${{ runner.os }}-py${{ matrix.python-version }}-${{ matrix.requires }}-${{ matrix.release }}.xml
if-no-files-found: error
name: unittest-results-${{ runner.os }}-py${{ matrix.python-version }}-${{ matrix.requires }}-${{ matrix.release }}
path: test/results-${{ runner.os }}-py${{ matrix.python-version }}-${{ matrix.requires }}-${{ matrix.release }}.xml
if: failure()

- name: Statistics
if: success()
working-directory: ./test
run: |
coverage report
coverage xml
Expand All @@ -159,7 +158,7 @@ jobs:
continue-on-error: true
with:
token: ${{ secrets.CODECOV_TOKEN }}
file: coverage.xml
file: test/coverage.xml
flags: cpu,pytest,python${{ matrix.python-version }}
name: CPU-coverage
fail_ci_if_error: false
16 changes: 8 additions & 8 deletions .github/workflows/ci_test-slow.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,27 +49,27 @@ jobs:
run: |
# adjust versions according installed Torch version
python ./requirements/adjust-versions.py requirements.txt ${{ matrix.pytorch-version }}
pip install -r requirements.txt --find-links https://download.pytorch.org/whl/cpu/torch_stable.html --upgrade
pip install -e . --find-links https://download.pytorch.org/whl/cpu/torch_stable.html --upgrade
pip install -r requirements/test.txt
pip list
shell: bash

- name: Tests
run: |
coverage run --source pytorch_lightning -m pytest tests -v --junitxml=junit/test-results-${{ runner.os }}-py${{ matrix.python-version }}.xml
- name: UnitTests
working-directory: ./test
run: coverage run --source pytorch_lightning -m pytest unittests_pl -v --junitxml=results-${{ runner.os }}-py${{ matrix.python-version }}.xml
env:
PL_RUN_SLOW_TESTS: 1

- name: Upload pytest test results
uses: actions/upload-artifact@v2
with:
name: pytest-results-${{ runner.os }}-py${{ matrix.python-version }}
path: junit/test-results-${{ runner.os }}-py${{ matrix.python-version }}.xml
if-no-files-found: error
name: unittest-results-${{ runner.os }}-py${{ matrix.python-version }}
path: test/results-${{ runner.os }}-py${{ matrix.python-version }}.xml
if: failure()

- name: Statistics
if: success()
working-directory: ./test
run: |
coverage report
coverage xml
Expand All @@ -81,7 +81,7 @@ jobs:
continue-on-error: true
with:
token: ${{ secrets.CODECOV_TOKEN }}
file: coverage.xml
file: test/coverage.xml
flags: cpu,pytest,torch${{ matrix.pytorch-version }}
name: CPU-coverage
fail_ci_if_error: false
8 changes: 4 additions & 4 deletions .github/workflows/legacy-checkpoints.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,12 @@ jobs:
- name: Generate checkpoint
run: |
while IFS= read -r line; do
bash legacy/generate_checkpoints.sh $line
done <<< $(cat legacy/back-compatible-versions.txt)
bash test/legacy/generate_checkpoints.sh $line
done <<< $(cat test/legacy/back-compatible-versions.txt)
- name: Push files to S3
working-directory: ./legacy
working-directory: ./test/legacy
run: |
aws s3 sync legacy/checkpoints/ s3://pl-public-data/legacy/checkpoints/
aws s3 sync checkpoints/ s3://pl-public-data/legacy/checkpoints/
zip -r checkpoints.zip checkpoints
aws s3 cp checkpoints.zip s3://pl-public-data/legacy/ --acl public-read
5 changes: 3 additions & 2 deletions .github/workflows/release-pypi.yml
Original file line number Diff line number Diff line change
Expand Up @@ -123,8 +123,9 @@ jobs:
path: dist

- name: Pull files from S3
working-directory: ./test/legacy
run: |
aws s3 cp --recursive s3://pl-public-data/legacy/checkpoints/ legacy/checkpoints/ # --acl public-read
aws s3 cp --recursive s3://pl-public-data/legacy/checkpoints/ checkpoints/ # --acl public-read
ls -l legacy/checkpoints/
- name: Generate checkpoint
Expand All @@ -137,7 +138,7 @@ jobs:
bash legacy/generate_checkpoints.sh $pl_ver
- name: Push files to S3
working-directory: ./legacy
working-directory: ./test/legacy
run: |
aws s3 sync legacy/checkpoints/ s3://pl-public-data/legacy/checkpoints/
zip -r checkpoints.zip checkpoints
Expand Down
Loading

0 comments on commit b9ae60d

Please sign in to comment.