Merge branch 'master' into fix/restore_step

Lightning-AI · Jul 16, 2022 · 31f1607 · 31f1607
2 parents 6ab0608 + 2845e75
commit 31f1607
Show file tree

Hide file tree

Showing 266 changed files with 8,628 additions and 2,741 deletions.
diff --git a/.actions/assistant.py b/.actions/assistant.py
@@ -15,6 +15,7 @@
 from urllib.request import Request, urlopen
 
 import fire
+import pkg_resources
 
 REQUIREMENT_FILES = {
     "pytorch": (
@@ -78,17 +79,12 @@ def requirements_prune_pkgs(packages: Sequence[str], req_files: Sequence[str] =
     @staticmethod
     def _prune_packages(req_file: str, packages: Sequence[str]) -> None:
         """Remove some packages from given requirement files."""
-        with open(req_file) as fp:
-            lines = fp.readlines()
-
-        if isinstance(packages, str):
-            packages = [packages]
-        for pkg in packages:
-            lines = [ln for ln in lines if not ln.startswith(pkg)]
-        pprint(lines)
-
-        with open(req_file, "w") as fp:
-            fp.writelines(lines)
+        path = Path(req_file)
+        assert path.exists()
+        text = path.read_text()
+        final = [str(req) for req in pkg_resources.parse_requirements(text) if req.name not in packages]
+        pprint(final)
+        path.write_text("\n".join(final))
 
     @staticmethod
     def _replace_min(fname: str) -> None:

diff --git a/.azure/gpu-benchmark.yml b/.azure/gpu-benchmark.yml
@@ -46,5 +46,6 @@ jobs:
     - bash: python -m pytest benchmarks -v --durations=0
       env:
         PL_RUNNING_BENCHMARKS: 1
+        PL_RUN_CUDA_TESTS: "1"
       workingDirectory: tests/tests_pytorch
       displayName: 'Testing: PyTorch benchmarks'
diff --git a/.azure/gpu-tests.yml b/.azure/gpu-tests.yml
@@ -12,18 +12,23 @@ trigger:
       - "master"
       - "release/*"
       - "refs/tags/*"
+
 pr:
   - "master"
   - "release/*"
 
+variables:
+  - name: continue
+    value: '1'
+
 jobs:
   - job: testing
     strategy:
       matrix:
         'PyTorch - stable':
           image: "pytorchlightning/pytorch_lightning:base-cuda-py3.9-torch1.11"
     # how long to run the job before automatically cancelling
-    timeoutInMinutes: "100"
+    timeoutInMinutes: "80"
     # how much time to give 'run always even if cancelled tasks' before stopping them
     cancelTimeoutInMinutes: "2"
     pool: azure-jirka-spot
@@ -37,6 +42,21 @@ jobs:
 
     steps:
 
+    - bash: |
+        CHANGED_FILES=$(git diff --name-status origin/master -- . | awk  '{print $2}')
+        FILTER='src/pytorch_lightning|requirements/pytorch|tests/tests_pytorch|examples/pl_*'
+        echo $CHANGED_FILES > changed_files.txt
+        MATCHES=$(cat changed_files.txt | grep -E $FILTER)
+        echo $MATCHES
+        if [ -z "$MATCHES" ]; then
+            echo "Skip"
+            echo "##vso[task.setvariable variable=continue]0"
+        else
+            echo "Continue"
+            echo "##vso[task.setvariable variable=continue]1"
+        fi
+      displayName: Skipper
+
     - bash: |
         lspci | egrep 'VGA|3D'
         whereis nvidia
@@ -46,6 +66,7 @@ jobs:
         pip --version
         pip list
       displayName: 'Image info & NVIDIA'
+      condition: eq(variables['continue'], '1')
 
     - bash: |
         python -c "fname = 'requirements/pytorch/strategies.txt' ; lines = [line for line in open(fname).readlines() if 'horovod' not in line] ; open(fname, 'w').writelines(lines)"
@@ -58,6 +79,7 @@ jobs:
         PACKAGE_NAME: pytorch
         FREEZE_REQUIREMENTS: 1
       displayName: 'Install dependencies'
+      condition: eq(variables['continue'], '1')
 
     - bash: |
         set -e
@@ -66,23 +88,33 @@ jobs:
         python requirements/pytorch/check-avail-strategies.py
         python requirements/pytorch/check-avail-extras.py
       displayName: 'Env details'
+      condition: eq(variables['continue'], '1')
 
     - bash: bash .actions/pull_legacy_checkpoints.sh
       displayName: 'Get legacy checkpoints'
+      condition: eq(variables['continue'], '1')
 
     - bash: python -m coverage run --source pytorch_lightning -m pytest
       workingDirectory: src/pytorch_lightning
       displayName: 'Testing: PyTorch doctests'
+      condition: eq(variables['continue'], '1')
 
     - bash: python -m coverage run --source pytorch_lightning -m pytest --ignore benchmarks -v --junitxml=$(Build.StagingDirectory)/test-results.xml --durations=50
-      displayName: 'Testing: PyTorch standard'
+      env:
+        PL_RUN_CUDA_TESTS: "1"
       workingDirectory: tests/tests_pytorch
+      displayName: 'Testing: PyTorch standard'
+      timeoutInMinutes: "35"
+      condition: eq(variables['continue'], '1')
 
     - bash: bash run_standalone_tests.sh
       workingDirectory: tests/tests_pytorch
       env:
         PL_USE_MOCKED_MNIST: "1"
+        PL_RUN_CUDA_TESTS: "1"
       displayName: 'Testing: PyTorch standalone tests'
+      timeoutInMinutes: "35"
+      condition: eq(variables['continue'], '1')
 
     - bash: |
         python -m coverage report
@@ -92,13 +124,14 @@ jobs:
         ls -l
       workingDirectory: tests/tests_pytorch
       displayName: 'Statistics'
+      condition: eq(variables['continue'], '1')
 
     - task: PublishTestResults@2
       displayName: 'Publish test results'
       inputs:
         testResultsFiles: '$(Build.StagingDirectory)/test-results.xml'
         testRunTitle: '$(Agent.OS) - $(Build.DefinitionName) - Python $(python.version)'
-      condition: succeededOrFailed()
+      condition: and(succeededOrFailed(), eq(variables['continue'], '1'))
 
     - script: |
         set -e
@@ -110,7 +143,11 @@ jobs:
       env:
         PL_USE_MOCKED_MNIST: "1"
       displayName: 'Testing: PyTorch examples'
+      condition: eq(variables['continue'], '1')
 
     - bash: python -m pytest benchmarks -v --maxfail=2 --durations=0
       workingDirectory: tests/tests_pytorch
+      env:
+        PL_RUN_CUDA_TESTS: "1"
       displayName: 'Testing: PyTorch benchmarks'
+      condition: eq(variables['continue'], '1')
diff --git a/.azure/hpu-tests.yml b/.azure/hpu-tests.yml
@@ -9,10 +9,15 @@ trigger:
       - "master"
       - "release/*"
       - "refs/tags/*"
+
 pr:
   - "master"
   - "release/*"
 
+variables:
+  - name: continue
+    value: '1'
+
 jobs:
   - job: testing
     # how long to run the job before automatically cancelling
@@ -24,27 +29,46 @@ jobs:
       clean: all
 
     steps:
+    - bash: |
+        CHANGED_FILES=$(git diff --name-status origin/master -- . | awk  '{print $2}')
+        FILTER='src/pytorch_lightning|requirements/pytorch|tests/tests_pytorch|examples/pl_*'
+        echo $CHANGED_FILES > changed_files.txt
+        MATCHES=$(cat changed_files.txt | grep -E $FILTER)
+        echo $MATCHES
+        if [ -z "$MATCHES" ]; then
+            echo "Skip"
+            echo "##vso[task.setvariable variable=continue]0"
+        else
+            echo "Continue"
+            echo "##vso[task.setvariable variable=continue]1"
+        fi
+      displayName: Skipper
+
     - bash: |
         apt-get install -y hwinfo
         hwinfo --short
       displayName: 'Instance HW info'
+      condition: eq(variables['continue'], '1')
 
     - bash: |
         pip install -e .[extra] -r requirements/pytorch/test.txt
       env:
         PACKAGE_NAME: pytorch
         FREEZE_REQUIREMENTS: 1
       displayName: 'Install dependencies'
+      condition: eq(variables['continue'], '1')
 
     - bash: |
         python -m pytest -sv accelerators/test_hpu.py --forked --junitxml=hpu1_test-results.xml
       workingDirectory: tests/tests_pytorch
       displayName: 'Single card HPU test'
+      condition: eq(variables['continue'], '1')
 
     - bash: |
         python -m pytest -sv accelerators/test_hpu.py --forked --hpus 8 --junitxml=hpu8_test-results.xml
       workingDirectory: tests/tests_pytorch
       displayName: 'Multi card(8) HPU test'
+      condition: eq(variables['continue'], '1')
 
     - bash: |
         python -m pytest -sv plugins/precision/hpu/test_hpu.py --hmp-bf16 \
@@ -53,16 +77,18 @@ jobs:
           --junitxml=hpu1_precision_test-results.xml
       workingDirectory: tests/tests_pytorch
       displayName: 'HPU precision test'
+      condition: eq(variables['continue'], '1')
 
     - bash: |
         export PYTHONPATH="${PYTHONPATH}:$(pwd)"
         python "pl_hpu/mnist_sample.py"
       workingDirectory: examples
       displayName: 'Testing: HPU examples'
+      condition: eq(variables['continue'], '1')
 
     - task: PublishTestResults@2
       inputs:
         testResultsFiles: 'hpu*_test-results.xml'
         testRunTitle: '$(Agent.OS) - $(Build.DefinitionName) - Python $(python.version)'
-      condition: succeededOrFailed()
+      condition: and(succeededOrFailed(), eq(variables['continue'], '1'))
       displayName: 'Publish test results'
diff --git a/.azure/ipu-tests.yml b/.azure/ipu-tests.yml
@@ -7,13 +7,16 @@ trigger:
       - master
       - release/*
       - refs/tags/*
+
 pr:
   - master
   - release/*
 
 variables:
-- name: poplar_sdk
-  value: "poplar_sdk-ubuntu_20_04-2.3.1+793-89796d462d"
+  - name: poplar_sdk
+    value: "poplar_sdk-ubuntu_20_04-2.3.1+793-89796d462d"
+  - name: continue
+    value: '1'
 
 jobs:
   - job: testing
@@ -24,14 +27,31 @@ jobs:
       clean: all
 
     steps:
+    - bash: |
+        CHANGED_FILES=$(git diff --name-status origin/master -- . | awk  '{print $2}')
+        FILTER='src/pytorch_lightning|requirements/pytorch|tests/tests_pytorch|examples/pl_*'
+        echo $CHANGED_FILES > changed_files.txt
+        MATCHES=$(cat changed_files.txt | grep -E $FILTER)
+        echo $MATCHES
+        if [ -z "$MATCHES" ]; then
+            echo "Skip"
+            echo "##vso[task.setvariable variable=continue]0"
+        else
+            echo "Continue"
+            echo "##vso[task.setvariable variable=continue]1"
+        fi
+      displayName: Skipper
+
     - script: tar -xvzf /opt/poplar/${{ variables.poplar_sdk }}.tar.gz
       displayName: "Extract Poplar SDK"
+      condition: eq(variables['continue'], '1')
 
     - script: |
         set -eux
         pip debug --verbose
         pip install ${{ variables.poplar_sdk }}/poptorch-*ubuntu*.whl
       displayName: "Install poptorch"
+      condition: eq(variables['continue'], '1')
 
     - script: |
         set -eux
@@ -48,6 +68,7 @@ jobs:
             i=$((i + 1))
         done
       displayName: "Reset IPU devices"
+      condition: eq(variables['continue'], '1')
 
     - bash: |
         export GIT_TERMINAL_PROMPT=1
@@ -59,18 +80,21 @@ jobs:
         PACKAGE_NAME: pytorch
         FREEZE_REQUIREMENTS: 1
       displayName: 'Install dependencies'
+      condition: eq(variables['continue'], '1')
 
     - bash: |
         python requirements/collect_env_details.py
         python -c "import torch"
       displayName: 'Env details'
+      condition: eq(variables['continue'], '1')
 
     - script: |
         set -eux
         source ${{ variables.poplar_sdk }}/poplar-ubuntu*/enable.sh
         source ${{ variables.poplar_sdk }}/popart-ubuntu*/enable.sh
         python -c "import poptorch; print(poptorch.__version__)"
       displayName: "Check poptorch installation"
+      condition: eq(variables['continue'], '1')
 
     - bash: |
         source ${{ variables.poplar_sdk }}/poplar-ubuntu*/enable.sh
@@ -81,3 +105,4 @@ jobs:
         POPTORCH_WAIT_FOR_IPU: 1
         PL_RUN_IPU_TESTS: 1
       displayName: 'Testing: PyTorch standard'
+      condition: eq(variables['continue'], '1')
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -14,6 +14,7 @@ trigger:
       - "master"
       - "release/*"
       - "refs/tags/*"
+
 pr:
   - "master"
   - "release/*"
@@ -157,9 +158,9 @@ jobs:
 
 workflows:
   version: 2
-  build-docs:
-    jobs:
-      - build-Docs
+  #build-docs:  # FixMe
+  #  jobs:
+  #    - build-Docs
   test-on-tpus:
     jobs:
       - TPU-tests
diff --git a/.github/gatekeeper-config_app.yml b/.github/gatekeeper-config_app.yml
@@ -0,0 +1,13 @@
+approvals:
+  # check will fail if there is no approval
+  minimum: 1
+  groups:
+    - name: 'Lightning Apps'
+      from:
+        - alecmerdler
+        - awaelchli
+        - hhsecond
+        - lantiga
+        - manskx
+        - nohalon
+        - tchaton
diff --git a/.github/approve_config.yml → .github/gatekeeper-config_pytorch.yml b/.github/approve_config.yml → .github/gatekeeper-config_pytorch.yml
@@ -3,7 +3,6 @@ approvals:
   minimum: 1
   groups:
     - name: 'PyTorch Lightning'
-      minimum: 1
       from:
         - awaelchli
         - Borda
@@ -15,13 +14,3 @@ approvals:
         - rohitgr7
         - tchaton
         - williamFalcon
-    - name: 'Lightning Apps'
-      minimum: 1
-      from:
-        - alecmerdler
-        - awaelchli
-        - hhsecond
-        - lantiga
-        - manskx
-        - nohalon
-        - tchaton