From 4ecb340f1b6ff59b37320882ec156ec0fad1a089 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mochol=C3=AD?= Date: Fri, 16 Dec 2022 10:49:17 +0100 Subject: [PATCH] Better check for programmatic lightningignore (#16080) Co-authored-by: Jirka Borovec <6035284+Borda@users.noreply.github.com> (cherry picked from commit b1ce2639f4371b9cac55dcf2fc6ece3945e8a000) --- .github/workflows/ci-app-examples.yml | 3 +- .../basic/hello_components/pl_multinode.py | 2 +- examples/app_multi_node/train_lite.py | 2 +- examples/app_multi_node/train_lt.py | 4 +- examples/app_multi_node/train_lt_script.py | 4 +- examples/app_multi_node/train_pytorch.py | 2 +- .../app_multi_node/train_pytorch_spawn.py | 4 +- src/lightning_app/CHANGELOG.md | 4 ++ src/lightning_app/core/flow.py | 10 +---- src/lightning_app/core/work.py | 9 +--- .../components/multi_node/test_trainer.py | 2 +- tests/tests_app/runners/test_cloud.py | 4 -- tests/tests_examples_app/conftest.py | 10 ++--- tests/tests_examples_app/local/__init__.py | 6 +-- tests/tests_examples_app/public/__init__.py | 6 +-- .../public/test_multi_node.py | 43 +++++++------------ 16 files changed, 44 insertions(+), 71 deletions(-) diff --git a/.github/workflows/ci-app-examples.yml b/.github/workflows/ci-app-examples.yml index 2046cd940aafb..201f3981f2619 100644 --- a/.github/workflows/ci-app-examples.yml +++ b/.github/workflows/ci-app-examples.yml @@ -89,7 +89,8 @@ jobs: - name: Install Lightning package env: PACKAGE_NAME: ${{ matrix.pkg-name }} - run: pip install -e . + # do not use -e because it will make both packages available since it adds `src` to `sys.path` automatically + run: pip install . - name: Adjust tests if: ${{ matrix.pkg-name == 'lightning' }} diff --git a/docs/source-app/levels/basic/hello_components/pl_multinode.py b/docs/source-app/levels/basic/hello_components/pl_multinode.py index e6764ee8fafae..9df12ec732684 100644 --- a/docs/source-app/levels/basic/hello_components/pl_multinode.py +++ b/docs/source-app/levels/basic/hello_components/pl_multinode.py @@ -10,7 +10,7 @@ def run(self): trainer = L.Trainer(max_epochs=10, strategy="ddp") trainer.fit(model) -# 8 GPU: (2 nodes of 4 x v100) +# 8 GPUs: (2 nodes of 4 x v100) component = LightningTrainerMultiNode( LightningTrainerDistributed, num_nodes=4, diff --git a/examples/app_multi_node/train_lite.py b/examples/app_multi_node/train_lite.py index 8e546b270a693..89c983a1f4004 100644 --- a/examples/app_multi_node/train_lite.py +++ b/examples/app_multi_node/train_lite.py @@ -31,7 +31,7 @@ def run(self): optimizer.step() -# Run over 2 nodes of 4 x V100 +# 8 GPUs: (2 nodes of 4 x v100) app = L.LightningApp( LiteMultiNode( LitePyTorchDistributed, diff --git a/examples/app_multi_node/train_lt.py b/examples/app_multi_node/train_lt.py index 4abe375c89b9b..8ed62a10fb9de 100644 --- a/examples/app_multi_node/train_lt.py +++ b/examples/app_multi_node/train_lt.py @@ -11,10 +11,10 @@ def run(self): trainer.fit(model) -# 8 GPU: (2 nodes of 4 x v100) +# 8 GPUs: (2 nodes of 4 x v100) component = LightningTrainerMultiNode( LightningTrainerDistributed, - num_nodes=4, + num_nodes=2, cloud_compute=L.CloudCompute("gpu-fast-multi"), # 4 x v100 ) app = L.LightningApp(component) diff --git a/examples/app_multi_node/train_lt_script.py b/examples/app_multi_node/train_lt_script.py index d2254e19daac0..58f847368346c 100644 --- a/examples/app_multi_node/train_lt_script.py +++ b/examples/app_multi_node/train_lt_script.py @@ -2,11 +2,11 @@ from lightning.app.components import LightningTrainerScript from lightning.app.utilities.packaging.cloud_compute import CloudCompute -# Run over 2 nodes of 4 x V100 +# 8 GPUs: (2 nodes of 4 x v100) app = L.LightningApp( LightningTrainerScript( "pl_boring_script.py", num_nodes=2, - cloud_compute=CloudCompute("gpu-fast-multi"), + cloud_compute=CloudCompute("gpu-fast-multi"), # 4 x v100 ), ) diff --git a/examples/app_multi_node/train_pytorch.py b/examples/app_multi_node/train_pytorch.py index cc9e84297c151..e5a9a1fc93e3b 100644 --- a/examples/app_multi_node/train_pytorch.py +++ b/examples/app_multi_node/train_pytorch.py @@ -56,6 +56,6 @@ def run(self, main_address: str, main_port: int, num_nodes: int, node_rank: int) # 8 GPUs: (2 nodes x 4 v 100) -compute = L.CloudCompute("gpu-fast-multi") # 4xV100 +compute = L.CloudCompute("gpu-fast-multi") # 4 x v100 component = MultiNode(PyTorchDistributed, num_nodes=2, cloud_compute=compute) app = L.LightningApp(component) diff --git a/examples/app_multi_node/train_pytorch_spawn.py b/examples/app_multi_node/train_pytorch_spawn.py index d29ec83562ffb..165a0c77dbfa9 100644 --- a/examples/app_multi_node/train_pytorch_spawn.py +++ b/examples/app_multi_node/train_pytorch_spawn.py @@ -42,11 +42,11 @@ def run( optimizer.step() -# Run over 2 nodes of 4 x V100 +# 8 GPUs: (2 nodes x 4 v 100) app = L.LightningApp( PyTorchSpawnMultiNode( PyTorchDistributed, num_nodes=2, - cloud_compute=L.CloudCompute("gpu-fast-multi"), # 4 x V100 + cloud_compute=L.CloudCompute("gpu-fast-multi"), # 4 x v100 ) ) diff --git a/src/lightning_app/CHANGELOG.md b/src/lightning_app/CHANGELOG.md index 7f0000d1e448d..9e570f9807f4d 100644 --- a/src/lightning_app/CHANGELOG.md +++ b/src/lightning_app/CHANGELOG.md @@ -25,6 +25,10 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed and improvements of login flow ([#16052](https://github.com/Lightning-AI/lightning/pull/16052)) - Fixed the debugger detection mechanism for lightning App in VSCode ([#16068](https://github.com/Lightning-AI/lightning/pull/16068)) + +- Fixed bug where components that are re-instantiated several times failed to initialize if they were modifying `self.lightningignore` ([#16080](https://github.com/Lightning-AI/lightning/pull/16080)) + + - Fixed a bug where apps that had previously been deleted could not be run again from the CLI ([#16082](https://github.com/Lightning-AI/lightning/pull/16082)) diff --git a/src/lightning_app/core/flow.py b/src/lightning_app/core/flow.py index 302ba344320d1..0be6b6f8ade98 100644 --- a/src/lightning_app/core/flow.py +++ b/src/lightning_app/core/flow.py @@ -10,13 +10,7 @@ from lightning_app.frontend import Frontend from lightning_app.storage import Path from lightning_app.storage.drive import _maybe_create_drive, Drive -from lightning_app.utilities.app_helpers import ( - _is_json_serializable, - _lightning_dispatched, - _LightningAppRef, - _set_child_name, - is_overridden, -) +from lightning_app.utilities.app_helpers import _is_json_serializable, _LightningAppRef, _set_child_name, is_overridden from lightning_app.utilities.component import _sanitize_state from lightning_app.utilities.exceptions import ExitAppException from lightning_app.utilities.introspection import _is_init_context, _is_run_context @@ -325,7 +319,7 @@ def lightningignore(self) -> Tuple[str, ...]: @lightningignore.setter def lightningignore(self, lightningignore: Tuple[str, ...]) -> None: - if _lightning_dispatched(): + if self._backend is not None: raise RuntimeError( f"Your app has been already dispatched, so modifying the `{self.name}.lightningignore` does not have an" " effect" diff --git a/src/lightning_app/core/work.py b/src/lightning_app/core/work.py index 43ffc0006d5ea..029f01fd2f7ae 100644 --- a/src/lightning_app/core/work.py +++ b/src/lightning_app/core/work.py @@ -11,12 +11,7 @@ from lightning_app.storage import Path from lightning_app.storage.drive import _maybe_create_drive, Drive from lightning_app.storage.payload import Payload -from lightning_app.utilities.app_helpers import ( - _is_json_serializable, - _lightning_dispatched, - _LightningAppRef, - is_overridden, -) +from lightning_app.utilities.app_helpers import _is_json_serializable, _LightningAppRef, is_overridden from lightning_app.utilities.component import _is_flow_context, _sanitize_state from lightning_app.utilities.enum import ( CacheCallsKeys, @@ -267,7 +262,7 @@ def lightningignore(self) -> Tuple[str, ...]: @lightningignore.setter def lightningignore(self, lightningignore: Tuple[str, ...]) -> None: - if _lightning_dispatched(): + if self._backend is not None: raise RuntimeError( f"Your app has been already dispatched, so modifying the `{self.name}.lightningignore` does not have an" " effect" diff --git a/tests/tests_app/components/multi_node/test_trainer.py b/tests/tests_app/components/multi_node/test_trainer.py index c86e0968e2ab0..3cfb26f1441b6 100644 --- a/tests/tests_app/components/multi_node/test_trainer.py +++ b/tests/tests_app/components/multi_node/test_trainer.py @@ -66,7 +66,7 @@ def test_trainer_run_executor_mps_forced_cpu(accelerator_given, accelerator_expe ({"strategy": "ddp_sharded_spawn"}, {"strategy": "ddp_sharded"}), ], ) -@pytest.mark.skipif(not module_available("pytorch"), reason="Lightning is not available") +@pytest.mark.skipif(not module_available("torch"), reason="PyTorch is not available") def test_trainer_run_executor_arguments_choices( args_given: dict, args_expected: dict, diff --git a/tests/tests_app/runners/test_cloud.py b/tests/tests_app/runners/test_cloud.py index b5d05c3e7c5cd..cb4bd5ddaa3c0 100644 --- a/tests/tests_app/runners/test_cloud.py +++ b/tests/tests_app/runners/test_cloud.py @@ -1582,8 +1582,6 @@ def run(self): def test_programmatic_lightningignore(monkeypatch, caplog, tmpdir): - monkeypatch.setenv("LIGHTNING_DISPATCHED", "0") # this is not cleaned up - mock_client = mock.MagicMock() mock_client.projects_service_list_memberships.return_value = V1ListMembershipsResponse( memberships=[V1Membership(name="test-project", project_id="test-project-id")] @@ -1650,8 +1648,6 @@ def run(self): assert "2 files were uploaded" # a.txt and .lightningignore assert "files:\n5.0 MB: a.txt\nPerhaps" in caplog.text # only this file appears - # replicate how the app would dispatch the app, and call `run` - monkeypatch.setenv("LIGHTNING_DISPATCHED", "1") flow.run() diff --git a/tests/tests_examples_app/conftest.py b/tests/tests_examples_app/conftest.py index fcefa6287c3c6..fa2c2a14dcbba 100644 --- a/tests/tests_examples_app/conftest.py +++ b/tests/tests_examples_app/conftest.py @@ -5,8 +5,8 @@ import psutil import pytest +from tests_examples_app.public import _PATH_EXAMPLES -from lightning_app import _PROJECT_ROOT from lightning_app.storage.path import _storage_root_dir from lightning_app.utilities.component import _set_context from lightning_app.utilities.packaging import cloud_compute @@ -24,11 +24,11 @@ def pytest_sessionstart(*_): """Pytest hook that get called after the Session object has been created and before performing collection and entering the run test loop.""" for name, url in GITHUB_APP_URLS.items(): - if not os.path.exists(os.path.join(_PROJECT_ROOT, "examples", name)): - path_examples = os.path.join(_PROJECT_ROOT, "examples") - Popen(["git", "clone", url, name], cwd=path_examples).wait(timeout=90) + app_path = _PATH_EXAMPLES / name + if not os.path.exists(app_path): + Popen(["git", "clone", url, name], cwd=_PATH_EXAMPLES).wait(timeout=90) else: - Popen(["git", "pull", "main"], cwd=os.path.join(_PROJECT_ROOT, "examples", name)).wait(timeout=90) + Popen(["git", "pull", "main"], cwd=app_path).wait(timeout=90) def pytest_sessionfinish(session, exitstatus): diff --git a/tests/tests_examples_app/local/__init__.py b/tests/tests_examples_app/local/__init__.py index dbd6181f8e89a..1e7d17cc6b536 100644 --- a/tests/tests_examples_app/local/__init__.py +++ b/tests/tests_examples_app/local/__init__.py @@ -1,5 +1,3 @@ -import os +from pathlib import Path -from lightning_app import _PROJECT_ROOT - -_PATH_APPS = os.path.join(_PROJECT_ROOT, "tests", "tests_examples_app", "apps") +_PATH_APPS = Path(__file__).resolve().parents[1] / "apps" diff --git a/tests/tests_examples_app/public/__init__.py b/tests/tests_examples_app/public/__init__.py index b70149ce10b11..1f1db5e15eaee 100644 --- a/tests/tests_examples_app/public/__init__.py +++ b/tests/tests_examples_app/public/__init__.py @@ -1,5 +1,3 @@ -import os +from pathlib import Path -from lightning_app import _PROJECT_ROOT - -_PATH_EXAMPLES = os.path.join(_PROJECT_ROOT, "examples") +_PATH_EXAMPLES = Path(__file__).resolve().parents[3] / "examples" diff --git a/tests/tests_examples_app/public/test_multi_node.py b/tests/tests_examples_app/public/test_multi_node.py index a5cd2de40811f..e344fab41eeaa 100644 --- a/tests/tests_examples_app/public/test_multi_node.py +++ b/tests/tests_examples_app/public/test_multi_node.py @@ -1,10 +1,11 @@ import os -import sys from unittest import mock import pytest +from lightning_utilities.core.imports import package_available from tests_examples_app.public import _PATH_EXAMPLES +from lightning_app.testing.helpers import _RunIf from lightning_app.testing.testing import application_testing, LightningTestApp @@ -12,33 +13,13 @@ class LightningTestMultiNodeApp(LightningTestApp): def on_before_run_once(self): res = super().on_before_run_once() if self.works and all(w.has_stopped for w in self.works): - assert len([w for w in self.works]) == 2 + assert len(self.works) == 2 return True return res -@pytest.mark.skip(reason="flaky") -@mock.patch("lightning_app.components.multi_node.base.is_running_in_cloud", return_value=True) -def test_multi_node_example(_, monkeypatch): - monkeypatch.chdir(os.path.join(_PATH_EXAMPLES, "app_multi_node")) - command_line = [ - "app.py", - "--blocking", - "False", - "--open-ui", - "False", - ] - result = application_testing(LightningTestMultiNodeApp, command_line) - assert result.exit_code == 0 - - -class LightningTestMultiNodeWorksApp(LightningTestApp): - def on_before_run_once(self): - res = super().on_before_run_once() - if self.works and all(w.has_stopped for w in self.works): - assert len([w for w in self.works]) == 2 - return True - return res +# for the skip to work, the package needs to be installed without editable mode +_SKIP_LIGHTNING_UNAVAILABLE = pytest.mark.skipif(not package_available("lightning"), reason="script requires lightning") @pytest.mark.parametrize( @@ -46,14 +27,20 @@ def on_before_run_once(self): [ "train_pytorch.py", "train_any.py", - # "app_lite_work.py", "train_pytorch_spawn.py", - # "app_pl_work.py": TODO Add once https://github.com/Lightning-AI/lightning/issues/15556 is resolved. + pytest.param("train_lite.py", marks=_SKIP_LIGHTNING_UNAVAILABLE), + pytest.param("train_lt_script.py", marks=_SKIP_LIGHTNING_UNAVAILABLE), + pytest.param("train_lt.py", marks=_SKIP_LIGHTNING_UNAVAILABLE), ], ) -@pytest.mark.skipif(sys.platform == "win32", reason="flaky") +@_RunIf(skip_windows=True) # flaky @mock.patch("lightning_app.components.multi_node.base.is_running_in_cloud", return_value=True) def test_multi_node_examples(_, app_name, monkeypatch): + # note: this test will fail locally: + # * if you installed `lightning_app`, then the examples need to be + # rewritten to use `lightning_app` imports (CI does this) + # * if you installed `lightning`, then the imports in this file and mocks + # need to be changed to use `lightning`. monkeypatch.chdir(os.path.join(_PATH_EXAMPLES, "app_multi_node")) command_line = [ app_name, @@ -62,5 +49,5 @@ def test_multi_node_examples(_, app_name, monkeypatch): "--open-ui", "False", ] - result = application_testing(LightningTestMultiNodeWorksApp, command_line) + result = application_testing(LightningTestMultiNodeApp, command_line) assert result.exit_code == 0