From 7eb5ff55580c9a2036002a471fff17a8cc471d9c Mon Sep 17 00:00:00 2001 From: Jirka Borovec <6035284+Borda@users.noreply.github.com> Date: Thu, 8 Dec 2022 19:50:53 +0100 Subject: [PATCH] Releasing/1.8.4 extra2 (#15960) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * [docs] Include all components in the API reference (#15805) * Update docs Co-authored-by: Jirka Borovec <6035284+Borda@users.noreply.github.com> (cherry picked from commit e6f4c8484baaa82e75cfd37d1719ecfadb04a7d9) * Bump playwright from 1.27.1 to 1.28.0 in /requirements (#15903) * Bump playwright from 1.27.1 to 1.28.0 in /requirements Bumps [playwright](https://github.com/Microsoft/playwright-python) from 1.27.1 to 1.28.0. - [Release notes](https://github.com/Microsoft/playwright-python/releases) - [Commits](https://github.com/Microsoft/playwright-python/compare/v1.27.1...v1.28.0) --- updated-dependencies: - dependency-name: playwright dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] * 1.28 Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Jirka (cherry picked from commit 73a6dbec01af6998efa4547555eff936c7630cd2) * [App] Add `configure_layout` method for works (#15926) * Add `configure_layout` method for works * Check for api access availability * Updates from review * Update CHANGELOG.md * Apply suggestions from code review Co-authored-by: Sherin Thomas (cherry picked from commit d5b9c678ba7b354b2d367f26b4a8dc211ad4f97d) * Don't try to aggregate `requirements/__pycache__/base.txt` in setuptools (#15775) Exlucde __pycache__ in setuptools (cherry picked from commit ca5ca0e2c6440d27d50b23e850ef285ca6e7866b) * [App] Multiprocessing-safe work pickling (#15836) (cherry picked from commit df678335d63e0a6d7968dcc2e20a3ccae1ffb7e9) * Multinode on MPS (#15748) * Fix restarting attribute for lr finder * update lite executor * update trainer executor * update spawn executor * add multinode component tests * add testing helpers * add lite tests * add trainer tests * update changelog * update trainer * update workflow * update tests * debug * add reason for skipif * Apply suggestions from code review * switch skipif Co-authored-by: Jirka Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Carlos Mocholí Co-authored-by: Adrian Wälchli Co-authored-by: Jirka Borovec <6035284+Borda@users.noreply.github.com> (cherry picked from commit 36aecde6957ce0c6ebb4e4b8bf8c1738a4c253b6) * [App] Resolve PythonServer on M1 (#15949) Co-authored-by: thomas Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> (cherry picked from commit 904323b5b940fc2f22fa60d0be972a3197021806) * Lite: Fix DataLoader shuffling when using DistributedSampler (#15931) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> (cherry picked from commit 3004f13d36b3ea0bbcdc7f7f4fbf3e83ccb86bd6) * [App] Temporarily disable ready (#15958) (cherry picked from commit d0b101ce20b5945cdf4d31ca745ede19b68b3cd7) * update date * Fix restarting attribute for lr finder (#15620) (cherry picked from commit 15184c694c41f20812ec25e0aefd2c9e98013211) * [App] Improve pdb for multiprocessing (#15950) Co-authored-by: thomas Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> (cherry picked from commit 482b2799f417b7a44601b95023048ae013f7ebeb) * [App] Improve debug triggering (#15951) (cherry picked from commit 772d121ea6a923d3224862d5246858b17fe0e245) * [App] Add automatic conversion to structures (#15961) (cherry picked from commit 67a47d47e1dac383e3cf74b192855c81f24debda) * Make LightningModule torch.jit.script-able again (#15947) * Make LightningModule torch.jit.script-able again * remove skip Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> (cherry picked from commit b5fa8968ecd18eaa7216e18d10b4b2dcf6640ca1) * Fix ImportErrors on Multinode if package not present (#15963) (cherry picked from commit cbd4dd6c5e537f0a34f19d964c60691807462925) * Fix typo in definition of world size in docs (#15954) (cherry picked from commit 7a1e0e801eac755b2da7b84ef3504a3e47b166c8) * [App] Enable running an app from the Gallery (#15941) Co-authored-by: thomas Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Ethan Harris Co-authored-by: Jirka (cherry picked from commit 4983083639ed2fbd9509c491a242d6c7203d8112) Co-authored-by: Akihiro Nitta Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Ethan Harris Co-authored-by: Sherin Thomas Co-authored-by: Justus Schock <12886177+justusschock@users.noreply.github.com> Co-authored-by: thomas chaton Co-authored-by: Adrian Wälchli --- .actions/setup_tools.py | 2 +- .azure/app-cloud-e2e.yml | 2 +- .github/workflows/ci-app-tests.yml | 2 +- docs/source-app/api_references.rst | 8 + .../clouds/cluster_intermediate_1.rst | 2 +- examples/app_dag/requirements.txt | 2 +- pyproject.toml | 11 +- requirements/app/test.txt | 2 +- requirements/app/ui.txt | 2 +- src/lightning_app/CHANGELOG.md | 14 +- src/lightning_app/cli/cmd_install.py | 179 ++++++++++++++++-- src/lightning_app/cli/lightning_cli.py | 80 +------- src/lightning_app/components/auto_scaler.py | 2 - .../components/multi_node/lite.py | 40 +++- .../components/multi_node/pytorch_spawn.py | 2 +- .../components/multi_node/trainer.py | 40 +++- src/lightning_app/components/python/tracer.py | 3 + src/lightning_app/components/serve/gradio.py | 11 +- .../components/serve/python_server.py | 156 ++++++--------- src/lightning_app/components/serve/serve.py | 9 +- .../components/serve/streamlit.py | 3 + src/lightning_app/core/app.py | 12 +- src/lightning_app/core/flow.py | 23 ++- src/lightning_app/core/work.py | 47 ++++- src/lightning_app/perf/pdb.py | 39 ++-- src/lightning_app/utilities/app_helpers.py | 8 +- src/lightning_app/utilities/layout.py | 91 ++++++++- src/lightning_app/utilities/safe_pickle.py | 95 ++++++++++ src/lightning_lite/CHANGELOG.md | 4 +- src/lightning_lite/lite.py | 3 +- src/pytorch_lightning/CHANGELOG.md | 11 +- src/pytorch_lightning/_graveyard/core.py | 24 +-- src/pytorch_lightning/callbacks/lr_finder.py | 2 +- src/pytorch_lightning/core/module.py | 1 - src/pytorch_lightning/tuner/lr_finder.py | 15 +- tests/tests_app/cli/test_cmd_install.py | 54 +++--- .../components/multi_node/__init__.py | 0 .../components/multi_node/test_lite.py | 103 ++++++++++ .../components/multi_node/test_trainer.py | 99 ++++++++++ tests/tests_app/structures/test_structures.py | 26 +++ .../utilities/packaging/test_build_spec.py | 4 +- tests/tests_app/utilities/test_app_helpers.py | 60 +++++- tests/tests_app/utilities/test_layout.py | 143 ++++++++++++++ tests/tests_app/utilities/test_safe_pickle.py | 11 ++ .../utilities/testdata/safe_pickle_app.py | 63 ++++++ tests/tests_lite/test_lite.py | 36 +++- tests/tests_lite/test_parity.py | 8 +- .../core/test_lightning_module.py | 11 ++ tests/tests_pytorch/graveyard/test_core.py | 15 -- tests/tests_pytorch/tuner/test_lr_finder.py | 47 +++++ 50 files changed, 1269 insertions(+), 358 deletions(-) create mode 100644 src/lightning_app/utilities/safe_pickle.py create mode 100644 tests/tests_app/components/multi_node/__init__.py create mode 100644 tests/tests_app/components/multi_node/test_lite.py create mode 100644 tests/tests_app/components/multi_node/test_trainer.py create mode 100644 tests/tests_app/utilities/test_layout.py create mode 100644 tests/tests_app/utilities/test_safe_pickle.py create mode 100644 tests/tests_app/utilities/testdata/safe_pickle_app.py diff --git a/.actions/setup_tools.py b/.actions/setup_tools.py index a91a6681a7383..6ff714d3f6462 100644 --- a/.actions/setup_tools.py +++ b/.actions/setup_tools.py @@ -179,7 +179,7 @@ def _load_aggregate_requirements(req_dir: str = "requirements", freeze_requireme load_requirements(d, file_name="base.txt", unfreeze=not freeze_requirements) for d in glob.glob(os.path.join(req_dir, "*")) # skip empty folder as git artefacts, and resolving Will's special issue - if os.path.isdir(d) and len(glob.glob(os.path.join(d, "*"))) > 0 + if os.path.isdir(d) and len(glob.glob(os.path.join(d, "*"))) > 0 and "__pycache__" not in d ] if not requires: return None diff --git a/.azure/app-cloud-e2e.yml b/.azure/app-cloud-e2e.yml index 821ffc8c426f1..f8e933419f824 100644 --- a/.azure/app-cloud-e2e.yml +++ b/.azure/app-cloud-e2e.yml @@ -51,7 +51,7 @@ jobs: - job: App_cloud_e2e_testing pool: azure-cpus container: - image: mcr.microsoft.com/playwright/python:v1.27.1-focal + image: mcr.microsoft.com/playwright/python:v1.28.0-focal options: "--shm-size=4gb" strategy: matrix: diff --git a/.github/workflows/ci-app-tests.yml b/.github/workflows/ci-app-tests.yml index 5b0619bcdfcbf..f53b3fa9803a3 100644 --- a/.github/workflows/ci-app-tests.yml +++ b/.github/workflows/ci-app-tests.yml @@ -99,7 +99,7 @@ jobs: - name: Adjust tests if: ${{ matrix.pkg-name == 'lightning' }} - run: python .actions/assistant.py copy_replace_imports --source_dir="./tests" --source_import="lightning_app" --target_import="lightning.app" + run: python .actions/assistant.py copy_replace_imports --source_dir="./tests" --source_import="lightning_app,lightning_lite,pytorch_lightning" --target_import="lightning.app,lightning.lite,lightning.pytorch" - name: Adjust examples if: ${{ matrix.pkg-name != 'lightning' }} diff --git a/docs/source-app/api_references.rst b/docs/source-app/api_references.rst index 2540633838502..2272f7bf13c41 100644 --- a/docs/source-app/api_references.rst +++ b/docs/source-app/api_references.rst @@ -32,11 +32,19 @@ ___________________ :nosignatures: :template: classtemplate_no_index.rst + ~database.client.DatabaseClient + ~database.server.Database ~python.popen.PopenPythonScript ~python.tracer.TracerPythonScript ~training.LightningTrainerScript ~serve.gradio.ServeGradio ~serve.serve.ModelInferenceAPI + ~serve.python_server.PythonServer + ~serve.streamlit.ServeStreamlit + ~multi_node.base.MultiNode + ~multi_node.lite.LiteMultiNode + ~multi_node.pytorch_spawn.PyTorchSpawnMultiNode + ~multi_node.trainer.LightningTrainerMultiNode ~auto_scaler.AutoScaler ---- diff --git a/docs/source-pytorch/clouds/cluster_intermediate_1.rst b/docs/source-pytorch/clouds/cluster_intermediate_1.rst index c2d92e200bde0..89d60a85534b2 100644 --- a/docs/source-pytorch/clouds/cluster_intermediate_1.rst +++ b/docs/source-pytorch/clouds/cluster_intermediate_1.rst @@ -24,7 +24,7 @@ PyTorch Lightning follows the design of `PyTorch distributed communication packa - *MASTER_PORT* - required; has to be a free port on machine with NODE_RANK 0 - *MASTER_ADDR* - required (except for NODE_RANK 0); address of NODE_RANK 0 node -- *WORLD_SIZE* - required; how many nodes are in the cluster +- *WORLD_SIZE* - required; the total number of GPUs/processes that you will use - *NODE_RANK* - required; id of the node in the cluster .. _training_script_setup: diff --git a/examples/app_dag/requirements.txt b/examples/app_dag/requirements.txt index 101182e0cd9ab..f669f518e7389 100644 --- a/examples/app_dag/requirements.txt +++ b/examples/app_dag/requirements.txt @@ -1,2 +1,2 @@ -sklearn +scikit-learn pandas diff --git a/pyproject.toml b/pyproject.toml index c3d93a05bf116..8611ef9323deb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,14 +30,12 @@ files = [ "src/lightning_lite", "src/lightning_app", ] +# This section is for folders with "-" as they are not valid python modules exclude = [ + "src/lightning_app/cli/app-template", "src/lightning_app/cli/component-template", "src/lightning_app/cli/pl-app-template", "src/lightning_app/cli/react-ui-template", - "src/lightning_app/cli/app-template", - "src/lightning_app/components/database", - "src/lightning_app/components/multi_node", - "src/lightning_app/frontend/just_py/just_py", ] install_types = "True" non_interactive = "True" @@ -67,7 +65,9 @@ module = [ "lightning_app.api.request_types", "lightning_app.cli.commands.app_commands", "lightning_app.cli.commands.connection", - "lightning_app.cli.react-ui-template.example_app", + "lightning_app.cli.commands.lightning_cli", + "lightning_app.cli.commands.cmd_install", + "lightning_app.cli.cmd_install", "lightning_app.components.database.client", "lightning_app.components.database.server", "lightning_app.components.database.utilities", @@ -94,6 +94,7 @@ module = [ "lightning_app.frontend.streamlit_base", "lightning_app.frontend.utils", "lightning_app.frontend.web", + "lightning_app.perf.pdb", "lightning_app.runners.backends.__init__", "lightning_app.runners.backends.backend", "lightning_app.runners.backends.cloud", diff --git a/requirements/app/test.txt b/requirements/app/test.txt index 1d1bcf271974f..ddbe7f1e0be12 100644 --- a/requirements/app/test.txt +++ b/requirements/app/test.txt @@ -4,7 +4,7 @@ pytest==7.2.0 pytest-timeout==2.1.0 pytest-cov==4.0.0 pytest-doctestplus>=0.9.0 -playwright==1.27.1 +playwright==1.28.0 httpx trio<0.22.0 pympler diff --git a/requirements/app/ui.txt b/requirements/app/ui.txt index fa051d284f0b8..6e73b96c317d3 100644 --- a/requirements/app/ui.txt +++ b/requirements/app/ui.txt @@ -1,2 +1,2 @@ -streamlit>=1.3.1, <=1.11.1 +streamlit>=1.0.0, <=1.15.2 panel>=0.12.7, <=0.13.1 diff --git a/src/lightning_app/CHANGELOG.md b/src/lightning_app/CHANGELOG.md index 819c4f61e0576..9bae487d2ad88 100644 --- a/src/lightning_app/CHANGELOG.md +++ b/src/lightning_app/CHANGELOG.md @@ -4,7 +4,7 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). -## [1.8.4] - 2022-12-06 +## [1.8.4] - 2022-12-08 ### Added @@ -12,10 +12,13 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Added the CLI command `lightning run model` to launch a `LightningLite` accelerated script ([#15506](https://github.com/Lightning-AI/lightning/pull/15506)) - Added the CLI command `lightning delete app` to delete a lightning app on the cloud ([#15783](https://github.com/Lightning-AI/lightning/pull/15783)) - Added a CloudMultiProcessBackend which enables running a child App from within the Flow in the cloud ([#15800](https://github.com/Lightning-AI/lightning/pull/15800)) +- Utility for pickling work object safely even from a child process ([#15836](https://github.com/Lightning-AI/lightning/pull/15836)) - Added `AutoScaler` component ([#15769](https://github.com/Lightning-AI/lightning/pull/15769)) - Added the property `ready` of the LightningFlow to inform when the `Open App` should be visible ([#15921](https://github.com/Lightning-AI/lightning/pull/15921)) - Added private work attributed `_start_method` to customize how to start the works ([#15923](https://github.com/Lightning-AI/lightning/pull/15923)) - +- Added a `configure_layout` method to the `LightningWork` which can be used to control how the work is handled in the layout of a parent flow ([#15926](https://github.com/Lightning-AI/lightning/pull/15926)) +- Added the ability to run a Lightning App or Component directly from the Gallery using `lightning run app organization/name` ([#15941](https://github.com/Lightning-AI/lightning/pull/15941)) +- Added automatic conversion of list and dict of works and flows to structures ([#15961](https://github.com/Lightning-AI/lightning/pull/15961)) ### Changed @@ -37,13 +40,16 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed SSH CLI command listing stopped components ([#15810](https://github.com/Lightning-AI/lightning/pull/15810)) - Fixed bug when launching apps on multiple clusters ([#15484](https://github.com/Lightning-AI/lightning/pull/15484)) - Fixed Sigterm Handler causing thread lock which caused KeyboardInterrupt to hang ([#15881](https://github.com/Lightning-AI/lightning/pull/15881)) +- Fixed MPS error for multinode component (defaults to cpu on mps devices now as distributed operations are not supported by pytorch on mps) ([#15748](https://github.com/Ligtning-AI/lightning/pull/15748)) - Fixed the work not stopped when successful when passed directly to the LightningApp ([#15801](https://github.com/Lightning-AI/lightning/pull/15801)) - Fixed the PyTorch Inference locally on GPU ([#15813](https://github.com/Lightning-AI/lightning/pull/15813)) - Fixed the `enable_spawn` method of the `WorkRunExecutor` ([#15812](https://github.com/Lightning-AI/lightning/pull/15812)) - Fixed require/import decorator ([#15849](https://github.com/Lightning-AI/lightning/pull/15849)) - - Fixed a bug where using `L.app.structures` would cause multiple apps to be opened and fail with an error in the cloud ([#15911](https://github.com/Lightning-AI/lightning/pull/15911)) - +- Fixed PythonServer generating noise on M1 ([#15949](https://github.com/Lightning-AI/lightning/pull/15949)) +- Fixed multiprocessing breakpoint ([#15950](https://github.com/Lightning-AI/lightning/pull/15950)) +- Fixed detection of a Lightning App running in debug mode ([#15951](https://github.com/Lightning-AI/lightning/pull/15951)) +- Fixed `ImportError` on Multinode if package not present ([#15963](https://github.com/Lightning-AI/lightning/pull/15963)) ## [1.8.3] - 2022-11-22 diff --git a/src/lightning_app/cli/cmd_install.py b/src/lightning_app/cli/cmd_install.py index db0467212f147..579a921179b4c 100644 --- a/src/lightning_app/cli/cmd_install.py +++ b/src/lightning_app/cli/cmd_install.py @@ -5,6 +5,7 @@ import sys from typing import Dict, Optional, Tuple +import click import requests from packaging.version import Version @@ -14,7 +15,117 @@ logger = Logger(__name__) -def gallery_component(name: str, yes_arg: bool, version_arg: str, cwd: str = None) -> None: +@click.group(name="install") +def install() -> None: + """Install Lightning AI selfresources.""" + pass + + +@install.command("app") +@click.argument("name", type=str) +@click.option( + "--yes", + "-y", + is_flag=True, + help="disables prompt to ask permission to create env and run install cmds", +) +@click.option( + "--version", + "-v", + type=str, + help="Specify the version to install. By default it uses 'latest'", + default="latest", + show_default=True, +) +@click.option( + "--overwrite", + "-f", + is_flag=True, + default=False, + help="When set, overwrite the app directory without asking if it already exists.", +) +def install_app(name: str, yes: bool, version: str, overwrite: bool = False) -> None: + _install_app_command(name, yes, version, overwrite=overwrite) + + +@install.command("component") +@click.argument("name", type=str) +@click.option( + "--yes", + "-y", + is_flag=True, + help="disables prompt to ask permission to create env and run install cmds", +) +@click.option( + "--version", + "-v", + type=str, + help="Specify the version to install. By default it uses 'latest'", + default="latest", + show_default=True, +) +def install_component(name: str, yes: bool, version: str) -> None: + _install_component_command(name, yes, version) + + +def _install_app_command(name: str, yes: bool, version: str, overwrite: bool = False) -> None: + if "github.com" in name: + if version != "latest": + logger.warn( + "When installing from GitHub, only the 'latest' version is supported. " + f"The provided version ({version}) will be ignored." + ) + return non_gallery_app(name, yes, overwrite=overwrite) + else: + return gallery_app(name, yes, version, overwrite=overwrite) + + +def _install_component_command(name: str, yes: bool, version: str, overwrite: bool = False) -> None: + if "github.com" in name: + if version != "latest": + logger.warn( + "When installing from GitHub, only the 'latest' version is supported. " + f"The provided version ({version}) will be ignored." + ) + return non_gallery_component(name, yes) + else: + return gallery_component(name, yes, version) + + +def gallery_apps_and_components( + name: str, yes_arg: bool, version_arg: str, cwd: str = None, overwrite: bool = False +) -> Optional[str]: + + try: + org, app_or_component = name.split("/") + except Exception: + return None + + entry, kind = _resolve_entry(app_or_component, version_arg) + + if kind == "app": + # give the user the chance to do a manual install + source_url, git_url, folder_name, git_sha = _show_install_app_prompt( + entry, app_or_component, org, yes_arg, resource_type="app" + ) + # run installation if requested + _install_app_from_source(source_url, git_url, folder_name, cwd=cwd, overwrite=overwrite, git_sha=git_sha) + + return os.path.join(os.getcwd(), folder_name, entry["appEntrypointFile"]) + + elif kind == "component": + # give the user the chance to do a manual install + git_url = _show_install_component_prompt(entry, app_or_component, org, yes_arg) + + # run installation if requested + _install_component_from_source(git_url) + + return os.path.join(os.getcwd(), entry["appEntrypointFile"]) + + return None + + +def gallery_component(name: str, yes_arg: bool, version_arg: str, cwd: str = None) -> str: # make sure org/component-name name is correct org, component = _validate_name(name, resource_type="component", example="lightning/LAI-slack-component") @@ -28,7 +139,9 @@ def gallery_component(name: str, yes_arg: bool, version_arg: str, cwd: str = Non git_url = _show_install_component_prompt(component_entry, component, org, yes_arg) # run installation if requested - _install_component(git_url) + _install_component_from_source(git_url) + + return os.path.join(os.getcwd(), component_entry["entrypointFile"]) def non_gallery_component(gh_url: str, yes_arg: bool, cwd: str = None) -> None: @@ -37,10 +150,10 @@ def non_gallery_component(gh_url: str, yes_arg: bool, cwd: str = None) -> None: git_url = _show_non_gallery_install_component_prompt(gh_url, yes_arg) # run installation if requested - _install_component(git_url) + _install_component_from_source(git_url) -def gallery_app(name: str, yes_arg: bool, version_arg: str, cwd: str = None, overwrite: bool = False) -> None: +def gallery_app(name: str, yes_arg: bool, version_arg: str, cwd: str = None, overwrite: bool = False) -> str: # make sure org/app-name syntax is correct org, app = _validate_name(name, resource_type="app", example="lightning/quick-start") @@ -57,7 +170,9 @@ def gallery_app(name: str, yes_arg: bool, version_arg: str, cwd: str = None, ove ) # run installation if requested - _install_app(source_url, git_url, folder_name, cwd=cwd, overwrite=overwrite, git_sha=git_sha) + _install_app_from_source(source_url, git_url, folder_name, cwd=cwd, overwrite=overwrite, git_sha=git_sha) + + return os.path.join(os.getcwd(), folder_name, app_entry["appEntrypointFile"]) def non_gallery_app(gh_url: str, yes_arg: bool, cwd: str = None, overwrite: bool = False) -> None: @@ -66,7 +181,7 @@ def non_gallery_app(gh_url: str, yes_arg: bool, cwd: str = None, overwrite: bool repo_url, folder_name = _show_non_gallery_install_app_prompt(gh_url, yes_arg) # run installation if requested - _install_app(repo_url, repo_url, folder_name, cwd=cwd, overwrite=overwrite) + _install_app_from_source(repo_url, repo_url, folder_name, cwd=cwd, overwrite=overwrite) def _show_install_component_prompt(entry: Dict[str, str], component: str, org: str, yes_arg: bool) -> str: @@ -299,7 +414,35 @@ def _validate_name(name: str, resource_type: str, example: str) -> Tuple[str, st return org, resource -def _resolve_resource(registry_url: str, name: str, version_arg: str, resource_type: str) -> Dict[str, str]: +def _resolve_entry(name, version_arg) -> Tuple[Optional[Dict], Optional[str]]: + entry = None + kind = None + + # resolve registry (orgs can have a private registry through their environment variables) + registry_url = _resolve_app_registry() + + # load the app resource + entry = _resolve_resource(registry_url, name=name, version_arg=version_arg, resource_type="app", raise_error=False) + + if not entry: + + registry_url = _resolve_component_registry() + + # load the component resource + entry = _resolve_resource( + registry_url, name=name, version_arg=version_arg, resource_type="component", raise_error=False + ) + kind = "component" if entry else None + + else: + kind = "app" + + return entry, kind + + +def _resolve_resource( + registry_url: str, name: str, version_arg: str, resource_type: str, raise_error: bool = True +) -> Dict[str, str]: gallery_entries = [] try: response = requests.get(registry_url) @@ -327,7 +470,10 @@ def _resolve_resource(registry_url: str, name: str, version_arg: str, resource_t all_versions.append(x["version"]) if len(entries) == 0: - raise SystemExit(f"{resource_type}: '{name}' is not available on ⚡ Lightning AI ⚡") + if raise_error: + raise SystemExit(f"{resource_type}: '{name}' is not available on ⚡ Lightning AI ⚡") + else: + return None entry = None if version_arg == "latest": @@ -337,11 +483,14 @@ def _resolve_resource(registry_url: str, name: str, version_arg: str, resource_t if e["version"] == version_arg: entry = e break - if entry is None: - raise Exception( - f"{resource_type}: 'Version {version_arg} for {name}' is not available on ⚡ Lightning AI ⚡. " - f"Here is the list of all availables versions:{os.linesep}{os.linesep.join(all_versions)}" - ) + if entry is None and raise_error: + if raise_error: + raise Exception( + f"{resource_type}: 'Version {version_arg} for {name}' is not available on ⚡ Lightning AI ⚡. " + f"Here is the list of all availables versions:{os.linesep}{os.linesep.join(all_versions)}" + ) + else: + return None return entry @@ -381,7 +530,7 @@ def _install_with_env(repo_url: str, folder_name: str, cwd: str = None) -> None: logger.info(m) -def _install_app( +def _install_app_from_source( source_url: str, git_url: str, folder_name: str, cwd: str = None, overwrite: bool = False, git_sha: str = None ) -> None: """Installing lighting app from the `git_url` @@ -458,7 +607,7 @@ def _install_app( logger.info(m) -def _install_component(git_url: str) -> None: +def _install_component_from_source(git_url: str) -> None: logger.info("⚡ RUN: pip install") out = subprocess.check_output(["pip", "install", git_url]) diff --git a/src/lightning_app/cli/lightning_cli.py b/src/lightning_app/cli/lightning_cli.py index 1a9f2ece528ad..68027e7784f0b 100644 --- a/src/lightning_app/cli/lightning_cli.py +++ b/src/lightning_app/cli/lightning_cli.py @@ -231,7 +231,14 @@ def _run_app( secret: tuple, run_app_comment_commands: bool, ) -> None: - file = _prepare_file(file) + + if not os.path.exists(file): + original_file = file + file = cmd_install.gallery_apps_and_components(file, True, "latest", overwrite=False) # type: ignore[assignment] # noqa E501 + if file is None: + click.echo(f"The provided entrypoint `{original_file}` doesn't exist.") + sys.exit(1) + run_app_comment_commands = True if not cloud and cluster_id is not None: raise click.ClickException("Using the flag --cluster-id in local execution is not supported.") @@ -287,7 +294,7 @@ def run() -> None: @run.command("app") -@click.argument("file", type=click.Path(exists=True)) +@click.argument("file", type=str) @click.option("--cloud", type=bool, default=False, is_flag=True) @click.option( "--cluster-id", @@ -366,6 +373,7 @@ def stop() -> None: _main.add_command(get_list) _main.add_command(delete) _main.add_command(create) +_main.add_command(cmd_install.install) @_main.command("ssh") @@ -449,74 +457,6 @@ def ssh(app_name: str = None, component_name: str = None) -> None: os.execv(ssh_path, ["-tt", f"{component_id}@{ssh_endpoint}"]) -@_main.group() -def install() -> None: - """Install a Lightning App and/or component.""" - - -@install.command("app") -@click.argument("name", type=str) -@click.option( - "--yes", - "-y", - is_flag=True, - help="disables prompt to ask permission to create env and run install cmds", -) -@click.option( - "--version", - "-v", - type=str, - help="Specify the version to install. By default it uses 'latest'", - default="latest", - show_default=True, -) -@click.option( - "--overwrite", - "-f", - is_flag=True, - default=False, - help="When set, overwrite the app directory without asking if it already exists.", -) -def install_app(name: str, yes: bool, version: str, overwrite: bool = False) -> None: - if "github.com" in name: - if version != "latest": - logger.warn( - f"The provided version {version} isn't the officially supported one. " - f"The provided version will be ignored." - ) - cmd_install.non_gallery_app(name, yes, overwrite=overwrite) - else: - cmd_install.gallery_app(name, yes, version, overwrite=overwrite) - - -@install.command("component") -@click.argument("name", type=str) -@click.option( - "--yes", - "-y", - is_flag=True, - help="disables prompt to ask permission to create env and run install cmds", -) -@click.option( - "--version", - "-v", - type=str, - help="Specify the version to install. By default it uses 'latest'", - default="latest", - show_default=True, -) -def install_component(name: str, yes: bool, version: str) -> None: - if "github.com" in name: - if version != "latest": - logger.warn( - f"The provided version {version} isn't the officially supported one. " - f"The provided version will be ignored." - ) - cmd_install.non_gallery_component(name, yes) - else: - cmd_install.gallery_component(name, yes, version) - - @_main.group() def init() -> None: """Init a Lightning App and/or component.""" diff --git a/src/lightning_app/components/auto_scaler.py b/src/lightning_app/components/auto_scaler.py index 4d41bf370bef3..62e6180c49665 100644 --- a/src/lightning_app/components/auto_scaler.py +++ b/src/lightning_app/components/auto_scaler.py @@ -206,7 +206,6 @@ async def process_request(self, data: BaseModel): return result def run(self): - logger.info(f"servers: {self.servers}") lock = asyncio.Lock() @@ -271,7 +270,6 @@ async def sys_info(authenticated: bool = Depends(authenticate_private_endpoint)) async def update_servers(servers: List[str], authenticated: bool = Depends(authenticate_private_endpoint)): async with lock: self.servers = servers - self._iter = cycle(self.servers) @fastapi_app.post(self.endpoint, response_model=self._output_type) diff --git a/src/lightning_app/components/multi_node/lite.py b/src/lightning_app/components/multi_node/lite.py index 2a9b33b0880d1..003c6dfb1c75a 100644 --- a/src/lightning_app/components/multi_node/lite.py +++ b/src/lightning_app/components/multi_node/lite.py @@ -1,4 +1,6 @@ +import importlib import os +import warnings from dataclasses import dataclass from typing import Any, Callable, Type @@ -30,8 +32,19 @@ def run( node_rank: int, nprocs: int, ): - from lightning.lite import LightningLite - from lightning.lite.strategies import DDPSpawnShardedStrategy, DDPSpawnStrategy + lites = [] + strategies = [] + mps_accelerators = [] + + for pkg_name in ("lightning.lite", "lightning_" + "lite"): + try: + pkg = importlib.import_module(pkg_name) + lites.append(pkg.LightningLite) + strategies.append(pkg.strategies.DDPSpawnShardedStrategy) + strategies.append(pkg.strategies.DDPSpawnStrategy) + mps_accelerators.append(pkg.accelerators.MPSAccelerator) + except (ImportError, ModuleNotFoundError): + continue # Used to configure PyTorch progress group os.environ["MASTER_ADDR"] = main_address @@ -52,7 +65,15 @@ def run( def pre_fn(lite, *args, **kwargs): kwargs["devices"] = nprocs kwargs["num_nodes"] = num_nodes - kwargs["accelerator"] = "auto" + + if any(acc.is_available() for acc in mps_accelerators): + old_acc_value = kwargs.get("accelerator", "auto") + kwargs["accelerator"] = "cpu" + + if old_acc_value != kwargs["accelerator"]: + warnings.warn("Forcing `accelerator=cpu` as MPS does not support distributed training.") + else: + kwargs["accelerator"] = "auto" strategy = kwargs.get("strategy", None) if strategy: if isinstance(strategy, str): @@ -60,15 +81,20 @@ def pre_fn(lite, *args, **kwargs): strategy = "ddp" elif strategy == "ddp_sharded_spawn": strategy = "ddp_sharded" - elif isinstance(strategy, (DDPSpawnStrategy, DDPSpawnShardedStrategy)): - raise Exception("DDP Spawned strategies aren't supported yet.") + elif isinstance(strategy, tuple(strategies)): + raise ValueError("DDP Spawned strategies aren't supported yet.") + + kwargs["strategy"] = strategy + return {}, args, kwargs tracer = Tracer() - tracer.add_traced(LightningLite, "__init__", pre_fn=pre_fn) + for ll in lites: + tracer.add_traced(ll, "__init__", pre_fn=pre_fn) tracer._instrument() - work_run() + ret_val = work_run() tracer._restore() + return ret_val class LiteMultiNode(MultiNode): diff --git a/src/lightning_app/components/multi_node/pytorch_spawn.py b/src/lightning_app/components/multi_node/pytorch_spawn.py index 3119ffc51e0b5..013bdbcaec347 100644 --- a/src/lightning_app/components/multi_node/pytorch_spawn.py +++ b/src/lightning_app/components/multi_node/pytorch_spawn.py @@ -88,7 +88,7 @@ def run( elif world_size > 1: raise Exception("Torch distributed should be available.") - work_run(world_size, node_rank, global_rank, local_rank) + return work_run(world_size, node_rank, global_rank, local_rank) class PyTorchSpawnMultiNode(MultiNode): diff --git a/src/lightning_app/components/multi_node/trainer.py b/src/lightning_app/components/multi_node/trainer.py index 222f71ce59557..76d744e24608c 100644 --- a/src/lightning_app/components/multi_node/trainer.py +++ b/src/lightning_app/components/multi_node/trainer.py @@ -1,4 +1,6 @@ +import importlib import os +import warnings from dataclasses import dataclass from typing import Any, Callable, Type @@ -30,9 +32,19 @@ def run( node_rank: int, nprocs: int, ): - from lightning.lite.strategies import DDPSpawnShardedStrategy, DDPSpawnStrategy - from lightning.pytorch import Trainer as LTrainer - from pytorch_lightning import Trainer as PLTrainer + trainers = [] + strategies = [] + mps_accelerators = [] + + for pkg_name in ("lightning.pytorch", "pytorch_" + "lightning"): + try: + pkg = importlib.import_module(pkg_name) + trainers.append(pkg.Trainer) + strategies.append(pkg.strategies.DDPSpawnShardedStrategy) + strategies.append(pkg.strategies.DDPSpawnStrategy) + mps_accelerators.append(pkg.accelerators.MPSAccelerator) + except (ImportError, ModuleNotFoundError): + continue # Used to configure PyTorch progress group os.environ["MASTER_ADDR"] = main_address @@ -50,7 +62,15 @@ def run( def pre_fn(trainer, *args, **kwargs): kwargs["devices"] = nprocs kwargs["num_nodes"] = num_nodes - kwargs["accelerator"] = "auto" + if any(acc.is_available() for acc in mps_accelerators): + old_acc_value = kwargs.get("accelerator", "auto") + kwargs["accelerator"] = "cpu" + + if old_acc_value != kwargs["accelerator"]: + warnings.warn("Forcing `accelerator=cpu` as MPS does not support distributed training.") + else: + kwargs["accelerator"] = "auto" + strategy = kwargs.get("strategy", None) if strategy: if isinstance(strategy, str): @@ -58,16 +78,18 @@ def pre_fn(trainer, *args, **kwargs): strategy = "ddp" elif strategy == "ddp_sharded_spawn": strategy = "ddp_sharded" - elif isinstance(strategy, (DDPSpawnStrategy, DDPSpawnShardedStrategy)): - raise Exception("DDP Spawned strategies aren't supported yet.") + elif isinstance(strategy, tuple(strategies)): + raise ValueError("DDP Spawned strategies aren't supported yet.") + kwargs["strategy"] = strategy return {}, args, kwargs tracer = Tracer() - tracer.add_traced(PLTrainer, "__init__", pre_fn=pre_fn) - tracer.add_traced(LTrainer, "__init__", pre_fn=pre_fn) + for trainer in trainers: + tracer.add_traced(trainer, "__init__", pre_fn=pre_fn) tracer._instrument() - work_run() + ret_val = work_run() tracer._restore() + return ret_val class LightningTrainerMultiNode(MultiNode): diff --git a/src/lightning_app/components/python/tracer.py b/src/lightning_app/components/python/tracer.py index c476f083258fc..d10ca92252ed8 100644 --- a/src/lightning_app/components/python/tracer.py +++ b/src/lightning_app/components/python/tracer.py @@ -22,6 +22,9 @@ class Code(TypedDict): class TracerPythonScript(LightningWork): + + _start_method = "spawn" + def on_before_run(self): """Called before the python script is executed.""" diff --git a/src/lightning_app/components/serve/gradio.py b/src/lightning_app/components/serve/gradio.py index 6e9b1d8777f67..7c07129d39b25 100644 --- a/src/lightning_app/components/serve/gradio.py +++ b/src/lightning_app/components/serve/gradio.py @@ -1,10 +1,8 @@ import abc -import os from functools import partial from types import ModuleType from typing import Any, List, Optional -from lightning_app.components.serve.python_server import _PyTorchSpawnRunExecutor, WorkRunExecutor from lightning_app.core.work import LightningWork from lightning_app.utilities.imports import _is_gradio_available, requires @@ -36,15 +34,13 @@ class ServeGradio(LightningWork, abc.ABC): title: Optional[str] = None description: Optional[str] = None + _start_method = "spawn" + def __init__(self, *args, **kwargs): requires("gradio")(super().__init__(*args, **kwargs)) assert self.inputs assert self.outputs self._model = None - # Note: Enable to run inference on GPUs. - self._run_executor_cls = ( - WorkRunExecutor if os.getenv("LIGHTNING_CLOUD_APP_ID", None) else _PyTorchSpawnRunExecutor - ) @property def model(self): @@ -78,3 +74,6 @@ def run(self, *args, **kwargs): server_port=self.port, enable_queue=self.enable_queue, ) + + def configure_layout(self) -> str: + return self.url diff --git a/src/lightning_app/components/serve/python_server.py b/src/lightning_app/components/serve/python_server.py index 99d51ac1cf4fc..1868b0b357fd3 100644 --- a/src/lightning_app/components/serve/python_server.py +++ b/src/lightning_app/components/serve/python_server.py @@ -1,20 +1,18 @@ import abc import base64 import os +import platform from pathlib import Path from typing import Any, Dict, Optional import uvicorn from fastapi import FastAPI -from lightning_utilities.core.imports import module_available +from lightning_utilities.core.imports import compare_version, module_available from pydantic import BaseModel -from starlette.staticfiles import StaticFiles -from lightning_app.core.queues import MultiProcessQueue from lightning_app.core.work import LightningWork from lightning_app.utilities.app_helpers import Logger from lightning_app.utilities.imports import _is_torch_available, requires -from lightning_app.utilities.proxies import _proxy_setattr, unwrap, WorkRunExecutor, WorkStateObserver logger = Logger(__name__) @@ -28,44 +26,19 @@ __doctest_skip__ += ["PythonServer", "PythonServer.*"] -class _PyTorchSpawnRunExecutor(WorkRunExecutor): +def _get_device(): + import operator - """This Executor enables to move PyTorch tensors on GPU. + import torch - Without this executor, it would raise the following exception: - RuntimeError: Cannot re-initialize CUDA in forked subprocess. - To use CUDA with multiprocessing, you must use the 'spawn' start method - """ + _TORCH_GREATER_EQUAL_1_12 = compare_version("torch", operator.ge, "1.12.0") - enable_start_observer: bool = False + local_rank = int(os.getenv("LOCAL_RANK", "0")) - def __call__(self, *args: Any, **kwargs: Any): - import torch - - with self.enable_spawn(): - queue = self.delta_queue if isinstance(self.delta_queue, MultiProcessQueue) else self.delta_queue.to_dict() - torch.multiprocessing.spawn( - self.dispatch_run, - args=(self.__class__, self.work, queue, args, kwargs), - nprocs=1, - ) - - @staticmethod - def dispatch_run(local_rank, cls, work, delta_queue, args, kwargs): - if local_rank == 0: - if isinstance(delta_queue, dict): - delta_queue = cls.process_queue(delta_queue) - work._request_queue = cls.process_queue(work._request_queue) - work._response_queue = cls.process_queue(work._response_queue) - - state_observer = WorkStateObserver(work, delta_queue=delta_queue) - state_observer.start() - _proxy_setattr(work, delta_queue, state_observer) - - unwrap(work.run)(*args, **kwargs) - - if local_rank == 0: - state_observer.join(0) + if _TORCH_GREATER_EQUAL_1_12 and torch.backends.mps.is_available() and platform.processor() in ("arm", "arm64"): + return torch.device("mps", local_rank) + else: + return torch.device(f"cuda:{local_rank}" if torch.cuda.is_available() else "cpu") class _DefaultInputData(BaseModel): @@ -96,6 +69,9 @@ def _get_sample_data() -> Dict[Any, Any]: class PythonServer(LightningWork, abc.ABC): + + _start_method = "spawn" + @requires(["torch", "lightning_api_access"]) def __init__( # type: ignore self, @@ -114,26 +90,26 @@ def __init__( # type: ignore The default data type is good enough for the basic usecases and it expects the data to be a json object that has one key called `payload` - ``` - input_data = {"payload": "some data"} - ``` + .. code-block:: python + + input_data = {"payload": "some data"} and this can be accessed as `request.payload` in the `predict` method. - ``` - def predict(self, request): - data = request.payload - ``` + .. code-block:: python + + def predict(self, request): + data = request.payload output_type: Optional `output_type` to be provided. This needs to be a pydantic BaseModel class. The default data type is good enough for the basic usecases. It expects the return value of the `predict` method to be a dictionary with one key called `prediction`. - ``` - def predict(self, request): - # some code - return {"prediction": "some data"} - ``` + .. code-block:: python + + def predict(self, request): + # some code + return {"prediction": "some data"} and this can be accessed as `response.json()["prediction"]` in the client if you are using requests library @@ -161,11 +137,6 @@ def predict(self, request): self._input_type = input_type self._output_type = output_type - # Note: Enable to run inference on GPUs. - self._run_executor_cls = ( - WorkRunExecutor if os.getenv("LIGHTNING_CLOUD_APP_ID", None) else _PyTorchSpawnRunExecutor - ) - def setup(self, *args, **kwargs) -> None: """This method is called before the server starts. Override this if you need to download the model or initialize the weights, setting up pipelines etc. @@ -211,60 +182,44 @@ def _get_sample_dict_from_datatype(datatype: Any) -> dict: return out def _attach_predict_fn(self, fastapi_app: FastAPI) -> None: - from torch import inference_mode + from torch import inference_mode, no_grad input_type: type = self.configure_input_type() output_type: type = self.configure_output_type() + device = _get_device() + context = no_grad if device.type == "mps" else inference_mode + def predict_fn(request: input_type): # type: ignore - with inference_mode(): + with context(): return self.predict(request) fastapi_app.post("/predict", response_model=output_type)(predict_fn) - def _attach_frontend(self, fastapi_app: FastAPI) -> None: - from lightning_api_access import APIAccessFrontend - - class_name = self.__class__.__name__ - url = self._future_url if self._future_url else self.url - if not url: - # if the url is still empty, point it to localhost - url = f"http://127.0.0.1:{self.port}" - url = f"{url}/predict" - datatype_parse_error = False - try: - request = self._get_sample_dict_from_datatype(self.configure_input_type()) - except TypeError: - datatype_parse_error = True - - try: - response = self._get_sample_dict_from_datatype(self.configure_output_type()) - except TypeError: - datatype_parse_error = True - - if datatype_parse_error: - - @fastapi_app.get("/") - def index() -> str: - return ( - "Automatic generation of the UI is only supported for simple, " - "non-nested datatype with types string, integer, float and boolean" - ) - - return - - frontend = APIAccessFrontend( - apis=[ - { - "name": class_name, - "url": url, - "method": "POST", - "request": request, - "response": response, - } - ] - ) - fastapi_app.mount("/", StaticFiles(directory=frontend.serve_dir, html=True), name="static") + def configure_layout(self) -> None: + if module_available("lightning_api_access"): + from lightning_api_access import APIAccessFrontend + + class_name = self.__class__.__name__ + url = f"{self.url}/predict" + + try: + request = self._get_sample_dict_from_datatype(self.configure_input_type()) + response = self._get_sample_dict_from_datatype(self.configure_output_type()) + except TypeError: + return None + + return APIAccessFrontend( + apis=[ + { + "name": class_name, + "url": url, + "method": "POST", + "request": request, + "response": response, + } + ] + ) def run(self, *args: Any, **kwargs: Any) -> Any: """Run method takes care of configuring and setting up a FastAPI server behind the scenes. @@ -275,7 +230,6 @@ def run(self, *args: Any, **kwargs: Any) -> Any: fastapi_app = FastAPI() self._attach_predict_fn(fastapi_app) - self._attach_frontend(fastapi_app) logger.info(f"Your app has started. View it in your browser: http://{self.host}:{self.port}") uvicorn.run(app=fastapi_app, host=self.host, port=self.port, log_level="error") diff --git a/src/lightning_app/components/serve/serve.py b/src/lightning_app/components/serve/serve.py index 150ca522e591b..8b6f35364cc38 100644 --- a/src/lightning_app/components/serve/serve.py +++ b/src/lightning_app/components/serve/serve.py @@ -10,7 +10,6 @@ import uvicorn from fastapi import FastAPI from fastapi.responses import JSONResponse -from starlette.responses import RedirectResponse from lightning_app.components.serve.types import _DESERIALIZER, _SERIALIZER from lightning_app.core.work import LightningWork @@ -37,10 +36,6 @@ async def run(self, data) -> Any: return self.serialize(self.predict(self.deserialize(data))) -async def _redirect(): - return RedirectResponse("/docs") - - class ModelInferenceAPI(LightningWork, abc.ABC): def __init__( self, @@ -121,7 +116,6 @@ def run(self): def _populate_app(self, fastapi_service: FastAPI): self._model = self.build_model() - fastapi_service.get("/")(_redirect) fastapi_service.post("/predict", response_class=JSONResponse)( _InferenceCallable( deserialize=_DESERIALIZER[self.input] if self.input else self.deserialize, @@ -134,6 +128,9 @@ def _launch_server(self, fastapi_service: FastAPI): logger.info(f"Your app has started. View it in your browser: http://{self.host}:{self.port}") uvicorn.run(app=fastapi_service, host=self.host, port=self.port, log_level="error") + def configure_layout(self) -> str: + return f"{self.url}/docs" + def _maybe_create_instance() -> Optional[ModelInferenceAPI]: """This function tries to re-create the user `ModelInferenceAPI` if the environment associated with multi diff --git a/src/lightning_app/components/serve/streamlit.py b/src/lightning_app/components/serve/streamlit.py index ed543bd1de7b8..1a325d60fecee 100644 --- a/src/lightning_app/components/serve/streamlit.py +++ b/src/lightning_app/components/serve/streamlit.py @@ -63,6 +63,9 @@ def on_exit(self) -> None: if self._process is not None: self._process.kill() + def configure_layout(self) -> str: + return self.url + class _PatchedWork: """The ``_PatchedWork`` is used to emulate a work instance from a subprocess. This is acheived by patching the diff --git a/src/lightning_app/core/app.py b/src/lightning_app/core/app.py index bcf3c2789098e..d9389ecd27e24 100644 --- a/src/lightning_app/core/app.py +++ b/src/lightning_app/core/app.py @@ -93,12 +93,10 @@ def __init__( >>> from lightning_app.runners import MultiProcessRuntime >>> class RootFlow(LightningFlow): ... def run(self): - ... print("Hello World!") ... self._exit() ... >>> app = LightningApp(RootFlow()) # application can be dispatched using the `runners`. >>> MultiProcessRuntime(app).dispatch() - Hello World! """ self.root_path = root_path # when running behind a proxy @@ -486,7 +484,15 @@ def _run(self) -> bool: """ self._original_state = deepcopy(self.state) done = False - self.ready = self.root.ready + + # TODO: Re-enable the `ready` property once issues are resolved + if not self.root.ready: + warnings.warn( + "One of your Flows returned `.ready` as `False`. " + "This feature is not yet enabled so this will be ignored.", + UserWarning, + ) + self.ready = True self._start_with_flow_works() diff --git a/src/lightning_app/core/flow.py b/src/lightning_app/core/flow.py index 56947b0d2cbef..a79794bac3d20 100644 --- a/src/lightning_app/core/flow.py +++ b/src/lightning_app/core/flow.py @@ -10,7 +10,7 @@ from lightning_app.frontend import Frontend from lightning_app.storage import Path from lightning_app.storage.drive import _maybe_create_drive, Drive -from lightning_app.utilities.app_helpers import _is_json_serializable, _LightningAppRef, _set_child_name +from lightning_app.utilities.app_helpers import _is_json_serializable, _LightningAppRef, _set_child_name, is_overridden from lightning_app.utilities.component import _sanitize_state from lightning_app.utilities.exceptions import ExitAppException from lightning_app.utilities.introspection import _is_init_context, _is_run_context @@ -142,6 +142,14 @@ def __setattr__(self, name: str, value: Any) -> None: if name in self._works and value != getattr(self, name): raise AttributeError(f"Cannot set attributes as the work can't be changed once defined: {name}") + if isinstance(value, (list, dict)) and value: + _type = (LightningFlow, LightningWork, List, Dict) + if isinstance(value, list) and all(isinstance(va, _type) for va in value): + value = List(*value) + + if isinstance(value, dict) and all(isinstance(va, _type) for va in value.values()): + value = Dict(**value) + if isinstance(value, LightningFlow): self._flows.add(name) _set_child_name(self, value, name) @@ -163,10 +171,10 @@ def __setattr__(self, name: str, value: Any) -> None: value._register_cloud_compute() elif isinstance(value, (Dict, List)): - value._backend = self._backend self._structures.add(name) _set_child_name(self, value, name) - if self._backend: + if getattr(self, "_backend", None) is not None: + value._backend = self._backend for flow in value.flows: LightningFlow._attach_backend(flow, self._backend) for work in value.works: @@ -232,7 +240,10 @@ def __getattr__(self, item): @property def ready(self) -> bool: - """Override to customize when your App should be ready.""" + """Not currently enabled. + + Override to customize when your App should be ready. + """ flows = self.flows return all(flow.ready for flow in flows.values()) if flows else True @@ -777,4 +788,6 @@ def run(self): self.work.run() def configure_layout(self): - return [{"name": "Main", "content": self.work}] + if is_overridden("configure_layout", self.work): + return [{"name": "Main", "content": self.work}] + return [] diff --git a/src/lightning_app/core/work.py b/src/lightning_app/core/work.py index 857cbc9447ff1..60d1ea62d8afb 100644 --- a/src/lightning_app/core/work.py +++ b/src/lightning_app/core/work.py @@ -3,7 +3,7 @@ import warnings from copy import deepcopy from functools import partial, wraps -from typing import Any, Callable, Dict, List, Optional, Type, Union +from typing import Any, Callable, Dict, List, Optional, Type, TYPE_CHECKING, Union from deepdiff import DeepHash, Delta @@ -33,6 +33,9 @@ ) from lightning_app.utilities.proxies import Action, LightningWorkSetAttrProxy, ProxyWorkRun, unwrap, WorkRunExecutor +if TYPE_CHECKING: + from lightning_app.frontend import Frontend + class LightningWork: @@ -629,3 +632,45 @@ def apply_flow_delta(self, delta: Delta): property_object.fset(self, value) else: self._default_setattr(name, value) + + def configure_layout(self) -> Union[None, str, "Frontend"]: + """Configure the UI of this LightningWork. + + You can either + + 1. Return a single :class:`~lightning_app.frontend.frontend.Frontend` object to serve a user interface + for this Work. + 2. Return a string containing a URL to act as the user interface for this Work. + 3. Return ``None`` to indicate that this Work doesn't currently have a user interface. + + **Example:** Serve a static directory (with at least a file index.html inside). + + .. code-block:: python + + from lightning_app.frontend import StaticWebFrontend + + + class Work(LightningWork): + def configure_layout(self): + return StaticWebFrontend("path/to/folder/to/serve") + + **Example:** Arrange the UI of my children in tabs (default UI by Lightning). + + .. code-block:: python + + class Work(LightningWork): + def configure_layout(self): + return [ + dict(name="First Tab", content=self.child0), + dict(name="Second Tab", content=self.child1), + dict(name="Lightning", content="https://lightning.ai"), + ] + + If you don't implement ``configure_layout``, Lightning will use ``self.url``. + + Note: + This hook gets called at the time of app creation and then again as part of the loop. If desired, a + returned URL can depend on the state. This is not the case if the work returns a + :class:`~lightning_app.frontend.frontend.Frontend`. These need to be provided at the time of app creation + in order for the runtime to start the server. + """ diff --git a/src/lightning_app/perf/pdb.py b/src/lightning_app/perf/pdb.py index 5bd56960715e3..f4b42f96e842d 100644 --- a/src/lightning_app/perf/pdb.py +++ b/src/lightning_app/perf/pdb.py @@ -1,19 +1,36 @@ +import multiprocessing +import os import pdb import sys -from typing import Any +_stdin = [None] +_stdin_lock = multiprocessing.Lock() +try: + _stdin_fd = sys.stdin.fileno() +except Exception: + _stdin_fd = None + +# Taken from https://github.com/facebookresearch/metaseq/blob/main/metaseq/pdb.py class MPPdb(pdb.Pdb): - """debugger for forked programs.""" + """A Pdb wrapper that works in a multiprocessing environment.""" + + def __init__(self) -> None: + pdb.Pdb.__init__(self, nosigint=True) - def interaction(self, *args: Any, **kwargs: Any) -> None: - _stdin = sys.stdin - try: - sys.stdin = open("/dev/stdin") - pdb.Pdb.interaction(self, *args, **kwargs) - finally: - sys.stdin = _stdin + def _cmdloop(self) -> None: + stdin_back = sys.stdin + with _stdin_lock: + try: + if _stdin_fd is not None: + if not _stdin[0]: + _stdin[0] = os.fdopen(_stdin_fd) + sys.stdin = _stdin[0] + self.cmdloop() + finally: + sys.stdin = stdin_back -def set_trace(*args: Any, **kwargs: Any) -> None: - MPPdb().set_trace(*args, **kwargs) +def set_trace() -> None: + pdb = MPPdb() + pdb.set_trace(sys._getframe().f_back) diff --git a/src/lightning_app/utilities/app_helpers.py b/src/lightning_app/utilities/app_helpers.py index f07ae6bc88c1c..665c50889676c 100644 --- a/src/lightning_app/utilities/app_helpers.py +++ b/src/lightning_app/utilities/app_helpers.py @@ -511,14 +511,10 @@ def is_static_method(klass_or_instance, attr) -> bool: return isinstance(inspect.getattr_static(klass_or_instance, attr), staticmethod) -def _debugger_is_active() -> bool: - """Return if the debugger is currently active.""" - return hasattr(sys, "gettrace") and sys.gettrace() is not None - - def _should_dispatch_app() -> bool: return ( - _debugger_is_active() + __debug__ + and "_pytest.doctest" not in sys.modules and not bool(int(os.getenv("LIGHTNING_DISPATCHED", "0"))) and "LIGHTNING_APP_STATE_URL" not in os.environ ) diff --git a/src/lightning_app/utilities/layout.py b/src/lightning_app/utilities/layout.py index 15079fcb6964b..11f26019cb406 100644 --- a/src/lightning_app/utilities/layout.py +++ b/src/lightning_app/utilities/layout.py @@ -4,7 +4,7 @@ import lightning_app from lightning_app.frontend.frontend import Frontend -from lightning_app.utilities.app_helpers import _MagicMockJsonSerializable +from lightning_app.utilities.app_helpers import _MagicMockJsonSerializable, is_overridden from lightning_app.utilities.cloud import is_running_in_cloud @@ -45,9 +45,9 @@ def _collect_layout(app: "lightning_app.LightningApp", flow: "lightning_app.Ligh app.frontends.setdefault(flow.name, "mock") return flow._layout elif isinstance(layout, dict): - layout = _collect_content_layout([layout], flow) + layout = _collect_content_layout([layout], app, flow) elif isinstance(layout, (list, tuple)) and all(isinstance(item, dict) for item in layout): - layout = _collect_content_layout(layout, flow) + layout = _collect_content_layout(layout, app, flow) else: lines = _add_comment_to_literal_code(flow.configure_layout, contains="return", comment=" <------- this guy") m = f""" @@ -76,7 +76,9 @@ def configure_layout(self): return layout -def _collect_content_layout(layout: List[Dict], flow: "lightning_app.LightningFlow") -> List[Dict]: +def _collect_content_layout( + layout: List[Dict], app: "lightning_app.LightningApp", flow: "lightning_app.LightningFlow" +) -> Union[List[Dict], Dict]: """Process the layout returned by the ``configure_layout()`` method if the returned format represents an aggregation of child layouts.""" for entry in layout: @@ -102,12 +104,43 @@ def _collect_content_layout(layout: List[Dict], flow: "lightning_app.LightningFl entry["content"] = entry["content"].name elif isinstance(entry["content"], lightning_app.LightningWork): - if entry["content"].url and not entry["content"].url.startswith("/"): - entry["content"] = entry["content"].url - entry["target"] = entry["content"] - else: + work = entry["content"] + work_layout = _collect_work_layout(work) + + if work_layout is None: entry["content"] = "" - entry["target"] = "" + elif isinstance(work_layout, str): + entry["content"] = work_layout + entry["target"] = work_layout + elif isinstance(work_layout, (Frontend, _MagicMockJsonSerializable)): + if len(layout) > 1: + lines = _add_comment_to_literal_code( + flow.configure_layout, contains="return", comment=" <------- this guy" + ) + m = f""" + The return value of configure_layout() in `{flow.__class__.__name__}` is an + unsupported format: + \n{lines} + + The tab containing a `{work.__class__.__name__}` must be the only tab in the + layout of this flow. + + (see the docs for `LightningWork.configure_layout`). + """ + raise TypeError(m) + + if isinstance(work_layout, Frontend): + # If the work returned a frontend, treat it as belonging to the flow. + # NOTE: This could evolve in the future to run the Frontend directly in the work machine. + frontend = work_layout + frontend.flow = flow + elif isinstance(work_layout, _MagicMockJsonSerializable): + # The import was mocked, we set a dummy `Frontend` so that `is_headless` knows there is a UI. + frontend = "mock" + + app.frontends.setdefault(flow.name, frontend) + return flow._layout + elif isinstance(entry["content"], _MagicMockJsonSerializable): # The import was mocked, we just record dummy content so that `is_headless` knows there is a UI entry["content"] = "mock" @@ -126,3 +159,43 @@ def configure_layout(self): """ raise ValueError(m) return layout + + +def _collect_work_layout(work: "lightning_app.LightningWork") -> Union[None, str, Frontend, _MagicMockJsonSerializable]: + """Check if ``configure_layout`` is overridden on the given work and return the work layout (either a string, a + ``Frontend`` object, or an instance of a mocked import). + + Args: + work: The work to collect the layout for. + + Raises: + TypeError: If the value returned by ``configure_layout`` is not of a supported format. + """ + if is_overridden("configure_layout", work): + work_layout = work.configure_layout() + else: + work_layout = work.url + + if work_layout is None: + return None + elif isinstance(work_layout, str): + url = work_layout + # The URL isn't fully defined yet. Looks something like ``self.work.url + /something``. + if url and not url.startswith("/"): + return url + return "" + elif isinstance(work_layout, (Frontend, _MagicMockJsonSerializable)): + return work_layout + else: + m = f""" + The value returned by `{work.__class__.__name__}.configure_layout()` is of an unsupported type. + + {repr(work_layout)} + + Return a `Frontend` or a URL string, for example: + + class {work.__class__.__name__}(LightningWork): + def configure_layout(self): + return MyFrontend() OR 'http://some/url' + """ + raise TypeError(m) diff --git a/src/lightning_app/utilities/safe_pickle.py b/src/lightning_app/utilities/safe_pickle.py new file mode 100644 index 0000000000000..8788ff22a3cb6 --- /dev/null +++ b/src/lightning_app/utilities/safe_pickle.py @@ -0,0 +1,95 @@ +import contextlib +import pickle +import sys +import types +import typing +from copy import deepcopy +from pathlib import Path + +from lightning_app.core.work import LightningWork +from lightning_app.utilities.app_helpers import _LightningAppRef + +NON_PICKLABLE_WORK_ATTRIBUTES = ["_request_queue", "_response_queue", "_backend", "_setattr_replacement"] + + +@contextlib.contextmanager +def _trimmed_work(work: LightningWork, to_trim: typing.List[str]) -> typing.Iterator[None]: + """Context manager to trim the work object to remove attributes that are not picklable.""" + holder = {} + for arg in to_trim: + holder[arg] = getattr(work, arg) + setattr(work, arg, None) + yield + for arg in to_trim: + setattr(work, arg, holder[arg]) + + +def get_picklable_work(work: LightningWork) -> LightningWork: + """Pickling a LightningWork instance fails if done from the work process + itself. This function is safe to call from the work process within both MultiprocessRuntime + and Cloud. + Note: This function modifies the module information of the work object. Specifically, it injects + the relative module path into the __module__ attribute of the work object. If the object is not + importable from the CWD, then the pickle load will fail. + + Example: + for a directory structure like below and the work class is defined in the app.py where + the app.py is the entrypoint for the app, it will inject `foo.bar.app` into the + __module__ attribute + + └── foo + ├── __init__.py + └── bar + └── app.py + """ + + # If the work object not taken from the app ref, there is a thread lock reference + # somewhere thats preventing it from being pickled. Investigate it later. We + # shouldn't be fetching the work object from the app ref. TODO @sherin + app_ref = _LightningAppRef.get_current() + if app_ref is None: + raise RuntimeError("Cannot pickle LightningWork outside of a LightningApp") + for w in app_ref.works: + if work.name == w.name: + # deep-copying the work object to avoid modifying the original work object + with _trimmed_work(w, to_trim=NON_PICKLABLE_WORK_ATTRIBUTES): + copied_work = deepcopy(w) + break + else: + raise ValueError(f"Work with name {work.name} not found in the app references") + + # if work is defined in the __main__ or __mp__main__ (the entrypoint file for `lightning run app` command), + # pickling/unpickling will fail, hence we need patch the module information + if "_main__" in copied_work.__class__.__module__: + work_class_module = sys.modules[copied_work.__class__.__module__] + work_class_file = work_class_module.__file__ + if not work_class_file: + raise ValueError( + f"Cannot pickle work class {copied_work.__class__.__name__} because we " + f"couldn't identify the module file" + ) + relative_path = Path(work_class_module.__file__).relative_to(Path.cwd()) # type: ignore + expected_module_name = relative_path.as_posix().replace(".py", "").replace("/", ".") + # TODO @sherin: also check if the module is importable from the CWD + fake_module = types.ModuleType(expected_module_name) + fake_module.__dict__.update(work_class_module.__dict__) + fake_module.__dict__["__name__"] = expected_module_name + sys.modules[expected_module_name] = fake_module + for k, v in fake_module.__dict__.items(): + if not k.startswith("__") and hasattr(v, "__module__"): + if "_main__" in v.__module__: + v.__module__ = expected_module_name + return copied_work + + +def dump(work: LightningWork, f: typing.BinaryIO) -> None: + picklable_work = get_picklable_work(work) + pickle.dump(picklable_work, f) + + +def load(f: typing.BinaryIO) -> typing.Any: + # inject current working directory to sys.path + sys.path.insert(1, str(Path.cwd())) + work = pickle.load(f) + sys.path.pop(1) + return work diff --git a/src/lightning_lite/CHANGELOG.md b/src/lightning_lite/CHANGELOG.md index 1ceece1b5cbf2..cceb9b1d97ed5 100644 --- a/src/lightning_lite/CHANGELOG.md +++ b/src/lightning_lite/CHANGELOG.md @@ -4,7 +4,9 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). -## [1.8.4] - 2022-12-06 +## [1.8.4] - 2022-12-08 + +- Fixed `shuffle=False` having no effect when using DDP/DistributedSampler ([#15931](https://github.com/Lightning-AI/lightning/issues/15931)) ## [1.8.3] - 2022-11-22 diff --git a/src/lightning_lite/lite.py b/src/lightning_lite/lite.py index e6890742e42d9..586396859de38 100644 --- a/src/lightning_lite/lite.py +++ b/src/lightning_lite/lite.py @@ -24,7 +24,7 @@ from lightning_utilities.core.rank_zero import rank_zero_warn from torch import Tensor from torch.optim import Optimizer -from torch.utils.data import BatchSampler, DataLoader, DistributedSampler +from torch.utils.data import BatchSampler, DataLoader, DistributedSampler, RandomSampler from lightning_lite.plugins import Precision # avoid circular imports: # isort: split from lightning_lite.accelerators.accelerator import Accelerator @@ -478,6 +478,7 @@ def _requires_distributed_sampler(self, dataloader: DataLoader) -> bool: @staticmethod def _get_distributed_sampler(dataloader: DataLoader, **kwargs: Any) -> DistributedSampler: + kwargs.setdefault("shuffle", isinstance(dataloader.sampler, RandomSampler)) kwargs.setdefault("seed", int(os.getenv("PL_GLOBAL_SEED", 0))) return DistributedSamplerWrapper(dataloader.sampler, **kwargs) diff --git a/src/pytorch_lightning/CHANGELOG.md b/src/pytorch_lightning/CHANGELOG.md index 88dfec393bb3c..3df4b4a7dcb83 100644 --- a/src/pytorch_lightning/CHANGELOG.md +++ b/src/pytorch_lightning/CHANGELOG.md @@ -4,11 +4,18 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). -## [1.8.4] - 2022-12-06 +## [1.8.4] - 2022-12-08 + +### Changed - Direct support for compiled models ([#15922](https://github.com/Lightning-AI/lightning/pull/15922)) + +### Fixed + - Fixed issue with unsupported torch.inference_mode() on hpu backends ([#15918](https://github.com/Lightning-AI/lightning/pull/15918)) -- Fix LRScheduler import for PyTorch 2.0 ([#15940](https://github.com/Lightning-AI/lightning/pull/15940)) +- Fixed LRScheduler import for PyTorch 2.0 ([#15940](https://github.com/Lightning-AI/lightning/pull/15940)) +- Fixed `fit_loop.restarting` to be `False` for lr finder ([#15620](https://github.com/Lightning-AI/lightning/pull/15620)) +- Fixed `torch.jit.script`-ing a LightningModule causing an unintended error message about deprecated `use_amp` property ([#15947](https://github.com/Lightning-AI/lightning/pull/15947)) ## [1.8.3] - 2022-11-22 diff --git a/src/pytorch_lightning/_graveyard/core.py b/src/pytorch_lightning/_graveyard/core.py index 49768e46569b8..e4722d60558a1 100644 --- a/src/pytorch_lightning/_graveyard/core.py +++ b/src/pytorch_lightning/_graveyard/core.py @@ -13,7 +13,7 @@ # limitations under the License. from typing import Any -from pytorch_lightning import LightningDataModule, LightningModule +from pytorch_lightning import LightningDataModule def _on_save_checkpoint(_: LightningDataModule, __: Any) -> None: @@ -32,28 +32,6 @@ def _on_load_checkpoint(_: LightningDataModule, __: Any) -> None: ) -def _use_amp(_: LightningModule) -> None: - # Remove in v2.0.0 and the skip in `__jit_unused_properties__` - if not LightningModule._jit_is_scripting: - # cannot use `AttributeError` as it messes up with `nn.Module.__getattr__` - raise RuntimeError( - "`LightningModule.use_amp` was deprecated in v1.6 and is no longer accessible as of v1.8." - " Please use `Trainer.amp_backend`.", - ) - - -def _use_amp_setter(_: LightningModule, __: bool) -> None: - # Remove in v2.0.0 - # cannot use `AttributeError` as it messes up with `nn.Module.__getattr__` - raise RuntimeError( - "`LightningModule.use_amp` was deprecated in v1.6 and is no longer accessible as of v1.8." - " Please use `Trainer.amp_backend`.", - ) - - -# Properties -LightningModule.use_amp = property(fget=_use_amp, fset=_use_amp_setter) - # Methods LightningDataModule.on_save_checkpoint = _on_save_checkpoint LightningDataModule.on_load_checkpoint = _on_load_checkpoint diff --git a/src/pytorch_lightning/callbacks/lr_finder.py b/src/pytorch_lightning/callbacks/lr_finder.py index 4d235751ca791..1c950e64086b9 100644 --- a/src/pytorch_lightning/callbacks/lr_finder.py +++ b/src/pytorch_lightning/callbacks/lr_finder.py @@ -85,7 +85,7 @@ def __init__( max_lr: float = 1, num_training_steps: int = 100, mode: str = "exponential", - early_stop_threshold: float = 4.0, + early_stop_threshold: Optional[float] = 4.0, update_attr: bool = False, ) -> None: mode = mode.lower() diff --git a/src/pytorch_lightning/core/module.py b/src/pytorch_lightning/core/module.py index 294b031f23996..48cce4ccbe971 100644 --- a/src/pytorch_lightning/core/module.py +++ b/src/pytorch_lightning/core/module.py @@ -88,7 +88,6 @@ class LightningModule( "automatic_optimization", "truncated_bptt_steps", "trainer", - "use_amp", # from graveyard ] + _DeviceDtypeModuleMixin.__jit_unused_properties__ + HyperparametersMixin.__jit_unused_properties__ diff --git a/src/pytorch_lightning/tuner/lr_finder.py b/src/pytorch_lightning/tuner/lr_finder.py index 503678b1fca8f..fa55d062320bb 100644 --- a/src/pytorch_lightning/tuner/lr_finder.py +++ b/src/pytorch_lightning/tuner/lr_finder.py @@ -203,7 +203,7 @@ def lr_find( max_lr: float = 1, num_training: int = 100, mode: str = "exponential", - early_stop_threshold: float = 4.0, + early_stop_threshold: Optional[float] = 4.0, update_attr: bool = False, ) -> Optional[_LRFinder]: """See :meth:`~pytorch_lightning.tuner.tuning.Tuner.lr_find`""" @@ -219,6 +219,8 @@ def lr_find( ckpt_path = os.path.join(trainer.default_root_dir, f".lr_find_{uuid.uuid4()}.ckpt") trainer.save_checkpoint(ckpt_path) + start_steps = trainer.global_step + # Arguments we adjust during the lr finder, save for restoring params = __lr_finder_dump_params(trainer) @@ -239,7 +241,7 @@ def lr_find( _try_loop_run(trainer, params) # Prompt if we stopped early - if trainer.global_step != num_training: + if trainer.global_step != num_training + start_steps: log.info(f"LR finder stopped early after {trainer.global_step} steps due to diverging loss.") # Transfer results from callback to lr finder object @@ -263,6 +265,7 @@ def lr_find( # Restore initial state of model trainer._checkpoint_connector.restore(ckpt_path) trainer.strategy.remove_checkpoint(ckpt_path) + trainer.fit_loop.restarting = False # reset restarting flag as checkpoint restoring sets it to True return lr_finder @@ -282,7 +285,7 @@ def __lr_finder_dump_params(trainer: "pl.Trainer") -> Dict[str, Any]: } -def __lr_finder_reset_params(trainer: "pl.Trainer", num_training: int, early_stop_threshold: float) -> None: +def __lr_finder_reset_params(trainer: "pl.Trainer", num_training: int, early_stop_threshold: Optional[float]) -> None: from pytorch_lightning.loggers.logger import DummyLogger trainer.strategy.lr_scheduler_configs = [] @@ -293,8 +296,8 @@ def __lr_finder_reset_params(trainer: "pl.Trainer", num_training: int, early_sto trainer.callbacks = [_LRCallback(num_training, early_stop_threshold, progress_bar_refresh_rate=1)] # No logging trainer.logger = DummyLogger() if trainer.logger is not None else None - # Max step set to number of iterations - trainer.fit_loop.max_steps = num_training + # Max step set to number of iterations starting at current number of iterations + trainer.fit_loop.max_steps = num_training + trainer.global_step trainer.limit_val_batches = num_training @@ -332,7 +335,7 @@ class _LRCallback(Callback): def __init__( self, num_training: int, - early_stop_threshold: float = 4.0, + early_stop_threshold: Optional[float] = 4.0, progress_bar_refresh_rate: int = 0, beta: float = 0.98, ): diff --git a/tests/tests_app/cli/test_cmd_install.py b/tests/tests_app/cli/test_cmd_install.py index aa0c34ba6ed2d..2e2086348cb58 100644 --- a/tests/tests_app/cli/test_cmd_install.py +++ b/tests/tests_app/cli/test_cmd_install.py @@ -17,19 +17,19 @@ def test_valid_org_app_name(): # assert a bad app name should fail fake_app = "fakeuser/impossible/name" - result = runner.invoke(lightning_cli.install_app, [fake_app]) + result = runner.invoke(lightning_cli.cmd_install.install_app, [fake_app]) assert "app name format must have organization/app-name" in result.output # assert a good name (but unavailable name) should work fake_app = "fakeuser/ALKKLJAUHREKJ21234KLAKJDLF" - result = runner.invoke(lightning_cli.install_app, [fake_app]) + result = runner.invoke(lightning_cli.cmd_install.install_app, [fake_app]) assert f"app: '{fake_app}' is not available on ⚡ Lightning AI ⚡" in result.output assert result.exit_code # assert a good (and availablea name) works # This should be an app that's always in the gallery real_app = "lightning/invideo" - result = runner.invoke(lightning_cli.install_app, [real_app]) + result = runner.invoke(lightning_cli.cmd_install.install_app, [real_app]) assert "Press enter to continue:" in result.output @@ -47,16 +47,16 @@ def test_valid_unpublished_app_name(): assert "WARNING" in str(e.output) # assert aborted install - result = runner.invoke(lightning_cli.install_app, [real_app], input="q") + result = runner.invoke(lightning_cli.cmd_install.install_app, [real_app], input="q") assert "Installation aborted!" in result.output # assert a bad app name should fail fake_app = "https://github.com/Lightning-AI/install-appdd" - result = runner.invoke(lightning_cli.install_app, [fake_app, "--yes"]) + result = runner.invoke(lightning_cli.cmd_install.install_app, [fake_app, "--yes"]) assert "Looks like the github url was not found" in result.output # assert a good (and availablea name) works - result = runner.invoke(lightning_cli.install_app, [real_app]) + result = runner.invoke(lightning_cli.cmd_install.install_app, [real_app]) assert "Press enter to continue:" in result.output @@ -81,17 +81,17 @@ def test_valid_org_component_name(): # assert a bad name should fail fake_component = "fakeuser/impossible/name" - result = runner.invoke(lightning_cli.install_component, [fake_component]) + result = runner.invoke(lightning_cli.cmd_install.install_component, [fake_component]) assert "component name format must have organization/component-name" in result.output # assert a good name (but unavailable name) should work fake_component = "fakeuser/ALKKLJAUHREKJ21234KLAKJDLF" - result = runner.invoke(lightning_cli.install_component, [fake_component]) + result = runner.invoke(lightning_cli.cmd_install.install_component, [fake_component]) assert f"component: '{fake_component}' is not available on ⚡ Lightning AI ⚡" in result.output # assert a good (and availablea name) works fake_component = "lightning/lit-slack-messenger" - result = runner.invoke(lightning_cli.install_component, [fake_component]) + result = runner.invoke(lightning_cli.cmd_install.install_component, [fake_component]) assert "Press enter to continue:" in result.output @@ -100,13 +100,13 @@ def test_unpublished_component_url_parsing(): # assert a bad name should fail (no git@) fake_component = "https://github.com/Lightning-AI/LAI-slack-messenger" - result = runner.invoke(lightning_cli.install_component, [fake_component]) + result = runner.invoke(lightning_cli.cmd_install.install_component, [fake_component]) assert "Error, your github url must be in the following format" in result.output # assert a good (and availablea name) works sha = "14f333456ffb6758bd19458e6fa0bf12cf5575e1" real_component = f"git+https://github.com/Lightning-AI/LAI-slack-messenger.git@{sha}" - result = runner.invoke(lightning_cli.install_component, [real_component]) + result = runner.invoke(lightning_cli.cmd_install.install_component, [real_component]) assert "Press enter to continue:" in result.output @@ -148,26 +148,26 @@ def test_prompt_actions(): runner = CliRunner() # assert that the user can cancel the command with any letter other than y - result = runner.invoke(lightning_cli.install_app, [app_to_use], input="b") + result = runner.invoke(lightning_cli.cmd_install.install_app, [app_to_use], input="b") assert "Installation aborted!" in result.output # assert that the install happens with --yes - # result = runner.invoke(lightning_cli.install_app, [app_to_use, "--yes"]) + # result = runner.invoke(lightning_cli.cmd_install.install_app, [app_to_use, "--yes"]) # assert result.exit_code == 0 # assert that the install happens with y - # result = runner.invoke(lightning_cli.install_app, [app_to_use], input='y') + # result = runner.invoke(lightning_cli.cmd_install.install_app, [app_to_use], input='y') # assert result.exit_code == 0 # # assert that the install happens with yes - # result = runner.invoke(lightning_cli.install_app, [app_to_use], input='yes') + # result = runner.invoke(lightning_cli.cmd_install.install_app, [app_to_use], input='yes') # assert result.exit_code == 0 # assert that the install happens with pressing enter - # result = runner.invoke(lightning_cli.install_app, [app_to_use]) + # result = runner.invoke(lightning_cli.cmd_install.install_app, [app_to_use]) # TODO: how to check the output when the user types ctrl+c? - # result = runner.invoke(lightning_cli.install_app, [app_to_use], input='') + # result = runner.invoke(lightning_cli.cmd_install.install_app, [app_to_use], input='') @mock.patch("lightning_app.cli.cmd_install.subprocess", mock.MagicMock()) @@ -178,7 +178,7 @@ def test_version_arg_component(tmpdir, monkeypatch): # Version does not exist component_name = "lightning/lit-slack-messenger" version_arg = "NOT-EXIST" - result = runner.invoke(lightning_cli.install_component, [component_name, f"--version={version_arg}"]) + result = runner.invoke(lightning_cli.cmd_install.install_component, [component_name, f"--version={version_arg}"]) assert f"component: 'Version {version_arg} for {component_name}' is not" in str(result.exception) assert result.exit_code == 1 @@ -186,7 +186,9 @@ def test_version_arg_component(tmpdir, monkeypatch): # This somwehow fail in test but not when you actually run it version_arg = "0.0.1" runner = CliRunner() - result = runner.invoke(lightning_cli.install_component, [component_name, f"--version={version_arg}", "--yes"]) + result = runner.invoke( + lightning_cli.cmd_install.install_component, [component_name, f"--version={version_arg}", "--yes"] + ) assert result.exit_code == 0 @@ -198,14 +200,14 @@ def test_version_arg_app(tmpdir): app_name = "lightning/invideo" version_arg = "NOT-EXIST" runner = CliRunner() - result = runner.invoke(lightning_cli.install_app, [app_name, f"--version={version_arg}"]) + result = runner.invoke(lightning_cli.cmd_install.install_app, [app_name, f"--version={version_arg}"]) assert f"app: 'Version {version_arg} for {app_name}' is not" in str(result.exception) assert result.exit_code == 1 # Version exists version_arg = "0.0.2" runner = CliRunner() - result = runner.invoke(lightning_cli.install_app, [app_name, f"--version={version_arg}", "--yes"]) + result = runner.invoke(lightning_cli.cmd_install.install_app, [app_name, f"--version={version_arg}", "--yes"]) assert result.exit_code == 0 @@ -236,7 +238,9 @@ def test_install_resolve_latest_version(mock_show_install_app_prompt, tmpdir): }, ] } - runner.invoke(lightning_cli.install_app, [app_name, "--yes"]) # no version specified so latest is installed + runner.invoke( + lightning_cli.cmd_install.install_app, [app_name, "--yes"] + ) # no version specified so latest is installed assert mock_show_install_app_prompt.called assert mock_show_install_app_prompt.call_args[0][0]["version"] == "0.0.4" @@ -274,7 +278,7 @@ def test_install_app_shows_error(tmpdir): app_folder_dir.mkdir() with pytest.raises(SystemExit, match=f"Folder {str(app_folder_dir)} exists, please delete it and try again."): - cmd_install._install_app( + cmd_install._install_app_from_source( source_url=mock.ANY, git_url=mock.ANY, folder_name=str(app_folder_dir), overwrite=False ) @@ -360,7 +364,9 @@ def test_install_app_process(subprocess_mock, source_url, git_url, git_sha, tmpd app_folder_dir = Path(tmpdir / "some_random_directory").absolute() app_folder_dir.mkdir() - cmd_install._install_app(source_url, git_url, folder_name=str(app_folder_dir), overwrite=True, git_sha=git_sha) + cmd_install._install_app_from_source( + source_url, git_url, folder_name=str(app_folder_dir), overwrite=True, git_sha=git_sha + ) assert subprocess_mock.check_output.call_args_list[0].args == (["git", "clone", git_url],) if git_sha: assert subprocess_mock.check_output.call_args_list[1].args == (["git", "checkout", git_sha],) diff --git a/tests/tests_app/components/multi_node/__init__.py b/tests/tests_app/components/multi_node/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/tests/tests_app/components/multi_node/test_lite.py b/tests/tests_app/components/multi_node/test_lite.py new file mode 100644 index 0000000000000..9b8aa29779fd2 --- /dev/null +++ b/tests/tests_app/components/multi_node/test_lite.py @@ -0,0 +1,103 @@ +import os +from copy import deepcopy +from functools import partial +from unittest import mock + +import pytest +from lightning_utilities.core.imports import module_available +from tests_app.helpers.utils import no_warning_call + +import lightning_lite as ll +from lightning_app.components.multi_node.lite import _LiteRunExecutor + + +class DummyLite(ll.LightningLite): + def run(self): + pass + + +def dummy_callable(**kwargs): + lite = DummyLite(**kwargs) + return lite._all_passed_kwargs + + +def dummy_init(self, **kwargs): + self._all_passed_kwargs = kwargs + + +def _get_args_after_tracer_injection(**kwargs): + with mock.patch.object(ll.LightningLite, "__init__", dummy_init): + ret_val = _LiteRunExecutor.run( + local_rank=0, + work_run=partial(dummy_callable, **kwargs), + main_address="1.2.3.4", + main_port=5, + node_rank=6, + num_nodes=7, + nprocs=8, + ) + env_vars = deepcopy(os.environ) + return ret_val, env_vars + + +def check_lightning_lite_mps(): + if module_available("lightning_lite"): + return ll.accelerators.MPSAccelerator.is_available() + return False + + +@pytest.mark.skipif(not check_lightning_lite_mps(), reason="Lightning lite not available or mps not available") +@pytest.mark.parametrize("accelerator_given,accelerator_expected", [("cpu", "cpu"), ("auto", "cpu"), ("gpu", "cpu")]) +def test_lite_run_executor_mps_forced_cpu(accelerator_given, accelerator_expected): + warning_str = ( + r"Forcing accelerator=cpu as other accelerators \(specifically MPS\) are not supported " + + "by PyTorch for distributed training on mps capable devices" + ) + if accelerator_expected != accelerator_given: + warning_context = pytest.warns(UserWarning, match=warning_str) + else: + warning_context = no_warning_call(match=warning_str + "*") + + with warning_context: + ret_val, env_vars = _get_args_after_tracer_injection(accelerator=accelerator_given) + assert ret_val["accelerator"] == accelerator_expected + + +@pytest.mark.parametrize( + "args_given,args_expected", + [ + ({"devices": 1, "num_nodes": 1, "accelerator": "gpu"}, {"devices": 8, "num_nodes": 7, "accelerator": "auto"}), + ({"strategy": "ddp_spawn"}, {"strategy": "ddp"}), + ({"strategy": "ddp_sharded_spawn"}, {"strategy": "ddp_sharded"}), + ], +) +@pytest.mark.skipif(not module_available("lightning"), reason="Lightning is required for this test") +def test_trainer_run_executor_arguments_choices(args_given: dict, args_expected: dict): + + # ddp with mps devices not available (tested separately, just patching here for cross-os testing of other args) + if ll.accelerators.MPSAccelerator.is_available(): + args_expected["accelerator"] = "cpu" + + ret_val, env_vars = _get_args_after_tracer_injection(**args_given) + + for k, v in args_expected.items(): + assert ret_val[k] == v + + assert env_vars["MASTER_ADDR"] == "1.2.3.4" + assert env_vars["MASTER_PORT"] == "5" + assert env_vars["GROUP_RANK"] == "6" + assert env_vars["RANK"] == str(0 + 6 * 8) + assert env_vars["LOCAL_RANK"] == "0" + assert env_vars["WORLD_SIZE"] == str(7 * 8) + assert env_vars["LOCAL_WORLD_SIZE"] == "8" + assert env_vars["TORCHELASTIC_RUN_ID"] == "1" + assert env_vars["LT_CLI_USED"] == "1" + + +@pytest.mark.skipif(not module_available("lightning"), reason="Lightning not available") +def test_lite_run_executor_invalid_strategy_instances(): + with pytest.raises(ValueError, match="DDP Spawned strategies aren't supported yet."): + _, _ = _get_args_after_tracer_injection(strategy=ll.strategies.DDPSpawnStrategy()) + + with pytest.raises(ValueError, match="DDP Spawned strategies aren't supported yet."): + _, _ = _get_args_after_tracer_injection(strategy=ll.strategies.DDPSpawnShardedStrategy()) diff --git a/tests/tests_app/components/multi_node/test_trainer.py b/tests/tests_app/components/multi_node/test_trainer.py new file mode 100644 index 0000000000000..c86e0968e2ab0 --- /dev/null +++ b/tests/tests_app/components/multi_node/test_trainer.py @@ -0,0 +1,99 @@ +import os +from copy import deepcopy +from functools import partial +from unittest import mock + +import pytest +from lightning_utilities.core.imports import module_available +from tests_app.helpers.utils import no_warning_call + +import pytorch_lightning as pl +from lightning_app.components.multi_node.trainer import _LightningTrainerRunExecutor + + +def dummy_callable(**kwargs): + t = pl.Trainer(**kwargs) + return t._all_passed_kwargs + + +def dummy_init(self, **kwargs): + self._all_passed_kwargs = kwargs + + +def _get_args_after_tracer_injection(**kwargs): + with mock.patch.object(pl.Trainer, "__init__", dummy_init): + ret_val = _LightningTrainerRunExecutor.run( + local_rank=0, + work_run=partial(dummy_callable, **kwargs), + main_address="1.2.3.4", + main_port=5, + node_rank=6, + num_nodes=7, + nprocs=8, + ) + env_vars = deepcopy(os.environ) + return ret_val, env_vars + + +def check_lightning_pytorch_and_mps(): + if module_available("pytorch_lightning"): + return pl.accelerators.MPSAccelerator.is_available() + return False + + +@pytest.mark.skipif(not check_lightning_pytorch_and_mps(), reason="pytorch_lightning and mps are required") +@pytest.mark.parametrize("accelerator_given,accelerator_expected", [("cpu", "cpu"), ("auto", "cpu"), ("gpu", "cpu")]) +def test_trainer_run_executor_mps_forced_cpu(accelerator_given, accelerator_expected): + warning_str = ( + r"Forcing accelerator=cpu as other accelerators \(specifically MPS\) are not supported " + + "by PyTorch for distributed training on mps capable devices" + ) + if accelerator_expected != accelerator_given: + warning_context = pytest.warns(UserWarning, match=warning_str) + else: + warning_context = no_warning_call(match=warning_str + "*") + + with warning_context: + ret_val, env_vars = _get_args_after_tracer_injection(accelerator=accelerator_given) + assert ret_val["accelerator"] == accelerator_expected + + +@pytest.mark.parametrize( + "args_given,args_expected", + [ + ({"devices": 1, "num_nodes": 1, "accelerator": "gpu"}, {"devices": 8, "num_nodes": 7, "accelerator": "auto"}), + ({"strategy": "ddp_spawn"}, {"strategy": "ddp"}), + ({"strategy": "ddp_sharded_spawn"}, {"strategy": "ddp_sharded"}), + ], +) +@pytest.mark.skipif(not module_available("pytorch"), reason="Lightning is not available") +def test_trainer_run_executor_arguments_choices( + args_given: dict, + args_expected: dict, +): + + if pl.accelerators.MPSAccelerator.is_available(): + args_expected.pop("accelerator", None) # Cross platform tests -> MPS is tested separately + + ret_val, env_vars = _get_args_after_tracer_injection(**args_given) + + for k, v in args_expected.items(): + assert ret_val[k] == v + + assert env_vars["MASTER_ADDR"] == "1.2.3.4" + assert env_vars["MASTER_PORT"] == "5" + assert env_vars["GROUP_RANK"] == "6" + assert env_vars["RANK"] == str(0 + 6 * 8) + assert env_vars["LOCAL_RANK"] == "0" + assert env_vars["WORLD_SIZE"] == str(7 * 8) + assert env_vars["LOCAL_WORLD_SIZE"] == "8" + assert env_vars["TORCHELASTIC_RUN_ID"] == "1" + + +@pytest.mark.skipif(not module_available("lightning"), reason="lightning not available") +def test_trainer_run_executor_invalid_strategy_instances(): + with pytest.raises(ValueError, match="DDP Spawned strategies aren't supported yet."): + _, _ = _get_args_after_tracer_injection(strategy=pl.strategies.DDPSpawnStrategy()) + + with pytest.raises(ValueError, match="DDP Spawned strategies aren't supported yet."): + _, _ = _get_args_after_tracer_injection(strategy=pl.strategies.DDPSpawnShardedStrategy()) diff --git a/tests/tests_app/structures/test_structures.py b/tests/tests_app/structures/test_structures.py index 9c7f492370635..3346da5a858fc 100644 --- a/tests/tests_app/structures/test_structures.py +++ b/tests/tests_app/structures/test_structures.py @@ -518,3 +518,29 @@ def __init__(self): LightningApp(flow) # wrap in app to init all component names assert flow.list_structure[0].name == "root.list_structure.0" assert flow.dict_structure["dict_child"].name == "root.dict_structure.dict_child" + + +class FlowWiStructures(LightningFlow): + def __init__(self): + super().__init__() + + self.ws = [EmptyFlow(), EmptyFlow()] + + self.ws1 = {"a": EmptyFlow(), "b": EmptyFlow()} + + self.ws2 = { + "a": EmptyFlow(), + "b": EmptyFlow(), + "c": List(EmptyFlow(), EmptyFlow()), + "d": Dict(**{"a": EmptyFlow()}), + } + + def run(self): + pass + + +def test_flow_without_structures(): + + flow = FlowWiStructures() + assert isinstance(flow.ws, List) + assert isinstance(flow.ws1, Dict) diff --git a/tests/tests_app/utilities/packaging/test_build_spec.py b/tests/tests_app/utilities/packaging/test_build_spec.py index ba497a5efbdb4..70c4a60374b67 100644 --- a/tests/tests_app/utilities/packaging/test_build_spec.py +++ b/tests/tests_app/utilities/packaging/test_build_spec.py @@ -29,7 +29,7 @@ def test_build_config_requirements_provided(): assert spec.requirements == [ "dask", "pandas", - "pytorch_" + "lightning==1.5.9", # ugly hack due to replacing `pytorch_lightning string` + "pytorch_lightning==1.5.9", "git+https://github.com/mit-han-lab/torchsparse.git@v1.4.0", ] assert spec == BuildConfig.from_dict(spec.to_dict()) @@ -50,7 +50,7 @@ def test_build_config_dockerfile_provided(): spec = BuildConfig(dockerfile="./projects/Dockerfile.cpu") assert not spec.requirements # ugly hack due to replacing `pytorch_lightning string - assert "pytorchlightning/pytorch_" + "lightning" in spec.dockerfile.data[0] + assert "pytorchlightning/pytorch_lightning" in spec.dockerfile.data[0] class DockerfileLightningTestApp(LightningTestApp): diff --git a/tests/tests_app/utilities/test_app_helpers.py b/tests/tests_app/utilities/test_app_helpers.py index 791d2011f7651..2241e262cd381 100644 --- a/tests/tests_app/utilities/test_app_helpers.py +++ b/tests/tests_app/utilities/test_app_helpers.py @@ -1,4 +1,5 @@ import os +from functools import partial from unittest import mock import pytest @@ -10,6 +11,8 @@ ) from lightning_app import LightningApp, LightningFlow, LightningWork +from lightning_app.core.flow import _RootFlow +from lightning_app.frontend import StaticWebFrontend from lightning_app.utilities.app_helpers import ( _handle_is_headless, _is_headless, @@ -119,14 +122,9 @@ def configure_layout(self): return {"name": "test", "content": "https://appurl"} -class FlowWithWorkLayout(Flow): - def __init__(self): - super().__init__() - - self.work = Work() - +class FlowWithFrontend(Flow): def configure_layout(self): - return {"name": "test", "content": self.work} + return StaticWebFrontend(".") class FlowWithMockedFrontend(Flow): @@ -153,16 +151,62 @@ def __init__(self): self.flow = FlowWithURLLayout() +class WorkWithStringLayout(Work): + def configure_layout(self): + return "http://appurl" + + +class WorkWithMockedFrontendLayout(Work): + def configure_layout(self): + return _MagicMockJsonSerializable() + + +class WorkWithFrontendLayout(Work): + def configure_layout(self): + return StaticWebFrontend(".") + + +class WorkWithNoneLayout(Work): + def configure_layout(self): + return None + + +class FlowWithWorkLayout(Flow): + def __init__(self, work): + super().__init__() + + self.work = work() + + def configure_layout(self): + return {"name": "test", "content": self.work} + + +class WorkClassRootFlow(_RootFlow): + """A ``_RootFlow`` which takes a work class rather than the work itself.""" + + def __init__(self, work): + super().__init__(work()) + + @pytest.mark.parametrize( "flow,expected", [ (Flow, True), (FlowWithURLLayout, False), - (FlowWithWorkLayout, False), + (FlowWithFrontend, False), (FlowWithMockedFrontend, False), (FlowWithMockedContent, False), (NestedFlow, True), (NestedFlowWithURLLayout, False), + (partial(WorkClassRootFlow, WorkWithStringLayout), False), + (partial(WorkClassRootFlow, WorkWithMockedFrontendLayout), False), + (partial(WorkClassRootFlow, WorkWithFrontendLayout), False), + (partial(WorkClassRootFlow, WorkWithNoneLayout), True), + (partial(FlowWithWorkLayout, Work), False), + (partial(FlowWithWorkLayout, WorkWithStringLayout), False), + (partial(FlowWithWorkLayout, WorkWithMockedFrontendLayout), False), + (partial(FlowWithWorkLayout, WorkWithFrontendLayout), False), + (partial(FlowWithWorkLayout, WorkWithNoneLayout), True), ], ) def test_is_headless(flow, expected): diff --git a/tests/tests_app/utilities/test_layout.py b/tests/tests_app/utilities/test_layout.py new file mode 100644 index 0000000000000..98921e3d0000e --- /dev/null +++ b/tests/tests_app/utilities/test_layout.py @@ -0,0 +1,143 @@ +import pytest + +from lightning_app.core.flow import LightningFlow +from lightning_app.core.work import LightningWork +from lightning_app.frontend.web import StaticWebFrontend +from lightning_app.utilities.layout import _collect_layout + + +class _MockApp: + def __init__(self) -> None: + self.frontends = {} + + +class FlowWithFrontend(LightningFlow): + def configure_layout(self): + return StaticWebFrontend(".") + + +class WorkWithFrontend(LightningWork): + def run(self): + pass + + def configure_layout(self): + return StaticWebFrontend(".") + + +class FlowWithWorkWithFrontend(LightningFlow): + def __init__(self): + super().__init__() + + self.work = WorkWithFrontend() + + def configure_layout(self): + return {"name": "work", "content": self.work} + + +class FlowWithUrl(LightningFlow): + def configure_layout(self): + return {"name": "test", "content": "https://test"} + + +class WorkWithUrl(LightningWork): + def run(self): + pass + + def configure_layout(self): + return "https://test" + + +class FlowWithWorkWithUrl(LightningFlow): + def __init__(self): + super().__init__() + + self.work = WorkWithUrl() + + def configure_layout(self): + return {"name": "test", "content": self.work} + + +@pytest.mark.parametrize( + "flow,expected_layout,expected_frontends", + [ + (FlowWithFrontend, {}, [("root", StaticWebFrontend)]), + (FlowWithWorkWithFrontend, {}, [("root", StaticWebFrontend)]), + (FlowWithUrl, [{"name": "test", "content": "https://test", "target": "https://test"}], []), + (FlowWithWorkWithUrl, [{"name": "test", "content": "https://test", "target": "https://test"}], []), + ], +) +def test_collect_layout(flow, expected_layout, expected_frontends): + app = _MockApp() + flow = flow() + layout = _collect_layout(app, flow) + + assert layout == expected_layout + assert set(app.frontends.keys()) == {key for key, _ in expected_frontends} + for key, frontend_type in expected_frontends: + assert isinstance(app.frontends[key], frontend_type) + + +class FlowWithBadLayout(LightningFlow): + def configure_layout(self): + return 100 + + +class FlowWithBadLayoutDict(LightningFlow): + def configure_layout(self): + return {"this_key_should_not_be_here": "http://appurl"} + + +class FlowWithBadContent(LightningFlow): + def configure_layout(self): + return {"content": 100} + + +class WorkWithBadLayout(LightningWork): + def run(self): + pass + + def configure_layout(self): + return 100 + + +class FlowWithWorkWithBadLayout(LightningFlow): + def __init__(self): + super().__init__() + + self.work = WorkWithBadLayout() + + def configure_layout(self): + return {"name": "test", "content": self.work} + + +class FlowWithMultipleWorksWithFrontends(LightningFlow): + def __init__(self): + super().__init__() + + self.work1 = WorkWithFrontend() + self.work2 = WorkWithFrontend() + + def configure_layout(self): + return [{"name": "test1", "content": self.work1}, {"name": "test2", "content": self.work2}] + + +@pytest.mark.parametrize( + "flow,error_type,match", + [ + (FlowWithBadLayout, TypeError, "is an unsupported layout format"), + (FlowWithBadLayoutDict, ValueError, "missing a key 'content'."), + (FlowWithBadContent, ValueError, "contains an unsupported entry."), + (FlowWithWorkWithBadLayout, TypeError, "is of an unsupported type."), + ( + FlowWithMultipleWorksWithFrontends, + TypeError, + "The tab containing a `WorkWithFrontend` must be the only tab", + ), + ], +) +def test_collect_layout_errors(flow, error_type, match): + app = _MockApp() + flow = flow() + + with pytest.raises(error_type, match=match): + _collect_layout(app, flow) diff --git a/tests/tests_app/utilities/test_safe_pickle.py b/tests/tests_app/utilities/test_safe_pickle.py new file mode 100644 index 0000000000000..473fe28ed22f7 --- /dev/null +++ b/tests/tests_app/utilities/test_safe_pickle.py @@ -0,0 +1,11 @@ +import subprocess +from pathlib import Path + + +def test_safe_pickle_app(): + test_dir = Path(__file__).parent / "testdata" + proc = subprocess.Popen( + ["lightning", "run", "app", "safe_pickle_app.py", "--open-ui", "false"], stdout=subprocess.PIPE, cwd=test_dir + ) + stdout, _ = proc.communicate() + assert "Exiting the pickling app successfully" in stdout.decode("UTF-8") diff --git a/tests/tests_app/utilities/testdata/safe_pickle_app.py b/tests/tests_app/utilities/testdata/safe_pickle_app.py new file mode 100644 index 0000000000000..f15344360d85f --- /dev/null +++ b/tests/tests_app/utilities/testdata/safe_pickle_app.py @@ -0,0 +1,63 @@ +""" +This app tests three things +1. Can a work pickle `self` +2. Can the pickled work be unpickled in another work +3. Can the pickled work be unpickled from a script +""" + +import subprocess +from pathlib import Path + +from lightning_app import LightningApp, LightningFlow, LightningWork +from lightning_app.utilities import safe_pickle + + +class SelfPicklingWork(LightningWork): + def run(self): + with open("work.pkl", "wb") as f: + safe_pickle.dump(self, f) + + def get_test_string(self): + return f"Hello from {self.__class__.__name__}!" + + +class WorkThatLoadsPickledWork(LightningWork): + def run(self): + with open("work.pkl", "rb") as f: + work = safe_pickle.load(f) + assert work.get_test_string() == "Hello from SelfPicklingWork!" + + +script_load_pickled_work = """ +import pickle +work = pickle.load(open("work.pkl", "rb")) +print(work.get_test_string()) +""" + + +class RootFlow(LightningFlow): + def __init__(self): + super().__init__() + self.self_pickling_work = SelfPicklingWork() + self.work_that_loads_pickled_work = WorkThatLoadsPickledWork() + + def run(self): + self.self_pickling_work.run() + self.work_that_loads_pickled_work.run() + + with open("script_that_loads_pickled_work.py", "w") as f: + f.write(script_load_pickled_work) + + # read the output from subprocess + proc = subprocess.Popen(["python", "script_that_loads_pickled_work.py"], stdout=subprocess.PIPE) + assert "Hello from SelfPicklingWork" in proc.stdout.read().decode("UTF-8") + + # deleting the script + Path("script_that_loads_pickled_work.py").unlink() + # deleting the pkl file + Path("work.pkl").unlink() + + self._exit("Exiting the pickling app successfully!!") + + +app = LightningApp(RootFlow()) diff --git a/tests/tests_lite/test_lite.py b/tests/tests_lite/test_lite.py index 05937cc062b78..a569a159fceee 100644 --- a/tests/tests_lite/test_lite.py +++ b/tests/tests_lite/test_lite.py @@ -22,14 +22,14 @@ from tests_lite.helpers.runif import RunIf from tests_lite.helpers.utils import no_warning_call from torch import nn -from torch.utils.data import DataLoader, DistributedSampler, Sampler +from torch.utils.data import DataLoader, DistributedSampler, RandomSampler, Sampler, SequentialSampler, TensorDataset from lightning_lite.lite import LightningLite from lightning_lite.plugins import Precision from lightning_lite.strategies import ParallelStrategy, SingleDeviceStrategy, Strategy from lightning_lite.utilities import _StrategyType from lightning_lite.utilities.exceptions import MisconfigurationException -from lightning_lite.utilities.seed import pl_worker_init_function +from lightning_lite.utilities.seed import pl_worker_init_function, seed_everything from lightning_lite.utilities.warnings import PossibleUserWarning from lightning_lite.wrappers import _LiteDataLoader, _LiteModule, _LiteOptimizer @@ -298,6 +298,38 @@ def test_setup_dataloaders_distributed_sampler_not_needed(): assert lite_dataloader.sampler is custom_sampler +class DistributedSamplerLite(LightningLite): + def run(self): + # no lite.launch(): pretend we are on rank 0 now + + dataset = TensorDataset(torch.arange(8)) + + # shuffling turned off + no_shuffle_dataloaders = [ + DataLoader(dataset), + DataLoader(dataset, shuffle=False), + DataLoader(dataset, sampler=SequentialSampler(dataset)), + ] + for dataloader in no_shuffle_dataloaders: + dataloader = self.setup_dataloaders(dataloader) + expected = [0, 2, 4, 6] if self.global_rank == 0 else [1, 3, 5, 7] + assert list(t[0].item() for t in iter(dataloader)) == expected + + # shuffling turned on + shuffle_dataloaders = [DataLoader(dataset, shuffle=True), DataLoader(dataset, sampler=RandomSampler(dataset))] + for dataloader in shuffle_dataloaders: + seed_everything(1) + dataloader = self.setup_dataloaders(dataloader) + expected = [5, 0, 2, 1] if self.global_rank == 0 else [4, 7, 3, 6] + assert list(t[0].item() for t in iter(dataloader)) == expected + + +def test_setup_dataloaders_distributed_sampler_shuffle(): + """Test that the DataLoader(shuffle=True|False) setting gets carried over correctly into the distributed + sampler.""" + DistributedSamplerLite(accelerator="cpu", strategy="ddp_spawn", devices=2).run() + + @mock.patch.dict(os.environ, {}, clear=True) def test_seed_everything(): """Test that seed everything is static and sets the worker init function on the dataloader.""" diff --git a/tests/tests_lite/test_parity.py b/tests/tests_lite/test_parity.py index dd82b15020649..a830ab02abfb0 100644 --- a/tests/tests_lite/test_parity.py +++ b/tests/tests_lite/test_parity.py @@ -203,7 +203,7 @@ def test_boring_lite_model_ddp_spawn(precision, strategy, devices, accelerator, ) def test_boring_lite_model_ddp(precision, strategy, devices, accelerator, tmpdir): LightningLite.seed_everything(42) - train_dataloader = DataLoader(RandomDataset(32, 4)) + train_dataloader = DataLoader(RandomDataset(32, 4), shuffle=True) model = BoringModel() num_epochs = 1 state_dict = deepcopy(model.state_dict()) @@ -214,13 +214,13 @@ def test_boring_lite_model_ddp(precision, strategy, devices, accelerator, tmpdir lite_model_state_dict = model.state_dict() for w_pure, w_lite in zip(state_dict.values(), lite_model_state_dict.values()): - assert not torch.equal(w_pure.cpu(), w_lite.cpu()) + assert not torch.allclose(w_pure.cpu(), w_lite.cpu()) LightningLite.seed_everything(42) - train_dataloader = DataLoader(RandomDataset(32, 4)) + train_dataloader = DataLoader(RandomDataset(32, 4), shuffle=True) model = BoringModel() run(lite.global_rank, model, train_dataloader, num_epochs, precision, accelerator, tmpdir) pure_model_state_dict = model.state_dict() for w_pure, w_lite in zip(pure_model_state_dict.values(), lite_model_state_dict.values()): - assert torch.equal(w_pure.cpu(), w_lite.cpu()) + torch.testing.assert_close(w_pure.cpu(), w_lite.cpu()) diff --git a/tests/tests_pytorch/core/test_lightning_module.py b/tests/tests_pytorch/core/test_lightning_module.py index 03bdf8dfa89e7..ba8419a904b3a 100644 --- a/tests/tests_pytorch/core/test_lightning_module.py +++ b/tests/tests_pytorch/core/test_lightning_module.py @@ -425,6 +425,17 @@ def test_proper_refcount(): assert sys.getrefcount(torch_module) == sys.getrefcount(lightning_module) +def test_lightning_module_scriptable(): + """Test that the LightningModule is `torch.jit.script`-able. + + Regression test for #15917. + """ + model = BoringModel() + trainer = Trainer() + model.trainer = trainer + torch.jit.script(model) + + def test_trainer_reference_recursively(): ensemble = LightningModule() inner = LightningModule() diff --git a/tests/tests_pytorch/graveyard/test_core.py b/tests/tests_pytorch/graveyard/test_core.py index 8450f41f6c075..95db542658481 100644 --- a/tests/tests_pytorch/graveyard/test_core.py +++ b/tests/tests_pytorch/graveyard/test_core.py @@ -53,18 +53,3 @@ def on_load_checkpoint(self, checkpoint): match="`LightningDataModule.on_load_checkpoint`.*no longer supported as of v1.8.", ): trainer.fit(model, OnLoadDataModule()) - - -def test_v2_0_0_lightning_module_unsupported_use_amp(): - model = BoringModel() - with pytest.raises( - RuntimeError, - match="`LightningModule.use_amp`.*no longer accessible as of v1.8.", - ): - model.use_amp - - with pytest.raises( - RuntimeError, - match="`LightningModule.use_amp`.*no longer accessible as of v1.8.", - ): - model.use_amp = False diff --git a/tests/tests_pytorch/tuner/test_lr_finder.py b/tests/tests_pytorch/tuner/test_lr_finder.py index d88fe41b5c05f..69202f6fe7056 100644 --- a/tests/tests_pytorch/tuner/test_lr_finder.py +++ b/tests/tests_pytorch/tuner/test_lr_finder.py @@ -438,3 +438,50 @@ def test_if_lr_finder_callback_already_configured(): with pytest.raises(MisconfigurationException, match="Trainer is already configured with a .* callback"): trainer.tune(model) + + +def test_lr_finder_callback_restarting(tmpdir): + """Test that `LearningRateFinder` does not set restarting=True when loading checkpoint.""" + + num_lr_steps = 100 + + class MyBoringModel(BoringModel): + def __init__(self): + super().__init__() + self.learning_rate = 0.123 + + def on_train_batch_start(self, batch, batch_idx): + if getattr(self, "_expected_max_steps", None) is not None: + assert self.trainer.fit_loop.max_steps == self._expected_max_steps + + def configure_optimizers(self): + return torch.optim.SGD(self.parameters(), lr=self.learning_rate) + + class CustomLearningRateFinder(LearningRateFinder): + milestones = (1,) + + def lr_find(self, trainer, pl_module) -> None: + pl_module._expected_max_steps = trainer.global_step + self._num_training_steps + super().lr_find(trainer, pl_module) + pl_module._expected_max_steps = None + assert not trainer.fit_loop.restarting + + def on_train_epoch_start(self, trainer, pl_module): + if trainer.current_epoch in self.milestones or trainer.current_epoch == 0: + self.lr_find(trainer, pl_module) + + model = MyBoringModel() + trainer = Trainer( + default_root_dir=tmpdir, + max_epochs=3, + callbacks=[ + CustomLearningRateFinder(early_stop_threshold=None, update_attr=True, num_training_steps=num_lr_steps) + ], + limit_train_batches=10, + limit_val_batches=0, + limit_test_batches=0, + num_sanity_val_steps=0, + enable_model_summary=False, + ) + + trainer.fit(model)