From b0fb647f021a069310037430bb78d13de7724058 Mon Sep 17 00:00:00 2001 From: Luca Furst Date: Thu, 20 Oct 2022 15:14:53 -0400 Subject: [PATCH 1/3] Error instead of breaking --- src/lightning_app/runners/cloud.py | 37 +++++++++++------ tests/tests_app/runners/test_cloud.py | 59 +++++++++++++++++++++++++++ 2 files changed, 83 insertions(+), 13 deletions(-) diff --git a/src/lightning_app/runners/cloud.py b/src/lightning_app/runners/cloud.py index 4a75678afa964..2f3cb0e1b4e22 100644 --- a/src/lightning_app/runners/cloud.py +++ b/src/lightning_app/runners/cloud.py @@ -263,19 +263,6 @@ def dispatch( if cluster_id is not None: self._ensure_cluster_project_binding(project.project_id, cluster_id) - lightning_app_release = self.backend.client.lightningapp_v2_service_create_lightningapp_release( - project_id=project.project_id, app_id=lit_app.id, body=release_body - ) - - if cluster_id is not None: - logger.info(f"running app on {lightning_app_release.cluster_id}") - - if lightning_app_release.source_upload_url == "": - raise RuntimeError("The source upload url is empty.") - - repo.package() - repo.upload(url=lightning_app_release.source_upload_url) - # check if user has sufficient credits to run an app # if so set the desired state to running otherwise, create the app in stopped state, # and open the admin ui to add credits and running the app. @@ -293,6 +280,15 @@ def dispatch( queue_server_type = V1QueueServerType.REDIS if CLOUD_QUEUE_TYPE == "redis" else V1QueueServerType.HTTP if find_instances_resp.lightningapps: existing_instance = find_instances_resp.lightningapps[0] + + # TODO: support multiple instances / 1 instance per cluster + if existing_instance.spec.cluster_id != cluster_id: + raise ValueError( + f"Can not start app '{name}' on cluster '{cluster_id}' " + f"since app already exists on '{existing_instance.spec.cluster_id}'. " + "To run this app on another cluster, give it another name with the --name flag" + ) + if existing_instance.status.phase != V1LightningappInstanceState.STOPPED: # TODO(yurij): Implement release switching in the UI and remove this # We can only switch release of the stopped instance @@ -312,6 +308,21 @@ def dispatch( if existing_instance.status.phase != V1LightningappInstanceState.STOPPED: raise RuntimeError("Failed to stop the existing instance.") + # create / upload the new app release / instace + lightning_app_release = self.backend.client.lightningapp_v2_service_create_lightningapp_release( + project_id=project.project_id, app_id=lit_app.id, body=release_body + ) + + if cluster_id is not None: + logger.info(f"running app on {lightning_app_release.cluster_id}") + + if lightning_app_release.source_upload_url == "": + raise RuntimeError("The source upload url is empty.") + + repo.package() + repo.upload(url=lightning_app_release.source_upload_url) + + if find_instances_resp.lightningapps: lightning_app_instance = ( self.backend.client.lightningapp_instance_service_update_lightningapp_instance_release( project_id=project.project_id, diff --git a/tests/tests_app/runners/test_cloud.py b/tests/tests_app/runners/test_cloud.py index 9600fedcef826..b912289370f3f 100644 --- a/tests/tests_app/runners/test_cloud.py +++ b/tests/tests_app/runners/test_cloud.py @@ -64,6 +64,60 @@ def run(self): class TestAppCreationClient: """Testing the calls made using GridRestClient to create the app.""" + # TODO: remove this test once there is support for multiple instances + @mock.patch("lightning_app.runners.backends.cloud.LightningClient", mock.MagicMock()) + def test_new_instance_on_different_cluster_fails(self, monkeypatch): + app_name = "test-app-name" + original_cluster = 'cluster-001' + new_cluster = 'cluster-002' + + mock_client = mock.MagicMock() + mock_client.projects_service_list_memberships.return_value = V1ListMembershipsResponse( + memberships=[V1Membership(name="Default Project", project_id="default-project-id")] + ) + + cloud_backend = mock.MagicMock() + cloud_backend.client = mock_client + monkeypatch.setattr(cloud, "LocalSourceCodeDir", mock.MagicMock()) + monkeypatch.setattr(cloud, "_prepare_lightning_wheels_and_requirements", mock.MagicMock()) + monkeypatch.setattr(backends, "CloudBackend", mock.MagicMock(return_value=cloud_backend)) + + app = mock.MagicMock() + app.flows = [] + app.frontend = {} + + existing_instance = MagicMock() + existing_instance.status.phase = V1LightningappInstanceState.STOPPED + existing_instance.spec.cluster_id = original_cluster + mock_client.lightningapp_instance_service_list_lightningapp_instances.return_value = ( + V1ListLightningappInstancesResponse(lightningapps=[existing_instance]) + ) + + cloud_runtime = cloud.CloudRuntime(app=app, entrypoint_file="entrypoint.py") + cloud_runtime._check_uploaded_folder = mock.MagicMock() + + # without requirements file + # setting is_file to False so requirements.txt existence check will return False + monkeypatch.setattr(Path, "is_file", lambda *args, **kwargs: False) + monkeypatch.setattr(cloud, "Path", Path) + + # This is the main assertion: + # we have an existing instance on `cluster-001` + # but we want to run this app on `cluster-002` + with pytest.raises(ValueError) as exc: + cloud_runtime.dispatch(name=app_name, cluster_id=new_cluster) + + assert exc.match( + f"Can not start app '{app_name}' on cluster '{new_cluster}' " + f"since app already exists on '{original_cluster}'. " + "To run this app on another cluster, give it another name with the --name flag" + ) + cloud_runtime.backend.client.lightningapp_v2_service_create_lightningapp_release.assert_not_called() + cloud_runtime.backend.client.projects_service_create_project_cluster_binding.assert_called_once_with( + project_id="default-project-id", + body=V1ProjectClusterBinding(cluster_id=new_cluster, project_id="default-project-id"), + ) + @mock.patch("lightning_app.runners.backends.cloud.LightningClient", mock.MagicMock()) def test_run_on_byoc_cluster(self, monkeypatch): mock_client = mock.MagicMock() @@ -218,6 +272,7 @@ def test_call_with_work_app(self, lightningapps, monkeypatch, tmpdir): mock_client = mock.MagicMock() if lightningapps: lightningapps[0].status.phase = V1LightningappInstanceState.STOPPED + lightningapps[0].spec.cluster_id = None mock_client.lightningapp_instance_service_list_lightningapp_instances.return_value = ( V1ListLightningappInstancesResponse(lightningapps=lightningapps) ) @@ -317,6 +372,7 @@ def test_call_with_work_app_and_attached_drives(self, lightningapps, monkeypatch mock_client = mock.MagicMock() if lightningapps: lightningapps[0].status.phase = V1LightningappInstanceState.STOPPED + lightningapps[0].spec.cluster_id = None mock_client.lightningapp_instance_service_list_lightningapp_instances.return_value = ( V1ListLightningappInstancesResponse(lightningapps=lightningapps) ) @@ -444,6 +500,7 @@ def test_call_with_work_app_and_multiple_attached_drives(self, lightningapps, mo mock_client = mock.MagicMock() if lightningapps: lightningapps[0].status.phase = V1LightningappInstanceState.STOPPED + lightningapps[0].spec.cluster_id = None mock_client.lightningapp_instance_service_list_lightningapp_instances.return_value = ( V1ListLightningappInstancesResponse(lightningapps=lightningapps) ) @@ -633,6 +690,7 @@ def test_call_with_work_app_and_attached_mount_and_drive(self, lightningapps, mo mock_client = mock.MagicMock() if lightningapps: lightningapps[0].status.phase = V1LightningappInstanceState.STOPPED + lightningapps[0].spec.cluster_id = None mock_client.lightningapp_instance_service_list_lightningapp_instances.return_value = ( V1ListLightningappInstancesResponse(lightningapps=lightningapps) ) @@ -643,6 +701,7 @@ def test_call_with_work_app_and_attached_mount_and_drive(self, lightningapps, mo ) existing_instance = MagicMock() existing_instance.status.phase = V1LightningappInstanceState.STOPPED + existing_instance.spec.cluster_id = None mock_client.lightningapp_service_get_lightningapp = MagicMock(return_value=existing_instance) cloud_backend = mock.MagicMock() cloud_backend.client = mock_client From 202cf8841dca3aafd8c7bf0c7550f60669735b17 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 20 Oct 2022 19:24:53 +0000 Subject: [PATCH 2/3] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/tests_app/runners/test_cloud.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/tests_app/runners/test_cloud.py b/tests/tests_app/runners/test_cloud.py index b912289370f3f..3a2d26a3b0ff4 100644 --- a/tests/tests_app/runners/test_cloud.py +++ b/tests/tests_app/runners/test_cloud.py @@ -68,8 +68,8 @@ class TestAppCreationClient: @mock.patch("lightning_app.runners.backends.cloud.LightningClient", mock.MagicMock()) def test_new_instance_on_different_cluster_fails(self, monkeypatch): app_name = "test-app-name" - original_cluster = 'cluster-001' - new_cluster = 'cluster-002' + original_cluster = "cluster-001" + new_cluster = "cluster-002" mock_client = mock.MagicMock() mock_client.projects_service_list_memberships.return_value = V1ListMembershipsResponse( @@ -85,7 +85,7 @@ def test_new_instance_on_different_cluster_fails(self, monkeypatch): app = mock.MagicMock() app.flows = [] app.frontend = {} - + existing_instance = MagicMock() existing_instance.status.phase = V1LightningappInstanceState.STOPPED existing_instance.spec.cluster_id = original_cluster From e5f05251ef22a304b29cca4b249fc86862765b2e Mon Sep 17 00:00:00 2001 From: Luca Furst Date: Fri, 21 Oct 2022 10:35:48 -0400 Subject: [PATCH 3/3] Improve error message && Add changelog --- src/lightning_app/CHANGELOG.md | 1 + src/lightning_app/runners/cloud.py | 4 ++-- tests/tests_app/runners/test_cloud.py | 4 ++-- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/lightning_app/CHANGELOG.md b/src/lightning_app/CHANGELOG.md index 5c4ca74074d77..f2e7f3639b3bf 100644 --- a/src/lightning_app/CHANGELOG.md +++ b/src/lightning_app/CHANGELOG.md @@ -26,6 +26,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed an issue when using the CLI without arguments ([#14877](https://github.com/Lightning-AI/lightning/pull/14877)) - Fixed a bug where the upload files endpoint would raise an error when running locally ([#14924](https://github.com/Lightning-AI/lightning/pull/14924)) +- Fixed a bug when launching an app on multiple clusters ([#15226](https://github.com/Lightning-AI/lightning/pull/15226)) ## [0.6.2] - 2022-09-21 diff --git a/src/lightning_app/runners/cloud.py b/src/lightning_app/runners/cloud.py index 2f3cb0e1b4e22..5ea14ef8246a4 100644 --- a/src/lightning_app/runners/cloud.py +++ b/src/lightning_app/runners/cloud.py @@ -285,8 +285,8 @@ def dispatch( if existing_instance.spec.cluster_id != cluster_id: raise ValueError( f"Can not start app '{name}' on cluster '{cluster_id}' " - f"since app already exists on '{existing_instance.spec.cluster_id}'. " - "To run this app on another cluster, give it another name with the --name flag" + f"since this app already exists on '{existing_instance.spec.cluster_id}'. " + "To run it on another cluster, give it a new name with the --name option." ) if existing_instance.status.phase != V1LightningappInstanceState.STOPPED: diff --git a/tests/tests_app/runners/test_cloud.py b/tests/tests_app/runners/test_cloud.py index 3a2d26a3b0ff4..0328870c1d613 100644 --- a/tests/tests_app/runners/test_cloud.py +++ b/tests/tests_app/runners/test_cloud.py @@ -109,8 +109,8 @@ def test_new_instance_on_different_cluster_fails(self, monkeypatch): assert exc.match( f"Can not start app '{app_name}' on cluster '{new_cluster}' " - f"since app already exists on '{original_cluster}'. " - "To run this app on another cluster, give it another name with the --name flag" + f"since this app already exists on '{original_cluster}'. " + "To run it on another cluster, give it a new name with the --name option." ) cloud_runtime.backend.client.lightningapp_v2_service_create_lightningapp_release.assert_not_called() cloud_runtime.backend.client.projects_service_create_project_cluster_binding.assert_called_once_with(