Skip to content

Commit

Permalink
[App] Raise error when launching app on multiple clusters (#15484)
Browse files Browse the repository at this point in the history
* Error when running on multiple clusters

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Revert this in separate PR: keep this focused

* Improve testing

* fixup! Improve testing

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* pass flake8

* Update changelog

* Address PR feedback

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* remove unused import

* Reword error message

* Error if running on cluster that doesn't exist

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* fixup! Error if running on cluster that doesn't exist

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Remove unsued import

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Jirka Borovec <[email protected]>

(cherry picked from commit c5d3bba)
  • Loading branch information
luca3rd authored and Borda committed Nov 30, 2022
1 parent 427ca44 commit 927b305
Show file tree
Hide file tree
Showing 3 changed files with 102 additions and 43 deletions.
3 changes: 3 additions & 0 deletions src/lightning_app/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
- Fixed SSH CLI command listing stopped components ([#15810](https://github.com/Lightning-AI/lightning/pull/15810))


- Fixed bug when launching apps on multiple clusters ([#15484](https://github.com/Lightning-AI/lightning/pull/15484))


## [1.8.3] - 2022-11-22

### Changed
Expand Down
22 changes: 16 additions & 6 deletions src/lightning_app/runners/cloud.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,16 @@
import time
from dataclasses import dataclass
from pathlib import Path
from textwrap import dedent
from typing import Any, Callable, List, Optional, Union

import click
from lightning_cloud.openapi import (
Body3,
Body4,
Body7,
Body8,
Body9,
Externalv1LightningappInstance,
Gridv1ImageSpec,
V1BuildSpec,
V1DependencyFileInfo,
Expand Down Expand Up @@ -336,6 +337,7 @@ def dispatch(
elif CLOUD_QUEUE_TYPE == "redis":
queue_server_type = V1QueueServerType.REDIS

existing_instance: Optional[Externalv1LightningappInstance] = None
if find_instances_resp.lightningapps:
existing_instance = find_instances_resp.lightningapps[0]

Expand Down Expand Up @@ -374,12 +376,20 @@ def dispatch(
f"Your app last ran on cluster {app_config.cluster_id}, but that cluster "
"doesn't exist anymore."
)
click.confirm(
f"{msg} Do you want to run on Lightning Cloud instead?",
abort=True,
default=True,
raise ValueError(msg)
if existing_instance and existing_instance.spec.cluster_id != app_config.cluster_id:
raise ValueError(
dedent(
f"""\
An app names {app_config.name} is already running on cluster {existing_instance.spec.cluster_id}, and you requested it to run on cluster {app_config.cluster_id}.
In order to proceed, please either:
a. rename the app to run on {app_config.cluster_id} with the --name option
lightning run app {app_entrypoint_file} --name (new name) --cloud --cluster-id {app_config.cluster_id}
b. delete the app running on {existing_instance.spec.cluster_id} in the UI before running this command.
""" # noqa: E501
)
)
app_config.cluster_id = None

if app_config.cluster_id is not None:
self._ensure_cluster_project_binding(project.project_id, app_config.cluster_id)
Expand Down
120 changes: 83 additions & 37 deletions tests/tests_app/runners/test_cloud.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import os
import re
import sys
from contextlib import nullcontext as does_not_raise
from copy import copy
from pathlib import Path
from unittest import mock
Expand Down Expand Up @@ -101,75 +102,120 @@ def get_cloud_runtime_request_body(**kwargs) -> "Body8":
return Body8(**default_request_body)


@pytest.fixture
def cloud_backend(monkeypatch):
cloud_backend = mock.MagicMock()
monkeypatch.setattr(cloud, "LocalSourceCodeDir", mock.MagicMock())
monkeypatch.setattr(cloud, "_prepare_lightning_wheels_and_requirements", mock.MagicMock())
monkeypatch.setattr(backends, "CloudBackend", mock.MagicMock(return_value=cloud_backend))
return cloud_backend


@pytest.fixture
def project_id():
return "test-project-id"


DEFAULT_CLUSTER = "litng-ai-03"


class TestAppCreationClient:
"""Testing the calls made using GridRestClient to create the app."""

def test_run_on_deleted_cluster(self, cloud_backend):
app_name = "test-app"

mock_client = mock.MagicMock()
mock_client.projects_service_list_memberships.return_value = V1ListMembershipsResponse(
memberships=[V1Membership(name="Default Project", project_id=project_id)]
)

mock_client.cluster_service_list_clusters.return_value = V1ListClustersResponse(
[
Externalv1Cluster(id=DEFAULT_CLUSTER),
]
)
cloud_backend.client = mock_client

app = mock.MagicMock()
app.flows = []
app.frontend = {}

existing_instance = MagicMock()
existing_instance.status.phase = V1LightningappInstanceState.STOPPED
existing_instance.spec.cluster_id = DEFAULT_CLUSTER
mock_client.lightningapp_instance_service_list_lightningapp_instances.return_value = (
V1ListLightningappInstancesResponse(lightningapps=[existing_instance])
)

cloud_runtime = cloud.CloudRuntime(app=app, entrypoint_file="entrypoint.py")
cloud_runtime._check_uploaded_folder = mock.MagicMock()

with pytest.raises(ValueError, match="that cluster doesn't exist"):
cloud_runtime.dispatch(name=app_name, cluster_id="unknown-cluster")

# TODO: remove this test once there is support for multiple instances
@mock.patch("lightning_app.runners.backends.cloud.LightningClient", mock.MagicMock())
def test_new_instance_on_different_cluster(self, monkeypatch):
app_name = "test-app-name"
original_cluster = "cluster-001"
new_cluster = "cluster-002"
@pytest.mark.parametrize(
"old_cluster,new_cluster,expected_raise",
[
(
"test",
"other",
pytest.raises(
ValueError,
match="already running on cluster",
),
),
("test", "test", does_not_raise()),
(None, None, does_not_raise()),
(None, "litng-ai-03", does_not_raise()),
("litng-ai-03", None, does_not_raise()),
],
)
def test_new_instance_on_different_cluster(
self, cloud_backend, project_id, old_cluster, new_cluster, expected_raise
):
app_name = "test-app"

mock_client = mock.MagicMock()
mock_client.projects_service_list_memberships.return_value = V1ListMembershipsResponse(
memberships=[V1Membership(name="Default Project", project_id="default-project-id")]
memberships=[V1Membership(name="Default Project", project_id=project_id)]
)
mock_client.lightningapp_v2_service_create_lightningapp_release.return_value = V1LightningappRelease(
cluster_id=new_cluster
)

# Note:
# backend converts "None" cluster to "litng-ai-03"
# dispatch should receive None, but API calls should return "litng-ai-03"
mock_client.cluster_service_list_clusters.return_value = V1ListClustersResponse(
[Externalv1Cluster(id=original_cluster), Externalv1Cluster(id=new_cluster)]
[
Externalv1Cluster(id=old_cluster or DEFAULT_CLUSTER),
Externalv1Cluster(id=new_cluster or DEFAULT_CLUSTER),
]
)

cloud_backend = mock.MagicMock()
cloud_backend.client = mock_client
monkeypatch.setattr(cloud, "LocalSourceCodeDir", mock.MagicMock())
monkeypatch.setattr(cloud, "_prepare_lightning_wheels_and_requirements", mock.MagicMock())
monkeypatch.setattr(backends, "CloudBackend", mock.MagicMock(return_value=cloud_backend))

app = mock.MagicMock()
app.flows = []
app.frontend = {}

existing_instance = MagicMock()
existing_instance.status.phase = V1LightningappInstanceState.STOPPED
existing_instance.spec.cluster_id = original_cluster
existing_instance.spec.cluster_id = old_cluster or DEFAULT_CLUSTER
mock_client.lightningapp_instance_service_list_lightningapp_instances.return_value = (
V1ListLightningappInstancesResponse(lightningapps=[existing_instance])
)

cloud_runtime = cloud.CloudRuntime(app=app, entrypoint_file="entrypoint.py")
cloud_runtime._check_uploaded_folder = mock.MagicMock()

# without requirements file
# setting is_file to False so requirements.txt existence check will return False
monkeypatch.setattr(Path, "is_file", lambda *args, **kwargs: False)
monkeypatch.setattr(cloud, "Path", Path)

# This is the main assertion:
# we have an existing instance on `cluster-001`
# but we want to run this app on `cluster-002`
cloud_runtime.dispatch(name=app_name, cluster_id=new_cluster)

body = Body8(
cluster_id=new_cluster,
app_entrypoint_file=mock.ANY,
enable_app_server=True,
flow_servers=[],
image_spec=None,
works=[],
local_source=True,
dependency_cache_key=mock.ANY,
user_requested_flow_compute_config=mock.ANY,
)
cloud_runtime.backend.client.lightningapp_v2_service_create_lightningapp_release.assert_called_once_with(
project_id="default-project-id", app_id=mock.ANY, body=body
)
cloud_runtime.backend.client.projects_service_create_project_cluster_binding.assert_called_once_with(
project_id="default-project-id",
body=V1ProjectClusterBinding(cluster_id=new_cluster, project_id="default-project-id"),
)
with expected_raise:
cloud_runtime.dispatch(name=app_name, cluster_id=new_cluster)

@pytest.mark.parametrize("flow_cloud_compute", [None, CloudCompute(name="t2.medium")])
@mock.patch("lightning_app.runners.backends.cloud.LightningClient", mock.MagicMock())
Expand Down

0 comments on commit 927b305

Please sign in to comment.