Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Validate the combination of CloudCompute and BuildConfig #14929

Merged
merged 16 commits into from
Nov 13, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions src/lightning_app/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
- Enabled MultiNode Components to support state broadcasting ([#15607](https://github.com/Lightning-AI/lightning/pull/15607))


- Added a friendly error message when attempting to run the default cloud compute with a custom base image configured ([#14929](https://github.com/Lightning-AI/lightning/pull/14929))


### Changed

-
Expand Down
12 changes: 12 additions & 0 deletions src/lightning_app/runners/cloud.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
)
from lightning_cloud.openapi.rest import ApiException

from lightning_app import LightningWork
from lightning_app.core.app import LightningApp
from lightning_app.core.constants import (
CLOUD_QUEUE_TYPE,
Expand Down Expand Up @@ -143,6 +144,8 @@ def dispatch(

works: List[V1Work] = []
for work in self.app.works:
_validate_build_spec_and_compute(work)

if not work._start_with_flow:
continue

Expand Down Expand Up @@ -519,3 +522,12 @@ def _create_mount_drive_spec(work_name: str, mount: Mount) -> V1LightningworkDri
),
mount_location=str(mount.mount_path),
)


def _validate_build_spec_and_compute(work: LightningWork) -> None:
if work.cloud_build_config.image is not None and work.cloud_compute.name == "default":
raise ValueError(
f"You requested a custom base image for the Work with name '{work.name}', but custom images are currently"
" not supported on the default cloud compute instance. Please choose a different configuration, for example"
" `CloudCompute('cpu-medium')`."
)
97 changes: 59 additions & 38 deletions tests/tests_app/runners/test_cloud.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,9 @@
V1Work,
)

from lightning_app import _PROJECT_ROOT, LightningApp, LightningWork
from lightning_app import _PROJECT_ROOT, BuildConfig, LightningApp, LightningWork
from lightning_app.runners import backends, cloud, CloudRuntime
from lightning_app.runners.cloud import _validate_build_spec_and_compute
from lightning_app.storage import Drive, Mount
from lightning_app.testing.helpers import EmptyFlow
from lightning_app.utilities.cloud import _get_project
Expand All @@ -54,17 +55,17 @@ def run(self):


class WorkWithSingleDrive(LightningWork):
def __init__(self):
super().__init__()
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.drive = None

def run(self):
pass


class WorkWithTwoDrives(LightningWork):
def __init__(self):
super().__init__()
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.lit_drive_1 = None
self.lit_drive_2 = None

Expand Down Expand Up @@ -403,13 +404,13 @@ def test_call_with_work_app(self, lightningapps, start_with_flow, monkeypatch, t
monkeypatch.setattr(cloud, "_prepare_lightning_wheels_and_requirements", mock.MagicMock())
app = mock.MagicMock()

work = MyWork(start_with_flow=start_with_flow)
monkeypatch.setattr(work, "_name", "test-work")
monkeypatch.setattr(work._cloud_build_config, "build_commands", lambda: ["echo 'start'"])
monkeypatch.setattr(work._cloud_build_config, "requirements", ["torch==1.0.0", "numpy==1.0.0"])
monkeypatch.setattr(work._cloud_build_config, "image", "random_base_public_image")
monkeypatch.setattr(work._cloud_compute, "disk_size", 0)
monkeypatch.setattr(work, "_port", 8080)
work = MyWork(start_with_flow=start_with_flow, cloud_compute=CloudCompute("custom"))
work._name = "test-work"
work._cloud_build_config.build_commands = lambda: ["echo 'start'"]
work._cloud_build_config.requirements = ["torch==1.0.0", "numpy==1.0.0"]
work._cloud_build_config.image = "random_base_public_image"
work._cloud_compute.disk_size = 0
work._port = 8080

app.works = [work]
cloud_runtime = cloud.CloudRuntime(app=app, entrypoint_file=(source_code_root_dir / "entrypoint.py"))
Expand Down Expand Up @@ -450,7 +451,7 @@ def test_call_with_work_app(self, lightningapps, start_with_flow, monkeypatch, t
),
drives=[],
user_requested_compute_config=V1UserRequestedComputeConfig(
name="default",
name="custom",
count=1,
disk_size=0,
shm_size=0,
Expand Down Expand Up @@ -585,7 +586,7 @@ def test_call_with_work_app_and_attached_drives(self, lightningapps, monkeypatch
# should be the results of the deepcopy operation (an instance of the original class)
mocked_drive.__deepcopy__.return_value = copy(mocked_drive)

work = WorkWithSingleDrive()
work = WorkWithSingleDrive(cloud_compute=CloudCompute("custom"))
monkeypatch.setattr(work, "drive", mocked_drive)
monkeypatch.setattr(work, "_state", {"_port", "drive"})
monkeypatch.setattr(work, "_name", "test-work")
Expand Down Expand Up @@ -646,7 +647,7 @@ def test_call_with_work_app_and_attached_drives(self, lightningapps, monkeypatch
),
],
user_requested_compute_config=V1UserRequestedComputeConfig(
name="default",
name="custom",
count=1,
disk_size=0,
shm_size=0,
Expand Down Expand Up @@ -709,14 +710,14 @@ def test_call_with_work_app_and_app_comment_command_execution_set(self, lightnin
monkeypatch.setattr(cloud, "_prepare_lightning_wheels_and_requirements", mock.MagicMock())
app = mock.MagicMock()

work = MyWork()
monkeypatch.setattr(work, "_state", {"_port"})
monkeypatch.setattr(work, "_name", "test-work")
monkeypatch.setattr(work._cloud_build_config, "build_commands", lambda: ["echo 'start'"])
monkeypatch.setattr(work._cloud_build_config, "requirements", ["torch==1.0.0", "numpy==1.0.0"])
monkeypatch.setattr(work._cloud_build_config, "image", "random_base_public_image")
monkeypatch.setattr(work._cloud_compute, "disk_size", 0)
monkeypatch.setattr(work, "_port", 8080)
work = MyWork(cloud_compute=CloudCompute("custom"))
work._state = {"_port"}
work._name = "test-work"
work._cloud_build_config.build_commands = lambda: ["echo 'start'"]
work._cloud_build_config.requirements = ["torch==1.0.0", "numpy==1.0.0"]
work._cloud_build_config.image = "random_base_public_image"
work._cloud_compute.disk_size = 0
work._port = 8080

app.works = [work]
cloud_runtime = cloud.CloudRuntime(app=app, entrypoint_file=(source_code_root_dir / "entrypoint.py"))
Expand Down Expand Up @@ -755,7 +756,7 @@ def test_call_with_work_app_and_app_comment_command_execution_set(self, lightnin
),
drives=[],
user_requested_compute_config=V1UserRequestedComputeConfig(
name="default", count=1, disk_size=0, shm_size=0, preemptible=mock.ANY
name="custom", count=1, disk_size=0, shm_size=0, preemptible=mock.ANY
),
network_config=[V1NetworkConfig(name=mock.ANY, host=None, port=8080)],
cluster_id=mock.ANY,
Expand Down Expand Up @@ -835,16 +836,16 @@ def test_call_with_work_app_and_multiple_attached_drives(self, lightningapps, mo
# should be the results of the deepcopy operation (an instance of the original class)
mocked_lit_drive.__deepcopy__.return_value = copy(mocked_lit_drive)

work = WorkWithTwoDrives()
monkeypatch.setattr(work, "lit_drive_1", mocked_lit_drive)
monkeypatch.setattr(work, "lit_drive_2", mocked_lit_drive)
monkeypatch.setattr(work, "_state", {"_port", "_name", "lit_drive_1", "lit_drive_2"})
monkeypatch.setattr(work, "_name", "test-work")
monkeypatch.setattr(work._cloud_build_config, "build_commands", lambda: ["echo 'start'"])
monkeypatch.setattr(work._cloud_build_config, "requirements", ["torch==1.0.0", "numpy==1.0.0"])
monkeypatch.setattr(work._cloud_build_config, "image", "random_base_public_image")
monkeypatch.setattr(work._cloud_compute, "disk_size", 0)
monkeypatch.setattr(work, "_port", 8080)
work = WorkWithTwoDrives(cloud_compute=CloudCompute("custom"))
work.lit_drive_1 = mocked_lit_drive
work.lit_drive_2 = mocked_lit_drive
work._state = {"_port", "_name", "lit_drive_1", "lit_drive_2"}
work._name = "test-work"
work._cloud_build_config.build_commands = lambda: ["echo 'start'"]
work._cloud_build_config.requirements = ["torch==1.0.0", "numpy==1.0.0"]
work._cloud_build_config.image = "random_base_public_image"
work._cloud_compute.disk_size = 0
work._port = 8080

app.works = [work]
cloud_runtime = cloud.CloudRuntime(app=app, entrypoint_file=(source_code_root_dir / "entrypoint.py"))
Expand Down Expand Up @@ -914,7 +915,7 @@ def test_call_with_work_app_and_multiple_attached_drives(self, lightningapps, mo
),
drives=[lit_drive_2_spec, lit_drive_1_spec],
user_requested_compute_config=V1UserRequestedComputeConfig(
name="default",
name="custom",
count=1,
disk_size=0,
shm_size=0,
Expand Down Expand Up @@ -953,7 +954,7 @@ def test_call_with_work_app_and_multiple_attached_drives(self, lightningapps, mo
),
drives=[lit_drive_1_spec, lit_drive_2_spec],
user_requested_compute_config=V1UserRequestedComputeConfig(
name="default",
name="custom",
count=1,
disk_size=0,
shm_size=0,
Expand Down Expand Up @@ -1043,7 +1044,7 @@ def test_call_with_work_app_and_attached_mount_and_drive(self, lightningapps, mo
setattr(mocked_mount, "mount_path", "/content/foo")
setattr(mocked_mount, "protocol", "s3://")

work = WorkWithSingleDrive()
work = WorkWithSingleDrive(cloud_compute=CloudCompute("custom"))
monkeypatch.setattr(work, "drive", mocked_drive)
monkeypatch.setattr(work, "_state", {"_port", "drive"})
monkeypatch.setattr(work, "_name", "test-work")
Expand Down Expand Up @@ -1119,7 +1120,7 @@ def test_call_with_work_app_and_attached_mount_and_drive(self, lightningapps, mo
),
],
user_requested_compute_config=V1UserRequestedComputeConfig(
name="default",
name="custom",
count=1,
disk_size=0,
shm_size=0,
Expand Down Expand Up @@ -1227,3 +1228,23 @@ def test_load_app_from_file_module_error():
empty_app = CloudRuntime.load_app_from_file(os.path.join(_PROJECT_ROOT, "examples", "app_v0", "app.py"))
assert isinstance(empty_app, LightningApp)
assert isinstance(empty_app.root, EmptyFlow)


def test_incompatible_cloud_compute_and_build_config():
awaelchli marked this conversation as resolved.
Show resolved Hide resolved
"""Test that an exception is raised when a build config has a custom image defined, but the cloud compute is
the default.

This combination is not supported by the platform.
"""

class Work(LightningWork):
def __init__(self):
super().__init__()
self.cloud_compute = CloudCompute(name="default")
self.cloud_build_config = BuildConfig(image="custom")

def run(self):
pass

with pytest.raises(ValueError, match="You requested a custom base image for the Work with name"):
_validate_build_spec_and_compute(Work())