diff --git a/src/lightning_app/CHANGELOG.md b/src/lightning_app/CHANGELOG.md index 154d162756003..5d059d6f3127d 100644 --- a/src/lightning_app/CHANGELOG.md +++ b/src/lightning_app/CHANGELOG.md @@ -15,6 +15,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Enabled MultiNode Components to support state broadcasting ([#15607](https://github.com/Lightning-AI/lightning/pull/15607)) +- Added a friendly error message when attempting to run the default cloud compute with a custom base image configured ([#14929](https://github.com/Lightning-AI/lightning/pull/14929)) + + ### Changed - diff --git a/src/lightning_app/runners/cloud.py b/src/lightning_app/runners/cloud.py index 36ca4f21bd79c..ebe54cfb83881 100644 --- a/src/lightning_app/runners/cloud.py +++ b/src/lightning_app/runners/cloud.py @@ -43,6 +43,7 @@ ) from lightning_cloud.openapi.rest import ApiException +from lightning_app import LightningWork from lightning_app.core.app import LightningApp from lightning_app.core.constants import ( CLOUD_QUEUE_TYPE, @@ -143,6 +144,8 @@ def dispatch( works: List[V1Work] = [] for work in self.app.works: + _validate_build_spec_and_compute(work) + if not work._start_with_flow: continue @@ -519,3 +522,12 @@ def _create_mount_drive_spec(work_name: str, mount: Mount) -> V1LightningworkDri ), mount_location=str(mount.mount_path), ) + + +def _validate_build_spec_and_compute(work: LightningWork) -> None: + if work.cloud_build_config.image is not None and work.cloud_compute.name == "default": + raise ValueError( + f"You requested a custom base image for the Work with name '{work.name}', but custom images are currently" + " not supported on the default cloud compute instance. Please choose a different configuration, for example" + " `CloudCompute('cpu-medium')`." + ) diff --git a/tests/tests_app/runners/test_cloud.py b/tests/tests_app/runners/test_cloud.py index f6764bb692868..e00892e22dce0 100644 --- a/tests/tests_app/runners/test_cloud.py +++ b/tests/tests_app/runners/test_cloud.py @@ -39,8 +39,9 @@ V1Work, ) -from lightning_app import _PROJECT_ROOT, LightningApp, LightningWork +from lightning_app import _PROJECT_ROOT, BuildConfig, LightningApp, LightningWork from lightning_app.runners import backends, cloud, CloudRuntime +from lightning_app.runners.cloud import _validate_build_spec_and_compute from lightning_app.storage import Drive, Mount from lightning_app.testing.helpers import EmptyFlow from lightning_app.utilities.cloud import _get_project @@ -54,8 +55,8 @@ def run(self): class WorkWithSingleDrive(LightningWork): - def __init__(self): - super().__init__() + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) self.drive = None def run(self): @@ -63,8 +64,8 @@ def run(self): class WorkWithTwoDrives(LightningWork): - def __init__(self): - super().__init__() + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) self.lit_drive_1 = None self.lit_drive_2 = None @@ -403,13 +404,13 @@ def test_call_with_work_app(self, lightningapps, start_with_flow, monkeypatch, t monkeypatch.setattr(cloud, "_prepare_lightning_wheels_and_requirements", mock.MagicMock()) app = mock.MagicMock() - work = MyWork(start_with_flow=start_with_flow) - monkeypatch.setattr(work, "_name", "test-work") - monkeypatch.setattr(work._cloud_build_config, "build_commands", lambda: ["echo 'start'"]) - monkeypatch.setattr(work._cloud_build_config, "requirements", ["torch==1.0.0", "numpy==1.0.0"]) - monkeypatch.setattr(work._cloud_build_config, "image", "random_base_public_image") - monkeypatch.setattr(work._cloud_compute, "disk_size", 0) - monkeypatch.setattr(work, "_port", 8080) + work = MyWork(start_with_flow=start_with_flow, cloud_compute=CloudCompute("custom")) + work._name = "test-work" + work._cloud_build_config.build_commands = lambda: ["echo 'start'"] + work._cloud_build_config.requirements = ["torch==1.0.0", "numpy==1.0.0"] + work._cloud_build_config.image = "random_base_public_image" + work._cloud_compute.disk_size = 0 + work._port = 8080 app.works = [work] cloud_runtime = cloud.CloudRuntime(app=app, entrypoint_file=(source_code_root_dir / "entrypoint.py")) @@ -450,7 +451,7 @@ def test_call_with_work_app(self, lightningapps, start_with_flow, monkeypatch, t ), drives=[], user_requested_compute_config=V1UserRequestedComputeConfig( - name="default", + name="custom", count=1, disk_size=0, shm_size=0, @@ -585,7 +586,7 @@ def test_call_with_work_app_and_attached_drives(self, lightningapps, monkeypatch # should be the results of the deepcopy operation (an instance of the original class) mocked_drive.__deepcopy__.return_value = copy(mocked_drive) - work = WorkWithSingleDrive() + work = WorkWithSingleDrive(cloud_compute=CloudCompute("custom")) monkeypatch.setattr(work, "drive", mocked_drive) monkeypatch.setattr(work, "_state", {"_port", "drive"}) monkeypatch.setattr(work, "_name", "test-work") @@ -646,7 +647,7 @@ def test_call_with_work_app_and_attached_drives(self, lightningapps, monkeypatch ), ], user_requested_compute_config=V1UserRequestedComputeConfig( - name="default", + name="custom", count=1, disk_size=0, shm_size=0, @@ -709,14 +710,14 @@ def test_call_with_work_app_and_app_comment_command_execution_set(self, lightnin monkeypatch.setattr(cloud, "_prepare_lightning_wheels_and_requirements", mock.MagicMock()) app = mock.MagicMock() - work = MyWork() - monkeypatch.setattr(work, "_state", {"_port"}) - monkeypatch.setattr(work, "_name", "test-work") - monkeypatch.setattr(work._cloud_build_config, "build_commands", lambda: ["echo 'start'"]) - monkeypatch.setattr(work._cloud_build_config, "requirements", ["torch==1.0.0", "numpy==1.0.0"]) - monkeypatch.setattr(work._cloud_build_config, "image", "random_base_public_image") - monkeypatch.setattr(work._cloud_compute, "disk_size", 0) - monkeypatch.setattr(work, "_port", 8080) + work = MyWork(cloud_compute=CloudCompute("custom")) + work._state = {"_port"} + work._name = "test-work" + work._cloud_build_config.build_commands = lambda: ["echo 'start'"] + work._cloud_build_config.requirements = ["torch==1.0.0", "numpy==1.0.0"] + work._cloud_build_config.image = "random_base_public_image" + work._cloud_compute.disk_size = 0 + work._port = 8080 app.works = [work] cloud_runtime = cloud.CloudRuntime(app=app, entrypoint_file=(source_code_root_dir / "entrypoint.py")) @@ -755,7 +756,7 @@ def test_call_with_work_app_and_app_comment_command_execution_set(self, lightnin ), drives=[], user_requested_compute_config=V1UserRequestedComputeConfig( - name="default", count=1, disk_size=0, shm_size=0, preemptible=mock.ANY + name="custom", count=1, disk_size=0, shm_size=0, preemptible=mock.ANY ), network_config=[V1NetworkConfig(name=mock.ANY, host=None, port=8080)], cluster_id=mock.ANY, @@ -835,16 +836,16 @@ def test_call_with_work_app_and_multiple_attached_drives(self, lightningapps, mo # should be the results of the deepcopy operation (an instance of the original class) mocked_lit_drive.__deepcopy__.return_value = copy(mocked_lit_drive) - work = WorkWithTwoDrives() - monkeypatch.setattr(work, "lit_drive_1", mocked_lit_drive) - monkeypatch.setattr(work, "lit_drive_2", mocked_lit_drive) - monkeypatch.setattr(work, "_state", {"_port", "_name", "lit_drive_1", "lit_drive_2"}) - monkeypatch.setattr(work, "_name", "test-work") - monkeypatch.setattr(work._cloud_build_config, "build_commands", lambda: ["echo 'start'"]) - monkeypatch.setattr(work._cloud_build_config, "requirements", ["torch==1.0.0", "numpy==1.0.0"]) - monkeypatch.setattr(work._cloud_build_config, "image", "random_base_public_image") - monkeypatch.setattr(work._cloud_compute, "disk_size", 0) - monkeypatch.setattr(work, "_port", 8080) + work = WorkWithTwoDrives(cloud_compute=CloudCompute("custom")) + work.lit_drive_1 = mocked_lit_drive + work.lit_drive_2 = mocked_lit_drive + work._state = {"_port", "_name", "lit_drive_1", "lit_drive_2"} + work._name = "test-work" + work._cloud_build_config.build_commands = lambda: ["echo 'start'"] + work._cloud_build_config.requirements = ["torch==1.0.0", "numpy==1.0.0"] + work._cloud_build_config.image = "random_base_public_image" + work._cloud_compute.disk_size = 0 + work._port = 8080 app.works = [work] cloud_runtime = cloud.CloudRuntime(app=app, entrypoint_file=(source_code_root_dir / "entrypoint.py")) @@ -914,7 +915,7 @@ def test_call_with_work_app_and_multiple_attached_drives(self, lightningapps, mo ), drives=[lit_drive_2_spec, lit_drive_1_spec], user_requested_compute_config=V1UserRequestedComputeConfig( - name="default", + name="custom", count=1, disk_size=0, shm_size=0, @@ -953,7 +954,7 @@ def test_call_with_work_app_and_multiple_attached_drives(self, lightningapps, mo ), drives=[lit_drive_1_spec, lit_drive_2_spec], user_requested_compute_config=V1UserRequestedComputeConfig( - name="default", + name="custom", count=1, disk_size=0, shm_size=0, @@ -1043,7 +1044,7 @@ def test_call_with_work_app_and_attached_mount_and_drive(self, lightningapps, mo setattr(mocked_mount, "mount_path", "/content/foo") setattr(mocked_mount, "protocol", "s3://") - work = WorkWithSingleDrive() + work = WorkWithSingleDrive(cloud_compute=CloudCompute("custom")) monkeypatch.setattr(work, "drive", mocked_drive) monkeypatch.setattr(work, "_state", {"_port", "drive"}) monkeypatch.setattr(work, "_name", "test-work") @@ -1119,7 +1120,7 @@ def test_call_with_work_app_and_attached_mount_and_drive(self, lightningapps, mo ), ], user_requested_compute_config=V1UserRequestedComputeConfig( - name="default", + name="custom", count=1, disk_size=0, shm_size=0, @@ -1227,3 +1228,23 @@ def test_load_app_from_file_module_error(): empty_app = CloudRuntime.load_app_from_file(os.path.join(_PROJECT_ROOT, "examples", "app_v0", "app.py")) assert isinstance(empty_app, LightningApp) assert isinstance(empty_app.root, EmptyFlow) + + +def test_incompatible_cloud_compute_and_build_config(): + """Test that an exception is raised when a build config has a custom image defined, but the cloud compute is + the default. + + This combination is not supported by the platform. + """ + + class Work(LightningWork): + def __init__(self): + super().__init__() + self.cloud_compute = CloudCompute(name="default") + self.cloud_build_config = BuildConfig(image="custom") + + def run(self): + pass + + with pytest.raises(ValueError, match="You requested a custom base image for the Work with name"): + _validate_build_spec_and_compute(Work())