Skip to content

Commit

Permalink
[UX] skip provisioning stages if cluster is already available
Browse files Browse the repository at this point in the history
  • Loading branch information
cg505 committed Oct 24, 2024
1 parent f2991b1 commit 51c98d3
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 2 deletions.
2 changes: 1 addition & 1 deletion sky/backends/cloud_vm_ray_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -2737,7 +2737,7 @@ def _provision(
(e.g., cluster name invalid) or a region/zone throwing
resource unavailability.
exceptions.CommandError: any ssh command error.
RuntimeErorr: raised when 'rsync' is not installed.
RuntimeError: raised when 'rsync' is not installed.
# TODO(zhwu): complete the list of exceptions.
"""
# FIXME: ray up for Azure with different cluster_names will overwrite
Expand Down
35 changes: 34 additions & 1 deletion sky/execution.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from sky import optimizer
from sky import sky_logging
from sky.backends import backend_utils
from sky.exceptions import ClusterNotUpError
from sky.usage import usage_lib
from sky.utils import admin_policy_utils
from sky.utils import controller_utils
Expand Down Expand Up @@ -451,15 +452,47 @@ def launch(
controller_utils.check_cluster_name_not_controller(
cluster_name, operation_str='sky.launch')

handle = None
stages = None
# Check if cluster exists
if cluster_name is not None:
maybe_handle = global_user_state.get_handle_from_cluster_name(
cluster_name)
if maybe_handle is not None:
try:
# This will throw if the cluster is not available
backend_utils.check_cluster_available(
cluster_name,
operation='executing tasks',
check_cloud_vm_ray_backend=False,
dryrun=dryrun)
# If the cluster is available, restrict stages
handle = maybe_handle
stages = [
# Stage.CLONE_DISK,
# Stage.PROVISION,
# Stage.OPTIMIZE,
Stage.SYNC_WORKDIR,
Stage.SYNC_FILE_MOUNTS,
# Stage.SETUP,
Stage.PRE_EXEC,
Stage.EXEC,
Stage.DOWN
]
except ClusterNotUpError:
# Proceed with normal provisioning
pass

return _execute(
entrypoint=entrypoint,
dryrun=dryrun,
down=down,
stream_logs=stream_logs,
handle=None,
handle=handle,
backend=backend,
retry_until_up=retry_until_up,
optimize_target=optimize_target,
stages=stages,
cluster_name=cluster_name,
detach_setup=detach_setup,
detach_run=detach_run,
Expand Down

0 comments on commit 51c98d3

Please sign in to comment.