From 14b67e78228740ed8b9777482faeedcacfa25769 Mon Sep 17 00:00:00 2001 From: Kye Date: Fri, 29 Mar 2024 09:03:27 -0700 Subject: [PATCH] [CLEANUP] --- scripts/terraform_scripts/activate_tf.sh | 50 ++++++ swarms_cloud/sky_api.py | 200 ----------------------- 2 files changed, 50 insertions(+), 200 deletions(-) create mode 100644 scripts/terraform_scripts/activate_tf.sh delete mode 100644 swarms_cloud/sky_api.py diff --git a/scripts/terraform_scripts/activate_tf.sh b/scripts/terraform_scripts/activate_tf.sh new file mode 100644 index 0000000..6076c43 --- /dev/null +++ b/scripts/terraform_scripts/activate_tf.sh @@ -0,0 +1,50 @@ +#!/bin/bash + +# Script to apply Terraform configuration with enhanced logging and error handling +# Define the directory where your Terraform scripts are located +TERRAFORM_DIR="/swarms-cloud/scripts/terraform_scripts/multi_cloud_consul/main.tf" + +# Define the log file path +LOG_FILE="/var/log/terraform_apply.log" + +# Function to log messages with timestamps +log() { + echo "[$(date +"%Y-%m-%d %T")] $1" >> "$LOG_FILE" +} + +# Ensure the Terraform directory exists +if [ ! -d "$TERRAFORM_DIR" ]; then + log "The specified Terraform directory does not exist: $TERRAFORM_DIR" + exit 1 +fi + +# Navigate to the Terraform directory +cd "$TERRAFORM_DIR" || exit + +# Begin Terraform process +log "Starting Terraform apply..." + +# Initialize Terraform +terraform init >> "$LOG_FILE" 2>&1 +if [ $? -ne 0 ]; then + log "Terraform init failed." + exit 1 +else + log "Terraform init succeeded." +fi + +# Apply Terraform configuration +terraform apply -auto-approve >> "$LOG_FILE" 2>&1 +if [ $? -ne 0 ]; then + log "Terraform apply failed." + exit 1 +else + log "Terraform apply succeeded." +fi + +log "Terraform apply completed successfully." + +# Add cron job if it doesn't exist +CRON_JOB="0 3 * * * /path/to/terraform_apply.sh" +( crontab -l | grep -Fv terraform_apply.sh ; echo "$CRON_JOB" ) | crontab - +log "Cron job for Terraform apply script ensured." diff --git a/swarms_cloud/sky_api.py b/swarms_cloud/sky_api.py deleted file mode 100644 index 00c2239..0000000 --- a/swarms_cloud/sky_api.py +++ /dev/null @@ -1,200 +0,0 @@ -from typing import List - -import sky -from sky import Task - - -class SkyInterface: - """ - - SkyInterface is a wrapper around the sky Python API. It provides a - simplified interface for launching, executing, stopping, starting, and - tearing down clusters. - - Attributes: - clusters (dict): A dictionary of clusters that have been launched. - The keys are the names of the clusters and the values are the handles - to the clusters. - - Methods: - launch: Launch a cluster - execute: Execute a task on a cluster - stop: Stop a cluster - start: Start a cluster - down: Tear down a cluster - status: Get the status of a cluster - autostop: Set the autostop of a cluster - - Example: - >>> sky_interface = SkyInterface() - >>> job_id = sky_interface.launch("task", "cluster_name") - >>> sky_interface.execute("task", "cluster_name") - >>> sky_interface.stop("cluster_name") - >>> sky_interface.start("cluster_name") - >>> sky_interface.down("cluster_name") - >>> sky_interface.status() - >>> sky_interface.autostop("cluster_name") - - - """ - - def __init__( - self, - task_name: str = None, - cluster_name: str = None, - gpus: str = "T4:1", - stream_logs_enabled: bool = False, - *args, - **kwargs, - ): - self.task_name = task_name - self.cluster_name = cluster_name - self.gpus = gpus - self.stream_logs_enabled = stream_logs_enabled - self.clusters = {} - - def launch(self, task: Task = None, cluster_name: str = None, **kwargs): - """Launch a task on a cluster - - Args: - task (str): code to execute on the cluster - cluster_name (_type_, optional): _description_. Defaults to None. - - Returns: - _type_: _description_ - """ - cluster = None - try: - cluster = sky.launch( - task=task, - cluster_name=cluster_name, - stream_logs=self.stream_logs_enabled, - **kwargs, - ) - print(f"Launched job {cluster} on cluster {cluster_name}") - return cluster - except Exception as error: - # Deep error logging - print( - f"Error launching job {cluster} on cluster {cluster_name} with" - f" error {error}" - ) - raise error - - def execute(self, task: Task = None, cluster_name: str = None, **kwargs): - """Execute a task on a cluster - - Args: - task (_type_): _description_ - cluster_name (_type_): _description_ - - Raises: - ValueError: _description_ - - Returns: - _type_: _description_ - """ - if cluster_name not in self.clusters: - raise ValueError(f"Cluster {cluster_name} does not exist") - try: - return sky.exec( - task=task, - cluster_name=cluster_name, - stream_logs=self.stream_logs_enabled, - **kwargs, - ) - except Exception as e: - print("Error executing on cluster:", e) - - def stop(self, cluster_name: str = None, **kwargs): - """Stop a cluster - - Args: - cluster_name (str): name of the cluster to stop - """ - try: - sky.stop(cluster_name, **kwargs) - except (ValueError, RuntimeError) as e: - print("Error stopping cluster:", e) - - def start(self, cluster_name: str = None, **kwargs): - """start a cluster - - Args: - cluster_name (str): name of the cluster to start - """ - try: - sky.start(cluster_name, **kwargs) - except Exception as e: - print("Error starting cluster:", e) - - def down(self, cluster_name: str = None, **kwargs): - """Down a cluster - - Args: - cluster_name (str): name of the cluster to tear down - """ - try: - sky.down(cluster_name, **kwargs) - if cluster_name in self.clusters: - del self.clusters[cluster_name] - except (ValueError, RuntimeError) as e: - print("Error tearing down cluster:", e) - - def status(self, cluster_names: List[str] = None, **kwargs): - """Save a cluster - - Returns: - r: the status of the cluster - """ - try: - return sky.status(cluster_names, **kwargs) - except Exception as e: - print("Error getting status:", e) - - def autostop(self, cluster_name: str = None, **kwargs): - """Autostop a cluster - - Args: - cluster_name (str): name of the cluster to autostop - """ - try: - sky.autostop(cluster_name, **kwargs) - except Exception as e: - print("Error setting autostop:", e) - - def create_task( - self, - name: str = None, - setup: str = None, - run: str = None, - workdir: str = None, - task: str = None, - *args, - **kwargs, - ): - """_summary_ - - Args: - name (str, optional): _description_. Defaults to None. - setup (str, optional): _description_. Defaults to None. - run (str, optional): _description_. Defaults to None. - workdir (str, optional): _description_. Defaults to None. - task (str, optional): _description_. Defaults to None. - - Returns: - _type_: _description_ - - # A Task that will sync up local workdir '.', containing - # requirements.txt and train.py. - sky.Task(setup='pip install requirements.txt', - run='python train.py', - workdir='.') - - # An empty Task for provisioning a cluster. - task = sky.Task(num_nodes=n).set_resources(...) - - # Chaining setters. - sky.Task().set_resources(...).set_file_mounts(...) - """ - return Task(name=name, setup=setup, run=run, workdir=workdir, *args, **kwargs)