diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml new file mode 100644 index 0000000..dbdfad4 --- /dev/null +++ b/.github/workflows/ci.yaml @@ -0,0 +1,51 @@ +name: CI + +on: + push: + branches: + - main + + pull_request: + branches: + - main + +jobs: + + docker-image: + runs-on: ubuntu-latest + + permissions: + contents: read + packages: write + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + # See explanation: https://github.com/orgs/community/discussions/25678 + - name: Clean disk + run: | + rm -rf /opt/hostedtoolcache + + - name: Log in to the Container registry + uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Extract metadata (tags, labels) for Docker + id: meta + uses: docker/metadata-action@9ec57ed1fcdbf14dcef7dfbe97b2010124a938b7 + with: + images: ghcr.io/kyryl-opens-ml/gpu-jobs-comparison + + - name: Build and push Docker image + uses: docker/build-push-action@f2a1d5e99d037542a71f64918e516c093c6f3fc4 + with: + context: . + push: true + tags: | + ${{ steps.meta.outputs.tags }} + ghcr.io/kyryl-opens-ml/gpu-jobs-comparison:latest + labels: ${{ steps.meta.outputs.labels }} diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..4ba6157 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,17 @@ +FROM huggingface/transformers-pytorch-gpu:4.35.2 + +WORKDIR /app + +ENV LC_ALL=C.UTF-8 +ENV LANG=C.UTF-8 + +COPY requirements.txt requirements.txt +RUN pip install -r requirements.txt +RUN MAX_JOBS=4 pip install flash-attn==2.5.7 --no-build-isolation + +RUN ln -s /usr/bin/python3 /usr/bin/python + +ENV PYTHONPATH /app +COPY . . + +CMD [ "bash" ] \ No newline at end of file diff --git a/README.md b/README.md index 9700064..8eee7b3 100644 --- a/README.md +++ b/README.md @@ -1 +1,84 @@ -# gpu-jobs-comparison \ No newline at end of file +# gpu-jobs-comparison + +## TLDR + +![alt text](./docs/result.png) + + +| Approach | Setup effort | Dev UX | Scalability | Price USD | +|-----------|--------------|--------|-------------|-----------| +| SSH | 🛋️-⚖️ | 4/5 | 1/5 | ~2.43 | +| Modal | 🛋️ | 5/5 | 5/5 | ~1.65 | +| K8S | ⚖️-🏋️‍♂️ | 3/5 | 5/5 | ~2.43 | +| SageMaker | ⚖️ | 3/5 | 5/5 | ~2.24 | + + +## Run SSH in VM + +```bash +export HF_TOKEN=**** +export WANDB_PROJECT=gpu-jobs-comparison +export WANDB_API_KEY==**** +export RUN_NAME=phi-3-text2sql-ssh + +pip install -r requirements.txt +python text2sql_training.py +``` + +## Kubernetes + +```bash +minikube start --driver docker --container-runtime docker --gpus all + +export HF_TOKEN=**** +export WANDB_API_KEY==**** + +kubectl create secret generic gpu-job-secrets --from-literal=HF_TOKEN=$HF_TOKEN --from-literal=WANDB_API_KEY=$WANDB_API_KEY +kubectl create -f gpu-job/kubernetes/job-app-ml.yaml +``` + +## Modal + +```bash +export HF_TOKEN=**** +export WANDB_PROJECT=gpu-jobs-comparison +export WANDB_API_KEY==**** +export RUN_NAME=phi-3-text2sql-modal + +pip install modal + +modal setup +modal deploy ./gpu-job/modal/run_training_job.py + +python ./gpu-job/modal/run_training_job.py +``` + +## AWS SageMaker + +```bash +export AWS_ACCESS_KEY_ID=**** +export AWS_SECRET_ACCESS_KEY=**** +export AWS_DEFAULT_REGION=us-east-1 +export AWS_ACCOUNT_ID=**** + +pip install boto3 sagemaker awscli + +aws iam create-role --role-name sagemaker-execution-role --assume-role-policy-document file://gpu-job/aws-sagemaker/trust-policy.json +aws iam attach-role-policy --role-name sagemaker-execution-role --policy-arn arn:aws:iam::aws:policy/AmazonSageMakerFullAccess +aws iam attach-role-policy --role-name sagemaker-execution-role --policy-arn arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryFullAccess + + +aws ecr get-login-password --region $AWS_DEFAULT_REGION | docker login --username AWS --password-stdin $AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com +aws ecr create-repository --repository-name gpu-jobs-comparison +docker pull ghcr.io/kyryl-opens-ml/gpu-jobs-comparison:latest +docker tag ghcr.io/kyryl-opens-ml/gpu-jobs-comparison:latest $AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com/gpu-jobs-comparison:latest +docker push $AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com/gpu-jobs-comparison:latest + + + +export HF_TOKEN=**** +export WANDB_PROJECT=gpu-jobs-comparison +export WANDB_API_KEY=**** +export RUN_NAME=phi-3-text2sql-sagemaker +python ./gpu-job/aws-sagemaker/run_processing.py +``` \ No newline at end of file diff --git a/docs/result.png b/docs/result.png new file mode 100644 index 0000000..60d6e5e Binary files /dev/null and b/docs/result.png differ diff --git a/gpu-job/aws-sagemaker/run_processing.py b/gpu-job/aws-sagemaker/run_processing.py new file mode 100644 index 0000000..0cd03fc --- /dev/null +++ b/gpu-job/aws-sagemaker/run_processing.py @@ -0,0 +1,31 @@ +from sagemaker.processing import Processor +import os + + +env = { + "WANDB_PROJECT": os.getenv("WANDB_PROJECT"), + "WANDB_API_KEY": os.getenv("WANDB_API_KEY"), + "HF_TOKEN": os.getenv("HF_TOKEN"), + "RUN_NAME": os.getenv("RUN_NAME"), +} + +sagemaker_role_arn = f"arn:aws:iam::{os.getenv('AWS_ACCOUNT_ID')}:role/sagemaker-execution-role" +iamge_uri = f"{os.getenv('AWS_ACCOUNT_ID')}.dkr.ecr.{os.getenv('AWS_DEFAULT_REGION')}.amazonaws.com/gpu-jobs-comparison:latest" + +processor = Processor( + role=sagemaker_role_arn, + image_uri=iamge_uri, + instance_count=1, + instance_type='ml.g5.2xlarge', + env=env +) + +# Define processing inputs and outputs (if any) +processing_inputs = [] +processing_outputs = [] + +processor.run( + inputs=processing_inputs, + outputs=processing_outputs, + arguments=['python', 'text2sql_training.py'] +) diff --git a/gpu-job/aws-sagemaker/trust-policy.json b/gpu-job/aws-sagemaker/trust-policy.json new file mode 100644 index 0000000..3312141 --- /dev/null +++ b/gpu-job/aws-sagemaker/trust-policy.json @@ -0,0 +1,12 @@ +{ + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Principal": { + "Service": "sagemaker.amazonaws.com" + }, + "Action": "sts:AssumeRole" + } + ] + } \ No newline at end of file diff --git a/gpu-job/azure-ml/.todo b/gpu-job/azure-ml/.todo new file mode 100644 index 0000000..e69de29 diff --git a/gpu-job/dstack/.todo b/gpu-job/dstack/.todo new file mode 100644 index 0000000..e69de29 diff --git a/gpu-job/gcp-vertex/.todo b/gpu-job/gcp-vertex/.todo new file mode 100644 index 0000000..e69de29 diff --git a/gpu-job/gcp-vertex/run_custom_job.py b/gpu-job/gcp-vertex/run_custom_job.py new file mode 100644 index 0000000..f3f3e2e --- /dev/null +++ b/gpu-job/gcp-vertex/run_custom_job.py @@ -0,0 +1,52 @@ +import os +from google.cloud import aiplatform + +# Set your project ID and location +PROJECT_ID = os.getenv('GOOGLE_CLOUD_PROJECT', 'gothic-doodad-323015') +LOCATION = os.getenv('GOOGLE_CLOUD_LOCATION', 'us-central1') +STAGING_BUCKET = 'gs://gpu-jobs-comparison' # Replace with your staging bucket + +# Initialize Vertex AI +aiplatform.init(project=PROJECT_ID, location=LOCATION, staging_bucket=STAGING_BUCKET) + +# Initialize Vertex AI +# aiplatform.init(project=PROJECT_ID, location=LOCATION) + +# Define environment variables +env = { + "WANDB_PROJECT": os.getenv("WANDB_PROJECT"), + "WANDB_API_KEY": os.getenv("WANDB_API_KEY"), + "HF_TOKEN": os.getenv("HF_TOKEN"), + "RUN_NAME": os.getenv("RUN_NAME"), +} + +# Define the container image URI from Artifact Registry +image_uri = "us-central1-docker.pkg.dev/gothic-doodad-323015/gpu-jobs-comparison/gpu-jobs-comparis" + +# Define the worker pool specification +worker_pool_specs = [ + { + "machine_spec": { + "machine_type": "n1-standard-4", + "accelerator_type": "NVIDIA_TESLA_T4", + "accelerator_count": 1, + }, + "replica_count": 1, + "container_spec": { + "image_uri": image_uri, + "command": ["python", "text2sql_training.py"], # Command to run the training script + "args": [], # Additional arguments can be added here + "env": [{"name": key, "value": value} for key, value in env.items()] + }, + } +] + +# Create the CustomJob +job = aiplatform.CustomJob( + display_name='text2sql-training', + worker_pool_specs=worker_pool_specs, + labels={'env': 'production'}, # Example label +) + +# Run the job +job.run(sync=True) \ No newline at end of file diff --git a/gpu-job/kubernetes/job-app-ml.yaml b/gpu-job/kubernetes/job-app-ml.yaml new file mode 100644 index 0000000..901fc5b --- /dev/null +++ b/gpu-job/kubernetes/job-app-ml.yaml @@ -0,0 +1,34 @@ +apiVersion: batch/v1 +kind: Job +metadata: + name: phi-3-text2sql-k8s-job +spec: + parallelism: 1 + template: + spec: + restartPolicy: Never + containers: + - image: ghcr.io/kyryl-opens-ml/gpu-jobs-comparison:latest + name: training + env: + - name: HF_TOKEN + valueFrom: + secretKeyRef: + name: gpu-job-secrets + key: HF_TOKEN + - name: WANDB_PROJECT + value: gpu-jobs-comparison + - name: WANDB_API_KEY + valueFrom: + secretKeyRef: + name: gpu-job-secrets + key: WANDB_API_KEY + - name: RUN_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + command: ["python"] + args: ["text2sql_training.py"] + resources: + limits: + nvidia.com/gpu: 1 \ No newline at end of file diff --git a/gpu-job/modal/run_training_job.py b/gpu-job/modal/run_training_job.py new file mode 100644 index 0000000..3cd69b9 --- /dev/null +++ b/gpu-job/modal/run_training_job.py @@ -0,0 +1,29 @@ +import os + +import modal +from modal import Image + +app = modal.App("gpu-jobs") +env = { + "WANDB_PROJECT": os.getenv("WANDB_PROJECT"), + "WANDB_API_KEY": os.getenv("WANDB_API_KEY"), + "HF_TOKEN": os.getenv("HF_TOKEN"), + "RUN_NAME": os.getenv("RUN_NAME"), +} +custom_image = Image.from_registry("ghcr.io/kyryl-opens-ml/gpu-jobs-comparison:latest").env(env) + + +@app.function(image=custom_image, gpu="a10g", timeout=10 * 60 * 60) +def run_training(): + from text2sql_training import main + main() + + +def run_from_python(): + fn = modal.Function.lookup("gpu-jobs", "run_training") + fn_id = fn.spawn() + print(f"Run training object: {fn_id}") + + +if __name__ == "__main__": + run_from_python() diff --git a/gpu-job/replicate/.todo b/gpu-job/replicate/.todo new file mode 100644 index 0000000..e69de29 diff --git a/gpu-job/runpod/run_training_job.py b/gpu-job/runpod/run_training_job.py new file mode 100644 index 0000000..cb60e7c --- /dev/null +++ b/gpu-job/runpod/run_training_job.py @@ -0,0 +1,21 @@ +import runpod + +runpod.api_key = "IOQA87IDZCITG92W82HJA34LWYT6MFJF2JY3FYYU" + +# Get all my pods +pods = runpod.get_pods() + +# Get a specific pod +pod = runpod.get_pod(pod.id) + +# Create a pod +pod = runpod.create_pod("test", "runpod/stack", "NVIDIA GeForce RTX 3070") + +# Stop the pod +runpod.stop_pod(pod.id) + +# Resume the pod +runpod.resume_pod(pod.id) + +# Terminate the pod +runpod.terminate_pod(pod.id) \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..b421643 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,13 @@ +transformers==4.42.3 +datasets==2.15.0 +trl==0.9.6 +accelerate==0.32.1 +typer==0.6.1 +wandb==0.17.4 +ruff==0.5.0 +great-expectations==0.15.25 +pytest-cov==3.0.0 +peft==0.11.1 +evaluate==0.4.2 +packaging==23.2 +ninja==1.11.1.1 \ No newline at end of file diff --git a/text2sql_training.py b/text2sql_training.py new file mode 100644 index 0000000..40afeed --- /dev/null +++ b/text2sql_training.py @@ -0,0 +1,215 @@ + + +from datasets import DatasetDict, load_dataset, Dataset +from dataclasses import dataclass +import logging +from pathlib import Path +from typing import List, Dict, Tuple, Any +import os +import evaluate +import torch +from tqdm import tqdm + +from functools import partial + + +import transformers +from transformers import AutoModelForCausalLM, AutoTokenizer, HfArgumentParser, TrainingArguments, set_seed, pipeline, TextGenerationPipeline +from peft import LoraConfig, TaskType, AutoPeftModelForCausalLM +from trl import SFTTrainer + +import sys + +import datasets + + + + +logger = logging.getLogger() + +@dataclass +class ModelArguments: + model_id: str + lora_r: int + lora_alpha: int + lora_dropout: float + subsample: float + target_modules: List[str] + max_seq_length: int + +def setup_logger(logger): + logging.basicConfig( + format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", + datefmt="%m/%d/%Y %H:%M:%S", + handlers=[logging.StreamHandler(sys.stdout)], + ) + log_level = "INFO" + logger.setLevel(log_level) + datasets.utils.logging.set_verbosity(log_level) + transformers.utils.logging.set_verbosity(log_level) + transformers.utils.logging.enable_default_handler() + transformers.utils.logging.enable_explicit_format() + + +def get_sql_data(random_state: int = 42, subsample: float = None) -> DatasetDict: + dataset_name = "b-mc2/sql-create-context" + dataset = load_dataset(dataset_name, split="train") + print(f"dataset size: {len(dataset)}") + print(dataset.shuffle()[0]) + + if subsample is not None: + dataset = dataset.shuffle(seed=random_state).select(range(int(len(dataset) * subsample))) + print(f"dataset new size: {len(dataset)}") + dataset = dataset.train_test_split(test_size=0.05, seed=random_state) + return dataset + +@torch.no_grad() +def predict(pipe: TextGenerationPipeline, question: str, context: str) -> str: + messages = [{"content": f"{context}\n Input: {question}", "role": "user"}] + prompt = pipe.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) + outputs = pipe(prompt, max_new_tokens=256, do_sample=True, num_beams=1, temperature=0.3, top_k=50, top_p=0.95, max_time=180,) + sql = outputs[0]["generated_text"][len(prompt) :].strip() + return sql + +def get_pipeline(model_load_path: str) -> TextGenerationPipeline: + device_map = {"": 0} + new_model = AutoPeftModelForCausalLM.from_pretrained( + model_load_path, + low_cpu_mem_usage=True, + return_dict=True, + torch_dtype=torch.bfloat16, + trust_remote_code=True, + device_map=device_map, + ) + merged_model = new_model.merge_and_unload() + + tokenizer = AutoTokenizer.from_pretrained( + model_load_path, trust_remote_code=True + ) + pipe = pipeline("text-generation", model=merged_model, tokenizer=tokenizer) + return pipe + +def run_evaluate_on_json(json_path: Path, model_load_path: Path): + df = Dataset.from_json(str(json_path)).to_pandas() + pipe = get_pipeline(model_load_path=model_load_path) + rouge = evaluate.load("rouge") + + generated_sql = [] + for idx in tqdm(range(len(df))): + context = df.iloc[idx]["context"] + question = df.iloc[idx]["question"] + sql = predict(question=question, context=context, pipe=pipe) + generated_sql.append(sql) + + results = rouge.compute(predictions=generated_sql, references=df["answer"].values) + print(f"Metrics {results}") + + + +def create_message_column(row: Dict[str, str]) -> Dict[str, List[Dict[str, str]]]: + messages = [] + user = {"content": f"{row['context']}\n Input: {row['question']}", "role": "user"} + messages.append(user) + assistant = {"content": f"{row['answer']}", "role": "assistant"} + messages.append(assistant) + return {"messages": messages} + + +def format_dataset_chatml(row: Dict[str, List[Dict[str, str]]], tokenizer: AutoTokenizer) -> Dict[str, str]: + return {"text": tokenizer.apply_chat_template(row["messages"], add_generation_prompt=False, tokenize=False)} + + +def process_dataset(model_id: str, dataset: DatasetDict) -> DatasetDict: + tokenizer_id = model_id + tokenizer = AutoTokenizer.from_pretrained(tokenizer_id) + tokenizer.padding_side = "right" + + dataset_chatml = dataset.map(create_message_column) + dataset_chatml = dataset_chatml.map(partial(format_dataset_chatml, tokenizer=tokenizer)) + return dataset_chatml + + +def get_model(model_id: str, device_map: Dict[str, int]) -> Tuple[AutoTokenizer, AutoModelForCausalLM]: + if torch.cuda.is_bf16_supported(): + compute_dtype = torch.bfloat16 + attn_implementation = "flash_attention_2" + # If bfloat16 is not supported, 'compute_dtype' is set to 'torch.float16' and 'attn_implementation' is set to 'sdpa'. + else: + compute_dtype = torch.float16 + attn_implementation = "sdpa" + # This line of code is used to print the value of 'attn_implementation', which indicates the chosen attention implementation. + print(attn_implementation) + + tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True, add_eos_token=True, use_fast=True) + tokenizer.pad_token = tokenizer.unk_token + tokenizer.pad_token_id = tokenizer.convert_tokens_to_ids(tokenizer.pad_token) + tokenizer.padding_side = "left" + + model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=compute_dtype, trust_remote_code=True, device_map=device_map, attn_implementation=attn_implementation) + return tokenizer, model + + +def train(config: Dict[str, Any]): + + setup_logger(logger) + + parser = HfArgumentParser((ModelArguments, TrainingArguments)) + model_args, training_args = parser.parse_dict(config) + + logger.info(f"model_args = {model_args}") + logger.info(f"training_args = {training_args}") + set_seed(training_args.seed) + + dataset = get_sql_data(subsample=model_args.subsample) + dataset_chatml = process_dataset(model_id=model_args.model_id, dataset=dataset) + logger.info(dataset_chatml["train"][0]) + + device_map = {"": 0} + tokenizer, model = get_model(model_id=model_args.model_id, device_map=device_map) + peft_config = LoraConfig(r=model_args.lora_r, lora_alpha=model_args.lora_alpha, lora_dropout=model_args.lora_dropout, task_type=TaskType.CAUSAL_LM, target_modules=model_args.target_modules,) + + trainer = SFTTrainer(model=model, train_dataset=dataset_chatml["train"], eval_dataset=dataset_chatml["test"], peft_config=peft_config, dataset_text_field="text", max_seq_length=model_args.max_seq_length, tokenizer=tokenizer, args=training_args) + trainer.train() + trainer.save_model() + trainer.create_model_card() + + +def main(): + run_name = os.getenv("RUN_NAME", 'phi-3-text2sql-default') + config = { + "num_train_epochs": 1, + "subsample": None, + "output_dir": run_name, + "model_id": "microsoft/Phi-3-mini-4k-instruct", + "lora_r": 16, + "lora_alpha": 16, + "lora_dropout": 0.05, + "target_modules": ["k_proj", "q_proj", "v_proj", "o_proj", "gate_proj", "down_proj", "up_proj",], + "max_seq_length": 512, + "push_to_hub": True, + "eval_strategy": "steps", + "eval_strategy": "steps", + "do_eval": True, + "per_device_train_batch_size": 4, + "per_device_eval_batch_size": 4, + "gradient_accumulation_steps": 4, + "learning_rate": 0.0001, + "optim": "adamw_torch", + "warmup_ratio": 0.1, + "logging_first_step": True, + "logging_steps": 500, + "save_steps": 500, + "eval_steps": 500, + "eval_on_start": True, + "seed": 42, + "bf16": True, + "fp16": False, + "report_to": ["wandb"], + "lr_scheduler_type": "linear", + "log_level" : "debug", + } + train(config=config) + +if __name__ == '__main__': + main() + \ No newline at end of file