Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Upgrade ray version on mistral7b on inf2 blueprint #550

Merged
merged 1 commit into from
Jun 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 0 additions & 66 deletions gen-ai/inference/gradio-ui/gradio-app-mistral.py

This file was deleted.

19 changes: 7 additions & 12 deletions gen-ai/inference/mistral-7b-rayserve-inf2/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# https://hub.docker.com/layers/rayproject/ray/2.11.0-py310/images/sha256-de798e487b76a8f2412c718c43c5f342b3eb05e0705a71325102904cd27c3613?context=explore
FROM rayproject/ray:2.11.0-py310
FROM rayproject/ray:2.22.0-py310

# Maintainer label
LABEL maintainer="DoEKS"
Expand All @@ -15,27 +15,22 @@ RUN . /etc/os-release && \
sudo echo "deb https://apt.repos.neuron.amazonaws.com ${VERSION_CODENAME} main" > /etc/apt/sources.list.d/neuron.list && \
sudo wget -qO - https://apt.repos.neuron.amazonaws.com/GPG-PUB-KEY-AMAZON-AWS-NEURON.PUB | apt-key add - && \
sudo apt-get update -y && \
sudo apt-get install git -y && \
sudo apt-get install aws-neuronx-dkms=2.* -y && \
sudo apt-get install aws-neuronx-collectives=2.* -y && \
sudo apt-get install aws-neuronx-runtime-lib=2.* -y && \
sudo apt-get install aws-neuronx-tools=2.* -y

sudo apt-get install aws-neuronx-dkms aws-neuronx-collectives=2.* aws-neuronx-runtime-lib=2.* aws-neuronx-tools=2.* -y && \
sudo apt-get clean

# Switch back to a non-root user for the subsequent commands
USER $USER

# Set pip repository pointing to the Neuron repository and install required Python packages
# huggingface_hub is needed to login to huggingface repo for the model access
RUN pip config set global.extra-index-url https://pip.repos.neuron.amazonaws.com && \
pip install neuronx-cc==2.* torch-neuronx==1.13.* torchvision transformers-neuronx sentencepiece transformers && \
pip install starlette==0.34.0 && \
pip install huggingface_hub

pip install wget awscli regex neuronx-cc==2.* torch-neuronx torchvision transformers-neuronx sentencepiece transformers

# Add Neuron path to PATH
ENV PATH /opt/aws/neuron/bin:$PATH

# Set LD_LIBRARY_PATH to include the directory with libpython3.10.so.1.0
ENV LD_LIBRARY_PATH /home/ray/anaconda3/lib:$LD_LIBRARY_PATH

WORKDIR /serve_app

COPY ray_serve_mistral.py /serve_app/ray_serve_mistral.py
137 changes: 137 additions & 0 deletions gen-ai/inference/mistral-7b-rayserve-inf2/gradio-ui.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
# gradio-deploy.yaml
apiVersion: v1
kind: Namespace
metadata:
name: gradio-mistral7b-inf2
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: gradio-deployment
namespace: gradio-mistral7b-inf2
labels:
app: gradio
spec:
replicas: 1
selector:
matchLabels:
app: gradio
template:
metadata:
labels:
app: gradio
spec:
containers:
- name: gradio
image: public.ecr.aws/data-on-eks/gradio-web-app-base:latest
imagePullPolicy: IfNotPresent
ports:
- containerPort: 7860
resources:
requests:
cpu: "512m"
memory: "2048Mi"
limits:
cpu: "1"
memory: "4096Mi"
env:
- name: MODEL_ENDPOINT
value: "/infer"
- name: SERVICE_NAME
value: "http://mistral-serve-svc.mistral.svc.cluster.local:8000"
volumeMounts:
- name: gradio-app-script
mountPath: /app/gradio-app.py
subPath: gradio-app-mistral7b-inf2.py
volumes:
- name: gradio-app-script
configMap:
name: gradio-app-script
---
apiVersion: v1
kind: Service
metadata:
name: gradio-service
namespace: gradio-mistral7b-inf2
spec:
selector:
app: gradio
ports:
- name: http
protocol: TCP
port: 7860
targetPort: 7860
type: ClusterIP
---
apiVersion: v1
kind: ConfigMap
metadata:
name: gradio-app-script
namespace: gradio-mistral7b-inf2
data:
gradio-app-mistral7b-inf2.py: |
import gradio as gr
import requests
import os


# Constants for model endpoint and service name
model_endpoint = os.environ.get("MODEL_ENDPOINT", "/infer")
service_name = os.environ.get("SERVICE_NAME", "http://localhost:8000")

# Function to generate text
def generate_text(message, history):
prompt = message

# Create the URL for the inference
url = f"{service_name}{model_endpoint}"

try:
# Send the request to the model service
response = requests.get(url, params={"sentence": prompt}, timeout=180)
response.raise_for_status() # Raise an exception for HTTP errors
prompt_to_replace = "[INST]" + prompt + "[/INST]"

# Removing the original prompt with instruction set from the output
text = response.text.replace(prompt_to_replace, "", 1).strip('["]?\n')
# remove '<s>' strikethrough markdown
if text.startswith("<s>"):
text = text.replace("<s>", "", 1)

text = text.replace("</s>", "", 1)

answer_only = text

# Safety filter to remove harmful or inappropriate content
answer_only = filter_harmful_content(answer_only)
return answer_only
except requests.exceptions.RequestException as e:
# Handle any request exceptions (e.g., connection errors)
return f"AI: Error: {str(e)}"


# Define the safety filter function (you can implement this as needed)
def filter_harmful_content(text):
# TODO: Implement a safety filter to remove any harmful or inappropriate content from the text

# For now, simply return the text as-is
return text


# Define the Gradio ChatInterface
chat_interface = gr.ChatInterface(
generate_text,
chatbot=gr.Chatbot(height=300),
textbox=gr.Textbox(placeholder="Ask me a question", container=False, scale=7),
title="Mistral AI Chat",
description="Ask me any question",
theme="soft",
examples=["How Big Is Observable Universe", "How to kill a linux process"],
cache_examples=False,
retry_btn=None,
undo_btn="Delete Previous",
clear_btn="Clear",
)

# Launch the ChatInterface
chat_interface.launch()
17 changes: 13 additions & 4 deletions gen-ai/inference/mistral-7b-rayserve-inf2/ray-service-mistral.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ spec:
env_vars:
MODEL_ID: "mistralai/Mistral-7B-Instruct-v0.2"
NEURON_CC_FLAGS: "-O1"
LD_LIBRARY_PATH: "/home/ray/anaconda3/lib:$LD_LIBRARY_PATH"
NEURON_CORES: "2"
deployments:
- name: mistral-7b
autoscaling_config:
Expand All @@ -46,8 +48,11 @@ spec:
ray_actor_options:
num_cpus: 10
resources: {"neuron_cores": 2}
runtime_env:
env_vars:
LD_LIBRARY_PATH: "/home/ray/anaconda3/lib:$LD_LIBRARY_PATH"
rayClusterConfig:
rayVersion: '2.11.0'
rayVersion: '2.22.0'
enableInTreeAutoscaling: true
headGroupSpec:
headService:
Expand All @@ -60,15 +65,15 @@ spec:
spec:
containers:
- name: head
image: public.ecr.aws/data-on-eks/ray2.11.0-py310-mistral7b-neuron:latest
image: public.ecr.aws/data-on-eks/ray2.22.0-py310-mistral7b-neuron:latest
imagePullPolicy: Always # Ensure the image is always pulled when updated
lifecycle:
preStop:
exec:
command: ["/bin/sh", "-c", "ray stop"]
ports:
- containerPort: 6379
name: gcs-server
name: gcs
- containerPort: 8265
name: dashboard
- containerPort: 10001
Expand All @@ -86,6 +91,8 @@ spec:
cpu: "2"
memory: "20G"
env:
- name: LD_LIBRARY_PATH
value: "/home/ray/anaconda3/lib:$LD_LIBRARY_PATH"
- name: HUGGING_FACE_HUB_TOKEN
valueFrom:
secretKeyRef:
Expand All @@ -108,7 +115,7 @@ spec:
spec:
containers:
- name: worker
image: public.ecr.aws/data-on-eks/ray2.11.0-py310-mistral7b-neuron:latest
image: public.ecr.aws/data-on-eks/ray2.22.0-py310-mistral7b-neuron:latest
imagePullPolicy: Always # Ensure the image is always pulled when updated
lifecycle:
preStop:
Expand All @@ -125,6 +132,8 @@ spec:
memory: "360G" # All memory of inf2.24xlarge; 24G for daemonset overhead
aws.amazon.com/neuron: "6" # All Neuron cores of inf2.24xlarge
env:
- name: LD_LIBRARY_PATH
value: "/home/ray/anaconda3/lib:$LD_LIBRARY_PATH"
- name: HUGGING_FACE_HUB_TOKEN
valueFrom:
secretKeyRef:
Expand Down
Loading
Loading