Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Sync release branch with main in preparation for RHOAI 2.10 release #212

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions config/base/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,34 @@ generatorOptions:
disableNameSuffixHash: true

vars:
- fieldref:
fieldPath: data.caikit-tgis-image
name: caikit-tgis-image
objref:
apiVersion: v1
kind: ConfigMap
name: odh-model-controller-parameters
- fieldref:
fieldPath: data.tgis-image
name: tgis-image
objref:
apiVersion: v1
kind: ConfigMap
name: odh-model-controller-parameters
- fieldref:
fieldPath: data.ovms-image
name: ovms-image
objref:
apiVersion: v1
kind: ConfigMap
name: odh-model-controller-parameters
- fieldref:
fieldPath: data.vllm-image
name: vllm-image
objref:
apiVersion: v1
kind: ConfigMap
name: odh-model-controller-parameters
- fieldref:
fieldPath: metadata.namespace
name: mesh-namespace
Expand Down
4 changes: 4 additions & 0 deletions config/base/params.env
Original file line number Diff line number Diff line change
@@ -1 +1,5 @@
odh-model-controller=quay.io/opendatahub/odh-model-controller:v0.12.0-latest
caikit-tgis-image=quay.io/opendatahub/caikit-tgis-serving:stable-01d6d99
tgis-image=quay.io/opendatahub/text-generation-inference:stable-ed9d828
ovms-image=quay.io/opendatahub/openvino_model_server:stable-nightly-2024-05-26
vllm-image=quay.io/opendatahub/vllm:stable-affc486
1 change: 1 addition & 0 deletions config/default/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@ bases:
- ../rbac
- ../manager
- ../webhook
- ../runtimes
17 changes: 10 additions & 7 deletions config/overlays/odh/params.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
varReference:
- path: metadata/name
kind: ClusterRoleBinding
apiGroup: rbac.authorization.k8s.io/v1
- path: spec/template/spec/containers[]/image
kind: Deployment
apiVersion: apps/v1
varReference:
- path: metadata/name
kind: ClusterRoleBinding
apiGroup: rbac.authorization.k8s.io/v1
- path: spec/template/spec/containers[]/image
kind: Deployment
apiVersion: apps/v1
- path: objects[]/spec/containers[]/image
kind: Template
apiVersion: template.openshift.io/v1
81 changes: 81 additions & 0 deletions config/runtimes/caikit-tgis-template.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
apiVersion: template.openshift.io/v1
kind: Template
metadata:
labels:
opendatahub.io/dashboard: 'true'
opendatahub.io/ootb: 'true'
annotations:
description: Caikit is an AI toolkit that enables users to manage models through a set of developer friendly APIs. It provides a consistent format for creating and using AI models against a wide variety of data domains and tasks.
openshift.io/provider-display-name: Red Hat, Inc.
tags: rhods,rhoai,kserve,servingruntime
template.openshift.io/documentation-url: https://github.com/opendatahub-io/caikit-tgis-serving
template.openshift.io/long-description: This template defines resources needed to deploy caikit-tgis-serving servingruntime with Red Hat Data Science KServe for LLM model
template.openshift.io/support-url: https://access.redhat.com
opendatahub.io/modelServingSupport: '["single"]'
opendatahub.io/apiProtocol: 'REST'
name: caikit-tgis-serving-template
objects:
- apiVersion: serving.kserve.io/v1alpha1
kind: ServingRuntime
metadata:
name: caikit-tgis-runtime
annotations:
openshift.io/display-name: Caikit TGIS ServingRuntime for KServe
opendatahub.io/recommended-accelerators: '["nvidia.com/gpu"]'
labels:
opendatahub.io/dashboard: 'true'
spec:
annotations:
prometheus.io/port: '3000'
prometheus.io/path: /metrics
multiModel: false
supportedModelFormats:
- autoSelect: true
name: caikit
containers:
- name: kserve-container
image: $(tgis-image)
command:
- text-generation-launcher
args:
- --model-name=/mnt/models/artifacts/
env:
- name: HF_HOME
value: /tmp/hf_home
- name: transformer-container
image: $(caikit-tgis-image)
env:
- name: RUNTIME_LOCAL_MODELS_DIR
value: /mnt/models
- name: HF_HOME
value: /tmp/hf_home
- name: RUNTIME_GRPC_ENABLED
value: 'false'
- name: RUNTIME_HTTP_ENABLED
value: 'true'
ports:
- containerPort: 8080
protocol: TCP
readinessProbe:
exec:
command:
- python
- -m
- caikit_health_probe
- readiness
initialDelaySeconds: 5
livenessProbe:
exec:
command:
- python
- -m
- caikit_health_probe
- liveness
initialDelaySeconds: 5
startupProbe:
httpGet:
port: 8080
path: /health
# Allow 12 mins to start
failureThreshold: 24
periodSeconds: 30
11 changes: 11 additions & 0 deletions config/runtimes/kustomization.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
commonLabels:
app: odh-dashboard
app.kubernetes.io/part-of: odh-dashboard
resources:
- ovms-mm-template.yaml
- caikit-tgis-template.yaml
- tgis-template.yaml
- ovms-kserve-template.yaml
- vllm-template.yaml
64 changes: 64 additions & 0 deletions config/runtimes/ovms-kserve-template.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
kind: Template
apiVersion: template.openshift.io/v1
metadata:
name: kserve-ovms
labels:
opendatahub.io/dashboard: 'true'
opendatahub.io/ootb: 'true'
annotations:
tags: 'kserve-ovms,servingruntime'
description: 'OpenVino Model Serving Definition'
opendatahub.io/modelServingSupport: '["single"]'
opendatahub.io/apiProtocol: 'REST'
objects:
- apiVersion: serving.kserve.io/v1alpha1
kind: ServingRuntime
metadata:
annotations:
openshift.io/display-name: OpenVINO Model Server
opendatahub.io/recommended-accelerators: '["nvidia.com/gpu"]'
name: kserve-ovms
labels:
opendatahub.io/dashboard: 'true'
spec:
multiModel: false
annotations:
prometheus.io/port: '8888'
prometheus.io/path: /metrics
supportedModelFormats:
- name: openvino_ir
version: opset13
autoSelect: true
- name: onnx
version: '1'
- name: tensorflow
version: '1'
autoSelect: true
- name: tensorflow
version: '2'
autoSelect: true
- name: paddle
version: '2'
autoSelect: true
- name: pytorch
version: '2'
autoSelect: true
protocolVersions:
- v2
- grpc-v2
containers:
- name: kserve-container
image: $(ovms-image)
args:
- '--model_name={{.Name}}'
- '--port=8001'
- '--rest_port=8888'
- '--model_path=/mnt/models'
- '--file_system_poll_wait_seconds=0'
- '--grpc_bind_address=0.0.0.0'
- '--rest_bind_address=0.0.0.0'
- '--target_device=AUTO'
- '--metrics_enable'
ports:
- containerPort: 8888
protocol: TCP
65 changes: 65 additions & 0 deletions config/runtimes/ovms-mm-template.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
kind: Template
apiVersion: template.openshift.io/v1
metadata:
name: ovms
labels:
opendatahub.io/dashboard: 'true'
opendatahub.io/ootb: 'true'
annotations:
tags: 'ovms,servingruntime'
description: 'OpenVino Model Serving Definition'
opendatahub.io/modelServingSupport: '["multi"]'
opendatahub.io/apiProtocol: 'REST'
objects:
- apiVersion: serving.kserve.io/v1alpha1
kind: ServingRuntime
metadata:
name: ovms
annotations:
openshift.io/display-name: 'OpenVINO Model Server'
opendatahub.io/recommended-accelerators: '["nvidia.com/gpu"]'
labels:
opendatahub.io/dashboard: 'true'
spec:
builtInAdapter:
env:
- name: OVMS_FORCE_TARGET_DEVICE
value: AUTO
memBufferBytes: 134217728
modelLoadingTimeoutMillis: 90000
runtimeManagementPort: 8888
serverType: ovms
containers:
- args:
- '--port=8001'
- '--rest_port=8888'
- '--config_path=/models/model_config_list.json'
- '--file_system_poll_wait_seconds=0'
- '--grpc_bind_address=0.0.0.0'
- '--rest_bind_address=0.0.0.0'
image: $(ovms-image)
name: ovms
resources:
limits:
cpu: '0'
memory: 0Gi
requests:
cpu: '0'
memory: 0Gi
grpcDataEndpoint: 'port:8001'
grpcEndpoint: 'port:8085'
multiModel: true
protocolVersions:
- grpc-v1
replicas: 1
supportedModelFormats:
- autoSelect: true
name: openvino_ir
version: opset1
- autoSelect: true
name: onnx
version: '1'
- autoSelect: true
name: tensorflow
version: '2'
parameters: []
68 changes: 68 additions & 0 deletions config/runtimes/tgis-template.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
apiVersion: template.openshift.io/v1
kind: Template
metadata:
labels:
opendatahub.io/dashboard: 'true'
opendatahub.io/ootb: 'true'
annotations:
description: Text Generation Inference Server (TGIS) is a high performance inference engine that deploys and serves Large Language Models.
openshift.io/display-name: TGIS Standalone ServingRuntime for KServe
openshift.io/provider-display-name: Red Hat, Inc.
tags: rhods,rhoai,kserve,servingruntime
template.openshift.io/documentation-url: https://github.com/opendatahub-io/text-generation-inference
template.openshift.io/long-description: This template defines resources needed to deploy TGIS standalone servingruntime with KServe in Red Hat OpenShift AI
opendatahub.io/modelServingSupport: '["single"]'
opendatahub.io/apiProtocol: 'gRPC'
name: tgis-grpc-serving-template
objects:
- apiVersion: serving.kserve.io/v1alpha1
kind: ServingRuntime
metadata:
name: tgis-grpc-runtime
annotations:
openshift.io/display-name: TGIS Standalone ServingRuntime for KServe
opendatahub.io/recommended-accelerators: '["nvidia.com/gpu"]'
labels:
opendatahub.io/dashboard: 'true'
spec:
annotations:
prometheus.io/port: '3000'
prometheus.io/path: '/metrics'
multiModel: false
supportedModelFormats:
- autoSelect: true
name: pytorch
containers:
- name: kserve-container
image: $(tgis-image)
command: ['text-generation-launcher']
args:
- '--model-name=/mnt/models/'
- '--port=3000'
- '--grpc-port=8033'
env:
- name: HF_HOME
value: /tmp/hf_home
readinessProbe:
exec:
command:
- curl
- localhost:3000/health
initialDelaySeconds: 5
livenessProbe:
exec:
command:
- curl
- localhost:3000/health
initialDelaySeconds: 5
startupProbe:
httpGet:
port: 8080
path: /health
# Allow 12 mins to start
failureThreshold: 24
periodSeconds: 30
ports:
- containerPort: 8033
name: h2c
protocol: TCP
Loading