opendatahub-io · openshift-merge-bot · May 30, 2024 · May 15, 2024 · May 20, 2024 · Apr 9, 2024
diff --git a/config/base/kustomization.yaml b/config/base/kustomization.yaml
@@ -13,6 +13,34 @@ generatorOptions:
   disableNameSuffixHash: true
 
 vars:
+  - fieldref:
+      fieldPath: data.caikit-tgis-image
+    name: caikit-tgis-image
+    objref:
+      apiVersion: v1
+      kind: ConfigMap
+      name: odh-model-controller-parameters
+  - fieldref:
+      fieldPath: data.tgis-image
+    name: tgis-image
+    objref:
+      apiVersion: v1
+      kind: ConfigMap
+      name: odh-model-controller-parameters
+  - fieldref:
+      fieldPath: data.ovms-image
+    name: ovms-image
+    objref:
+      apiVersion: v1
+      kind: ConfigMap
+      name: odh-model-controller-parameters
+  - fieldref:
+      fieldPath: data.vllm-image
+    name: vllm-image
+    objref:
+      apiVersion: v1
+      kind: ConfigMap
+      name: odh-model-controller-parameters
   - fieldref:
       fieldPath: metadata.namespace
     name: mesh-namespace

diff --git a/config/base/params.env b/config/base/params.env
@@ -1 +1,5 @@
 odh-model-controller=quay.io/opendatahub/odh-model-controller:v0.12.0-latest
+caikit-tgis-image=quay.io/opendatahub/caikit-tgis-serving:stable-01d6d99
+tgis-image=quay.io/opendatahub/text-generation-inference:stable-ed9d828
+ovms-image=quay.io/opendatahub/openvino_model_server:stable-nightly-2024-05-26
+vllm-image=quay.io/opendatahub/vllm:stable-affc486
diff --git a/config/default/kustomization.yaml b/config/default/kustomization.yaml
@@ -2,3 +2,4 @@ bases:
   - ../rbac
   - ../manager
   - ../webhook
+  - ../runtimes
diff --git a/config/overlays/odh/params.yaml b/config/overlays/odh/params.yaml
@@ -1,7 +1,10 @@
-varReference:
-  - path: metadata/name
-    kind: ClusterRoleBinding
-    apiGroup: rbac.authorization.k8s.io/v1
-  - path: spec/template/spec/containers[]/image
-    kind: Deployment
-    apiVersion: apps/v1
+varReference:
+  - path: metadata/name
+    kind: ClusterRoleBinding
+    apiGroup: rbac.authorization.k8s.io/v1
+  - path: spec/template/spec/containers[]/image
+    kind: Deployment
+    apiVersion: apps/v1
+  - path: objects[]/spec/containers[]/image
+    kind: Template
+    apiVersion: template.openshift.io/v1
diff --git a/config/runtimes/caikit-tgis-template.yaml b/config/runtimes/caikit-tgis-template.yaml
@@ -0,0 +1,81 @@
+apiVersion: template.openshift.io/v1
+kind: Template
+metadata:
+  labels:
+    opendatahub.io/dashboard: 'true'
+    opendatahub.io/ootb: 'true'
+  annotations:
+    description: Caikit is an AI toolkit that enables users to manage models through a set of developer friendly APIs. It provides a consistent format for creating and using AI models against a wide variety of data domains and tasks.
+    openshift.io/provider-display-name: Red Hat, Inc.
+    tags: rhods,rhoai,kserve,servingruntime
+    template.openshift.io/documentation-url: https://github.com/opendatahub-io/caikit-tgis-serving
+    template.openshift.io/long-description: This template defines resources needed to deploy caikit-tgis-serving servingruntime with Red Hat Data Science KServe for LLM model
+    template.openshift.io/support-url: https://access.redhat.com
+    opendatahub.io/modelServingSupport: '["single"]'
+    opendatahub.io/apiProtocol: 'REST'
+  name: caikit-tgis-serving-template
+objects:
+  - apiVersion: serving.kserve.io/v1alpha1
+    kind: ServingRuntime
+    metadata:
+      name: caikit-tgis-runtime
+      annotations:
+        openshift.io/display-name: Caikit TGIS ServingRuntime for KServe
+        opendatahub.io/recommended-accelerators: '["nvidia.com/gpu"]'
+      labels:
+        opendatahub.io/dashboard: 'true'
+    spec:
+      annotations:
+        prometheus.io/port: '3000'
+        prometheus.io/path: /metrics
+      multiModel: false
+      supportedModelFormats:
+        - autoSelect: true
+          name: caikit
+      containers:
+        - name: kserve-container
+          image: $(tgis-image)
+          command:
+            - text-generation-launcher
+          args:
+            - --model-name=/mnt/models/artifacts/
+          env:
+            - name: HF_HOME
+              value: /tmp/hf_home
+        - name: transformer-container
+          image: $(caikit-tgis-image)
+          env:
+            - name: RUNTIME_LOCAL_MODELS_DIR
+              value: /mnt/models
+            - name: HF_HOME
+              value: /tmp/hf_home
+            - name: RUNTIME_GRPC_ENABLED
+              value: 'false'
+            - name: RUNTIME_HTTP_ENABLED
+              value: 'true'
+          ports:
+            - containerPort: 8080
+              protocol: TCP
+          readinessProbe:
+            exec:
+              command:
+                - python
+                - -m
+                - caikit_health_probe
+                - readiness
+            initialDelaySeconds: 5
+          livenessProbe:
+            exec:
+              command:
+                - python
+                - -m
+                - caikit_health_probe
+                - liveness
+            initialDelaySeconds: 5
+          startupProbe:
+            httpGet:
+              port: 8080
+              path: /health
+            # Allow 12 mins to start
+            failureThreshold: 24
+            periodSeconds: 30
diff --git a/config/runtimes/kustomization.yaml b/config/runtimes/kustomization.yaml
@@ -0,0 +1,11 @@
+apiVersion: kustomize.config.k8s.io/v1beta1
+kind: Kustomization
+commonLabels:
+  app: odh-dashboard
+  app.kubernetes.io/part-of: odh-dashboard
+resources:
+  - ovms-mm-template.yaml
+  - caikit-tgis-template.yaml
+  - tgis-template.yaml
+  - ovms-kserve-template.yaml
+  - vllm-template.yaml
diff --git a/config/runtimes/ovms-kserve-template.yaml b/config/runtimes/ovms-kserve-template.yaml
@@ -0,0 +1,64 @@
+kind: Template
+apiVersion: template.openshift.io/v1
+metadata:
+  name: kserve-ovms
+  labels:
+    opendatahub.io/dashboard: 'true'
+    opendatahub.io/ootb: 'true'
+  annotations:
+    tags: 'kserve-ovms,servingruntime'
+    description: 'OpenVino Model Serving Definition'
+    opendatahub.io/modelServingSupport: '["single"]'
+    opendatahub.io/apiProtocol: 'REST'
+objects:
+  - apiVersion: serving.kserve.io/v1alpha1
+    kind: ServingRuntime
+    metadata:
+      annotations:
+        openshift.io/display-name: OpenVINO Model Server
+        opendatahub.io/recommended-accelerators: '["nvidia.com/gpu"]'
+      name: kserve-ovms
+      labels:
+        opendatahub.io/dashboard: 'true'
+    spec:
+      multiModel: false
+      annotations:
+        prometheus.io/port: '8888'
+        prometheus.io/path: /metrics
+      supportedModelFormats:
+        - name: openvino_ir
+          version: opset13
+          autoSelect: true
+        - name: onnx
+          version: '1'
+        - name: tensorflow
+          version: '1'
+          autoSelect: true
+        - name: tensorflow
+          version: '2'
+          autoSelect: true
+        - name: paddle
+          version: '2'
+          autoSelect: true
+        - name: pytorch
+          version: '2'
+          autoSelect: true
+      protocolVersions:
+        - v2
+        - grpc-v2
+      containers:
+        - name: kserve-container
+          image: $(ovms-image)
+          args:
+            - '--model_name={{.Name}}'
+            - '--port=8001'
+            - '--rest_port=8888'
+            - '--model_path=/mnt/models'
+            - '--file_system_poll_wait_seconds=0'
+            - '--grpc_bind_address=0.0.0.0'
+            - '--rest_bind_address=0.0.0.0'
+            - '--target_device=AUTO'
+            - '--metrics_enable'
+          ports:
+            - containerPort: 8888
+              protocol: TCP
diff --git a/config/runtimes/ovms-mm-template.yaml b/config/runtimes/ovms-mm-template.yaml
@@ -0,0 +1,65 @@
+kind: Template
+apiVersion: template.openshift.io/v1
+metadata:
+  name: ovms
+  labels:
+    opendatahub.io/dashboard: 'true'
+    opendatahub.io/ootb: 'true'
+  annotations:
+    tags: 'ovms,servingruntime'
+    description: 'OpenVino Model Serving Definition'
+    opendatahub.io/modelServingSupport: '["multi"]'
+    opendatahub.io/apiProtocol: 'REST'
+objects:
+  - apiVersion: serving.kserve.io/v1alpha1
+    kind: ServingRuntime
+    metadata:
+      name: ovms
+      annotations:
+        openshift.io/display-name: 'OpenVINO Model Server'
+        opendatahub.io/recommended-accelerators: '["nvidia.com/gpu"]'
+      labels:
+        opendatahub.io/dashboard: 'true'
+    spec:
+      builtInAdapter:
+        env:
+          - name: OVMS_FORCE_TARGET_DEVICE
+            value: AUTO
+        memBufferBytes: 134217728
+        modelLoadingTimeoutMillis: 90000
+        runtimeManagementPort: 8888
+        serverType: ovms
+      containers:
+        - args:
+            - '--port=8001'
+            - '--rest_port=8888'
+            - '--config_path=/models/model_config_list.json'
+            - '--file_system_poll_wait_seconds=0'
+            - '--grpc_bind_address=0.0.0.0'
+            - '--rest_bind_address=0.0.0.0'
+          image: $(ovms-image)
+          name: ovms
+          resources:
+            limits:
+              cpu: '0'
+              memory: 0Gi
+            requests:
+              cpu: '0'
+              memory: 0Gi
+      grpcDataEndpoint: 'port:8001'
+      grpcEndpoint: 'port:8085'
+      multiModel: true
+      protocolVersions:
+        - grpc-v1
+      replicas: 1
+      supportedModelFormats:
+        - autoSelect: true
+          name: openvino_ir
+          version: opset1
+        - autoSelect: true
+          name: onnx
+          version: '1'
+        - autoSelect: true
+          name: tensorflow
+          version: '2'
+parameters: []
diff --git a/config/runtimes/tgis-template.yaml b/config/runtimes/tgis-template.yaml
@@ -0,0 +1,68 @@
+apiVersion: template.openshift.io/v1
+kind: Template
+metadata:
+  labels:
+    opendatahub.io/dashboard: 'true'
+    opendatahub.io/ootb: 'true'
+  annotations:
+    description: Text Generation Inference Server (TGIS) is a high performance inference engine that deploys and serves Large Language Models.
+    openshift.io/display-name: TGIS Standalone ServingRuntime for KServe
+    openshift.io/provider-display-name: Red Hat, Inc.
+    tags: rhods,rhoai,kserve,servingruntime
+    template.openshift.io/documentation-url: https://github.com/opendatahub-io/text-generation-inference
+    template.openshift.io/long-description: This template defines resources needed to deploy TGIS standalone servingruntime with KServe in Red Hat OpenShift AI
+    opendatahub.io/modelServingSupport: '["single"]'
+    opendatahub.io/apiProtocol: 'gRPC'
+  name: tgis-grpc-serving-template
+objects:
+  - apiVersion: serving.kserve.io/v1alpha1
+    kind: ServingRuntime
+    metadata:
+      name: tgis-grpc-runtime
+      annotations:
+        openshift.io/display-name: TGIS Standalone ServingRuntime for KServe
+        opendatahub.io/recommended-accelerators: '["nvidia.com/gpu"]'
+      labels:
+        opendatahub.io/dashboard: 'true'
+    spec:
+      annotations:
+        prometheus.io/port: '3000'
+        prometheus.io/path: '/metrics'
+      multiModel: false
+      supportedModelFormats:
+        - autoSelect: true
+          name: pytorch
+      containers:
+        - name: kserve-container
+          image: $(tgis-image)
+          command: ['text-generation-launcher']
+          args:
+            - '--model-name=/mnt/models/'
+            - '--port=3000'
+            - '--grpc-port=8033'
+          env:
+            - name: HF_HOME
+              value: /tmp/hf_home
+          readinessProbe:
+            exec:
+              command:
+                - curl
+                - localhost:3000/health
+            initialDelaySeconds: 5
+          livenessProbe:
+            exec:
+              command:
+                - curl
+                - localhost:3000/health
+            initialDelaySeconds: 5
+          startupProbe:
+            httpGet:
+              port: 8080
+              path: /health
+            # Allow 12 mins to start
+            failureThreshold: 24
+            periodSeconds: 30
+          ports:
+            - containerPort: 8033
+              name: h2c
+              protocol: TCP