From 62316151993945e419b49738dcf368a3146ef1ca Mon Sep 17 00:00:00 2001 From: "MOATTI@il.ibm.com" Date: Thu, 7 Dec 2023 09:25:27 -0600 Subject: [PATCH 01/41] grpc-call and http-call replaced by inference-call which takes a mandatory protocol arg: either http or grpc --- demo/kserve/scripts/test/grpc-call.sh | 27 ---------- demo/kserve/scripts/test/http-call.sh | 27 ---------- demo/kserve/scripts/test/inference-call.sh | 61 ++++++++++++++++++++++ 3 files changed, 61 insertions(+), 54 deletions(-) delete mode 100755 demo/kserve/scripts/test/grpc-call.sh delete mode 100755 demo/kserve/scripts/test/http-call.sh create mode 100755 demo/kserve/scripts/test/inference-call.sh diff --git a/demo/kserve/scripts/test/grpc-call.sh b/demo/kserve/scripts/test/grpc-call.sh deleted file mode 100755 index 1feb0ef5..00000000 --- a/demo/kserve/scripts/test/grpc-call.sh +++ /dev/null @@ -1,27 +0,0 @@ -#!/bin/bash -set -o pipefail -set -o nounset -set -o errtrace -# set -x #Uncomment this to debug script. - -source "$(dirname "$(realpath "$0")")/../env.sh" -source "$(dirname "$(realpath "$0")")/../utils.sh" - -echo -echo "Wait until runtime is READY" - -wait_for_pods_ready "serving.kserve.io/inferenceservice=caikit-example-isvc" "${TEST_NS}" -oc wait --for=condition=ready pod -l serving.kserve.io/inferenceservice=caikit-example-isvc -n ${TEST_NS} --timeout=300s - -echo -echo "Testing all token in a single call" -echo - -export KSVC_HOSTNAME=$(oc get ksvc caikit-example-isvc-predictor -n ${TEST_NS} -o jsonpath='{.status.url}' | cut -d'/' -f3) -grpcurl -insecure -d '{"text": "At what temperature does liquid Nitrogen boil?"}' -H "mm-model-id: flan-t5-small-caikit" ${KSVC_HOSTNAME}:443 caikit.runtime.Nlp.NlpService/TextGenerationTaskPredict - -echo -echo "Testing streams of token" -echo - -grpcurl -insecure -d '{"text": "At what temperature does liquid Nitrogen boil?"}' -H "mm-model-id: flan-t5-small-caikit" ${KSVC_HOSTNAME}:443 caikit.runtime.Nlp.NlpService/ServerStreamingTextGenerationTaskPredict diff --git a/demo/kserve/scripts/test/http-call.sh b/demo/kserve/scripts/test/http-call.sh deleted file mode 100755 index 46890da5..00000000 --- a/demo/kserve/scripts/test/http-call.sh +++ /dev/null @@ -1,27 +0,0 @@ -#!/bin/bash -set -o pipefail -set -o nounset -set -o errtrace -# set -x #Uncomment this to debug script. - -source "$(dirname "$(realpath "$0")")/../env.sh" -source "$(dirname "$(realpath "$0")")/../utils.sh" - -echo -echo "Wait until runtime is READY" - -wait_for_pods_ready "serving.kserve.io/inferenceservice=caikit-example-isvc" "${TEST_NS}" -oc wait --for=condition=ready pod -l serving.kserve.io/inferenceservice=caikit-example-isvc -n ${TEST_NS} --timeout=300s - -echo -echo "Testing all token in a single call" -echo - -export KSVC_HOSTNAME=$(oc get ksvc caikit-example-isvc-predictor -n ${TEST_NS} -o jsonpath='{.status.url}' | cut -d'/' -f3) -curl -kL -H 'Content-Type: application/json' -d '{"model_id": "flan-t5-small-caikit", "inputs": "At what temperature does Nitrogen boil?"}' https://${KSVC_HOSTNAME}/api/v1/task/text-generation - -echo -echo "Testing streams of token" -echo - -curl -kL -H 'Content-Type: application/json' -d '{"model_id": "flan-t5-small-caikit", "inputs": "At what temperature does Nitrogen boil?"}' https://${KSVC_HOSTNAME}/api/v1/task/server-streaming-text-generation diff --git a/demo/kserve/scripts/test/inference-call.sh b/demo/kserve/scripts/test/inference-call.sh new file mode 100755 index 00000000..5e467d83 --- /dev/null +++ b/demo/kserve/scripts/test/inference-call.sh @@ -0,0 +1,61 @@ +#!/bin/bash +set -o pipefail +set -o nounset +set -o errtrace +# set -x #Uncomment this to debug script. + +# Usage: a single! arg: "http" or "grpc" - the protocol to be used + +# Check if a single argument is passed +if [ "$#" -ne 1 ]; then + echo "Error: exactly one argument is required: either 'http' or 'grpc'" + exit 1 +fi + +# Check if the argument is either "http" or "grpc" +if [ "$1" = "http" ] || [ "$1" = "grpc" ]; then + INF_PROTO=$1 +else + echo "Error: Argument must be either 'http' or 'grpc'." + exit 1 +fi + +source "$(dirname "$(realpath "$0")")/../env.sh" +source "$(dirname "$(realpath "$0")")/../utils.sh" +export TEST_NS=${TEST_NS}"-$INF_PROTO" + +echo +echo "Wait until $INF_PROTO runtime is READY" + +ISVC_NAME=caikit-tgis-isvc-"$INF_PROTO" +wait_for_pods_ready "serving.kserve.io/inferenceservice=$ISVC_NAME" "${TEST_NS}" +oc wait --for=condition=ready pod -l serving.kserve.io/inferenceservice=$ISVC_NAME -n ${TEST_NS} --timeout=300s + +echo +echo "Testing all token in a single call" +echo + +export KSVC_HOSTNAME=$(oc get ksvc "$ISVC_NAME"-predictor -n ${TEST_NS} -o jsonpath='{.status.url}' | cut -d'/' -f3) +export THE_QUESTION="At what temperature does Nitrogen boil?" +export THE_MODEL="flan-t5-small-caikit" + +### Invoke the inferences: + +if [ "$INF_PROTO" = "http" ]; then + curl -kL -H 'Content-Type: application/json' -d '{"model_id": "$THE_MODEL", "inputs": "$THE_QUESTION"}' https://${KSVC_HOSTNAME}/api/v1/task/text-generation + + echo + echo "Testing streams of token" + echo + + curl -kL -H 'Content-Type: application/json' -d '{"model_id": "$THE_MODEL", "inputs": "$THE_QUESTION"}' https://${KSVC_HOSTNAME}/api/v1/task/server-streaming-text-generation +elif [ "$INF_PROTO" = "grpc" ]; then + grpcurl -insecure -d '{"text": "$THE_QUESTION"}' -H "mm-model-id: $THE_MODEL" ${KSVC_HOSTNAME}:443 caikit.runtime.Nlp.NlpService/TextGenerationTaskPredict + + echo + echo "Testing streams of token" + echo + + grpcurl -insecure -d '{"text": "$THE_QUESTION"}' -H "mm-model-id: $THE_MODEL" ${KSVC_HOSTNAME}:443 caikit.runtime.Nlp.NlpService/ServerStreamingTextGenerationTaskPredict +fi + From 3cf84bcf23c9c8cfd79640d62c7e0591f4d4af46 Mon Sep 17 00:00:00 2001 From: "MOATTI@il.ibm.com" Date: Thu, 7 Dec 2023 09:57:54 -0600 Subject: [PATCH 02/41] In custom-manifests/caikit caikit-tgis-servingruntime-grpc.yaml and caikit-tgis-servingruntime.yaml replaced by caikit-tgis-isvc-template.yaml --- ...rpc.yaml => caikit-tgis-isvc-template.yaml} | 12 +++++------- .../caikit/caikit-tgis-isvc.yaml | 18 ------------------ 2 files changed, 5 insertions(+), 25 deletions(-) rename demo/kserve/custom-manifests/caikit/{caikit-tgis-isvc-grpc.yaml => caikit-tgis-isvc-template.yaml} (61%) delete mode 100644 demo/kserve/custom-manifests/caikit/caikit-tgis-isvc.yaml diff --git a/demo/kserve/custom-manifests/caikit/caikit-tgis-isvc-grpc.yaml b/demo/kserve/custom-manifests/caikit/caikit-tgis-isvc-template.yaml similarity index 61% rename from demo/kserve/custom-manifests/caikit/caikit-tgis-isvc-grpc.yaml rename to demo/kserve/custom-manifests/caikit/caikit-tgis-isvc-template.yaml index dfc66aac..953781ef 100644 --- a/demo/kserve/custom-manifests/caikit/caikit-tgis-isvc-grpc.yaml +++ b/demo/kserve/custom-manifests/caikit/caikit-tgis-isvc-template.yaml @@ -5,18 +5,16 @@ metadata: serving.knative.openshift.io/enablePassthrough: "true" sidecar.istio.io/inject: "true" sidecar.istio.io/rewriteAppHTTPProbers: "true" - name: caikit-tgis-example-isvc + name: caikit-tgis-isvc- spec: predictor: + serviceAccountName: sa model: modelFormat: name: caikit - runtime: caikit-tgis-runtime - ports: - - containerPort: 8085 - name: h2c - protocol: TCP - storageUri: proto://path/to/model # single model here + runtime: caikit-tgis-runtime- + storageUri: s3://modelmesh-example-models/llm/models/flan-t5-small-caikit # single model here + # storageUri: proto://path/to/model # single model here # Example, using a pvc: # storageUri: pvc://caikit-pvc/flan-t5-small-caikit/ # Target directory must contain a config.yml diff --git a/demo/kserve/custom-manifests/caikit/caikit-tgis-isvc.yaml b/demo/kserve/custom-manifests/caikit/caikit-tgis-isvc.yaml deleted file mode 100644 index 9f46cd09..00000000 --- a/demo/kserve/custom-manifests/caikit/caikit-tgis-isvc.yaml +++ /dev/null @@ -1,18 +0,0 @@ -apiVersion: serving.kserve.io/v1beta1 -kind: InferenceService -metadata: - annotations: - serving.knative.openshift.io/enablePassthrough: "true" - sidecar.istio.io/inject: "true" - sidecar.istio.io/rewriteAppHTTPProbers: "true" - name: caikit-tgis-example-isvc -spec: - predictor: - model: - modelFormat: - name: caikit - runtime: caikit-tgis-runtime - storageUri: proto://path/to/model # single model here - # Example, using a pvc: - # storageUri: pvc://caikit-pvc/flan-t5-small-caikit/ - # Target directory must contain a config.yml From 26a8a90bf7dcfbc28734c87ae91d92a400cc188d Mon Sep 17 00:00:00 2001 From: "MOATTI@il.ibm.com" Date: Thu, 7 Dec 2023 10:03:16 -0600 Subject: [PATCH 03/41] In custom-manifests/caikit caikit-tgis-servingruntime.yaml replaced by specific runtimes caikit-tgis-servingruntime-grpc.yaml and caikit-tgis-servingruntime-http.yaml --- .../caikit-tgis-servingruntime-grpc.yaml | 35 +++++++++++++++++++ ...l => caikit-tgis-servingruntime-http.yaml} | 2 +- 2 files changed, 36 insertions(+), 1 deletion(-) create mode 100644 demo/kserve/custom-manifests/caikit/caikit-tgis-servingruntime-grpc.yaml rename demo/kserve/custom-manifests/caikit/{caikit-tgis-servingruntime.yaml => caikit-tgis-servingruntime-http.yaml} (96%) diff --git a/demo/kserve/custom-manifests/caikit/caikit-tgis-servingruntime-grpc.yaml b/demo/kserve/custom-manifests/caikit/caikit-tgis-servingruntime-grpc.yaml new file mode 100644 index 00000000..18eac728 --- /dev/null +++ b/demo/kserve/custom-manifests/caikit/caikit-tgis-servingruntime-grpc.yaml @@ -0,0 +1,35 @@ +apiVersion: serving.kserve.io/v1alpha1 +kind: ServingRuntime +metadata: + name: caikit-tgis-runtime-grpc +spec: + multiModel: false + supportedModelFormats: + # Note: this currently *only* supports caikit format models + - autoSelect: true + name: caikit + containers: + - name: kserve-container + image: quay.io/opendatahub/text-generation-inference:stable + command: ["text-generation-launcher"] + args: ["--model-name=/mnt/models/artifacts/"] + env: + - name: TRANSFORMERS_CACHE + value: /tmp/transformers_cache + # resources: # configure as required + # requests: + # cpu: 8 + # memory: 16Gi + - name: transformer-container + image: quay.io/opendatahub/caikit-tgis-serving:stable + env: + - name: RUNTIME_LOCAL_MODELS_DIR + value: /mnt/models + ports: + - containerPort: 8085 + name: h2c + protocol: TCP + # resources: # configure as required + # requests: + # cpu: 8 + # memory: 16Gi \ No newline at end of file diff --git a/demo/kserve/custom-manifests/caikit/caikit-tgis-servingruntime.yaml b/demo/kserve/custom-manifests/caikit/caikit-tgis-servingruntime-http.yaml similarity index 96% rename from demo/kserve/custom-manifests/caikit/caikit-tgis-servingruntime.yaml rename to demo/kserve/custom-manifests/caikit/caikit-tgis-servingruntime-http.yaml index 8528909c..93d0f342 100644 --- a/demo/kserve/custom-manifests/caikit/caikit-tgis-servingruntime.yaml +++ b/demo/kserve/custom-manifests/caikit/caikit-tgis-servingruntime-http.yaml @@ -1,7 +1,7 @@ apiVersion: serving.kserve.io/v1alpha1 kind: ServingRuntime metadata: - name: caikit-tgis-runtime + name: caikit-tgis-runtime-http spec: multiModel: false supportedModelFormats: From 4f6559a9c80a04db550d45d936be5370d3676ff9 Mon Sep 17 00:00:00 2001 From: "MOATTI@il.ibm.com" Date: Thu, 7 Dec 2023 10:10:49 -0600 Subject: [PATCH 04/41] One mandatary arg the protocol, either http or grpc. The isvc and the namespace will be specific to the protocol name --- demo/kserve/scripts/test/deploy-model.sh | 32 ++++++++++++++++++++---- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/demo/kserve/scripts/test/deploy-model.sh b/demo/kserve/scripts/test/deploy-model.sh index 8f64b1c1..d153b974 100755 --- a/demo/kserve/scripts/test/deploy-model.sh +++ b/demo/kserve/scripts/test/deploy-model.sh @@ -4,7 +4,24 @@ set -o nounset set -o errtrace # set -x #Uncomment this to debug script. +# Usage: a single! arg: "http" or "grpc" - the protocol to be used + +# Check if a single argument is passed +if [ "$#" -ne 1 ]; then + echo "Error: exactly one argument is required: either 'http' or 'grpc'" + exit 1 +fi + +# Check if the argument is either "http" or "grpc" +if [ "$1" = "http" ] || [ "$1" = "grpc" ]; then + INF_PROTO=$1 +else + echo "Error: Argument must be either 'http' or 'grpc'." + exit 1 +fi + source "$(dirname "$(realpath "$0")")/../env.sh" +export TEST_NS=${TEST_NS}"-$INF_PROTO" # Deploy Minio ACCESS_KEY_ID=THEACCESSKEY @@ -30,18 +47,23 @@ if [[ $? == 1 ]] then oc new-project ${TEST_NS} - oc apply -f ./custom-manifests/caikit/caikit-tgis-servingruntime.yaml -n ${TEST_NS} + oc apply -f ./custom-manifests/caikit/caikit-tgis-servingruntime-"$INF_PROTO".yaml -n ${TEST_NS} oc apply -f ${BASE_DIR}/minio-secret-current.yaml -n ${TEST_NS} oc apply -f ${BASE_DIR}/serviceaccount-minio-current.yaml -n ${TEST_NS} - oc apply -f ./custom-manifests/caikit/caikit-tgis-isvc.yaml -n ${TEST_NS} + ### create the isvc. First step: create the yaml file + ISVC_NAME=caikit-tgis-isvc-"$INF_PROTO" + sed "s//$INF_PROTO/g" ./custom-manifests/caikit/caikit-tgis-isvc-template.yaml > ./custom-manifests/caikit/"$ISVC_NAME".yaml + oc apply -f ./custom-manifests/caikit/"$ISVC_NAME".yaml -n ${TEST_NS} # Resources needed to enable metrics for the model # The metrics service needs the correct label in the `matchLabel` field. The expected value of this label is `-predictor-default` - # The metrics service in this repo is configured to work with the example model. If you are deploying a different model or using a different model name, change the label accordingly. - oc apply -f custom-manifests/metrics/caikit-metrics-service.yaml -n ${TEST_NS} - oc apply -f custom-manifests/metrics/caikit-metrics-servicemonitor.yaml -n ${TEST_NS} + # The metrics service in this repo is configured to work with the example model. If you are deploying a different model or using a different model name, change the label accordingly. + + ### TBD: Following 2 line should take into account the changed names + # oc apply -f custom-manifests/metrics/caikit-metrics-service.yaml -n ${TEST_NS} + # oc apply -f custom-manifests/metrics/caikit-metrics-servicemonitor.yaml -n ${TEST_NS} else echo echo "* ${TEST_NS} exist. Please remove the namespace or use another namespace name" From 5b47666bb03e5b36f45fe362fbdc6b5ae8cee6e3 Mon Sep 17 00:00:00 2001 From: Yosef Moatti Date: Thu, 7 Dec 2023 10:25:22 -0600 Subject: [PATCH 05/41] Fix the step-by-step docummentation for deploying and removing an LLM model --- demo/kserve/deploy-remove.md | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/demo/kserve/deploy-remove.md b/demo/kserve/deploy-remove.md index aff9b196..b7cf1a17 100644 --- a/demo/kserve/deploy-remove.md +++ b/demo/kserve/deploy-remove.md @@ -30,6 +30,7 @@ Note: The **flan-t5-small** LLM model has been containerized into an S3 MinIO bu ACCESS_KEY_ID=admin SECRET_ACCESS_KEY=password MINIO_NS=minio + INF_PROTO=http ### If INF_PROTO is set to "http", only HTTP (e.g., curl) can be used to invoke inferences. If set to "grpc" only gRPC (e.g., grpcurl) can be used. oc new-project ${MINIO_NS} oc apply -f ./custom-manifests/minio/minio.yaml -n ${MINIO_NS} @@ -42,14 +43,16 @@ Note: The **flan-t5-small** LLM model has been containerized into an S3 MinIO bu a. Create a new namespace. ```bash - export TEST_NS=kserve-demo + export TEST_NS=kserve-demo"-${INF_PROTO}" oc new-project ${TEST_NS} ``` - b. Create a caikit `ServingRuntime`. By default, it requests 4CPU and 8Gi of memory. You can adjust these values as needed. + b. Create a caikit `ServingRuntime`. + + By default, it requests 4CPU and 8Gi of memory. You can adjust these values as needed. ```bash - oc apply -f ./custom-manifests/caikit/caikit-tgis-servingruntime.yaml -n ${TEST_NS} + oc apply -f ./custom-manifests/caikit/caikit-tgis-servingruntime-"$INF_PROTO".yaml -n ${TEST_NS} ``` c. Deploy the MinIO data connection and service account. @@ -62,23 +65,31 @@ Note: The **flan-t5-small** LLM model has been containerized into an S3 MinIO bu d. Deploy the inference service. It will point to the model located in the `modelmesh-example-models/llm/models` directory. ```bash - oc apply -f ./custom-manifests/caikit/caikit-tgis-isvc.yaml -n ${TEST_NS} + + ISVC_NAME=caikit-tgis-isvc-"$INF_PROTO" + sed "s//$INF_PROTO/g" ./custom-manifests/caikit/caikit-tgis-isvc-template.yaml > ./"$ISVC_NAME".yaml + oc apply -f ./"$ISVC_NAME".yaml -n ${TEST_NS} ``` e. Verify that the inference service's `READY` state is `True`. ```bash - oc get isvc/caikit-example-isvc -n ${TEST_NS} + oc get isvc/$ISVC_NAME -n ${TEST_NS} ``` -3. Perform inference using HTTP (default) or gRPC +3. Perform inference using HTTP or either gRPC ( + + Compute KSVC_HOSTNAME: + ```bash + + export KSVC_HOSTNAME=$(oc get ksvc "$ISVC_NAME"-predictor -n ${TEST_NS} -o jsonpath='{.status.url}' | cut -d'/' -f3) + ``` 3-http. Perform inference with HTTP. This example uses cURL. a. Run the following `curl` command for all tokens in a single call: ```bash - export KSVC_HOSTNAME=$(oc get ksvc caikit-example-isvc-predictor -n ${TEST_NS} -o jsonpath='{.status.url}' | cut -d'/' -f3) curl -kL -H 'Content-Type: application/json' -d '{"model_id": "flan-t5-small-caikit", "inputs": "At what temperature does Nitrogen boil?"}' https://${KSVC_HOSTNAME}/api/v1/task/text-generation ``` @@ -156,7 +167,6 @@ Note: The **flan-t5-small** LLM model has been containerized into an S3 MinIO bu c. Run the following `grpcurl` command for all tokens in a single call: ```bash - export KSVC_HOSTNAME=$(oc get ksvc caikit-example-isvc-predictor -n ${TEST_NS} -o jsonpath='{.status.url}' | cut -d'/' -f3) grpcurl -insecure -d '{"text": "At what temperature does liquid Nitrogen boil?"}' -H "mm-model-id: flan-t5-small-caikit" ${KSVC_HOSTNAME}:443 caikit.runtime.Nlp.NlpService/TextGenerationTaskPredict ``` From ee7d8f720290b09f8e4a12847d00a3a9065d5d92 Mon Sep 17 00:00:00 2001 From: Yosef Moatti Date: Thu, 7 Dec 2023 10:28:32 -0600 Subject: [PATCH 06/41] Modified so that both kserve-demo-http and kserver-demo-grpc may be removed --- demo/kserve/scripts/test/delete-model.sh | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/demo/kserve/scripts/test/delete-model.sh b/demo/kserve/scripts/test/delete-model.sh index add24f77..306be92c 100755 --- a/demo/kserve/scripts/test/delete-model.sh +++ b/demo/kserve/scripts/test/delete-model.sh @@ -4,8 +4,24 @@ set -o nounset set -o errtrace # set -x #Uncomment this to debug script. +### This script will remove, if relevant, both kserve-demo-http and kserve-demo-grpc namespaces and their content and then will remove the minio namespace + source "$(dirname "$(realpath "$0")")/../env.sh" +export TEST_NS_HTTP=${TEST_NS}"-http" +export TEST_NS_GRPC=${TEST_NS}"-grpc" + +oc get ns ${TEST_NS_HTTP}} +if [[ $? == 0 ]] +then + oc delete isvc,pod --all -n ${TEST_NS_HTTP} --force --grace-period=0 +fi + +oc get ns ${TEST_NS_GRPC}} +if [[ $? == 0 ]] +then + oc delete isvc,pod --all -n ${TEST_NS_GRPC} --force --grace-period=0 +fi -oc delete isvc,pod --all -n ${TEST_NS} --force --grace-period=0 -oc delete ns ${TEST_NS} ${MINIO_NS} --force --grace-period=0 +### common to all protocols: +oc delete ns ${MINIO_NS} --force --grace-period=0 From 962751f14ff186dc694fa7c4a85636758cba6bdd Mon Sep 17 00:00:00 2001 From: Yosef Moatti Date: Thu, 7 Dec 2023 10:30:06 -0600 Subject: [PATCH 07/41] Modified to handle both kserve-demo-http and kserver-demo-grpc --- .../scripts/uninstall/dependencies-uninstall.sh | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/demo/kserve/scripts/uninstall/dependencies-uninstall.sh b/demo/kserve/scripts/uninstall/dependencies-uninstall.sh index 6bda5a42..057fe1f9 100755 --- a/demo/kserve/scripts/uninstall/dependencies-uninstall.sh +++ b/demo/kserve/scripts/uninstall/dependencies-uninstall.sh @@ -5,6 +5,8 @@ set -o errtrace # set -x #Uncomment this to debug script. source "$(dirname "$(realpath "$0")")/../env.sh" +export TEST_NS_HTTP=${TEST_NS}"-http" +export TEST_NS_GRPC=${TEST_NS}"-grpc" # Delete the Knative gateways oc delete -f custom-manifests/serverless/gateways.yaml @@ -45,4 +47,16 @@ oc delete csv OperatorGroup serverless-operators -n openshift-serverless oc delete project istio-system oc delete project knative-serving oc delete project knative-eventing -oc delete project $TEST_NS + +oc get ns ${TEST_NS_HTTP}} +if [[ $? == 0 ]] +then + oc delete project $TEST_NS_HTTP +fi + +oc get ns ${TEST_NS_GRPC}} +if [[ $? == 0 ]] +then + oc delete project $TEST_NS_GRPC +fi + From 150d45e680f25262ec8d55c5c4323f754632f522 Mon Sep 17 00:00:00 2001 From: Yosef Moatti Date: Thu, 7 Dec 2023 10:30:44 -0600 Subject: [PATCH 08/41] Modified to handle both kserve-demo-http and kserver-demo-grpc --- demo/kserve/scripts/uninstall/kserve-uninstall.sh | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/demo/kserve/scripts/uninstall/kserve-uninstall.sh b/demo/kserve/scripts/uninstall/kserve-uninstall.sh index 88d56e65..56373492 100755 --- a/demo/kserve/scripts/uninstall/kserve-uninstall.sh +++ b/demo/kserve/scripts/uninstall/kserve-uninstall.sh @@ -6,6 +6,8 @@ set -o errtrace # set -x #Uncomment this to debug script. source "$(dirname "$(realpath "$0")")/../env.sh" +TEST_NS_HTTP=${TEST_NS}"-http" +TEST_NS_GRPC=${TEST_NS}"-grpc" if [[ ! -n "${TARGET_OPERATOR+x}" ]] then @@ -27,10 +29,11 @@ export TARGET_OPERATOR_NS=$(getOpNS ${TARGET_OPERATOR_TYPE}) oc delete validatingwebhookconfiguration inferencegraph.serving.kserve.io inferenceservice.serving.kserve.io oc delete mutatingwebhookconfiguration inferenceservice.serving.kserve.io -oc delete isvc,pod --all -n ${TEST_NS} --force --grace-period=0 +oc delete isvc,pod --all -n ${TEST_NS_HTTP} --force --grace-period=0 +oc delete isvc,pod --all -n ${TEST_NS_GRPC} --force --grace-period=0 echo "It would take around around 3~4 mins" -oc delete ns ${TEST_NS} ${MINIO_NS} --force --grace-period=0 +oc delete ns ${TEST_NS_HTTP} ${TEST_NS_GRPC} ${MINIO_NS} --force --grace-period=0 oc delete secret wildcard-certs -n istio-system oc delete DataScienceCluster --all -n "${KSERVE_OPERATOR_NS}" From fafd57bc7e91d257ec0ff549a1abd8fb441d8fe5 Mon Sep 17 00:00:00 2001 From: Yosef Moatti Date: Thu, 7 Dec 2023 11:10:26 -0600 Subject: [PATCH 09/41] created yaml files for caikit-tgis-isvcs are put in ./custom-manifests/caikit dir --- demo/kserve/deploy-remove.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/demo/kserve/deploy-remove.md b/demo/kserve/deploy-remove.md index b7cf1a17..851cb8b2 100644 --- a/demo/kserve/deploy-remove.md +++ b/demo/kserve/deploy-remove.md @@ -67,8 +67,8 @@ Note: The **flan-t5-small** LLM model has been containerized into an S3 MinIO bu ```bash ISVC_NAME=caikit-tgis-isvc-"$INF_PROTO" - sed "s//$INF_PROTO/g" ./custom-manifests/caikit/caikit-tgis-isvc-template.yaml > ./"$ISVC_NAME".yaml - oc apply -f ./"$ISVC_NAME".yaml -n ${TEST_NS} + sed "s//$INF_PROTO/g" ./custom-manifests/caikit/caikit-tgis-isvc-template.yaml > ./custom-manifests/caikit/"$ISVC_NAME".yaml + oc apply -f ./custom-manifests/caikit/"$ISVC_NAME".yaml -n ${TEST_NS} ``` e. Verify that the inference service's `READY` state is `True`. @@ -82,7 +82,7 @@ Note: The **flan-t5-small** LLM model has been containerized into an S3 MinIO bu Compute KSVC_HOSTNAME: ```bash - export KSVC_HOSTNAME=$(oc get ksvc "$ISVC_NAME"-predictor -n ${TEST_NS} -o jsonpath='{.status.url}' | cut -d'/' -f3) + export KSVC_HOSTNAME=$(oc get ksvc "$ISVC_NAME"-predictor -n ${TEST_NS} -o jsonpath='{.status.url}' | cut -d'/' -f3) ``` 3-http. Perform inference with HTTP. This example uses cURL. From 24e7e10425225d27a34e5cb9040ef69f1affd231 Mon Sep 17 00:00:00 2001 From: Yosef Moatti Date: Thu, 7 Dec 2023 13:31:45 -0600 Subject: [PATCH 10/41] fix the documentation for scripted deployment/removal of sample models --- demo/kserve/deploy-remove-scripts.md | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/demo/kserve/deploy-remove-scripts.md b/demo/kserve/deploy-remove-scripts.md index 498d7216..04459292 100644 --- a/demo/kserve/deploy-remove-scripts.md +++ b/demo/kserve/deploy-remove-scripts.md @@ -14,26 +14,27 @@ Note: If you prefer to deploy and remove an LLM model by using step-by-step comm **Procedure** -1. Deploy a sample LLM model. +1. Choose HTTP or gRPC. ~~~ - ./scripts/test/deploy-model.sh + export INF_PROT="http" ### If HTTP is to be used (e.g., curl) + ### or ### + export INF_PROT="grpc" ### If gRPC is to be used (e.g., grpcurl) ~~~ -2. Perform inference with a HTTP or gRPC call. +2. Deploy a sample LLM model - 2-http. If using HTTP: ~~~ - ./scripts/test/http-call.sh + ./scripts/test/deploy-model.sh ${INF_PROT} ~~~ +3. Perform inference with a HTTP or gRPC call. - 2-grpc. If using gRPC: ~~~ - ./scripts/test/grpc-call.sh + ./scripts/test/inference-call.sh ${INF_PROT} ~~~ -3. Delete the sample model and the MinIO namespace. +4. Delete the sample model(s) and the MinIO namespace. ~~~ ./scripts/test/delete-model.sh From 840ea9ed355e2353ffc773dceb725e76fb701b63 Mon Sep 17 00:00:00 2001 From: Yosef Moatti Date: Thu, 7 Dec 2023 13:33:01 -0600 Subject: [PATCH 11/41] 3 bugs fixed --- demo/kserve/scripts/test/delete-model.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/demo/kserve/scripts/test/delete-model.sh b/demo/kserve/scripts/test/delete-model.sh index 306be92c..17bb3e10 100755 --- a/demo/kserve/scripts/test/delete-model.sh +++ b/demo/kserve/scripts/test/delete-model.sh @@ -4,24 +4,24 @@ set -o nounset set -o errtrace # set -x #Uncomment this to debug script. -### This script will remove, if relevant, both kserve-demo-http and kserve-demo-grpc namespaces and their content and then will remove the minio namespace +### This script will remove, if they exist, kserve-demo-http and kserve-demo-grpc namespaces and their content and then will remove the minio namespace source "$(dirname "$(realpath "$0")")/../env.sh" export TEST_NS_HTTP=${TEST_NS}"-http" export TEST_NS_GRPC=${TEST_NS}"-grpc" -oc get ns ${TEST_NS_HTTP}} +oc get ns ${TEST_NS_HTTP} if [[ $? == 0 ]] then oc delete isvc,pod --all -n ${TEST_NS_HTTP} --force --grace-period=0 fi -oc get ns ${TEST_NS_GRPC}} +oc get ns ${TEST_NS_GRPC} if [[ $? == 0 ]] then oc delete isvc,pod --all -n ${TEST_NS_GRPC} --force --grace-period=0 fi ### common to all protocols: -oc delete ns ${MINIO_NS} --force --grace-period=0 +oc delete ns ${TEST_NS_HTTP} ${TEST_NS_GRPC} ${MINIO_NS} --force --grace-period=0 From a625554256b5e4e2eb4c1dea327408c50372ef8b Mon Sep 17 00:00:00 2001 From: Yosef Moatti Date: Sun, 10 Dec 2023 14:14:29 +0200 Subject: [PATCH 12/41] Add comment --- demo/kserve/scripts/uninstall/kserve-uninstall.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/demo/kserve/scripts/uninstall/kserve-uninstall.sh b/demo/kserve/scripts/uninstall/kserve-uninstall.sh index 56373492..efda2011 100755 --- a/demo/kserve/scripts/uninstall/kserve-uninstall.sh +++ b/demo/kserve/scripts/uninstall/kserve-uninstall.sh @@ -1,10 +1,11 @@ - #!/bin/bash set -o pipefail set -o nounset set -o errtrace # set -x #Uncomment this to debug script. +# Will perform the uninstall for all present namespaces (e.g., kserve-demo-http or hserve-demo-grpc) + source "$(dirname "$(realpath "$0")")/../env.sh" TEST_NS_HTTP=${TEST_NS}"-http" TEST_NS_GRPC=${TEST_NS}"-grpc" From c8197eb22b683a8e0538b6dc1ba8bd752f9fc5e2 Mon Sep 17 00:00:00 2001 From: Yosef Moatti Date: Sun, 10 Dec 2023 14:17:56 +0200 Subject: [PATCH 13/41] Fix comment --- demo/kserve/scripts/uninstall/kserve-uninstall.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/demo/kserve/scripts/uninstall/kserve-uninstall.sh b/demo/kserve/scripts/uninstall/kserve-uninstall.sh index efda2011..72cdaf35 100755 --- a/demo/kserve/scripts/uninstall/kserve-uninstall.sh +++ b/demo/kserve/scripts/uninstall/kserve-uninstall.sh @@ -4,7 +4,7 @@ set -o nounset set -o errtrace # set -x #Uncomment this to debug script. -# Will perform the uninstall for all present namespaces (e.g., kserve-demo-http or hserve-demo-grpc) +# Uninstalls the minio namespace as well as protocol specific namespaces such as kserve-demo-http or hserve-demo-grpc source "$(dirname "$(realpath "$0")")/../env.sh" TEST_NS_HTTP=${TEST_NS}"-http" From 04af1a39d134b7b718e9d35dbf31d22ee9695c62 Mon Sep 17 00:00:00 2001 From: Yosef Moatti Date: Sun, 10 Dec 2023 12:35:50 -0600 Subject: [PATCH 14/41] fix bug in delete-model.sh minio ns should not be deleted! --- demo/kserve/scripts/test/delete-model.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/demo/kserve/scripts/test/delete-model.sh b/demo/kserve/scripts/test/delete-model.sh index 17bb3e10..a06d9ae2 100755 --- a/demo/kserve/scripts/test/delete-model.sh +++ b/demo/kserve/scripts/test/delete-model.sh @@ -23,5 +23,5 @@ then fi ### common to all protocols: -oc delete ns ${TEST_NS_HTTP} ${TEST_NS_GRPC} ${MINIO_NS} --force --grace-period=0 +oc delete ns ${TEST_NS_HTTP} ${TEST_NS_GRPC} --force --grace-period=0 From c172968052b61c7eba1170a3000e14fdbc34c5d1 Mon Sep 17 00:00:00 2001 From: Yosef Moatti Date: Sun, 10 Dec 2023 12:38:43 -0600 Subject: [PATCH 15/41] fix comment in delete-model.sh! --- demo/kserve/scripts/test/delete-model.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/demo/kserve/scripts/test/delete-model.sh b/demo/kserve/scripts/test/delete-model.sh index a06d9ae2..4fd57efa 100755 --- a/demo/kserve/scripts/test/delete-model.sh +++ b/demo/kserve/scripts/test/delete-model.sh @@ -4,7 +4,7 @@ set -o nounset set -o errtrace # set -x #Uncomment this to debug script. -### This script will remove, if they exist, kserve-demo-http and kserve-demo-grpc namespaces and their content and then will remove the minio namespace +### This script will remove, if they exist, kserve-demo-http and kserve-demo-grpc namespaces and their content source "$(dirname "$(realpath "$0")")/../env.sh" export TEST_NS_HTTP=${TEST_NS}"-http" From b9e249d6cefcca80c9ecc99a1f401fab7ebee990 Mon Sep 17 00:00:00 2001 From: Yosef Moatti Date: Mon, 11 Dec 2023 09:19:18 +0200 Subject: [PATCH 16/41] delete-model is now for a specific protocol --- demo/kserve/deploy-remove-scripts.md | 9 +++---- demo/kserve/scripts/test/delete-model.sh | 30 ++++++++++++++---------- 2 files changed, 23 insertions(+), 16 deletions(-) diff --git a/demo/kserve/deploy-remove-scripts.md b/demo/kserve/deploy-remove-scripts.md index 04459292..1586ec49 100644 --- a/demo/kserve/deploy-remove-scripts.md +++ b/demo/kserve/deploy-remove-scripts.md @@ -14,10 +14,11 @@ Note: If you prefer to deploy and remove an LLM model by using step-by-step comm **Procedure** -1. Choose HTTP or gRPC. +1. Choose the protocol to be used when invoking inferences: HTTP or gRPC. + You may use simulatneously both by invoking the 3 first steps one with "http" and once with "grpc" ~~~ - export INF_PROT="http" ### If HTTP is to be used (e.g., curl) + export INF_PROT="http" ### When HTTP is to be used (e.g., curl) ### or ### export INF_PROT="grpc" ### If gRPC is to be used (e.g., grpcurl) ~~~ @@ -34,8 +35,8 @@ Note: If you prefer to deploy and remove an LLM model by using step-by-step comm ./scripts/test/inference-call.sh ${INF_PROT} ~~~ -4. Delete the sample model(s) and the MinIO namespace. +4. Delete the sample model for a specific protocol: ~~~ - ./scripts/test/delete-model.sh + ./scripts/test/delete-model.sh ${INF_PROT} ~~~ diff --git a/demo/kserve/scripts/test/delete-model.sh b/demo/kserve/scripts/test/delete-model.sh index 4fd57efa..35bf8581 100755 --- a/demo/kserve/scripts/test/delete-model.sh +++ b/demo/kserve/scripts/test/delete-model.sh @@ -4,24 +4,30 @@ set -o nounset set -o errtrace # set -x #Uncomment this to debug script. -### This script will remove, if they exist, kserve-demo-http and kserve-demo-grpc namespaces and their content +### This script will remove objects related to the protocol specified (http or grpc) as single and mandatory parameter -source "$(dirname "$(realpath "$0")")/../env.sh" -export TEST_NS_HTTP=${TEST_NS}"-http" -export TEST_NS_GRPC=${TEST_NS}"-grpc" +# Check if a single argument is passed +if [ "$#" -ne 1 ]; then + echo "Error: exactly one argument is required: either 'http' or 'grpc'" + exit 1 +fi -oc get ns ${TEST_NS_HTTP} -if [[ $? == 0 ]] -then - oc delete isvc,pod --all -n ${TEST_NS_HTTP} --force --grace-period=0 +# Check if the argument is either "http" or "grpc" +if [ "$1" = "http" ] || [ "$1" = "grpc" ]; then + INF_PROTO=$1 +else + echo "Error: Argument must be either 'http' or 'grpc'." + exit 1 fi -oc get ns ${TEST_NS_GRPC} +source "$(dirname "$(realpath "$0")")/../env.sh" +export TEST_NS_REMOVE=${TEST_NS}"-"${INF_PROTO} + +oc get ns ${TEST_NS_REMOVE} if [[ $? == 0 ]] then - oc delete isvc,pod --all -n ${TEST_NS_GRPC} --force --grace-period=0 + oc delete isvc,pod --all -n ${TEST_NS_REMOVE} --force --grace-period=0 + oc delete ns ${TEST_NS_REMOVE} --force --grace-period=0 fi -### common to all protocols: -oc delete ns ${TEST_NS_HTTP} ${TEST_NS_GRPC} --force --grace-period=0 From 428e1afcc252a68967978a6fb087f83925205d92 Mon Sep 17 00:00:00 2001 From: Yosef Moatti Date: Tue, 12 Dec 2023 10:35:16 -0600 Subject: [PATCH 17/41] inference-call.sh replaced by grpc-call.sh and http-call.sh --- demo/kserve/scripts/test/grpc-call.sh | 38 ++++++++++++++ demo/kserve/scripts/test/http-call.sh | 39 ++++++++++++++ demo/kserve/scripts/test/inference-call.sh | 61 ---------------------- 3 files changed, 77 insertions(+), 61 deletions(-) create mode 100755 demo/kserve/scripts/test/grpc-call.sh create mode 100755 demo/kserve/scripts/test/http-call.sh delete mode 100755 demo/kserve/scripts/test/inference-call.sh diff --git a/demo/kserve/scripts/test/grpc-call.sh b/demo/kserve/scripts/test/grpc-call.sh new file mode 100755 index 00000000..d167f49b --- /dev/null +++ b/demo/kserve/scripts/test/grpc-call.sh @@ -0,0 +1,38 @@ +#!/bin/bash +set -o pipefail +set -o nounset +set -o errtrace +# set -x #Uncomment this to debug script. + +# Performs inference using HTTP + +PREFIX="-" +INF_PROTO="grpc" + +source "$(dirname "$(realpath "$0")")/../env.sh" +source "$(dirname "$(realpath "$0")")/../utils.sh" + +echo +echo "Wait until $INF_PROTO runtime is READY" + +ISVC_NAME=caikit-tgis-isvc"${PREFIX}${INF_PROTO}" +wait_for_pods_ready "serving.kserve.io/inferenceservice=$ISVC_NAME" "${TEST_NS}" +oc wait --for=condition=ready pod -l serving.kserve.io/inferenceservice=$ISVC_NAME -n ${TEST_NS} --timeout=300s + +echo +echo "Testing all token in a single call" +echo + +export KSVC_HOSTNAME=$(oc get ksvc "$ISVC_NAME"-predictor -n ${TEST_NS} -o jsonpath='{.status.url}' | cut -d'/' -f3) +#export THE_QUESTION="At what temperature does Nitrogen boil?" +# export THE_MODEL="flan-t5-small-caikit" + +### Invoke the inferences: +grpcurl -insecure -d '{"text": "At what temperature does Nitrogen boil?"}' -H "mm-model-id: flan-t5-small-caikit" ${KSVC_HOSTNAME}:443 caikit.runtime.Nlp.NlpService/TextGenerationTaskPredict + +echo +echo "Testing streams of token" +echo + +grpcurl -insecure -d '{"text": "At what temperature does Nitrogen boil?"}' -H "mm-model-id: flan-t5-small-caikit" ${KSVC_HOSTNAME}:443 caikit.runtime.Nlp.NlpService/ServerStreamingTextGenerationTaskPredict + diff --git a/demo/kserve/scripts/test/http-call.sh b/demo/kserve/scripts/test/http-call.sh new file mode 100755 index 00000000..bfd723eb --- /dev/null +++ b/demo/kserve/scripts/test/http-call.sh @@ -0,0 +1,39 @@ +#!/bin/bash +set -o pipefail +set -o nounset +set -o errtrace +# set -x #Uncomment this to debug script. + +# Performs inference using HTTP + +PREFIX="" +INF_PROTO="" + +source "$(dirname "$(realpath "$0")")/../env.sh" +source "$(dirname "$(realpath "$0")")/../utils.sh" + +echo +echo "Wait until $INF_PROTO runtime is READY" + +ISVC_NAME=caikit-tgis-isvc"${PREFIX}${INF_PROTO}" +wait_for_pods_ready "serving.kserve.io/inferenceservice=$ISVC_NAME" "${TEST_NS}" +oc wait --for=condition=ready pod -l serving.kserve.io/inferenceservice=$ISVC_NAME -n ${TEST_NS} --timeout=300s + +echo +echo "Testing all token in a single call" +echo + +export KSVC_HOSTNAME=$(oc get ksvc "$ISVC_NAME"-predictor -n ${TEST_NS} -o jsonpath='{.status.url}' | cut -d'/' -f3) +#export THE_QUESTION="At what temperature does Nitrogen boil?" +# export THE_MODEL="flan-t5-small-caikit" + +### Invoke the inferences: + +curl -kL -H 'Content-Type: application/json' -d '{"model_id": "flan-t5-small-caikit", "inputs": "At what temperature does Nitrogen boil?"}' https://${KSVC_HOSTNAME}/api/v1/task/text-generation + +echo +echo "Testing streams of token" +echo + +curl -kL -H 'Content-Type: application/json' -d '{"model_id": "flan-t5-small-caikit", "inputs": "At what temperature does Nitrogen boil?"}' https://${KSVC_HOSTNAME}/api/v1/task/server-streaming-text-generation + diff --git a/demo/kserve/scripts/test/inference-call.sh b/demo/kserve/scripts/test/inference-call.sh deleted file mode 100755 index 5e467d83..00000000 --- a/demo/kserve/scripts/test/inference-call.sh +++ /dev/null @@ -1,61 +0,0 @@ -#!/bin/bash -set -o pipefail -set -o nounset -set -o errtrace -# set -x #Uncomment this to debug script. - -# Usage: a single! arg: "http" or "grpc" - the protocol to be used - -# Check if a single argument is passed -if [ "$#" -ne 1 ]; then - echo "Error: exactly one argument is required: either 'http' or 'grpc'" - exit 1 -fi - -# Check if the argument is either "http" or "grpc" -if [ "$1" = "http" ] || [ "$1" = "grpc" ]; then - INF_PROTO=$1 -else - echo "Error: Argument must be either 'http' or 'grpc'." - exit 1 -fi - -source "$(dirname "$(realpath "$0")")/../env.sh" -source "$(dirname "$(realpath "$0")")/../utils.sh" -export TEST_NS=${TEST_NS}"-$INF_PROTO" - -echo -echo "Wait until $INF_PROTO runtime is READY" - -ISVC_NAME=caikit-tgis-isvc-"$INF_PROTO" -wait_for_pods_ready "serving.kserve.io/inferenceservice=$ISVC_NAME" "${TEST_NS}" -oc wait --for=condition=ready pod -l serving.kserve.io/inferenceservice=$ISVC_NAME -n ${TEST_NS} --timeout=300s - -echo -echo "Testing all token in a single call" -echo - -export KSVC_HOSTNAME=$(oc get ksvc "$ISVC_NAME"-predictor -n ${TEST_NS} -o jsonpath='{.status.url}' | cut -d'/' -f3) -export THE_QUESTION="At what temperature does Nitrogen boil?" -export THE_MODEL="flan-t5-small-caikit" - -### Invoke the inferences: - -if [ "$INF_PROTO" = "http" ]; then - curl -kL -H 'Content-Type: application/json' -d '{"model_id": "$THE_MODEL", "inputs": "$THE_QUESTION"}' https://${KSVC_HOSTNAME}/api/v1/task/text-generation - - echo - echo "Testing streams of token" - echo - - curl -kL -H 'Content-Type: application/json' -d '{"model_id": "$THE_MODEL", "inputs": "$THE_QUESTION"}' https://${KSVC_HOSTNAME}/api/v1/task/server-streaming-text-generation -elif [ "$INF_PROTO" = "grpc" ]; then - grpcurl -insecure -d '{"text": "$THE_QUESTION"}' -H "mm-model-id: $THE_MODEL" ${KSVC_HOSTNAME}:443 caikit.runtime.Nlp.NlpService/TextGenerationTaskPredict - - echo - echo "Testing streams of token" - echo - - grpcurl -insecure -d '{"text": "$THE_QUESTION"}' -H "mm-model-id: $THE_MODEL" ${KSVC_HOSTNAME}:443 caikit.runtime.Nlp.NlpService/ServerStreamingTextGenerationTaskPredict -fi - From 926b4a97b914306e313d7f8741a0e428498611bb Mon Sep 17 00:00:00 2001 From: Yosef Moatti Date: Tue, 12 Dec 2023 10:37:21 -0600 Subject: [PATCH 18/41] delete-model.sh reverted to original version - no change for PR 183 --- demo/kserve/scripts/test/delete-model.sh | 26 ++---------------------- 1 file changed, 2 insertions(+), 24 deletions(-) diff --git a/demo/kserve/scripts/test/delete-model.sh b/demo/kserve/scripts/test/delete-model.sh index 35bf8581..add24f77 100755 --- a/demo/kserve/scripts/test/delete-model.sh +++ b/demo/kserve/scripts/test/delete-model.sh @@ -4,30 +4,8 @@ set -o nounset set -o errtrace # set -x #Uncomment this to debug script. -### This script will remove objects related to the protocol specified (http or grpc) as single and mandatory parameter - -# Check if a single argument is passed -if [ "$#" -ne 1 ]; then - echo "Error: exactly one argument is required: either 'http' or 'grpc'" - exit 1 -fi - -# Check if the argument is either "http" or "grpc" -if [ "$1" = "http" ] || [ "$1" = "grpc" ]; then - INF_PROTO=$1 -else - echo "Error: Argument must be either 'http' or 'grpc'." - exit 1 -fi - source "$(dirname "$(realpath "$0")")/../env.sh" -export TEST_NS_REMOVE=${TEST_NS}"-"${INF_PROTO} - -oc get ns ${TEST_NS_REMOVE} -if [[ $? == 0 ]] -then - oc delete isvc,pod --all -n ${TEST_NS_REMOVE} --force --grace-period=0 - oc delete ns ${TEST_NS_REMOVE} --force --grace-period=0 -fi +oc delete isvc,pod --all -n ${TEST_NS} --force --grace-period=0 +oc delete ns ${TEST_NS} ${MINIO_NS} --force --grace-period=0 From 07f3dbd0bb63d496971a81e8dc05af871be69271 Mon Sep 17 00:00:00 2001 From: Yosef Moatti Date: Tue, 12 Dec 2023 10:47:47 -0600 Subject: [PATCH 19/41] Move to single namespace: kserve-demo, HTTP becomes default --- demo/kserve/deploy-remove-scripts.md | 27 ++++++----- demo/kserve/deploy-remove.md | 42 +++++++++++------ demo/kserve/scripts/test/deploy-model.sh | 59 +++++++++++++----------- 3 files changed, 74 insertions(+), 54 deletions(-) diff --git a/demo/kserve/deploy-remove-scripts.md b/demo/kserve/deploy-remove-scripts.md index 1586ec49..f95c1561 100644 --- a/demo/kserve/deploy-remove-scripts.md +++ b/demo/kserve/deploy-remove-scripts.md @@ -14,29 +14,32 @@ Note: If you prefer to deploy and remove an LLM model by using step-by-step comm **Procedure** -1. Choose the protocol to be used when invoking inferences: HTTP or gRPC. - You may use simulatneously both by invoking the 3 first steps one with "http" and once with "grpc" +1. Deploy a sample LLM model + For HTTP: ~~~ - export INF_PROT="http" ### When HTTP is to be used (e.g., curl) - ### or ### - export INF_PROT="grpc" ### If gRPC is to be used (e.g., grpcurl) + ./scripts/test/deploy-model.sh ~~~ -2. Deploy a sample LLM model - + For gRPC: ~~~ - ./scripts/test/deploy-model.sh ${INF_PROT} + ./scripts/test/deploy-model.sh grpc ~~~ -3. Perform inference with a HTTP or gRPC call. +2. Perform inference: + + For HTTP: + ~~~ + ./scripts/test/http-call.sh + ~~~ + For gRPC: ~~~ - ./scripts/test/inference-call.sh ${INF_PROT} + ./scripts/test/grpc-call.sh ~~~ -4. Delete the sample model for a specific protocol: +3. Delete the sample model: ~~~ - ./scripts/test/delete-model.sh ${INF_PROT} + ./scripts/test/delete-model.sh ~~~ diff --git a/demo/kserve/deploy-remove.md b/demo/kserve/deploy-remove.md index 851cb8b2..b32179f5 100644 --- a/demo/kserve/deploy-remove.md +++ b/demo/kserve/deploy-remove.md @@ -30,8 +30,9 @@ Note: The **flan-t5-small** LLM model has been containerized into an S3 MinIO bu ACCESS_KEY_ID=admin SECRET_ACCESS_KEY=password MINIO_NS=minio - INF_PROTO=http ### If INF_PROTO is set to "http", only HTTP (e.g., curl) can be used to invoke inferences. If set to "grpc" only gRPC (e.g., grpcurl) can be used. + ``` + ``` oc new-project ${MINIO_NS} oc apply -f ./custom-manifests/minio/minio.yaml -n ${MINIO_NS} sed "s//$MINIO_NS/g" ./custom-manifests/minio/minio-secret.yaml | tee ./minio-secret-current.yaml | oc -n ${MINIO_NS} apply -f - @@ -40,38 +41,49 @@ Note: The **flan-t5-small** LLM model has been containerized into an S3 MinIO bu 2. Deploy the LLM model with Caikit+TGIS Serving runtime - a. Create a new namespace. + a. Choose protocol to be used to invoke inferences: + Default protocol is HTTP (e.g., curl commands). + If you want to use gRPC set INF_PROTO to "-grpc" value, either skip the following command lines. + + ``` + INF_PROTO="-grpc" + ``` + + b. Create a new namespace. ```bash - export TEST_NS=kserve-demo"-${INF_PROTO}" + export TEST_NS="kserve-demo" oc new-project ${TEST_NS} ``` - b. Create a caikit `ServingRuntime`. + c. Create a caikit `ServingRuntime`. By default, it requests 4CPU and 8Gi of memory. You can adjust these values as needed. ```bash - oc apply -f ./custom-manifests/caikit/caikit-tgis-servingruntime-"$INF_PROTO".yaml -n ${TEST_NS} + oc apply -f ./custom-manifests/caikit/caikit-tgis-servingruntime"$INF_PROTO".yaml -n ${TEST_NS} ``` - c. Deploy the MinIO data connection and service account. + d. Deploy the MinIO data connection and service account. ```bash oc apply -f ./minio-secret-current.yaml -n ${TEST_NS} oc create -f ./serviceaccount-minio-current.yaml -n ${TEST_NS} ``` - d. Deploy the inference service. It will point to the model located in the `modelmesh-example-models/llm/models` directory. + e. Deploy the inference service. - ```bash + ./custom-manifests/caikit/caikit-tgis-isvc-template.yaml shows how to define a generic ISVC. + If you've deployed Minio with the flan-t5-small model, as explained earlier in this document, + you can use this specific ISVC to get it up and running: ./custom-manifests/caikit/caikit-tgis-isvc.yaml + It will point to the model located in the `modelmesh-example-models/llm/models` directory. - ISVC_NAME=caikit-tgis-isvc-"$INF_PROTO" - sed "s//$INF_PROTO/g" ./custom-manifests/caikit/caikit-tgis-isvc-template.yaml > ./custom-manifests/caikit/"$ISVC_NAME".yaml + ```bash + ISVC_NAME=caikit-tgis-isvc$INF_PROTO oc apply -f ./custom-manifests/caikit/"$ISVC_NAME".yaml -n ${TEST_NS} ``` - e. Verify that the inference service's `READY` state is `True`. + f. Verify that the inference service's `READY` state is `True`. ```bash oc get isvc/$ISVC_NAME -n ${TEST_NS} @@ -81,7 +93,6 @@ Note: The **flan-t5-small** LLM model has been containerized into an S3 MinIO bu Compute KSVC_HOSTNAME: ```bash - export KSVC_HOSTNAME=$(oc get ksvc "$ISVC_NAME"-predictor -n ${TEST_NS} -o jsonpath='{.status.url}' | cut -d'/' -f3) ``` @@ -224,16 +235,17 @@ Note: The **flan-t5-small** LLM model has been containerized into an S3 MinIO bu .... ``` -1. Remove the LLM model +4. Remove the LLM model - a. To remove (undeploy) the LLM model, delete the Inference Service. + a. To remove (undeploy) the LLM model, delete the Inference Service and its containing namespace: ```bash oc delete isvc --all -n ${TEST_NS} --force --grace-period=0 + oc delete ns ${TEST_NS} ``` b. Delete the MinIO resources by deleting the MinIO namespace. ```bash - oc delete ns ${TEST_NS} ${MINIO_NS} + oc delete ns ${MINIO_NS} ``` diff --git a/demo/kserve/scripts/test/deploy-model.sh b/demo/kserve/scripts/test/deploy-model.sh index d153b974..74d9dbc7 100755 --- a/demo/kserve/scripts/test/deploy-model.sh +++ b/demo/kserve/scripts/test/deploy-model.sh @@ -4,24 +4,30 @@ set -o nounset set -o errtrace # set -x #Uncomment this to debug script. -# Usage: a single! arg: "http" or "grpc" - the protocol to be used +# Deploys model for HTTP (default) or gRPC if "grpc" is passed as argument -# Check if a single argument is passed -if [ "$#" -ne 1 ]; then - echo "Error: exactly one argument is required: either 'http' or 'grpc'" +# Check if at most one argument is passed +if [ "$#" -gt 1 ]; then + echo "Error: at most a single argument ('http' or 'grpc') or no argument, default protocol being 'http'" exit 1 fi -# Check if the argument is either "http" or "grpc" -if [ "$1" = "http" ] || [ "$1" = "grpc" ]; then - INF_PROTO=$1 -else - echo "Error: Argument must be either 'http' or 'grpc'." - exit 1 +# Default values that fit the default 'http' protocol: +INF_PROTO="" + +# If we have an argument, check that it is either "http" or "grpc" +if [ "$#" -eq 1 ]; then + if [ "$1" = "http" ]; then + : ### nothing to be done + elif [ "$1" = "grpc" ]; then + INF_PROTO="-grpc" + else + echo "Error: Argument must be either 'http' or 'grpc'." + exit 1 + fi fi source "$(dirname "$(realpath "$0")")/../env.sh" -export TEST_NS=${TEST_NS}"-$INF_PROTO" # Deploy Minio ACCESS_KEY_ID=THEACCESSKEY @@ -41,29 +47,28 @@ else fi sed "s//$MINIO_NS/g" ./custom-manifests/minio/serviceaccount-minio.yaml | tee ${BASE_DIR}/serviceaccount-minio-current.yaml -# Deploy a sample model +# Test if ${TEST_NS} namespace already exists: oc get ns ${TEST_NS} if [[ $? == 1 ]] then - oc new-project ${TEST_NS} - - oc apply -f ./custom-manifests/caikit/caikit-tgis-servingruntime-"$INF_PROTO".yaml -n ${TEST_NS} + oc new-project ${TEST_NS} + + oc apply -f ./custom-manifests/caikit/caikit-tgis-servingruntime"${INF_PROTO}".yaml -n ${TEST_NS} - oc apply -f ${BASE_DIR}/minio-secret-current.yaml -n ${TEST_NS} - oc apply -f ${BASE_DIR}/serviceaccount-minio-current.yaml -n ${TEST_NS} + oc apply -f ${BASE_DIR}/minio-secret-current.yaml -n ${TEST_NS} + oc apply -f ${BASE_DIR}/serviceaccount-minio-current.yaml -n ${TEST_NS} - ### create the isvc. First step: create the yaml file - ISVC_NAME=caikit-tgis-isvc-"$INF_PROTO" - sed "s//$INF_PROTO/g" ./custom-manifests/caikit/caikit-tgis-isvc-template.yaml > ./custom-manifests/caikit/"$ISVC_NAME".yaml - oc apply -f ./custom-manifests/caikit/"$ISVC_NAME".yaml -n ${TEST_NS} + ### create the isvc. First step: create the yaml file + ISVC_NAME=caikit-tgis-isvc"${INF_PROTO}" + oc apply -f ./custom-manifests/caikit/"$ISVC_NAME".yaml -n ${TEST_NS} - # Resources needed to enable metrics for the model - # The metrics service needs the correct label in the `matchLabel` field. The expected value of this label is `-predictor-default` - # The metrics service in this repo is configured to work with the example model. If you are deploying a different model or using a different model name, change the label accordingly. + # Resources needed to enable metrics for the model + # The metrics service needs the correct label in the `matchLabel` field. The expected value of this label is `-predictor-default` + # The metrics service in this repo is configured to work with the example model. If you are deploying a different model or using a different model name, change the label accordingly. - ### TBD: Following 2 line should take into account the changed names - # oc apply -f custom-manifests/metrics/caikit-metrics-service.yaml -n ${TEST_NS} - # oc apply -f custom-manifests/metrics/caikit-metrics-servicemonitor.yaml -n ${TEST_NS} + ### TBD: Following 2 line should take into account the changed names + # oc apply -f custom-manifests/metrics/caikit-metrics-service.yaml -n ${TEST_NS} + # oc apply -f custom-manifests/metrics/caikit-metrics-servicemonitor.yaml -n ${TEST_NS} else echo echo "* ${TEST_NS} exist. Please remove the namespace or use another namespace name" From 5c56f1b2c0a90adfb0e8d0d0a6820cde86604768 Mon Sep 17 00:00:00 2001 From: Yosef Moatti Date: Tue, 12 Dec 2023 10:49:31 -0600 Subject: [PATCH 20/41] We have now specific http and grpc yaml files for flan5 LLM and in addition a generic yaml file --- .../caikit/caikit-tgis-isvc-grpc.yaml | 20 +++++++++++++++++++ .../caikit/caikit-tgis-isvc-template.yaml | 7 +++---- .../caikit/caikit-tgis-isvc.yaml | 20 +++++++++++++++++++ 3 files changed, 43 insertions(+), 4 deletions(-) create mode 100644 demo/kserve/custom-manifests/caikit/caikit-tgis-isvc-grpc.yaml create mode 100644 demo/kserve/custom-manifests/caikit/caikit-tgis-isvc.yaml diff --git a/demo/kserve/custom-manifests/caikit/caikit-tgis-isvc-grpc.yaml b/demo/kserve/custom-manifests/caikit/caikit-tgis-isvc-grpc.yaml new file mode 100644 index 00000000..909e827d --- /dev/null +++ b/demo/kserve/custom-manifests/caikit/caikit-tgis-isvc-grpc.yaml @@ -0,0 +1,20 @@ +apiVersion: serving.kserve.io/v1beta1 +kind: InferenceService +metadata: + annotations: + serving.knative.openshift.io/enablePassthrough: "true" + sidecar.istio.io/inject: "true" + sidecar.istio.io/rewriteAppHTTPProbers: "true" + name: caikit-tgis-isvc-grpc +spec: + predictor: + serviceAccountName: sa + model: + modelFormat: + name: caikit + runtime: caikit-tgis-runtime-grpc + storageUri: s3://modelmesh-example-models/llm/models/flan-t5-small-caikit # single model here + # storageUri: proto://path/to/model # single model here + # Example, using a pvc: + # storageUri: pvc://caikit-pvc/flan-t5-small-caikit/ + # Target directory must contain a config.yml diff --git a/demo/kserve/custom-manifests/caikit/caikit-tgis-isvc-template.yaml b/demo/kserve/custom-manifests/caikit/caikit-tgis-isvc-template.yaml index 953781ef..1c683920 100644 --- a/demo/kserve/custom-manifests/caikit/caikit-tgis-isvc-template.yaml +++ b/demo/kserve/custom-manifests/caikit/caikit-tgis-isvc-template.yaml @@ -5,16 +5,15 @@ metadata: serving.knative.openshift.io/enablePassthrough: "true" sidecar.istio.io/inject: "true" sidecar.istio.io/rewriteAppHTTPProbers: "true" - name: caikit-tgis-isvc- + name: caikit-tgis-isvc spec: predictor: serviceAccountName: sa model: modelFormat: name: caikit - runtime: caikit-tgis-runtime- - storageUri: s3://modelmesh-example-models/llm/models/flan-t5-small-caikit # single model here - # storageUri: proto://path/to/model # single model here + runtime: caikit-tgis-runtime + storageUri: proto://path/to/model # single model here # Example, using a pvc: # storageUri: pvc://caikit-pvc/flan-t5-small-caikit/ # Target directory must contain a config.yml diff --git a/demo/kserve/custom-manifests/caikit/caikit-tgis-isvc.yaml b/demo/kserve/custom-manifests/caikit/caikit-tgis-isvc.yaml new file mode 100644 index 00000000..847a2201 --- /dev/null +++ b/demo/kserve/custom-manifests/caikit/caikit-tgis-isvc.yaml @@ -0,0 +1,20 @@ +apiVersion: serving.kserve.io/v1beta1 +kind: InferenceService +metadata: + annotations: + serving.knative.openshift.io/enablePassthrough: "true" + sidecar.istio.io/inject: "true" + sidecar.istio.io/rewriteAppHTTPProbers: "true" + name: caikit-tgis-isvc +spec: + predictor: + serviceAccountName: sa + model: + modelFormat: + name: caikit + runtime: caikit-tgis-runtime + storageUri: s3://modelmesh-example-models/llm/models/flan-t5-small-caikit # single model here + # storageUri: proto://path/to/model # single model here + # Example, using a pvc: + # storageUri: pvc://caikit-pvc/flan-t5-small-caikit/ + # Target directory must contain a config.yml From 75264f7b92cc715a08d3ea742743914bd4ee80bb Mon Sep 17 00:00:00 2001 From: Yosef Moatti Date: Tue, 12 Dec 2023 10:50:53 -0600 Subject: [PATCH 21/41] HTTP is default, thus caikit-tgis-servingruntime-http.yaml is renamed caikit-tgis-servingruntime.yaml --- ...servingruntime-http.yaml => caikit-tgis-servingruntime.yaml} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename demo/kserve/custom-manifests/caikit/{caikit-tgis-servingruntime-http.yaml => caikit-tgis-servingruntime.yaml} (96%) diff --git a/demo/kserve/custom-manifests/caikit/caikit-tgis-servingruntime-http.yaml b/demo/kserve/custom-manifests/caikit/caikit-tgis-servingruntime.yaml similarity index 96% rename from demo/kserve/custom-manifests/caikit/caikit-tgis-servingruntime-http.yaml rename to demo/kserve/custom-manifests/caikit/caikit-tgis-servingruntime.yaml index 93d0f342..8528909c 100644 --- a/demo/kserve/custom-manifests/caikit/caikit-tgis-servingruntime-http.yaml +++ b/demo/kserve/custom-manifests/caikit/caikit-tgis-servingruntime.yaml @@ -1,7 +1,7 @@ apiVersion: serving.kserve.io/v1alpha1 kind: ServingRuntime metadata: - name: caikit-tgis-runtime-http + name: caikit-tgis-runtime spec: multiModel: false supportedModelFormats: From d50f0ba12d4f002cc58f5bd26e6ba6ae9105dd23 Mon Sep 17 00:00:00 2001 From: Yosef Moatti Date: Wed, 13 Dec 2023 10:35:40 -0600 Subject: [PATCH 22/41] remove PREFIX and INF_PROTO variables --- demo/kserve/scripts/test/grpc-call.sh | 11 +++-------- demo/kserve/scripts/test/http-call.sh | 9 ++------- 2 files changed, 5 insertions(+), 15 deletions(-) diff --git a/demo/kserve/scripts/test/grpc-call.sh b/demo/kserve/scripts/test/grpc-call.sh index d167f49b..a33ddfd3 100755 --- a/demo/kserve/scripts/test/grpc-call.sh +++ b/demo/kserve/scripts/test/grpc-call.sh @@ -6,17 +6,14 @@ set -o errtrace # Performs inference using HTTP -PREFIX="-" -INF_PROTO="grpc" - source "$(dirname "$(realpath "$0")")/../env.sh" source "$(dirname "$(realpath "$0")")/../utils.sh" echo -echo "Wait until $INF_PROTO runtime is READY" +echo "Wait until grpc runtime is READY" -ISVC_NAME=caikit-tgis-isvc"${PREFIX}${INF_PROTO}" -wait_for_pods_ready "serving.kserve.io/inferenceservice=$ISVC_NAME" "${TEST_NS}" +ISVC_NAME=caikit-tgis-isvc-grpc +wait_for_pods_ready "serving.kserve.io/inferenceservice=${ISVC_NAME}" "${TEST_NS}" oc wait --for=condition=ready pod -l serving.kserve.io/inferenceservice=$ISVC_NAME -n ${TEST_NS} --timeout=300s echo @@ -24,8 +21,6 @@ echo "Testing all token in a single call" echo export KSVC_HOSTNAME=$(oc get ksvc "$ISVC_NAME"-predictor -n ${TEST_NS} -o jsonpath='{.status.url}' | cut -d'/' -f3) -#export THE_QUESTION="At what temperature does Nitrogen boil?" -# export THE_MODEL="flan-t5-small-caikit" ### Invoke the inferences: grpcurl -insecure -d '{"text": "At what temperature does Nitrogen boil?"}' -H "mm-model-id: flan-t5-small-caikit" ${KSVC_HOSTNAME}:443 caikit.runtime.Nlp.NlpService/TextGenerationTaskPredict diff --git a/demo/kserve/scripts/test/http-call.sh b/demo/kserve/scripts/test/http-call.sh index bfd723eb..09ae0017 100755 --- a/demo/kserve/scripts/test/http-call.sh +++ b/demo/kserve/scripts/test/http-call.sh @@ -6,16 +6,13 @@ set -o errtrace # Performs inference using HTTP -PREFIX="" -INF_PROTO="" - source "$(dirname "$(realpath "$0")")/../env.sh" source "$(dirname "$(realpath "$0")")/../utils.sh" echo -echo "Wait until $INF_PROTO runtime is READY" +echo "Wait until http runtime is READY" -ISVC_NAME=caikit-tgis-isvc"${PREFIX}${INF_PROTO}" +ISVC_NAME=caikit-tgis-isvc wait_for_pods_ready "serving.kserve.io/inferenceservice=$ISVC_NAME" "${TEST_NS}" oc wait --for=condition=ready pod -l serving.kserve.io/inferenceservice=$ISVC_NAME -n ${TEST_NS} --timeout=300s @@ -24,8 +21,6 @@ echo "Testing all token in a single call" echo export KSVC_HOSTNAME=$(oc get ksvc "$ISVC_NAME"-predictor -n ${TEST_NS} -o jsonpath='{.status.url}' | cut -d'/' -f3) -#export THE_QUESTION="At what temperature does Nitrogen boil?" -# export THE_MODEL="flan-t5-small-caikit" ### Invoke the inferences: From 448920dbc6760b5a93fcc8ebc3dc2af789e11e04 Mon Sep 17 00:00:00 2001 From: Yosef Moatti Date: Wed, 13 Dec 2023 10:39:16 -0600 Subject: [PATCH 23/41] fix bug in comment --- demo/kserve/scripts/test/grpc-call.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/demo/kserve/scripts/test/grpc-call.sh b/demo/kserve/scripts/test/grpc-call.sh index a33ddfd3..62834320 100755 --- a/demo/kserve/scripts/test/grpc-call.sh +++ b/demo/kserve/scripts/test/grpc-call.sh @@ -4,7 +4,7 @@ set -o nounset set -o errtrace # set -x #Uncomment this to debug script. -# Performs inference using HTTP +# Performs inference using gRPC source "$(dirname "$(realpath "$0")")/../env.sh" source "$(dirname "$(realpath "$0")")/../utils.sh" From 4afbef0ba1beb1775d282d99f4339728ce431023 Mon Sep 17 00:00:00 2001 From: Yosef Moatti Date: Wed, 13 Dec 2023 10:57:58 -0600 Subject: [PATCH 24/41] consistent use of brackets around env variables --- demo/kserve/scripts/test/grpc-call.sh | 4 ++-- demo/kserve/scripts/test/http-call.sh | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/demo/kserve/scripts/test/grpc-call.sh b/demo/kserve/scripts/test/grpc-call.sh index 62834320..d4bccad2 100755 --- a/demo/kserve/scripts/test/grpc-call.sh +++ b/demo/kserve/scripts/test/grpc-call.sh @@ -14,13 +14,13 @@ echo "Wait until grpc runtime is READY" ISVC_NAME=caikit-tgis-isvc-grpc wait_for_pods_ready "serving.kserve.io/inferenceservice=${ISVC_NAME}" "${TEST_NS}" -oc wait --for=condition=ready pod -l serving.kserve.io/inferenceservice=$ISVC_NAME -n ${TEST_NS} --timeout=300s +oc wait --for=condition=ready pod -l serving.kserve.io/inferenceservice=${ISVC_NAME} -n ${TEST_NS} --timeout=300s echo echo "Testing all token in a single call" echo -export KSVC_HOSTNAME=$(oc get ksvc "$ISVC_NAME"-predictor -n ${TEST_NS} -o jsonpath='{.status.url}' | cut -d'/' -f3) +export KSVC_HOSTNAME=$(oc get ksvc "${ISVC_NAME}"-predictor -n ${TEST_NS} -o jsonpath='{.status.url}' | cut -d'/' -f3) ### Invoke the inferences: grpcurl -insecure -d '{"text": "At what temperature does Nitrogen boil?"}' -H "mm-model-id: flan-t5-small-caikit" ${KSVC_HOSTNAME}:443 caikit.runtime.Nlp.NlpService/TextGenerationTaskPredict diff --git a/demo/kserve/scripts/test/http-call.sh b/demo/kserve/scripts/test/http-call.sh index 09ae0017..39e6053d 100755 --- a/demo/kserve/scripts/test/http-call.sh +++ b/demo/kserve/scripts/test/http-call.sh @@ -13,14 +13,14 @@ echo echo "Wait until http runtime is READY" ISVC_NAME=caikit-tgis-isvc -wait_for_pods_ready "serving.kserve.io/inferenceservice=$ISVC_NAME" "${TEST_NS}" -oc wait --for=condition=ready pod -l serving.kserve.io/inferenceservice=$ISVC_NAME -n ${TEST_NS} --timeout=300s +wait_for_pods_ready "serving.kserve.io/inferenceservice=${ISVC_NAME}" "${TEST_NS}" +oc wait --for=condition=ready pod -l serving.kserve.io/inferenceservice=${ISVC_NAME} -n ${TEST_NS} --timeout=300s echo echo "Testing all token in a single call" echo -export KSVC_HOSTNAME=$(oc get ksvc "$ISVC_NAME"-predictor -n ${TEST_NS} -o jsonpath='{.status.url}' | cut -d'/' -f3) +export KSVC_HOSTNAME=$(oc get ksvc "${ISVC_NAME}"-predictor -n ${TEST_NS} -o jsonpath='{.status.url}' | cut -d'/' -f3) ### Invoke the inferences: From 6dcc663698d9c2bd715b22e5b0e2dca81e6447d7 Mon Sep 17 00:00:00 2001 From: Yosef Moatti Date: Tue, 19 Dec 2023 11:14:47 -0600 Subject: [PATCH 25/41] add set -u to prevent usage of any non initialized env variable --- demo/kserve/scripts/test/grpc-call.sh | 1 + demo/kserve/scripts/test/http-call.sh | 3 +++ 2 files changed, 4 insertions(+) diff --git a/demo/kserve/scripts/test/grpc-call.sh b/demo/kserve/scripts/test/grpc-call.sh index d4bccad2..83323f89 100755 --- a/demo/kserve/scripts/test/grpc-call.sh +++ b/demo/kserve/scripts/test/grpc-call.sh @@ -2,6 +2,7 @@ set -o pipefail set -o nounset set -o errtrace +set -u ### any reference to an unset variable will be considered as an error and will immediately stop execution # set -x #Uncomment this to debug script. # Performs inference using gRPC diff --git a/demo/kserve/scripts/test/http-call.sh b/demo/kserve/scripts/test/http-call.sh index 39e6053d..7bfd20ca 100755 --- a/demo/kserve/scripts/test/http-call.sh +++ b/demo/kserve/scripts/test/http-call.sh @@ -2,6 +2,9 @@ set -o pipefail set -o nounset set -o errtrace +set -u ### any reference to an unset variable will be considered as an error and will immediately stop execution + + # set -x #Uncomment this to debug script. # Performs inference using HTTP From 7fc47e3cce5cd601e5436949cdbc24f724b77c56 Mon Sep 17 00:00:00 2001 From: Yosef Moatti Date: Tue, 19 Dec 2023 15:18:41 -0600 Subject: [PATCH 26/41] modify manifests so as to directly spin the http/grpc servers --- .../custom-manifests/caikit/caikit-tgis-servingruntime-grpc.yaml | 1 + .../custom-manifests/caikit/caikit-tgis-servingruntime.yaml | 1 + 2 files changed, 2 insertions(+) diff --git a/demo/kserve/custom-manifests/caikit/caikit-tgis-servingruntime-grpc.yaml b/demo/kserve/custom-manifests/caikit/caikit-tgis-servingruntime-grpc.yaml index 18eac728..0da088b5 100644 --- a/demo/kserve/custom-manifests/caikit/caikit-tgis-servingruntime-grpc.yaml +++ b/demo/kserve/custom-manifests/caikit/caikit-tgis-servingruntime-grpc.yaml @@ -22,6 +22,7 @@ spec: # memory: 16Gi - name: transformer-container image: quay.io/opendatahub/caikit-tgis-serving:stable + command: ["python", "-m", "caikit.runtime.grpc_server"] env: - name: RUNTIME_LOCAL_MODELS_DIR value: /mnt/models diff --git a/demo/kserve/custom-manifests/caikit/caikit-tgis-servingruntime.yaml b/demo/kserve/custom-manifests/caikit/caikit-tgis-servingruntime.yaml index 8528909c..38a139e6 100644 --- a/demo/kserve/custom-manifests/caikit/caikit-tgis-servingruntime.yaml +++ b/demo/kserve/custom-manifests/caikit/caikit-tgis-servingruntime.yaml @@ -22,6 +22,7 @@ spec: # memory: 16Gi - name: transformer-container image: quay.io/opendatahub/caikit-tgis-serving:stable + command: ["python", "-m", "caikit.runtime.http_server"] env: - name: RUNTIME_LOCAL_MODELS_DIR value: /mnt/models From 1d94e5f4e5ce3aaa93925c937e954f9cec0d5c7f Mon Sep 17 00:00:00 2001 From: Yosef Moatti Date: Wed, 20 Dec 2023 05:30:24 -0600 Subject: [PATCH 27/41] improve the ISVC template by removing specific sa name and adding comments --- .../custom-manifests/caikit/caikit-tgis-isvc-template.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/demo/kserve/custom-manifests/caikit/caikit-tgis-isvc-template.yaml b/demo/kserve/custom-manifests/caikit/caikit-tgis-isvc-template.yaml index 1c683920..1d43ff28 100644 --- a/demo/kserve/custom-manifests/caikit/caikit-tgis-isvc-template.yaml +++ b/demo/kserve/custom-manifests/caikit/caikit-tgis-isvc-template.yaml @@ -8,7 +8,9 @@ metadata: name: caikit-tgis-isvc spec: predictor: - serviceAccountName: sa + # replace in following with the + # ServiceAccount that has the secret for accessing the model + serviceAccountName: model: modelFormat: name: caikit From 8048b158752cada1e51bb6bc3d21aa04869292d3 Mon Sep 17 00:00:00 2001 From: Yosef Moatti Date: Wed, 20 Dec 2023 06:23:47 -0600 Subject: [PATCH 28/41] comment improvement --- .../custom-manifests/caikit/caikit-tgis-isvc-template.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/demo/kserve/custom-manifests/caikit/caikit-tgis-isvc-template.yaml b/demo/kserve/custom-manifests/caikit/caikit-tgis-isvc-template.yaml index 1d43ff28..21fed98c 100644 --- a/demo/kserve/custom-manifests/caikit/caikit-tgis-isvc-template.yaml +++ b/demo/kserve/custom-manifests/caikit/caikit-tgis-isvc-template.yaml @@ -8,9 +8,9 @@ metadata: name: caikit-tgis-isvc spec: predictor: - # replace in following with the - # ServiceAccount that has the secret for accessing the model - serviceAccountName: + # replace in following with the name + # of a ServiceAccount that has the secret for accessing the model + serviceAccountName: model: modelFormat: name: caikit From 4d6ba0ac4e032388c9d14c9de9a30ec600d166c5 Mon Sep 17 00:00:00 2001 From: Yosef Moatti Date: Wed, 20 Dec 2023 06:53:40 -0600 Subject: [PATCH 29/41] yet another comment improve --- .../custom-manifests/caikit/caikit-tgis-isvc-template.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/demo/kserve/custom-manifests/caikit/caikit-tgis-isvc-template.yaml b/demo/kserve/custom-manifests/caikit/caikit-tgis-isvc-template.yaml index 21fed98c..177c439f 100644 --- a/demo/kserve/custom-manifests/caikit/caikit-tgis-isvc-template.yaml +++ b/demo/kserve/custom-manifests/caikit/caikit-tgis-isvc-template.yaml @@ -8,9 +8,9 @@ metadata: name: caikit-tgis-isvc spec: predictor: - # replace in following with the name + # replace in following with the name # of a ServiceAccount that has the secret for accessing the model - serviceAccountName: + serviceAccountName: model: modelFormat: name: caikit From e3fc1e7b0f47321dde32f76d05653e330bcc6061 Mon Sep 17 00:00:00 2001 From: Yosef Moatti Date: Wed, 20 Dec 2023 08:42:23 -0600 Subject: [PATCH 30/41] Improve documentation in deploy-remove.md for step 2.e as well as comments in ISVC template file --- .../caikit/caikit-tgis-isvc-template.yaml | 5 ++- demo/kserve/deploy-remove.md | 40 +++++++++++++++++-- 2 files changed, 41 insertions(+), 4 deletions(-) diff --git a/demo/kserve/custom-manifests/caikit/caikit-tgis-isvc-template.yaml b/demo/kserve/custom-manifests/caikit/caikit-tgis-isvc-template.yaml index 177c439f..6da3600f 100644 --- a/demo/kserve/custom-manifests/caikit/caikit-tgis-isvc-template.yaml +++ b/demo/kserve/custom-manifests/caikit/caikit-tgis-isvc-template.yaml @@ -5,7 +5,10 @@ metadata: serving.knative.openshift.io/enablePassthrough: "true" sidecar.istio.io/inject: "true" sidecar.istio.io/rewriteAppHTTPProbers: "true" - name: caikit-tgis-isvc + # The following should be set to the + # actual name of the inference service. (e.g., caikit-tgis-isvc + # for HTTP and caikit-tgis-isvc-grpc for gRPC) + name: spec: predictor: # replace in following with the name diff --git a/demo/kserve/deploy-remove.md b/demo/kserve/deploy-remove.md index b32179f5..72f987ca 100644 --- a/demo/kserve/deploy-remove.md +++ b/demo/kserve/deploy-remove.md @@ -73,10 +73,44 @@ Note: The **flan-t5-small** LLM model has been containerized into an S3 MinIO bu e. Deploy the inference service. - ./custom-manifests/caikit/caikit-tgis-isvc-template.yaml shows how to define a generic ISVC. + The [ISVC Template file](/demo/kserve/custom-manifests/caikit/caikit-tgis-isvc-template.yaml) + shows how to define a generic ISVC: + + ```bash +apiVersion: serving.kserve.io/v1beta1 +kind: InferenceService +metadata: + annotations: + serving.knative.openshift.io/enablePassthrough: "true" + sidecar.istio.io/inject: "true" + sidecar.istio.io/rewriteAppHTTPProbers: "true" + # The following should be set to the + # actual name of the inference service. (e.g., caikit-tgis-isvc + # for HTTP and caikit-tgis-isvc-grpc for gRPC) + name: +spec: + predictor: + # replace in following with the name + # of a ServiceAccount that has the secret for accessing the model + serviceAccountName: + model: + modelFormat: + name: caikit + runtime: caikit-tgis-runtime + storageUri: proto://path/to/model # single model here + # Example, using a pvc: + # storageUri: pvc://caikit-pvc/flan-t5-small-caikit/ + # Target directory must contain a config.yml + ``` + Note that you should modify 3 places: + i. should be replaced by the name of the inference + ii. should be replaced by the actual name of the Service Account + iii. proto://path/to/model should be replaced by the actual path to the model that will run the inferences + If you've deployed Minio with the flan-t5-small model, as explained earlier in this document, - you can use this specific ISVC to get it up and running: ./custom-manifests/caikit/caikit-tgis-isvc.yaml - It will point to the model located in the `modelmesh-example-models/llm/models` directory. + the following 2 lines will point to the specific yaml code (as function of your chosen protocol) + that is either to [ISVC Template file](/demo/kserve/custom-manifests/caikit/caikit-tgis-isvc.yaml) for HTTP or + to [ISVC Template file](/demo/kserve/custom-manifests/caikit/caikit-tgis-isvc-grpc.yaml) for gRPC ```bash ISVC_NAME=caikit-tgis-isvc$INF_PROTO From edebf6cbf5b093c0efd3f0ca2982d1bd436026ff Mon Sep 17 00:00:00 2001 From: Yosef Moatti Date: Wed, 20 Dec 2023 08:47:52 -0600 Subject: [PATCH 31/41] Further improve documentation in deploy-remove.md for step 2.e --- demo/kserve/deploy-remove.md | 49 ++++++++++++++++++------------------ 1 file changed, 25 insertions(+), 24 deletions(-) diff --git a/demo/kserve/deploy-remove.md b/demo/kserve/deploy-remove.md index 72f987ca..f98050ff 100644 --- a/demo/kserve/deploy-remove.md +++ b/demo/kserve/deploy-remove.md @@ -77,31 +77,32 @@ Note: The **flan-t5-small** LLM model has been containerized into an S3 MinIO bu shows how to define a generic ISVC: ```bash -apiVersion: serving.kserve.io/v1beta1 -kind: InferenceService -metadata: - annotations: - serving.knative.openshift.io/enablePassthrough: "true" - sidecar.istio.io/inject: "true" - sidecar.istio.io/rewriteAppHTTPProbers: "true" - # The following should be set to the - # actual name of the inference service. (e.g., caikit-tgis-isvc - # for HTTP and caikit-tgis-isvc-grpc for gRPC) - name: -spec: - predictor: - # replace in following with the name - # of a ServiceAccount that has the secret for accessing the model - serviceAccountName: - model: - modelFormat: - name: caikit - runtime: caikit-tgis-runtime - storageUri: proto://path/to/model # single model here - # Example, using a pvc: - # storageUri: pvc://caikit-pvc/flan-t5-small-caikit/ - # Target directory must contain a config.yml + apiVersion: serving.kserve.io/v1beta1 + kind: InferenceService + metadata: + annotations: + serving.knative.openshift.io/enablePassthrough: "true" + sidecar.istio.io/inject: "true" + sidecar.istio.io/rewriteAppHTTPProbers: "true" + # The following should be set to the + # actual name of the inference service. (e.g., caikit-tgis-isvc + # for HTTP and caikit-tgis-isvc-grpc for gRPC) + name: + spec: + predictor: + # replace in following with the name + # of a ServiceAccount that has the secret for accessing the model + serviceAccountName: + model: + modelFormat: + name: caikit + runtime: caikit-tgis-runtime + storageUri: proto://path/to/model # single model here + # Example, using a pvc: + # storageUri: pvc://caikit-pvc/flan-t5-small-caikit/ + # Target directory must contain a config.yml ``` + Note that you should modify 3 places: i. should be replaced by the name of the inference ii. should be replaced by the actual name of the Service Account From 1e154696d0e8831d7844e5d05fd2bfa8de1b2700 Mon Sep 17 00:00:00 2001 From: Yosef Moatti Date: Wed, 20 Dec 2023 08:57:23 -0600 Subject: [PATCH 32/41] Further improve documentation in deploy-remove.md for step 2.e --- demo/kserve/deploy-remove.md | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/demo/kserve/deploy-remove.md b/demo/kserve/deploy-remove.md index f98050ff..e015c835 100644 --- a/demo/kserve/deploy-remove.md +++ b/demo/kserve/deploy-remove.md @@ -103,10 +103,11 @@ Note: The **flan-t5-small** LLM model has been containerized into an S3 MinIO bu # Target directory must contain a config.yml ``` - Note that you should modify 3 places: - i. should be replaced by the name of the inference - ii. should be replaced by the actual name of the Service Account - iii. proto://path/to/model should be replaced by the actual path to the model that will run the inferences + **Note that you should adapt this template by modifying** + +- should be replaced by the name of the inference +- should be replaced by the actual name of the Service Account +- proto://path/to/model should be replaced by the actual path to the model that will run the inferences If you've deployed Minio with the flan-t5-small model, as explained earlier in this document, the following 2 lines will point to the specific yaml code (as function of your chosen protocol) From aa0e672daa06fb577ba9e9e4b6c4a27eea092479 Mon Sep 17 00:00:00 2001 From: Yosef Moatti Date: Wed, 20 Dec 2023 08:59:59 -0600 Subject: [PATCH 33/41] Yet another documentation improvement in deploy-remove.md for step 2.e --- demo/kserve/deploy-remove.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/demo/kserve/deploy-remove.md b/demo/kserve/deploy-remove.md index e015c835..f23b199b 100644 --- a/demo/kserve/deploy-remove.md +++ b/demo/kserve/deploy-remove.md @@ -103,7 +103,7 @@ Note: The **flan-t5-small** LLM model has been containerized into an S3 MinIO bu # Target directory must contain a config.yml ``` - **Note that you should adapt this template by modifying** + **Note** you should adapt this template by modifying: - should be replaced by the name of the inference - should be replaced by the actual name of the Service Account @@ -111,8 +111,8 @@ Note: The **flan-t5-small** LLM model has been containerized into an S3 MinIO bu If you've deployed Minio with the flan-t5-small model, as explained earlier in this document, the following 2 lines will point to the specific yaml code (as function of your chosen protocol) - that is either to [ISVC Template file](/demo/kserve/custom-manifests/caikit/caikit-tgis-isvc.yaml) for HTTP or - to [ISVC Template file](/demo/kserve/custom-manifests/caikit/caikit-tgis-isvc-grpc.yaml) for gRPC + that is either to [HTTP ISVC file](/demo/kserve/custom-manifests/caikit/caikit-tgis-isvc.yaml) or + to [gRPC ISVC file](/demo/kserve/custom-manifests/caikit/caikit-tgis-isvc-grpc.yaml) ```bash ISVC_NAME=caikit-tgis-isvc$INF_PROTO From 2ea48f91f18bf06134e4dd608d0e92368f3c7234 Mon Sep 17 00:00:00 2001 From: Yosef Moatti Date: Wed, 20 Dec 2023 09:01:48 -0600 Subject: [PATCH 34/41] Yet another documentation improvement in deploy-remove.md for step 2.e --- demo/kserve/deploy-remove.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/demo/kserve/deploy-remove.md b/demo/kserve/deploy-remove.md index f23b199b..14461f00 100644 --- a/demo/kserve/deploy-remove.md +++ b/demo/kserve/deploy-remove.md @@ -110,9 +110,10 @@ Note: The **flan-t5-small** LLM model has been containerized into an S3 MinIO bu - proto://path/to/model should be replaced by the actual path to the model that will run the inferences If you've deployed Minio with the flan-t5-small model, as explained earlier in this document, - the following 2 lines will point to the specific yaml code (as function of your chosen protocol) + the following 2 lines will use the specific yaml code (as needed per chosen protocol) that is either to [HTTP ISVC file](/demo/kserve/custom-manifests/caikit/caikit-tgis-isvc.yaml) or - to [gRPC ISVC file](/demo/kserve/custom-manifests/caikit/caikit-tgis-isvc-grpc.yaml) + to [gRPC ISVC file](/demo/kserve/custom-manifests/caikit/caikit-tgis-isvc-grpc.yaml) and create + the needed Inference Service: ```bash ISVC_NAME=caikit-tgis-isvc$INF_PROTO From 24b264de9608fbf33cd9939a450e178575547bec Mon Sep 17 00:00:00 2001 From: Yosef Moatti Date: Wed, 20 Dec 2023 09:07:21 -0600 Subject: [PATCH 35/41] Yet another documentation improvement in deploy-remove.md for step 2.e --- demo/kserve/deploy-remove.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/demo/kserve/deploy-remove.md b/demo/kserve/deploy-remove.md index 14461f00..dd3b3193 100644 --- a/demo/kserve/deploy-remove.md +++ b/demo/kserve/deploy-remove.md @@ -105,8 +105,8 @@ Note: The **flan-t5-small** LLM model has been containerized into an S3 MinIO bu **Note** you should adapt this template by modifying: -- should be replaced by the name of the inference -- should be replaced by the actual name of the Service Account +- <caikit-tgis-isvc-name> should be replaced by the name of the inference +- <NameOfAServiceAccount> should be replaced by the actual name of the Service Account - proto://path/to/model should be replaced by the actual path to the model that will run the inferences If you've deployed Minio with the flan-t5-small model, as explained earlier in this document, From 2f704582b9003c9cd0e57ef7a64ca12e42f48371 Mon Sep 17 00:00:00 2001 From: Yosef Moatti Date: Wed, 20 Dec 2023 09:08:18 -0600 Subject: [PATCH 36/41] Yet another documentation improvement in deploy-remove.md for step 2.e --- demo/kserve/deploy-remove.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/demo/kserve/deploy-remove.md b/demo/kserve/deploy-remove.md index dd3b3193..0d4ac17e 100644 --- a/demo/kserve/deploy-remove.md +++ b/demo/kserve/deploy-remove.md @@ -103,7 +103,7 @@ Note: The **flan-t5-small** LLM model has been containerized into an S3 MinIO bu # Target directory must contain a config.yml ``` - **Note** you should adapt this template by modifying: + **Note** you should adapt this template as follows: - <caikit-tgis-isvc-name> should be replaced by the name of the inference - <NameOfAServiceAccount> should be replaced by the actual name of the Service Account From df575a73896a69229f7940f00d891b4057d71107 Mon Sep 17 00:00:00 2001 From: Yosef Moatti Date: Wed, 20 Dec 2023 09:09:17 -0600 Subject: [PATCH 37/41] Yet another documentation improvement in deploy-remove.md for step 2.e --- demo/kserve/deploy-remove.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/demo/kserve/deploy-remove.md b/demo/kserve/deploy-remove.md index 0d4ac17e..f440d9b4 100644 --- a/demo/kserve/deploy-remove.md +++ b/demo/kserve/deploy-remove.md @@ -111,8 +111,8 @@ Note: The **flan-t5-small** LLM model has been containerized into an S3 MinIO bu If you've deployed Minio with the flan-t5-small model, as explained earlier in this document, the following 2 lines will use the specific yaml code (as needed per chosen protocol) - that is either to [HTTP ISVC file](/demo/kserve/custom-manifests/caikit/caikit-tgis-isvc.yaml) or - to [gRPC ISVC file](/demo/kserve/custom-manifests/caikit/caikit-tgis-isvc-grpc.yaml) and create + that is either [HTTP ISVC file](/demo/kserve/custom-manifests/caikit/caikit-tgis-isvc.yaml) or + [gRPC ISVC file](/demo/kserve/custom-manifests/caikit/caikit-tgis-isvc-grpc.yaml) and create the needed Inference Service: ```bash From a9a47cc01800f47cf52f415d3e8255832238ff4a Mon Sep 17 00:00:00 2001 From: Yosef Moatti Date: Wed, 20 Dec 2023 09:33:48 -0600 Subject: [PATCH 38/41] Yet another documentation improvement in deploy-remove.md for step 2.e --- demo/kserve/deploy-remove.md | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/demo/kserve/deploy-remove.md b/demo/kserve/deploy-remove.md index f440d9b4..9a4128dd 100644 --- a/demo/kserve/deploy-remove.md +++ b/demo/kserve/deploy-remove.md @@ -105,15 +105,13 @@ Note: The **flan-t5-small** LLM model has been containerized into an S3 MinIO bu **Note** you should adapt this template as follows: -- <caikit-tgis-isvc-name> should be replaced by the name of the inference -- <NameOfAServiceAccount> should be replaced by the actual name of the Service Account -- proto://path/to/model should be replaced by the actual path to the model that will run the inferences - - If you've deployed Minio with the flan-t5-small model, as explained earlier in this document, - the following 2 lines will use the specific yaml code (as needed per chosen protocol) - that is either [HTTP ISVC file](/demo/kserve/custom-manifests/caikit/caikit-tgis-isvc.yaml) or - [gRPC ISVC file](/demo/kserve/custom-manifests/caikit/caikit-tgis-isvc-grpc.yaml) and create - the needed Inference Service: + - `<caikit-tgis-isvc-name>` should be replaced by the name of the inference + - `<NameOfAServiceAccount>` should be replaced by the actual name of the Service Account + - `proto://path/to/model` should be replaced by the actual path to the model that will run the inferences + + Note: If you followed all the steps to this point, the following code will + create the needed Inference Service using the Minio with the flan-t5-small + model and the service account that have been created in the previous steps. ```bash ISVC_NAME=caikit-tgis-isvc$INF_PROTO From d37977aa54213a21050645f955b5e41dbe2c9db4 Mon Sep 17 00:00:00 2001 From: Yosef Moatti Date: Wed, 20 Dec 2023 09:35:41 -0600 Subject: [PATCH 39/41] Yet another documentation improvement in deploy-remove.md for step 2.e --- demo/kserve/deploy-remove.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/demo/kserve/deploy-remove.md b/demo/kserve/deploy-remove.md index 9a4128dd..3cf7b66e 100644 --- a/demo/kserve/deploy-remove.md +++ b/demo/kserve/deploy-remove.md @@ -105,8 +105,8 @@ Note: The **flan-t5-small** LLM model has been containerized into an S3 MinIO bu **Note** you should adapt this template as follows: - - `<caikit-tgis-isvc-name>` should be replaced by the name of the inference - - `<NameOfAServiceAccount>` should be replaced by the actual name of the Service Account + - `` should be replaced by the name of the inference + - `` should be replaced by the actual name of the Service Account - `proto://path/to/model` should be replaced by the actual path to the model that will run the inferences Note: If you followed all the steps to this point, the following code will From 47a4a6491bbfcad4f1d52c60f5975df68924eb1f Mon Sep 17 00:00:00 2001 From: Yosef Moatti Date: Wed, 20 Dec 2023 10:31:05 -0600 Subject: [PATCH 40/41] Yet another documentation improvement in deploy-remove.md for step 2.e --- demo/kserve/deploy-remove.md | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/demo/kserve/deploy-remove.md b/demo/kserve/deploy-remove.md index 3cf7b66e..7ed76139 100644 --- a/demo/kserve/deploy-remove.md +++ b/demo/kserve/deploy-remove.md @@ -73,8 +73,7 @@ Note: The **flan-t5-small** LLM model has been containerized into an S3 MinIO bu e. Deploy the inference service. - The [ISVC Template file](/demo/kserve/custom-manifests/caikit/caikit-tgis-isvc-template.yaml) - shows how to define a generic ISVC: + The [ISVC template file](/demo/kserve/custom-manifests/caikit/caikit-tgis-isvc-template.yaml) shown below contains all that is needed to set up the Inference Service ```bash apiVersion: serving.kserve.io/v1beta1 @@ -103,7 +102,7 @@ Note: The **flan-t5-small** LLM model has been containerized into an S3 MinIO bu # Target directory must contain a config.yml ``` - **Note** you should adapt this template as follows: + Before using it, the following details to be added: - `` should be replaced by the name of the inference - `` should be replaced by the actual name of the Service Account From 7f33b245a9cb7a46abca59d0a6c852a56ddde2d3 Mon Sep 17 00:00:00 2001 From: Yosef Moatti Date: Wed, 20 Dec 2023 10:32:20 -0600 Subject: [PATCH 41/41] Yet another documentation improvement in deploy-remove.md for step 2.e --- demo/kserve/deploy-remove.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/demo/kserve/deploy-remove.md b/demo/kserve/deploy-remove.md index 7ed76139..2c551e69 100644 --- a/demo/kserve/deploy-remove.md +++ b/demo/kserve/deploy-remove.md @@ -102,7 +102,7 @@ Note: The **flan-t5-small** LLM model has been containerized into an S3 MinIO bu # Target directory must contain a config.yml ``` - Before using it, the following details to be added: + Before using it, the following details have to be added: - `` should be replaced by the name of the inference - `` should be replaced by the actual name of the Service Account