From f68c015c193541521aca01d4d686682122fe61d4 Mon Sep 17 00:00:00 2001 From: svaruag Date: Fri, 11 Aug 2023 10:25:55 -0700 Subject: [PATCH 1/4] add default score file for non hftgi --- .../llama-safe-online-deployment.ipynb | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/sdk/python/foundation-models/system/inference/text-generation/llama-safe-online-deployment.ipynb b/sdk/python/foundation-models/system/inference/text-generation/llama-safe-online-deployment.ipynb index 83c502925b..98e12b0a74 100644 --- a/sdk/python/foundation-models/system/inference/text-generation/llama-safe-online-deployment.ipynb +++ b/sdk/python/foundation-models/system/inference/text-generation/llama-safe-online-deployment.ipynb @@ -298,12 +298,16 @@ " reg_client.models.list(model_name)\n", ") # list available versions of the model\n", "llama_model = None\n", + "hf_tgi = False # If text-generation-inference (hf container) is supported for model\n", "\n", "if len(version_list) == 0:\n", " raise Exception(f\"No model named {model_name} found in registry\")\n", "else:\n", " model_version = version_list[0].version\n", " llama_model = reg_client.models.get(model_name, model_version)\n", + " if \"inference_supported_envs\" in llama_model.tags:\n", + " if \"hf_tgi\" in llama_model.tags[\"inference_supported_envs\"]:\n", + " hf_tgi = True\n", " print(\n", " f\"Using model name: {llama_model.name}, version: {llama_model.version}, id: {llama_model.id} for inferencing\"\n", " )" @@ -388,7 +392,7 @@ "\n", " # Trigger the endpoint creation\n", " try:\n", - " ml_client.begin_create_or_update(endpoint).wait()\n", + " # ml_client.begin_create_or_update(endpoint).wait()\n", " print(\"\\n---Endpoint created successfully---\\n\")\n", " except Exception as err:\n", " raise RuntimeError(\n", @@ -448,16 +452,23 @@ "source": [ "from azure.ai.ml.entities import (\n", " OnlineRequestSettings,\n", + " CodeConfiguration,\n", " ManagedOnlineDeployment,\n", " ProbeSettings,\n", ")\n", "\n", + "# For HF TGI inferencing, the scoring script is baked into the container\n", + "code_configuration = CodeConfiguration(\n", + " code=\"./llama-files/score/default/\", scoring_script=\"score.py\"\n", + ") if not hf_tgi else None\n", + "\n", "deployment = ManagedOnlineDeployment(\n", " name=deployment_name,\n", " endpoint_name=endpoint_name,\n", " model=llama_model.id,\n", " instance_type=sku_name,\n", " instance_count=1,\n", + " code_configuration=code_configuration,\n", " environment_variables=deployment_env_vars,\n", " request_settings=OnlineRequestSettings(request_timeout_ms=REQUEST_TIMEOUT_MS),\n", " liveness_probe=ProbeSettings(\n", @@ -476,7 +487,7 @@ "\n", "# Trigger the deployment creation\n", "try:\n", - " ml_client.begin_create_or_update(deployment).wait()\n", + " # ml_client.begin_create_or_update(deployment).wait()\n", " print(\"\\n---Deployment created successfully---\\n\")\n", "except Exception as err:\n", " raise RuntimeError(\n", From d871d252d2bf91967cdc42a5ae8ce1e84f9f986f Mon Sep 17 00:00:00 2001 From: svaruag Date: Fri, 11 Aug 2023 10:28:35 -0700 Subject: [PATCH 2/4] rev --- .../text-generation/llama-safe-online-deployment.ipynb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sdk/python/foundation-models/system/inference/text-generation/llama-safe-online-deployment.ipynb b/sdk/python/foundation-models/system/inference/text-generation/llama-safe-online-deployment.ipynb index 98e12b0a74..c97bf942a5 100644 --- a/sdk/python/foundation-models/system/inference/text-generation/llama-safe-online-deployment.ipynb +++ b/sdk/python/foundation-models/system/inference/text-generation/llama-safe-online-deployment.ipynb @@ -361,7 +361,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 24, "metadata": {}, "outputs": [], "source": [ @@ -392,7 +392,7 @@ "\n", " # Trigger the endpoint creation\n", " try:\n", - " # ml_client.begin_create_or_update(endpoint).wait()\n", + " ml_client.begin_create_or_update(endpoint).wait()\n", " print(\"\\n---Endpoint created successfully---\\n\")\n", " except Exception as err:\n", " raise RuntimeError(\n", @@ -487,7 +487,7 @@ "\n", "# Trigger the deployment creation\n", "try:\n", - " # ml_client.begin_create_or_update(deployment).wait()\n", + " ml_client.begin_create_or_update(deployment).wait()\n", " print(\"\\n---Deployment created successfully---\\n\")\n", "except Exception as err:\n", " raise RuntimeError(\n", From 5c7570e4cfeddd656abf220ae6fb91a5ee6891e0 Mon Sep 17 00:00:00 2001 From: svaruag Date: Fri, 11 Aug 2023 15:02:10 -0700 Subject: [PATCH 3/4] black --- .../text-generation/llama-safe-online-deployment.ipynb | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/sdk/python/foundation-models/system/inference/text-generation/llama-safe-online-deployment.ipynb b/sdk/python/foundation-models/system/inference/text-generation/llama-safe-online-deployment.ipynb index c97bf942a5..a27ff8583b 100644 --- a/sdk/python/foundation-models/system/inference/text-generation/llama-safe-online-deployment.ipynb +++ b/sdk/python/foundation-models/system/inference/text-generation/llama-safe-online-deployment.ipynb @@ -361,7 +361,6 @@ }, { "cell_type": "code", - "execution_count": 24, "metadata": {}, "outputs": [], "source": [ @@ -458,9 +457,11 @@ ")\n", "\n", "# For HF TGI inferencing, the scoring script is baked into the container\n", - "code_configuration = CodeConfiguration(\n", - " code=\"./llama-files/score/default/\", scoring_script=\"score.py\"\n", - ") if not hf_tgi else None\n", + "code_configuration = (\n", + " CodeConfiguration(code=\"./llama-files/score/default/\", scoring_script=\"score.py\")\n", + " if not hf_tgi\n", + " else None\n", + ")\n", "\n", "deployment = ManagedOnlineDeployment(\n", " name=deployment_name,\n", From e67645bff8ce03afd570b698d639e8682a687639 Mon Sep 17 00:00:00 2001 From: svaruag Date: Fri, 11 Aug 2023 15:28:00 -0700 Subject: [PATCH 4/4] add excount --- .../inference/text-generation/llama-safe-online-deployment.ipynb | 1 + 1 file changed, 1 insertion(+) diff --git a/sdk/python/foundation-models/system/inference/text-generation/llama-safe-online-deployment.ipynb b/sdk/python/foundation-models/system/inference/text-generation/llama-safe-online-deployment.ipynb index a27ff8583b..5ab2370012 100644 --- a/sdk/python/foundation-models/system/inference/text-generation/llama-safe-online-deployment.ipynb +++ b/sdk/python/foundation-models/system/inference/text-generation/llama-safe-online-deployment.ipynb @@ -361,6 +361,7 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": {}, "outputs": [], "source": [