From 60da9e8b57d9783056cbfd312f8941945e16b701 Mon Sep 17 00:00:00 2001 From: Rupal Jain Date: Thu, 3 Aug 2023 21:55:07 +0530 Subject: [PATCH 1/5] updates for sdk/cli inference examples --- .../image-classification-batch-endpoint.sh | 1 - .../image-classification-online-endpoint.sh | 10 +++++---- ...ge-instance-segmentation-batch-endpoint.sh | 1 - ...e-instance-segmentation-online-endpoint.sh | 1 - .../image-object-detection-batch-endpoint.sh | 1 - .../image-object-detection-online-endpoint.sh | 1 - .../image-classification-batch-endpoint.ipynb | 14 ++++-------- ...image-classification-online-endpoint.ipynb | 14 +++++------- ...instance-segmentation-batch-endpoint.ipynb | 12 +++------- ...nstance-segmentation-online-endpoint.ipynb | 22 ++++++------------- ...mage-object-detection-batch-endpoint.ipynb | 11 +++++----- ...age-object-detection-online-endpoint.ipynb | 14 +++++------- 12 files changed, 37 insertions(+), 65 deletions(-) diff --git a/cli/foundation-models/system/inference/image-classification/image-classification-batch-endpoint.sh b/cli/foundation-models/system/inference/image-classification/image-classification-batch-endpoint.sh index ed571d33f4d..d678213a475 100644 --- a/cli/foundation-models/system/inference/image-classification/image-classification-batch-endpoint.sh +++ b/cli/foundation-models/system/inference/image-classification/image-classification-batch-endpoint.sh @@ -12,7 +12,6 @@ workspace_name="" # This is the model from system registry that needs to be deployed model_name="microsoft-beit-base-patch16-224-pt22k-ft22k" - model_label="latest" deployment_compute="cpu-cluster" diff --git a/cli/foundation-models/system/inference/image-classification/image-classification-online-endpoint.sh b/cli/foundation-models/system/inference/image-classification/image-classification-online-endpoint.sh index fe983060641..a5d1666acdd 100644 --- a/cli/foundation-models/system/inference/image-classification/image-classification-online-endpoint.sh +++ b/cli/foundation-models/system/inference/image-classification/image-classification-online-endpoint.sh @@ -10,8 +10,7 @@ workspace_name="" # This is the model from system registry that needs to be deployed model_name="microsoft-beit-base-patch16-224-pt22k-ft22k" -# using the latest version of the model - not working yet -model_version=2 +model_label="latest" version=$(date +%s) endpoint_name="image-classification-$version" @@ -42,12 +41,15 @@ workspace_info="--resource-group $resource_group_name --workspace-name $workspac # 2. Check if the model exists in the registry # need to confirm model show command works for registries outside the tenant (aka system registry) -if ! az ml model show --name $model_name --version $model_version --registry-name $registry_name +if ! az ml model show --name $model_name --label $model_label --registry-name $registry_name then - echo "Model $model_name:$model_version does not exist in registry $registry_name" + echo "Model $model_name:$model_label does not exist in registry $registry_name" exit 1 fi +# get the latest model version +model_version=$(az ml model show --name $model_name --label $model_label --registry-name $registry_name --query version --output tsv) + # 3. Deploy the model to an endpoint # create online endpoint az ml online-endpoint create --name $endpoint_name $workspace_info || { diff --git a/cli/foundation-models/system/inference/image-instance-segmentation/image-instance-segmentation-batch-endpoint.sh b/cli/foundation-models/system/inference/image-instance-segmentation/image-instance-segmentation-batch-endpoint.sh index 9ca5e5a2dbc..1d39330b162 100644 --- a/cli/foundation-models/system/inference/image-instance-segmentation/image-instance-segmentation-batch-endpoint.sh +++ b/cli/foundation-models/system/inference/image-instance-segmentation/image-instance-segmentation-batch-endpoint.sh @@ -9,7 +9,6 @@ workspace_name="" # This is the model from system registry that needs to be deployed model_name="mask_rcnn_swin-t-p4-w7_fpn_1x_coco" - model_label="latest" deployment_compute="cpu-cluster" diff --git a/cli/foundation-models/system/inference/image-instance-segmentation/image-instance-segmentation-online-endpoint.sh b/cli/foundation-models/system/inference/image-instance-segmentation/image-instance-segmentation-online-endpoint.sh index c7f2c2bda43..ebbfe5f7267 100644 --- a/cli/foundation-models/system/inference/image-instance-segmentation/image-instance-segmentation-online-endpoint.sh +++ b/cli/foundation-models/system/inference/image-instance-segmentation/image-instance-segmentation-online-endpoint.sh @@ -10,7 +10,6 @@ workspace_name="" # This is the model from system registry that needs to be deployed model_name="mask_rcnn_swin-t-p4-w7_fpn_1x_coco" -# using the latest version of the model - not working yet model_label="latest" version=$(date +%s) diff --git a/cli/foundation-models/system/inference/image-object-detection/image-object-detection-batch-endpoint.sh b/cli/foundation-models/system/inference/image-object-detection/image-object-detection-batch-endpoint.sh index ff3290629f1..47bc745d53f 100644 --- a/cli/foundation-models/system/inference/image-object-detection/image-object-detection-batch-endpoint.sh +++ b/cli/foundation-models/system/inference/image-object-detection/image-object-detection-batch-endpoint.sh @@ -9,7 +9,6 @@ workspace_name="" # This is the model from system registry that needs to be deployed model_name="yolof_r50_c5_8x8_1x_coco" - model_label="latest" deployment_compute="cpu-cluster" diff --git a/cli/foundation-models/system/inference/image-object-detection/image-object-detection-online-endpoint.sh b/cli/foundation-models/system/inference/image-object-detection/image-object-detection-online-endpoint.sh index 8b74f780ff6..ceba7c69d07 100644 --- a/cli/foundation-models/system/inference/image-object-detection/image-object-detection-online-endpoint.sh +++ b/cli/foundation-models/system/inference/image-object-detection/image-object-detection-online-endpoint.sh @@ -10,7 +10,6 @@ workspace_name="" # This is the model from system registry that needs to be deployed model_name="yolof_r50_c5_8x8_1x_coco" -# using the latest version of the model - not working yet model_label="latest" version=$(date +%s) diff --git a/sdk/python/foundation-models/system/inference/image-classification/image-classification-batch-endpoint.ipynb b/sdk/python/foundation-models/system/inference/image-classification/image-classification-batch-endpoint.ipynb index 7b04391c216..90eda4ebb11 100644 --- a/sdk/python/foundation-models/system/inference/image-classification/image-classification-batch-endpoint.ipynb +++ b/sdk/python/foundation-models/system/inference/image-classification/image-classification-batch-endpoint.ipynb @@ -16,7 +16,7 @@ "* MultiLabel: An image can be categorised into more than one class.\n", " \n", "### Model\n", - "Models that can perform the `image-classification` task are tagged with `image-classification`. We will use the `microsoft-beit-base-patch16-224-pt22k-ft22k` model in this notebook. If you opened this notebook from a specific model card, remember to replace the specific model name. If you don't find a model that suits your scenario or domain, you can discover and [import models from HuggingFace hub](../../import/import-model-from-huggingface.ipynb) and then use them for inference. \n", + "Models that can perform the `image-classification` task are tagged with `image-classification`. We will use the `microsoft-beit-base-patch16-224-pt22k-ft22k` model in this notebook. If you opened this notebook from a specific model card, remember to replace the specific model name. If you don't find a model that suits your scenario or domain, you can discover and [import models from HuggingFace hub](../../import/import_model_into_registry.ipynb) and then use them for inference. \n", "\n", "### Inference data\n", "We will use the [fridgeObjects](https://cvbp-secondary.z19.web.core.windows.net/datasets/image_classification/fridgeObjects.zip) dataset.\n", @@ -92,7 +92,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "#### Create a compute cluster.\n", + "#### Create a compute cluster\n", "Use the model card from the AzureML system registry to check the minimum required inferencing SKU, referenced as size below. If you already have a sufficient compute cluster, you can simply define the name in compute_name in the following code block." ] }, @@ -130,7 +130,7 @@ "source": [ "### 2. Pick a model to deploy\n", "\n", - "Browse models in the Model Catalog in the AzureML Studio, filtering by the `image-classification` task. In this example, we use the `microsoft-beit-base-patch16-224-pt22k-ft22k ` model. If you have opened this notebook for a different model, replace the model name and version accordingly. " + "Browse models in the Model Catalog in the AzureML Studio, filtering by the `image-classification` task. In this example, we use the `microsoft-beit-base-patch16-224-pt22k-ft22k ` model. If you have opened this notebook for a different model, replace the model name accordingly. " ] }, { @@ -239,7 +239,7 @@ "\n", "We can provide input images to batch inference either in a folder containing images or in a csv file containing \"image\" named column having images in base 64 format.\n", "\n", - "Note: If job failed with error Assertion Error (The actual length exceeded max length 100 MB) then please try with less number of input images or use ImageFolder Input mode." + "Note: If job failed with error Assertion Error (`The actual length exceeded max length 100 MB`) then please try with less number of input images or use ImageFolder Input mode." ] }, { @@ -276,12 +276,6 @@ "Image(filename=sample_image)" ] }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [] - }, { "attachments": {}, "cell_type": "markdown", diff --git a/sdk/python/foundation-models/system/inference/image-classification/image-classification-online-endpoint.ipynb b/sdk/python/foundation-models/system/inference/image-classification/image-classification-online-endpoint.ipynb index a4da5c12646..9cb194a284a 100644 --- a/sdk/python/foundation-models/system/inference/image-classification/image-classification-online-endpoint.ipynb +++ b/sdk/python/foundation-models/system/inference/image-classification/image-classification-online-endpoint.ipynb @@ -16,7 +16,7 @@ "* MultiLabel: An image can be categorised into more than one class.\n", " \n", "### Model\n", - "Models that can perform the `image-classification` task are tagged with `image-classification`. We will use the `microsoft-beit-base-patch16-224-pt22k-ft22k` model in this notebook. If you opened this notebook from a specific model card, remember to replace the specific model name. If you don't find a model that suits your scenario or domain, you can discover and [import models from HuggingFace hub](../../import/import-model-from-huggingface.ipynb) and then use them for inference. \n", + "Models that can perform the `image-classification` task are tagged with `image-classification`. We will use the `microsoft-beit-base-patch16-224-pt22k-ft22k` model in this notebook. If you opened this notebook from a specific model card, remember to replace the specific model name. If you don't find a model that suits your scenario or domain, you can discover and [import models from HuggingFace hub](../../import/import_model_into_registry.ipynb) and then use them for inference.\n", "\n", "### Inference data\n", "We will use the [fridgeObjects](https://cvbp-secondary.z19.web.core.windows.net/datasets/image_classification/fridgeObjects.zip) dataset.\n", @@ -77,7 +77,6 @@ " credential,\n", " subscription_id,\n", " resource_group,\n", - " # workspace_name\n", " registry_name=\"azureml-staging\",\n", ")\n", "# genrating a unique timestamp that can be used for names and versions that need to be unique\n", @@ -91,7 +90,7 @@ "source": [ "### 2. Pick a model to deploy\n", "\n", - "Browse models in the Model Catalog in the AzureML Studio, filtering by the `image-classification` task. In this example, we use the `microsoft-beit-base-patch16-224-pt22k-ft22k ` model. If you have opened this notebook for a different model, replace the model name and version accordingly. " + "Browse models in the Model Catalog in the AzureML Studio, filtering by the `image-classification` task. In this example, we use the `microsoft-beit-base-patch16-224-pt22k-ft22k ` model. If you have opened this notebook for a different model, replace the model name accordingly. " ] }, { @@ -101,9 +100,8 @@ "outputs": [], "source": [ "model_name = \"microsoft-beit-base-patch16-224-pt22k-ft22k\"\n", - "model_version = \"1\"\n", - "foundation_model = registry_ml_client.models.get(model_name, model_version)\n", - "\n", + "foundation_models = registry_ml_client.models.list(name=model_name)\n", + "foundation_model = max(foundation_models, key=lambda x: x.version)\n", "print(\n", " f\"\\n\\nUsing model name: {foundation_model.name}, version: {foundation_model.version}, id: {foundation_model.id} for inferencing\"\n", ")" @@ -226,7 +224,7 @@ " endpoint_name=online_endpoint_name,\n", " model=foundation_model.id,\n", " # use GPU instance type like Standard_NC6s_v3 for faster explanations\n", - " instance_type=\"Standard_DS3_V2\", # \"Standard_DS3_V2\",\n", + " instance_type=\"Standard_DS3_V2\",\n", " instance_count=1,\n", " request_settings=OnlineRequestSettings(\n", " max_concurrent_requests_per_instance=1, request_timeout_ms=5000, max_queue_wait_ms=500 # 90000,\n", @@ -258,7 +256,7 @@ "source": [ "### 5. Test the endpoint with sample data\n", "\n", - "We will fetch some sample data from the test dataset and submit to online endpoint for inference. We will then show the display the scored labels alongside the ground truth labels" + "We will fetch some sample data from the test dataset and submit to online endpoint for inference. We will then show the scored labels alongside the ground truth labels." ] }, { diff --git a/sdk/python/foundation-models/system/inference/image-instance-segmentation/image-instance-segmentation-batch-endpoint.ipynb b/sdk/python/foundation-models/system/inference/image-instance-segmentation/image-instance-segmentation-batch-endpoint.ipynb index 675b61eabe3..ddeb3e0cbd1 100644 --- a/sdk/python/foundation-models/system/inference/image-instance-segmentation/image-instance-segmentation-batch-endpoint.ipynb +++ b/sdk/python/foundation-models/system/inference/image-instance-segmentation/image-instance-segmentation-batch-endpoint.ipynb @@ -127,7 +127,7 @@ "source": [ "### 2. Pick a model to deploy\n", "\n", - "Browse models in the Model Catalog in the AzureML Studio, filtering by the `image-instance-segmentation` task. In this example, we use the `mask_rcnn_swin-t-p4-w7_fpn_1x_coco` model. If you have opened this notebook for a different model, replace the model name and version accordingly. This is a pre-trained model and may not give correct prediction for your dataset. You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. Please refer to the [notebook.](../../finetune/image-instance-segmentation/mmdetection-fridgeobjects-instance-segmentation.ipynb)" + "Browse models in the Model Catalog in the AzureML Studio, filtering by the `image-instance-segmentation` task. In this example, we use the `mask_rcnn_swin-t-p4-w7_fpn_1x_coco` model. If you have opened this notebook for a different model, replace the model name accordingly. This is a pre-trained model and may not give correct prediction for your dataset. We strongly recommend to TRAIN this model on a down-stream task to be able to use it for predictions and inference. Please refer to the [notebook.](../../finetune/image-instance-segmentation/mmdetection-fridgeobjects-instance-segmentation.ipynb)" ] }, { @@ -137,9 +137,8 @@ "outputs": [], "source": [ "model_name = \"mask_rcnn_swin-t-p4-w7_fpn_1x_coco\"\n", - "\n", - "foundation_model = registry_ml_client.models.get(model_name, label=\"latest\")\n", - "\n", + "foundation_models = registry_ml_client.models.list(name=model_name)\n", + "foundation_model = max(foundation_models, key=lambda x: x.version)\n", "print(\n", " f\"\\n\\nUsing model name: {foundation_model.name}, version: {foundation_model.version}, id: {foundation_model.id} for inferencing\"\n", ")" @@ -481,11 +480,6 @@ } ], "metadata": { - "kernelspec": { - "display_name": "temp", - "language": "python", - "name": "python3" - }, "language_info": { "name": "python" } diff --git a/sdk/python/foundation-models/system/inference/image-instance-segmentation/image-instance-segmentation-online-endpoint.ipynb b/sdk/python/foundation-models/system/inference/image-instance-segmentation/image-instance-segmentation-online-endpoint.ipynb index f0e150e1a2d..4b76fe58a5a 100644 --- a/sdk/python/foundation-models/system/inference/image-instance-segmentation/image-instance-segmentation-online-endpoint.ipynb +++ b/sdk/python/foundation-models/system/inference/image-instance-segmentation/image-instance-segmentation-online-endpoint.ipynb @@ -38,7 +38,7 @@ "### 1. Setup pre-requisites\n", "* Install dependencies\n", "* Connect to AzureML Workspace. Learn more at [set up SDK authentication](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-setup-authentication?tabs=sdk). Replace ``, `` and `` below.\n", - "* Connect to `azureml` system registry" + "* Connect to `azureml-staging` system registry" ] }, { @@ -76,7 +76,6 @@ " credential,\n", " subscription_id,\n", " resource_group,\n", - " # workspace_name\n", " registry_name=\"azureml-staging\",\n", ")\n", "# genrating a unique timestamp that can be used for names and versions that need to be unique\n", @@ -90,7 +89,7 @@ "source": [ "### 2. Pick a model to deploy\n", "\n", - "Browse models in the Model Catalog in the AzureML Studio, filtering by the `image-instance-segmentation` task. In this example, we use the `mask_rcnn_swin-t-p4-w7_fpn_1x_coco` model. If you have opened this notebook for a different model, replace the model name and version accordingly. This is a pre-trained model and may not give correct prediction for your dataset. You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. Please refer to the [notebook.](../../finetune/image-instance-segmentation/mmdetection-fridgeobjects-instance-segmentation.ipynb)" + "Browse models in the Model Catalog in the AzureML Studio, filtering by the `image-instance-segmentation` task. In this example, we use the `mask_rcnn_swin-t-p4-w7_fpn_1x_coco` model. If you have opened this notebook for a different model, replace the model name accordingly. This is a pre-trained model and may not give correct prediction for your dataset. We strongly recommend to TRAIN this model on a down-stream task to be able to use it for predictions and inference. Please refer to the [notebook](../../finetune/image-instance-segmentation/mmdetection-fridgeobjects-instance-segmentation.ipynb)." ] }, { @@ -100,9 +99,8 @@ "outputs": [], "source": [ "model_name = \"mask_rcnn_swin-t-p4-w7_fpn_1x_coco\"\n", - "# foundation_model = registry_ml_client.models.get(model_name, label=\"latest\")\n", - "foundation_model = workspace_ml_client.models.get(model_name, label=\"latest\")\n", - "\n", + "foundation_models = registry_ml_client.models.list(name=model_name)\n", + "foundation_model = max(foundation_models, key=lambda x: x.version)\n", "print(\n", " f\"\\n\\nUsing model name: {foundation_model.name}, version: {foundation_model.version}, id: {foundation_model.id} for inferencing\"\n", ")" @@ -221,7 +219,9 @@ " instance_type=\"Standard_DS3_v2\",\n", " instance_count=1,\n", " request_settings=OnlineRequestSettings(\n", - " max_concurrent_requests_per_instance=1, request_timeout_ms=5000, max_queue_wait_ms=500 # 90000,\n", + " max_concurrent_requests_per_instance=1,\n", + " request_timeout_ms=5000, # 90000,\n", + " max_queue_wait_ms=500\n", " ),\n", " liveness_probe=ProbeSettings(\n", " failure_threshold=30,\n", @@ -286,13 +286,10 @@ "\n", "sample_image = os.path.join(dataset_dir, \"images\", \"99.jpg\")\n", "\n", - "\n", "def read_image(image_path):\n", " with open(image_path, \"rb\") as f:\n", " return f.read()\n", "\n", - "\n", - "# {\"inputs\":{\"image\":[\"\"]}}\n", "request_json = {\n", " \"input_data\": \n", " {\n", @@ -350,11 +347,6 @@ } ], "metadata": { - "kernelspec": { - "display_name": "temp", - "language": "python", - "name": "python3" - }, "language_info": { "name": "python" } diff --git a/sdk/python/foundation-models/system/inference/image-object-detection/image-object-detection-batch-endpoint.ipynb b/sdk/python/foundation-models/system/inference/image-object-detection/image-object-detection-batch-endpoint.ipynb index 7bb74677412..9d65c2e42c6 100644 --- a/sdk/python/foundation-models/system/inference/image-object-detection/image-object-detection-batch-endpoint.ipynb +++ b/sdk/python/foundation-models/system/inference/image-object-detection/image-object-detection-batch-endpoint.ipynb @@ -38,7 +38,7 @@ "### 1. Setup pre-requisites\n", "* Install dependencies\n", "* Connect to AzureML Workspace. Learn more at [set up SDK authentication](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-setup-authentication?tabs=sdk). Replace ``, `` and `` below.\n", - "* Connect to `azureml` system registry" + "* Connect to `azureml-staging` system registry" ] }, { @@ -89,7 +89,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "#### Create a compute cluster.\n", + "#### Create a compute cluster\n", "Use the model card from the AzureML system registry to check the minimum required inferencing SKU, referenced as size below. If you already have a sufficient compute cluster, you can simply define the name in compute_name in the following code block." ] }, @@ -127,7 +127,7 @@ "source": [ "### 2. Pick a model to deploy\n", "\n", - "Browse models in the Model Catalog in the AzureML Studio, filtering by the `image-object-detection` task. In this example, we use the `yolof_r50_c5_8x8_1x_coco ` model. If you have opened this notebook for a different model, replace the model name and version accordingly. This is a pre-trained model and may not give correct prediction for your dataset. You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. Please refer to the [notebook.](../../finetune/image-object-detection/mmdetection-fridgeobjects-object-detection.ipynb)" + "Browse models in the Model Catalog in the AzureML Studio, filtering by the `image-object-detection` task. In this example, we use the `yolof_r50_c5_8x8_1x_coco ` model. If you have opened this notebook for a different model, replace the model name accordingly. This is a pre-trained model and may not give correct prediction for your dataset. We strongly recommend to TRAIN this model on a down-stream task to be able to use it for predictions and inference. Please refer to the [notebook.](../../finetune/image-object-detection/mmdetection-fridgeobjects-object-detection.ipynb)" ] }, { @@ -137,9 +137,8 @@ "outputs": [], "source": [ "model_name = \"yolof_r50_c5_8x8_1x_coco\"\n", - "\n", - "foundation_model = registry_ml_client.models.get(model_name, label=\"latest\")\n", - "\n", + "foundation_models = registry_ml_client.models.list(name=model_name)\n", + "foundation_model = max(foundation_models, key=lambda x: x.version)\n", "print(\n", " f\"\\n\\nUsing model name: {foundation_model.name}, version: {foundation_model.version}, id: {foundation_model.id} for inferencing\"\n", ")" diff --git a/sdk/python/foundation-models/system/inference/image-object-detection/image-object-detection-online-endpoint.ipynb b/sdk/python/foundation-models/system/inference/image-object-detection/image-object-detection-online-endpoint.ipynb index b4b244c7412..2e945265316 100644 --- a/sdk/python/foundation-models/system/inference/image-object-detection/image-object-detection-online-endpoint.ipynb +++ b/sdk/python/foundation-models/system/inference/image-object-detection/image-object-detection-online-endpoint.ipynb @@ -74,7 +74,6 @@ " credential,\n", " subscription_id,\n", " resource_group,\n", - " # workspace_name\n", " registry_name=\"azureml-staging\",\n", ")\n", "# genrating a unique timestamp that can be used for names and versions that need to be unique\n", @@ -88,7 +87,7 @@ "source": [ "### 2. Pick a model to deploy\n", "\n", - "Browse models in the Model Catalog in the AzureML Studio, filtering by the `image-object-detection` task. In this example, we use the `yolof_r50_c5_8x8_1x_coco ` model. If you have opened this notebook for a different model, replace the model name and version accordingly. This is a pre-trained model and may not give correct prediction for your dataset. You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. Please refer to the [notebook.](../../finetune/image-object-detection/mmdetection-fridgeobjects-object-detection.ipynb)" + "Browse models in the Model Catalog in the AzureML Studio, filtering by the `image-object-detection` task. In this example, we use the `yolof_r50_c5_8x8_1x_coco ` model. If you have opened this notebook for a different model, replace the model name accordingly. This is a pre-trained model and may not give correct prediction for your dataset. We strongly recommend to TRAIN this model on a down-stream task to be able to use it for predictions and inference. Please refer to the [notebook](../../finetune/image-object-detection/mmdetection-fridgeobjects-object-detection.ipynb)." ] }, { @@ -98,8 +97,8 @@ "outputs": [], "source": [ "model_name = \"yolof_r50_c5_8x8_1x_coco\"\n", - "foundation_model = registry_ml_client.models.get(model_name, label=\"latest\")\n", - "\n", + "foundation_models = registry_ml_client.models.list(name=model_name)\n", + "foundation_model = max(foundation_models, key=lambda x: x.version)\n", "print(\n", " f\"\\n\\nUsing model name: {foundation_model.name}, version: {foundation_model.version}, id: {foundation_model.id} for inferencing\"\n", ")" @@ -218,7 +217,9 @@ " instance_type=\"Standard_DS3_v2\",\n", " instance_count=1,\n", " request_settings=OnlineRequestSettings(\n", - " max_concurrent_requests_per_instance=1, request_timeout_ms=5000, max_queue_wait_ms=500 # 90000,\n", + " max_concurrent_requests_per_instance=1,\n", + " request_timeout_ms=5000, # 90000,\n", + " max_queue_wait_ms=500\n", " ),\n", " liveness_probe=ProbeSettings(\n", " failure_threshold=30,\n", @@ -283,13 +284,10 @@ "\n", "sample_image = os.path.join(dataset_dir, \"images\", \"99.jpg\")\n", "\n", - "\n", "def read_image(image_path):\n", " with open(image_path, \"rb\") as f:\n", " return f.read()\n", "\n", - "\n", - "# {\"inputs\":{\"image\":[\"\"]}}\n", "request_json = {\n", " \"input_data\": \n", " {\n", From 20aabdc7b538e7bc9a343cf3a18042197116f3e5 Mon Sep 17 00:00:00 2001 From: Rupal Jain Date: Thu, 3 Aug 2023 23:58:17 +0530 Subject: [PATCH 2/5] deploying from azureml-staging erroring out --- .../image-classification-batch-endpoint.sh | 2 +- .../image-classification-online-endpoint.sh | 2 +- .../image-instance-segmentation-batch-endpoint.sh | 2 +- .../image-instance-segmentation-online-endpoint.sh | 2 +- .../image-object-detection-batch-endpoint.sh | 2 +- .../image-object-detection-online-endpoint.sh | 2 +- .../image-classification-batch-endpoint.ipynb | 4 ++-- .../image-classification-online-endpoint.ipynb | 6 +++--- .../image-instance-segmentation-batch-endpoint.ipynb | 4 ++-- .../image-instance-segmentation-online-endpoint.ipynb | 6 +++--- .../image-object-detection-batch-endpoint.ipynb | 6 +++--- .../image-object-detection-online-endpoint.ipynb | 4 ++-- 12 files changed, 21 insertions(+), 21 deletions(-) diff --git a/cli/foundation-models/system/inference/image-classification/image-classification-batch-endpoint.sh b/cli/foundation-models/system/inference/image-classification/image-classification-batch-endpoint.sh index d678213a475..86822b8a3f6 100644 --- a/cli/foundation-models/system/inference/image-classification/image-classification-batch-endpoint.sh +++ b/cli/foundation-models/system/inference/image-classification/image-classification-batch-endpoint.sh @@ -5,7 +5,7 @@ set -x # the sample scoring file available in the same folder as the above notebook # script inputs -registry_name="azureml-staging" +registry_name="azureml-preview" subscription_id="" resource_group_name="" workspace_name="" diff --git a/cli/foundation-models/system/inference/image-classification/image-classification-online-endpoint.sh b/cli/foundation-models/system/inference/image-classification/image-classification-online-endpoint.sh index a5d1666acdd..4247dc7e2e7 100644 --- a/cli/foundation-models/system/inference/image-classification/image-classification-online-endpoint.sh +++ b/cli/foundation-models/system/inference/image-classification/image-classification-online-endpoint.sh @@ -3,7 +3,7 @@ set -x # the sample scoring file available in the same folder as the above notebook # script inputs -registry_name="azureml-staging" +registry_name="azureml-preview" subscription_id="" resource_group_name="" workspace_name="" diff --git a/cli/foundation-models/system/inference/image-instance-segmentation/image-instance-segmentation-batch-endpoint.sh b/cli/foundation-models/system/inference/image-instance-segmentation/image-instance-segmentation-batch-endpoint.sh index 1d39330b162..5a4e5a2c07c 100644 --- a/cli/foundation-models/system/inference/image-instance-segmentation/image-instance-segmentation-batch-endpoint.sh +++ b/cli/foundation-models/system/inference/image-instance-segmentation/image-instance-segmentation-batch-endpoint.sh @@ -2,7 +2,7 @@ set -x # the commands in this file map to steps in this notebook: https://aka.ms/azureml-infer-batch-sdk-image-instance-segmentation # script inputs -registry_name="azureml-staging" +registry_name="azureml-preview" subscription_id="" resource_group_name="" workspace_name="" diff --git a/cli/foundation-models/system/inference/image-instance-segmentation/image-instance-segmentation-online-endpoint.sh b/cli/foundation-models/system/inference/image-instance-segmentation/image-instance-segmentation-online-endpoint.sh index ebbfe5f7267..ac850721fc9 100644 --- a/cli/foundation-models/system/inference/image-instance-segmentation/image-instance-segmentation-online-endpoint.sh +++ b/cli/foundation-models/system/inference/image-instance-segmentation/image-instance-segmentation-online-endpoint.sh @@ -3,7 +3,7 @@ set -x # the sample scoring file available in the same folder as the above notebook # script inputs -registry_name="azureml-staging" +registry_name="azureml-preview" subscription_id="" resource_group_name="" workspace_name="" diff --git a/cli/foundation-models/system/inference/image-object-detection/image-object-detection-batch-endpoint.sh b/cli/foundation-models/system/inference/image-object-detection/image-object-detection-batch-endpoint.sh index 47bc745d53f..517c72765dc 100644 --- a/cli/foundation-models/system/inference/image-object-detection/image-object-detection-batch-endpoint.sh +++ b/cli/foundation-models/system/inference/image-object-detection/image-object-detection-batch-endpoint.sh @@ -2,7 +2,7 @@ set -x # the commands in this file map to steps in this notebook: https://aka.ms/azureml-infer-batch-sdk-image-object-detection # script inputs -registry_name="azureml-staging" +registry_name="azureml-preview" subscription_id="" resource_group_name="" workspace_name="" diff --git a/cli/foundation-models/system/inference/image-object-detection/image-object-detection-online-endpoint.sh b/cli/foundation-models/system/inference/image-object-detection/image-object-detection-online-endpoint.sh index ceba7c69d07..f5d6db257f7 100644 --- a/cli/foundation-models/system/inference/image-object-detection/image-object-detection-online-endpoint.sh +++ b/cli/foundation-models/system/inference/image-object-detection/image-object-detection-online-endpoint.sh @@ -3,7 +3,7 @@ set -x # the sample scoring file available in the same folder as the above notebook # script inputs -registry_name="azureml-staging" +registry_name="azureml-preview" subscription_id="" resource_group_name="" workspace_name="" diff --git a/sdk/python/foundation-models/system/inference/image-classification/image-classification-batch-endpoint.ipynb b/sdk/python/foundation-models/system/inference/image-classification/image-classification-batch-endpoint.ipynb index 90eda4ebb11..dc2707e9b7b 100644 --- a/sdk/python/foundation-models/system/inference/image-classification/image-classification-batch-endpoint.ipynb +++ b/sdk/python/foundation-models/system/inference/image-classification/image-classification-batch-endpoint.ipynb @@ -76,12 +76,12 @@ "\n", "workspace_ml_client = MLClient(credential, subscription_id, resource_group, workspace_name)\n", "\n", - "# the models, fine tuning pipelines and environments are available in the AzureML system registry, \"azureml-staging\"\n", + "# the models, fine tuning pipelines and environments are available in the AzureML system registry, \"azureml-preview\"\n", "registry_ml_client = MLClient(\n", " credential,\n", " subscription_id,\n", " resource_group,\n", - " registry_name=\"azureml-staging\",\n", + " registry_name=\"azureml-preview\",\n", ")\n", "# generating a unique timestamp that can be used for names and versions that need to be unique\n", "timestamp = str(int(time.time()))" diff --git a/sdk/python/foundation-models/system/inference/image-classification/image-classification-online-endpoint.ipynb b/sdk/python/foundation-models/system/inference/image-classification/image-classification-online-endpoint.ipynb index 9cb194a284a..b4a720fd7b5 100644 --- a/sdk/python/foundation-models/system/inference/image-classification/image-classification-online-endpoint.ipynb +++ b/sdk/python/foundation-models/system/inference/image-classification/image-classification-online-endpoint.ipynb @@ -39,7 +39,7 @@ "### 1. Setup pre-requisites\n", "* Install dependencies\n", "* Connect to AzureML Workspace. Learn more at [set up SDK authentication](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-setup-authentication?tabs=sdk). Replace ``, `` and `` below.\n", - "* Connect to `azureml-staging` system registry" + "* Connect to `azureml-preview` system registry" ] }, { @@ -72,12 +72,12 @@ " workspace_name = \"\"\n", "workspace_ml_client = MLClient(credential, subscription_id, resource_group, workspace_name)\n", "\n", - "# the models, fine tuning pipelines and environments are available in the AzureML system registry, \"azureml-staging\"\n", + "# the models, fine tuning pipelines and environments are available in the AzureML system registry, \"azureml-preview\"\n", "registry_ml_client = MLClient(\n", " credential,\n", " subscription_id,\n", " resource_group,\n", - " registry_name=\"azureml-staging\",\n", + " registry_name=\"azureml-preview\",\n", ")\n", "# genrating a unique timestamp that can be used for names and versions that need to be unique\n", "timestamp = str(int(time.time()))" diff --git a/sdk/python/foundation-models/system/inference/image-instance-segmentation/image-instance-segmentation-batch-endpoint.ipynb b/sdk/python/foundation-models/system/inference/image-instance-segmentation/image-instance-segmentation-batch-endpoint.ipynb index ddeb3e0cbd1..fd6d9798ea6 100644 --- a/sdk/python/foundation-models/system/inference/image-instance-segmentation/image-instance-segmentation-batch-endpoint.ipynb +++ b/sdk/python/foundation-models/system/inference/image-instance-segmentation/image-instance-segmentation-batch-endpoint.ipynb @@ -73,12 +73,12 @@ "\n", "workspace_ml_client = MLClient(credential, subscription_id, resource_group, workspace_name)\n", "\n", - "# the models, fine tuning pipelines and environments are available in the AzureML system registry, \"azureml-staging\"\n", + "# the models, fine tuning pipelines and environments are available in the AzureML system registry, \"azureml-preview\"\n", "registry_ml_client = MLClient(\n", " credential,\n", " subscription_id,\n", " resource_group,\n", - " registry_name=\"azureml-staging\",\n", + " registry_name=\"azureml-preview\",\n", ")\n", "# generating a unique timestamp that can be used for names and versions that need to be unique\n", "timestamp = str(int(time.time()))" diff --git a/sdk/python/foundation-models/system/inference/image-instance-segmentation/image-instance-segmentation-online-endpoint.ipynb b/sdk/python/foundation-models/system/inference/image-instance-segmentation/image-instance-segmentation-online-endpoint.ipynb index 4b76fe58a5a..d901e888e71 100644 --- a/sdk/python/foundation-models/system/inference/image-instance-segmentation/image-instance-segmentation-online-endpoint.ipynb +++ b/sdk/python/foundation-models/system/inference/image-instance-segmentation/image-instance-segmentation-online-endpoint.ipynb @@ -38,7 +38,7 @@ "### 1. Setup pre-requisites\n", "* Install dependencies\n", "* Connect to AzureML Workspace. Learn more at [set up SDK authentication](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-setup-authentication?tabs=sdk). Replace ``, `` and `` below.\n", - "* Connect to `azureml-staging` system registry" + "* Connect to `azureml-preview` system registry" ] }, { @@ -71,12 +71,12 @@ " workspace_name = \"\"\n", "workspace_ml_client = MLClient(credential, subscription_id, resource_group, workspace_name)\n", "\n", - "# the models, fine tuning pipelines and environments are available in the AzureML system registry, \"azureml-staging\"\n", + "# the models, fine tuning pipelines and environments are available in the AzureML system registry, \"azureml-preview\"\n", "registry_ml_client = MLClient(\n", " credential,\n", " subscription_id,\n", " resource_group,\n", - " registry_name=\"azureml-staging\",\n", + " registry_name=\"azureml-preview\",\n", ")\n", "# genrating a unique timestamp that can be used for names and versions that need to be unique\n", "timestamp = str(int(time.time()))" diff --git a/sdk/python/foundation-models/system/inference/image-object-detection/image-object-detection-batch-endpoint.ipynb b/sdk/python/foundation-models/system/inference/image-object-detection/image-object-detection-batch-endpoint.ipynb index 9d65c2e42c6..dda7b239689 100644 --- a/sdk/python/foundation-models/system/inference/image-object-detection/image-object-detection-batch-endpoint.ipynb +++ b/sdk/python/foundation-models/system/inference/image-object-detection/image-object-detection-batch-endpoint.ipynb @@ -38,7 +38,7 @@ "### 1. Setup pre-requisites\n", "* Install dependencies\n", "* Connect to AzureML Workspace. Learn more at [set up SDK authentication](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-setup-authentication?tabs=sdk). Replace ``, `` and `` below.\n", - "* Connect to `azureml-staging` system registry" + "* Connect to `azureml-preview` system registry" ] }, { @@ -73,12 +73,12 @@ "\n", "workspace_ml_client = MLClient(credential, subscription_id, resource_group, workspace_name)\n", "\n", - "# the models, fine tuning pipelines and environments are available in the AzureML system registry, \"azureml-staging\"\n", + "# the models, fine tuning pipelines and environments are available in the AzureML system registry, \"azureml-preview\"\n", "registry_ml_client = MLClient(\n", " credential,\n", " subscription_id,\n", " resource_group,\n", - " registry_name=\"azureml-staging\",\n", + " registry_name=\"azureml-preview\",\n", ")\n", "# generating a unique timestamp that can be used for names and versions that need to be unique\n", "timestamp = str(int(time.time()))" diff --git a/sdk/python/foundation-models/system/inference/image-object-detection/image-object-detection-online-endpoint.ipynb b/sdk/python/foundation-models/system/inference/image-object-detection/image-object-detection-online-endpoint.ipynb index 2e945265316..c153480c98e 100644 --- a/sdk/python/foundation-models/system/inference/image-object-detection/image-object-detection-online-endpoint.ipynb +++ b/sdk/python/foundation-models/system/inference/image-object-detection/image-object-detection-online-endpoint.ipynb @@ -69,12 +69,12 @@ " workspace_name = \"\"\n", "workspace_ml_client = MLClient(credential, subscription_id, resource_group, workspace_name)\n", "\n", - "# the models, fine tuning pipelines and environments are available in the AzureML system registry, \"azureml-staging\"\n", + "# the models, fine tuning pipelines and environments are available in the AzureML system registry, \"azureml-preview\"\n", "registry_ml_client = MLClient(\n", " credential,\n", " subscription_id,\n", " resource_group,\n", - " registry_name=\"azureml-staging\",\n", + " registry_name=\"azureml-preview\",\n", ")\n", "# genrating a unique timestamp that can be used for names and versions that need to be unique\n", "timestamp = str(int(time.time()))" From 956faa7eb62ec86e90241bf33e9d264262bbabf1 Mon Sep 17 00:00:00 2001 From: Rupal Jain Date: Fri, 4 Aug 2023 16:01:29 +0530 Subject: [PATCH 3/5] updates to sdk, cli examples --- ...ts-multiclass-classification-pipeline.yaml | 37 ++-- ...fridgeobjects-multiclass-classification.sh | 17 +- .../multiclass-classification/prepare_data.py | 12 +- ...ts-multilabel-classification-pipeline.yaml | 35 ++-- ...fridgeobjects-multilabel-classification.sh | 21 +-- .../multilabel-classification/prepare_data.py | 12 +- .../jsonl_converter.py | 14 +- ...objects-instance-segmentation-pipeline.yml | 46 ++--- ...ion-fridgeobjects-instance-segmentation.sh | 23 ++- .../prepare_data.py | 20 +-- ...ction-fridgeobjects-detection-pipeline.yml | 47 +++--- .../mmdetection-fridgeobjects-detection.sh | 20 +-- .../image-classification-batch-endpoint.sh | 20 +-- .../image-classification-online-endpoint.sh | 14 +- ...ge-instance-segmentation-batch-endpoint.sh | 24 +-- ...e-instance-segmentation-online-endpoint.sh | 13 +- .../image-object-detection-batch-endpoint.sh | 25 ++- ...dgeobjects-multiclass-classification.ipynb | 146 ++++++---------- ...dgeobjects-multilabel-classification.ipynb | 159 +++++++----------- ...-fridgeobjects-instance-segmentation.ipynb | 141 ++++++---------- ...ction-fridgeobjects-object-detection.ipynb | 104 +++++++----- .../image-classification-batch-endpoint.ipynb | 71 +++++--- ...image-classification-online-endpoint.ipynb | 71 ++++---- ...instance-segmentation-batch-endpoint.ipynb | 77 +++++---- ...nstance-segmentation-online-endpoint.ipynb | 56 +++--- ...mage-object-detection-batch-endpoint.ipynb | 73 ++++---- ...age-object-detection-online-endpoint.ipynb | 53 +++--- 27 files changed, 644 insertions(+), 707 deletions(-) diff --git a/cli/foundation-models/system/finetune/image-classification/multiclass-classification/hftransformers-fridgeobjects-multiclass-classification-pipeline.yaml b/cli/foundation-models/system/finetune/image-classification/multiclass-classification/hftransformers-fridgeobjects-multiclass-classification-pipeline.yaml index acb957a0652..5d6b92949f3 100644 --- a/cli/foundation-models/system/finetune/image-classification/multiclass-classification/hftransformers-fridgeobjects-multiclass-classification-pipeline.yaml +++ b/cli/foundation-models/system/finetune/image-classification/multiclass-classification/hftransformers-fridgeobjects-multiclass-classification-pipeline.yaml @@ -4,7 +4,7 @@ type: pipeline experiment_name: AzureML-Train-Finetune-Vision-MultiClass-Samples inputs: - # # model - specify the foundation model available in the azureml system registry + # # Model - specify the foundation model available in the azureml system registry mlflow_model_path: path: azureml://registries/azureml-staging/models/google-vit-base-patch16-224/versions/1 type: mlflow_model @@ -25,8 +25,7 @@ inputs: compute_finetune: sample-finetune-cluster-gpu outputs: - # map the output of the fine tuning job to the output of pipeline job so that we can easily register the fine tuned model - # registering the model is required to deploy the model to an online or batch endpoint + # Map the output of the fine tuning job to the output of pipeline job so that we can easily register the fine tuned model. Registering the model is required to deploy the model to an online or batch endpoint trained_model: type: mlflow_model @@ -46,10 +45,10 @@ jobs: process_count_per_instance: 1 instance_count: 1 - # model + # Model import args task_name: image-classification model_family: HuggingFaceImage - # # specify the model_name instead of mlflow_model if you want to use a model from the huggingface hub + # # Specify the model_name instead of mlflow_model if you want to use a model from the huggingface hub mlflow_model: ${{parent.inputs.mlflow_model_path}} # model_name: ${{parent.inputs.model_name}} @@ -57,28 +56,17 @@ jobs: training_data: ${{parent.inputs.training_data}} validation_data: ${{parent.inputs.validation_data}} + # Finetuning args image_width: -1 image_height: -1 - metric_for_best_model: accuracy apply_augmentations: True number_of_workers: 8 apply_deepspeed: False deepspeed_config: ${{parent.inputs.ds_finetune}} apply_ort: False - number_of_epochs: 15 - max_steps: -1 - training_batch_size: 4 - validation_batch_size: 4 auto_find_batch_size: False - learning_rate: 5e-5 - learning_rate_scheduler: warmup_linear - warmup_steps: 0 - optimizer: adamw_hf - weight_decay: 0.0 extra_optim_args: "" - gradient_accumulation_step: 1 precision: 32 - label_smoothing_factor: 0.0 random_seed: 42 evaluation_strategy: epoch evaluation_steps: 500 @@ -89,9 +77,22 @@ jobs: save_total_limit: -1 early_stopping: False early_stopping_patience: 1 - max_grad_norm: 1.0 resume_from_checkpoint: False save_as_mlflow_model: True + # # Uncomment one or more lines below to provide specific values, if you wish you override the autoselected default values. + # metric_for_best_model: accuracy + # number_of_epochs: 15 + # max_steps: -1 + # training_batch_size: 4 + # validation_batch_size: 4 + # learning_rate: 5e-5 + # learning_rate_scheduler: warmup_linear + # warmup_steps: 0 + # optimizer: adamw_hf + # weight_decay: 0.0 + # gradient_accumulation_step: 1 + # label_smoothing_factor: 0.0 + # max_grad_norm: 1.0 outputs: mlflow_model_folder: ${{parent.outputs.trained_model}} diff --git a/cli/foundation-models/system/finetune/image-classification/multiclass-classification/hftransformers-fridgeobjects-multiclass-classification.sh b/cli/foundation-models/system/finetune/image-classification/multiclass-classification/hftransformers-fridgeobjects-multiclass-classification.sh index 0d8ed62b948..2811a22fa33 100644 --- a/cli/foundation-models/system/finetune/image-classification/multiclass-classification/hftransformers-fridgeobjects-multiclass-classification.sh +++ b/cli/foundation-models/system/finetune/image-classification/multiclass-classification/hftransformers-fridgeobjects-multiclass-classification.sh @@ -9,7 +9,7 @@ workspace_name="" compute_cluster_model_import="sample-model-import-cluster" compute_cluster_finetune="sample-finetune-cluster-gpu" -# if above compute cluster does not exist, create it with the following vm size +# If above compute cluster does not exist, create it with the following vm size compute_model_import_sku="Standard_D12" compute_finetune_sku="STANDARD_NC6s_v3" # This is the number of GPUs in a single node of the selected 'vm_size' compute. @@ -20,7 +20,6 @@ gpus_per_node=1 # huggingFace model huggingface_model_name="microsoft/beit-base-patch16-224-pt22k-ft22k" # This is the foundation model for finetuning from azureml system registry -# using the latest version of the model - not working yet aml_registry_model_name="microsoft-beit-base-patch16-224-pt22k-ft22k" model_label="latest" @@ -87,7 +86,7 @@ fi az account set -s $subscription_id workspace_info="--resource-group $resource_group_name --workspace-name $workspace_name" -# check if $compute_cluster_model_import exists, else create it +# Check if $compute_cluster_model_import exists, else create it if az ml compute show --name $compute_cluster_model_import $workspace_info then echo "Compute cluster $compute_cluster_model_import already exists" @@ -99,7 +98,7 @@ else } fi -# check if $compute_cluster_finetune exists, else create it +# Check if $compute_cluster_finetune exists, else create it if az ml compute show --name $compute_cluster_finetune $workspace_info then echo "Compute cluster $compute_cluster_finetune already exists" @@ -111,7 +110,7 @@ else } fi -# check if the finetuning pipeline component exists +# Check if the finetuning pipeline component exists if ! az ml component show --name $finetuning_pipeline_component --label latest --registry-name $registry_name then echo "Finetuning pipeline component $finetuning_pipeline_component does not exist" @@ -126,7 +125,7 @@ then exit 1 fi -# get the latest model version +# Get the latest model version model_version=$(az ml model show --name $aml_registry_model_name --label $model_label --registry-name $registry_name --query version --output tsv) # 4. Prepare data @@ -148,8 +147,6 @@ fi # 5. Submit finetuning job using pipeline.yaml for a HuggingFace Transformers model -# # Need to switch to using latest version for model, currently blocked with a bug. - # # If you want to use a HuggingFace model, specify the inputs.model_name instead of inputs.mlflow_model_path.path like below # inputs.model_name=$huggingface_model_name @@ -179,12 +176,12 @@ az ml model create --name $finetuned_huggingface_model_name --version $version - } # 7. Deploy the fine-tuned HuggingFace Transformers model to an endpoint -# create online endpoint +# Create online endpoint az ml online-endpoint create --name $huggingface_endpoint_name $workspace_info || { echo "endpoint create failed"; exit 1; } -# deploy model from registry to endpoint in workspace +# Deploy model from registry to endpoint in workspace az ml online-deployment create --file ./deploy.yaml $workspace_info --all-traffic --set \ endpoint_name=$huggingface_endpoint_name model=azureml:$finetuned_huggingface_model_name:$version \ instance_type=$deployment_sku || { diff --git a/cli/foundation-models/system/finetune/image-classification/multiclass-classification/prepare_data.py b/cli/foundation-models/system/finetune/image-classification/multiclass-classification/prepare_data.py index 72ba3730663..b7dd55383f0 100644 --- a/cli/foundation-models/system/finetune/image-classification/multiclass-classification/prepare_data.py +++ b/cli/foundation-models/system/finetune/image-classification/multiclass-classification/prepare_data.py @@ -38,7 +38,7 @@ def create_jsonl_and_mltable_files(uri_folder_data_path, dataset_dir): dataset_parent_dir = os.path.dirname(dataset_dir) - # We'll copy each JSONL file within its related MLTable folder + # We will copy each JSONL file within its related MLTable folder training_mltable_path = os.path.join(dataset_parent_dir, "training-mltable-folder") validation_mltable_path = os.path.join(dataset_parent_dir, "validation-mltable-folder") @@ -72,10 +72,10 @@ def create_jsonl_and_mltable_files(uri_folder_data_path, dataset_dir): json_line["label"] = class_name if index % train_validation_ratio == 0: - # validation annotation + # Validation annotation validation_f.write(json.dumps(json_line) + "\n") else: - # train annotation + # Train annotation train_f.write(json.dumps(json_line) + "\n") index += 1 print("done") @@ -93,7 +93,7 @@ def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir): # Create directory, if it does not exist os.makedirs(dataset_parent_dir, exist_ok=True) - # download data + # Download data print("Downloading data.") download_url = "https://cvbp-secondary.z19.web.core.windows.net/datasets/image_classification/fridgeObjects.zip" @@ -108,12 +108,12 @@ def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir): # Download data from public url urllib.request.urlretrieve(download_url, filename=data_file) - # extract files + # Extract files with ZipFile(data_file, "r") as zip: print("extracting files...") zip.extractall(path=dataset_parent_dir) print("done") - # delete zip file + # Delete zip file os.remove(data_file) # Upload data and create a data asset URI folder diff --git a/cli/foundation-models/system/finetune/image-classification/multilabel-classification/hftransformers-fridgeobjects-multilabel-classification-pipeline.yaml b/cli/foundation-models/system/finetune/image-classification/multilabel-classification/hftransformers-fridgeobjects-multilabel-classification-pipeline.yaml index 4f4d5d4ce55..ea62d20f907 100644 --- a/cli/foundation-models/system/finetune/image-classification/multilabel-classification/hftransformers-fridgeobjects-multilabel-classification-pipeline.yaml +++ b/cli/foundation-models/system/finetune/image-classification/multilabel-classification/hftransformers-fridgeobjects-multilabel-classification-pipeline.yaml @@ -4,7 +4,7 @@ type: pipeline experiment_name: AzureML-Train-Finetune-Vision-MultiLabel-Samples inputs: - # # model - specify the foundation model available in the azureml system registry + # # Model - specify the foundation model available in the azureml system registry mlflow_model_path: path: azureml://registries/azureml-staging/models/google-vit-base-patch16-224/versions/1 type: mlflow_model @@ -26,8 +26,7 @@ inputs: outputs: - # map the output of the fine tuning job to the output of pipeline job so that we can easily register the fine tuned model - # registering the model is required to deploy the model to an online or batch endpoint + # Map the output of the fine tuning job to the output of pipeline job so that we can easily register the fine tuned model. Registering the model is required to deploy the model to an online or batch endpoint trained_model: type: mlflow_model @@ -50,7 +49,7 @@ jobs: # model task_name: image-classification-multilabel model_family: HuggingFaceImage - # # specify the model_name instead of mlflow_model if you want to use a model from the huggingface hub + # # Specify the model_name instead of mlflow_model if you want to use a model from the huggingface hub mlflow_model: ${{parent.inputs.mlflow_model_path}} # model_name: ${{parent.inputs.model_name}} @@ -58,28 +57,17 @@ jobs: training_data: ${{parent.inputs.training_data}} validation_data: ${{parent.inputs.validation_data}} + # Finetuning args image_width: -1 image_height: -1 - metric_for_best_model: iou apply_augmentations: True number_of_workers: 8 apply_deepspeed: False deepspeed_config: ${{parent.inputs.ds_finetune}} apply_ort: False - number_of_epochs: 15 - max_steps: -1 - training_batch_size: 4 - validation_batch_size: 4 auto_find_batch_size: False - learning_rate: 5e-5 - learning_rate_scheduler: warmup_linear - warmup_steps: 0 - optimizer: adamw_hf - weight_decay: 0.0 extra_optim_args: "" - gradient_accumulation_step: 1 precision: 32 - label_smoothing_factor: 0.0 random_seed: 42 evaluation_strategy: epoch evaluation_steps: 500 @@ -90,9 +78,22 @@ jobs: save_total_limit: -1 early_stopping: False early_stopping_patience: 1 - max_grad_norm: 1.0 resume_from_checkpoint: False save_as_mlflow_model: True + # # Uncomment one or more lines below to provide specific values, if you wish you override the autoselected default values. + # metric_for_best_model: iou + # number_of_epochs: 15 + # max_steps: -1 + # training_batch_size: 4 + # validation_batch_size: 4 + # learning_rate: 5e-5 + # learning_rate_scheduler: warmup_linear + # warmup_steps: 0 + # optimizer: adamw_hf + # weight_decay: 0.0 + # gradient_accumulation_step: 1 + # label_smoothing_factor: 0.0 + # max_grad_norm: 1.0 outputs: mlflow_model_folder: ${{parent.outputs.trained_model}} \ No newline at end of file diff --git a/cli/foundation-models/system/finetune/image-classification/multilabel-classification/hftransformers-fridgeobjects-multilabel-classification.sh b/cli/foundation-models/system/finetune/image-classification/multilabel-classification/hftransformers-fridgeobjects-multilabel-classification.sh index a4ecd279324..4f1a115db61 100644 --- a/cli/foundation-models/system/finetune/image-classification/multilabel-classification/hftransformers-fridgeobjects-multilabel-classification.sh +++ b/cli/foundation-models/system/finetune/image-classification/multilabel-classification/hftransformers-fridgeobjects-multilabel-classification.sh @@ -9,7 +9,7 @@ workspace_name="" compute_cluster_model_import="sample-model-import-cluster" compute_cluster_finetune="sample-finetune-cluster-gpu" -# if above compute cluster does not exist, create it with the following vm size +# If above compute cluster does not exist, create it with the following vm size compute_model_import_sku="Standard_D12" compute_finetune_sku="STANDARD_NC6s_v3" # This is the number of GPUs in a single node of the selected 'vm_size' compute. @@ -17,10 +17,9 @@ compute_finetune_sku="STANDARD_NC6s_v3" # Setting this to more than the number of GPUs will result in an error. gpus_per_node=1 -# huggingFace model +# HuggingFace model huggingface_model_name="microsoft/beit-base-patch16-224-pt22k-ft22k" # This is the foundation model for finetuning from azureml system registry -# using the latest version of the model - not working yet aml_registry_model_name="microsoft-beit-base-patch16-224-pt22k-ft22k" model_label="latest" @@ -86,7 +85,7 @@ fi az account set -s $subscription_id workspace_info="--resource-group $resource_group_name --workspace-name $workspace_name" -# check if $compute_cluster_model_import exists, else create it +# Check if $compute_cluster_model_import exists, else create it if az ml compute show --name $compute_cluster_model_import $workspace_info then echo "Compute cluster $compute_cluster_model_import already exists" @@ -98,7 +97,7 @@ else } fi -# check if $compute_cluster_finetune exists, else create it +# Check if $compute_cluster_finetune exists, else create it if az ml compute show --name $compute_cluster_finetune $workspace_info then echo "Compute cluster $compute_cluster_finetune already exists" @@ -110,7 +109,7 @@ else } fi -# check if the finetuning pipeline component exists +# Check if the finetuning pipeline component exists if ! az ml component show --name $finetuning_pipeline_component --label latest --registry-name $registry_name then echo "Finetuning pipeline component $finetuning_pipeline_component does not exist" @@ -118,14 +117,14 @@ then fi # 3. Check if the model exists in the registry -# need to confirm model show command works for registries outside the tenant (aka system registry) +# Need to confirm model show command works for registries outside the tenant (aka system registry) if ! az ml model show --name $aml_registry_model_name --label $model_label --registry-name $registry_name then echo "Model $aml_registry_model_name:$model_label does not exist in registry $registry_name" exit 1 fi -# get the latest model version +# Get the latest model version model_version=$(az ml model show --name $aml_registry_model_name --label $model_label --registry-name $registry_name --query version --output tsv) # 4. Prepare data @@ -147,8 +146,6 @@ fi # 5. Submit finetuning job using pipeline.yaml for a HuggingFace Transformers model -# # Need to switch to using latest version for model, currently blocked with a bug. - # # If you want to use a HuggingFace model, specify the inputs.model_name instead of inputs.mlflow_model_path.path like below # inputs.model_name=$huggingface_model_name @@ -178,12 +175,12 @@ az ml model create --name $finetuned_huggingface_model_name --version $version - } # 7. Deploy the fine-tuned HuggingFace Transformers model to an endpoint -# create online endpoint +# Create online endpoint az ml online-endpoint create --name $huggingface_endpoint_name $workspace_info || { echo "endpoint create failed"; exit 1; } -# deploy model from registry to endpoint in workspace +# Deploy model from registry to endpoint in workspace az ml online-deployment create --file ./deploy.yaml $workspace_info --all-traffic --set \ endpoint_name=$huggingface_endpoint_name model=azureml:$finetuned_huggingface_model_name:$version \ instance_type=$deployment_sku || { diff --git a/cli/foundation-models/system/finetune/image-classification/multilabel-classification/prepare_data.py b/cli/foundation-models/system/finetune/image-classification/multilabel-classification/prepare_data.py index 1cf05eda13e..2f5d392dbc8 100644 --- a/cli/foundation-models/system/finetune/image-classification/multilabel-classification/prepare_data.py +++ b/cli/foundation-models/system/finetune/image-classification/multilabel-classification/prepare_data.py @@ -38,7 +38,7 @@ def create_jsonl_and_mltable_files(uri_folder_data_path, dataset_dir): dataset_parent_dir = os.path.dirname(dataset_dir) - # We'll copy each JSONL file within its related MLTable folder + # We will copy each JSONL file within its related MLTable folder training_mltable_path = os.path.join(dataset_parent_dir, "training-mltable-folder") validation_mltable_path = os.path.join(dataset_parent_dir, "validation-mltable-folder") @@ -76,10 +76,10 @@ def create_jsonl_and_mltable_files(uri_folder_data_path, dataset_dir): json_line["label"] = line_split[1].strip().split(" ") if i % train_validation_ratio == 0: - # validation annotation + # Validation annotation validation_f.write(json.dumps(json_line) + "\n") else: - # train annotation + # Train annotation train_f.write(json.dumps(json_line) + "\n") print("done") @@ -96,7 +96,7 @@ def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir): # Create directory, if it does not exist os.makedirs(dataset_parent_dir, exist_ok=True) - # download data + # Download data print("Downloading data.") download_url = ( "https://cvbp-secondary.z19.web.core.windows.net/datasets/image_classification/multilabelFridgeObjects.zip" @@ -113,12 +113,12 @@ def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir): # Download data from public url urllib.request.urlretrieve(download_url, filename=data_file) - # extract files + # Extract files with ZipFile(data_file, "r") as zip: print("extracting files...") zip.extractall(path=dataset_parent_dir) print("done") - # delete zip file + # Delete zip file os.remove(data_file) # Upload data and create a data asset URI folder diff --git a/cli/foundation-models/system/finetune/image-instance-segmentation/jsonl_converter.py b/cli/foundation-models/system/finetune/image-instance-segmentation/jsonl_converter.py index 13a8047375f..9e500d776bb 100644 --- a/cli/foundation-models/system/finetune/image-instance-segmentation/jsonl_converter.py +++ b/cli/foundation-models/system/finetune/image-instance-segmentation/jsonl_converter.py @@ -130,13 +130,13 @@ def convert_mask_in_VOC_to_jsonl(dataset_dir: str, remote_path: str) -> None: dataset_parent_dir = os.path.dirname(dataset_dir) print(dataset_dir, dataset_parent_dir) - # We'll copy each JSONL file within its related MLTable folder + # We will copy each JSONL file within its related MLTable folder training_mltable_path = os.path.join(dataset_parent_dir, "training-mltable-folder") validation_mltable_path = os.path.join( dataset_parent_dir, "validation-mltable-folder" ) - # First, let's create the folders if they don't exist + # Create the folders if they don't exist os.makedirs(training_mltable_path, exist_ok=True) os.makedirs(validation_mltable_path, exist_ok=True) @@ -154,7 +154,7 @@ def convert_mask_in_VOC_to_jsonl(dataset_dir: str, remote_path: str) -> None: annotations_folder = os.path.join(dataset_dir, "annotations") mask_folder = os.path.join(dataset_dir, "segmentation-masks") - # sample json line dictionary + # Sample json line dictionary json_line_sample = { "image_url": remote_path, "image_details": {"format": None, "width": None, "height": None}, @@ -176,7 +176,7 @@ def convert_mask_in_VOC_to_jsonl(dataset_dir: str, remote_path: str) -> None: width = int(root.find("size/width").text) height = int(root.find("size/height").text) - # convert mask into polygon + # Convert mask into polygon mask_fname = os.path.join(mask_folder, filename[:-4] + ".png") polygons = parsing_mask(mask_fname) @@ -193,7 +193,7 @@ def convert_mask_in_VOC_to_jsonl(dataset_dir: str, remote_path: str) -> None: } ) - # build the jsonl file + # Build the jsonl file image_filename = root.find("filename").text _, file_extension = os.path.splitext(image_filename) json_line = dict(json_line_sample) @@ -206,10 +206,10 @@ def convert_mask_in_VOC_to_jsonl(dataset_dir: str, remote_path: str) -> None: json_line["label"] = labels if i % train_validation_ratio == 0: - # validation annotation + # Validation annotation validation_f.write(json.dumps(json_line) + "\n") else: - # train annotation + # Train annotation train_f.write(json.dumps(json_line) + "\n") diff --git a/cli/foundation-models/system/finetune/image-instance-segmentation/mmdetection-fridgeobjects-instance-segmentation-pipeline.yml b/cli/foundation-models/system/finetune/image-instance-segmentation/mmdetection-fridgeobjects-instance-segmentation-pipeline.yml index 9ab5438ab54..2ff3075d87b 100644 --- a/cli/foundation-models/system/finetune/image-instance-segmentation/mmdetection-fridgeobjects-instance-segmentation-pipeline.yml +++ b/cli/foundation-models/system/finetune/image-instance-segmentation/mmdetection-fridgeobjects-instance-segmentation-pipeline.yml @@ -19,7 +19,7 @@ inputs: compute_model_import: sample-model-import-cluster compute_finetune: sample-finetune-cluster-gpu # model_name: microsoft/beit-base-patch16-224 - # model - specify the foundation model available in the azureml system registry + # # Model - specify the foundation model available in the azureml system registry # pytorch_model: # path: azureml://registries/azureml-staging/models/mask_rcnn_swin-t-p4-w7_fpn_1x_coco/versions/1 # type: custom_model @@ -28,8 +28,7 @@ inputs: type: mlflow_model outputs: - # map the output of the fine tuning job to the output of pipeline job so that we can easily register the fine tuned model - # registering the model is required to deploy the model to an online or batch endpoint + # Map the output of the fine tuning job to the output of pipeline job so that we can easily register the fine tuned model. Registering the model is required to deploy the model to an online or batch endpoint trained_model: type: mlflow_model @@ -40,8 +39,8 @@ settings: jobs: mmdetection_model_finetune_job: type: pipeline - # TODO change the asset id component: azureml://registries/azureml-staging/components/mmdetection_image_objectdetection_instancesegmentation_pipeline/labels/latest + inputs: # Compute @@ -50,40 +49,28 @@ jobs: instance_count: 1 process_count_per_instance: 1 - # model selection + # Model import args task_name: image-instance-segmentation - # model_name: ${{parent.inputs.model_name}} # pytorch_model: ${{parent.inputs.pytorch_model}} mlflow_model: ${{parent.inputs.mlflow_model}} model_family: MmDetectionImage - # data + # Data training_data: ${{parent.inputs.training_data}} validation_data: ${{parent.inputs.validation_data}} - # finetuning parameters + # Finetuning parameters apply_augmentations: True - image_min_size: -1 - image_max_size: -1 - metric_for_best_model: mean_average_precision number_of_workers: 8 apply_deepspeed: False deepspeed_config: ${{parent.inputs.ds_finetune}} apply_ort: False - number_of_epochs: 15 - max_steps: -1 - training_batch_size: 4 - validation_batch_size: 4 auto_find_batch_size: False - learning_rate: 5e-5 - learning_rate_scheduler: warmup_linear - warmup_steps: 0 - optimizer: adamw_hf - weight_decay: 0.0 extra_optim_args: "" - gradient_accumulation_step: 1 precision: 32 + iou_threshold: 0.5 + box_score_threshold: 0.3 random_seed: 42 evaluation_strategy: epoch evaluation_steps: 500 @@ -94,8 +81,23 @@ jobs: save_total_limit: -1 early_stopping: False early_stopping_patience: 1 - max_grad_norm: 1.0 resume_from_checkpoint: False + save_as_mlflow_model: True + # # Uncomment one or more lines below to provide specific values, if you wish you override the autoselected default values. + # image_min_size: -1 + # image_max_size: -1 + # metric_for_best_model: mean_average_precision + # number_of_epochs: 15 + # max_steps: -1 + # training_batch_size: 4 + # validation_batch_size: 4 + # learning_rate: 5e-5 + # learning_rate_scheduler: warmup_linear + # warmup_steps: 0 + # optimizer: adamw_hf + # weight_decay: 0.0 + # gradient_accumulation_step: 1 + # max_grad_norm: 1.0 outputs: mlflow_model_folder: ${{parent.outputs.trained_model}} diff --git a/cli/foundation-models/system/finetune/image-instance-segmentation/mmdetection-fridgeobjects-instance-segmentation.sh b/cli/foundation-models/system/finetune/image-instance-segmentation/mmdetection-fridgeobjects-instance-segmentation.sh index 1c10b79b766..ada8f66047c 100644 --- a/cli/foundation-models/system/finetune/image-instance-segmentation/mmdetection-fridgeobjects-instance-segmentation.sh +++ b/cli/foundation-models/system/finetune/image-instance-segmentation/mmdetection-fridgeobjects-instance-segmentation.sh @@ -11,7 +11,7 @@ workspace_name="" compute_cluster_model_import="sample-model-import-cluster" compute_cluster_finetune="sample-finetune-cluster-gpu" -# if above compute cluster does not exist, create it with the following vm size +# If above compute cluster does not exist, create it with the following vm size compute_model_import_sku="Standard_D12" compute_finetune_sku="Standard_NC6s_v3" # This is the number of GPUs in a single node of the selected 'vm_size' compute. @@ -20,13 +20,11 @@ compute_finetune_sku="Standard_NC6s_v3" gpus_per_node=1 # This is the foundation model for finetuning -# TODO: update the model name once it registered in preview registry -# using the latest version of the model - not working yet mmdetection_model_name="mask_rcnn_swin-t-p4-w7_fpn_1x_coco" model_label="latest" version=$(date +%s) -finetuned_mmdetection_model_name="mask_rcnn_swin-t-p4-w7_fpn_1x_coco_fridge_is" +finetuned_mmdetection_model_name="$mmdetection_model_name-fridge-is" mmdetection_endpoint_name="mmd-is-fridge-items-$version" deployment_sku="Standard_DS3_V2" @@ -36,15 +34,15 @@ ds_finetune="./deepspeed_configs/zero1.json" # Scoring file mmdetection_sample_request_data="./mmdetection_sample_request_data.json" -# finetuning job parameters +# Finetuning job parameters finetuning_pipeline_component="mmdetection_image_objectdetection_instancesegmentation_pipeline" # Training settings process_count_per_instance=$gpus_per_node # set to the number of GPUs available in the compute # 1. Install dependencies -pip install azure-ai-ml==1.0.0 -pip install azure-identity +pip install azure-ai-ml==1.8.0 +pip install azure-identity==1.13.0 pip install datasets==2.12.0 unameOut=$(uname -a) @@ -89,7 +87,7 @@ fi az account set -s $subscription_id workspace_info="--resource-group $resource_group_name --workspace-name $workspace_name" -# check if $compute_cluster_model_import exists, else create it +# Check if $compute_cluster_model_import exists, else create it if az ml compute show --name $compute_cluster_model_import $workspace_info then echo "Compute cluster $compute_cluster_model_import already exists" @@ -101,7 +99,7 @@ else } fi -# check if $compute_cluster_finetune exists, else create it +# Check if $compute_cluster_finetune exists, else create it if az ml compute show --name $compute_cluster_finetune $workspace_info then echo "Compute cluster $compute_cluster_finetune already exists" @@ -113,7 +111,7 @@ else } fi -# check if the finetuning pipeline component exists +# Check if the finetuning pipeline component exists if ! az ml component show --name $finetuning_pipeline_component --label latest --registry-name $registry_name then echo "Finetuning pipeline component $finetuning_pipeline_component does not exist" @@ -151,6 +149,7 @@ then fi # 5. Submit finetuning job using pipeline.yaml for a open-mmlab mmdetection model + # If you want to use a MMDetection model, specify the inputs.model_name instead of inputs.mlflow_model_path.path like below # inputs.model_name="mask_rcnn_swin-t-p4-w7_fpn_1x_coco" @@ -182,12 +181,12 @@ az ml model create --name $finetuned_mmdetection_model_name --version $version - } # 7. Deploy the fine-tuned mmdetection model to an endpoint -# create online endpoint +# Create online endpoint az ml online-endpoint create --name $mmdetection_endpoint_name $workspace_info || { echo "endpoint create failed"; exit 1; } -# deploy registered model to endpoint in workspace +# Deploy registered model to endpoint in workspace az ml online-deployment create --file ./deploy.yaml $workspace_info --all-traffic --set \ endpoint_name=$mmdetection_endpoint_name model=azureml:$finetuned_mmdetection_model_name:$version \ instance_type=$deployment_sku || { diff --git a/cli/foundation-models/system/finetune/image-instance-segmentation/prepare_data.py b/cli/foundation-models/system/finetune/image-instance-segmentation/prepare_data.py index 7825159302f..274d4785591 100644 --- a/cli/foundation-models/system/finetune/image-instance-segmentation/prepare_data.py +++ b/cli/foundation-models/system/finetune/image-instance-segmentation/prepare_data.py @@ -53,7 +53,7 @@ def create_jsonl_and_mltable_files(uri_folder_data_path, dataset_dir): dataset_parent_dir = os.path.dirname(dataset_dir) - # We'll copy each JSONL file within its related MLTable folder + # We will copy each JSONL file within its related MLTable folder training_mltable_path = os.path.join(dataset_parent_dir, "training-mltable-folder") validation_mltable_path = os.path.join( dataset_parent_dir, "validation-mltable-folder" @@ -117,7 +117,7 @@ def create_jsonl_and_mltable_files(uri_folder_data_path, dataset_dir): "isCrowd": isCrowd, } ) - # build the jsonl file + # Build the jsonl file image_filename = root.find("filename").text _, file_extension = os.path.splitext(image_filename) json_line = dict(json_line_sample) @@ -130,10 +130,10 @@ def create_jsonl_and_mltable_files(uri_folder_data_path, dataset_dir): json_line["label"] = labels if i % train_validation_ratio == 0: - # validation annotation + # Validation annotation validation_f.write(json.dumps(json_line) + "\n") else: - # train annotation + # Train annotation train_f.write(json.dumps(json_line) + "\n") print("done") @@ -153,10 +153,10 @@ def create_jsonl_and_mltable_files(uri_folder_data_path, dataset_dir): def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir): # Download data from public url - # create data folder if it doesnt exist. + # Create data folder if it doesnt exist. os.makedirs(dataset_parent_dir, exist_ok=True) - # download data + # Download data download_url = "https://cvbp-secondary.z19.web.core.windows.net/datasets/object_detection/odFridgeObjectsMask.zip" # Extract current dataset name from dataset url @@ -170,12 +170,12 @@ def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir): # Download the dataset urllib.request.urlretrieve(download_url, filename=data_file) - # extract files + # Extract files with ZipFile(data_file, "r") as zip: print("extracting files...") zip.extractall(path=dataset_parent_dir) print("done") - # delete zip file + # Delete zip file os.remove(data_file) # Upload data and create a data asset URI folder @@ -207,13 +207,13 @@ def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir): convert_mask_in_VOC_to_jsonl(dataset_dir, uri_folder_data_asset.path) print("done") - # We'll copy each JSONL file within its related MLTable folder + # We will copy each JSONL file within its related MLTable folder training_mltable_path = os.path.join(dataset_parent_dir, "training-mltable-folder") validation_mltable_path = os.path.join( dataset_parent_dir, "validation-mltable-folder" ) - # First, let's create the folders if they don't exist + # Create the folders if they don't exist os.makedirs(training_mltable_path, exist_ok=True) os.makedirs(validation_mltable_path, exist_ok=True) diff --git a/cli/foundation-models/system/finetune/image-object-detection/mmdetection-fridgeobjects-detection-pipeline.yml b/cli/foundation-models/system/finetune/image-object-detection/mmdetection-fridgeobjects-detection-pipeline.yml index 68944a10365..e69d2e411d2 100644 --- a/cli/foundation-models/system/finetune/image-object-detection/mmdetection-fridgeobjects-detection-pipeline.yml +++ b/cli/foundation-models/system/finetune/image-object-detection/mmdetection-fridgeobjects-detection-pipeline.yml @@ -15,9 +15,11 @@ inputs: ds_finetune: path: deepspeed_configs/zero1.json type: uri_file + # compute compute_model_import: sample-model-import-cluster compute_finetune: sample-finetune-cluster-gpu + # model_name: yolof_r50_c5_8x8_1x_coco # # model - specify the foundation model available in the azureml system registry # pytorch_model: @@ -28,8 +30,7 @@ inputs: type: mlflow_model outputs: - # map the output of the fine tuning job to the output of pipeline job so that we can easily register the fine tuned model - # registering the model is required to deploy the model to an online or batch endpoint + # Map the output of the fine tuning job to the output of pipeline job so that we can easily register the fine tuned model. Registering the model is required to deploy the model to an online or batch endpoint trained_model: type: mlflow_model @@ -40,50 +41,37 @@ settings: jobs: mmdetection_model_finetune_job: type: pipeline - # TODO change the asset id component: azureml://registries/azureml-staging/components/mmdetection_image_objectdetection_instancesegmentation_pipeline/labels/latest inputs: - # Compute + # # Compute compute_model_import: ${{parent.inputs.compute_model_import}} compute_finetune: ${{parent.inputs.compute_finetune}} instance_count: 1 process_count_per_instance: 1 - # model selection + # # Model import args task_name: image-object-detection - # model_name: ${{parent.inputs.model_name}} # pytorch_model: ${{parent.inputs.pytorch_model}} mlflow_model: ${{parent.inputs.mlflow_model}} model_family: MmDetectionImage - # data + # # Data training_data: ${{parent.inputs.training_data}} validation_data: ${{parent.inputs.validation_data}} - # finetuning parameters + # # Finetuning parameters apply_augmentations: True - image_min_size: -1 - image_max_size: -1 - metric_for_best_model: mean_average_precision number_of_workers: 8 apply_deepspeed: False deepspeed_config: ${{parent.inputs.ds_finetune}} apply_ort: False - number_of_epochs: 15 - max_steps: -1 - training_batch_size: 4 - validation_batch_size: 4 auto_find_batch_size: False - learning_rate: 5e-5 - learning_rate_scheduler: warmup_linear - warmup_steps: 0 - optimizer: adamw_hf - weight_decay: 0.0 extra_optim_args: "" - gradient_accumulation_step: 1 precision: 32 + iou_threshold: 0.5 + box_score_threshold: 0.3 random_seed: 42 evaluation_strategy: epoch evaluation_steps: 500 @@ -94,8 +82,23 @@ jobs: save_total_limit: -1 early_stopping: False early_stopping_patience: 1 - max_grad_norm: 1.0 resume_from_checkpoint: False + save_as_mlflow_model: True + # # Uncomment one or more lines below to provide specific values, if you wish you override the autoselected default values. + # image_min_size: -1 + # image_max_size: -1 + # metric_for_best_model: mean_average_precision + # number_of_epochs: 15 + # max_steps: -1 + # training_batch_size: 4 + # validation_batch_size: 4 + # learning_rate: 5e-5 + # learning_rate_scheduler: warmup_linear + # warmup_steps: 0 + # optimizer: adamw_hf + # weight_decay: 0.0 + # gradient_accumulation_step: 1 + # max_grad_norm: 1.0 outputs: mlflow_model_folder: ${{parent.outputs.trained_model}} diff --git a/cli/foundation-models/system/finetune/image-object-detection/mmdetection-fridgeobjects-detection.sh b/cli/foundation-models/system/finetune/image-object-detection/mmdetection-fridgeobjects-detection.sh index 0319c031556..7cff0139ef5 100644 --- a/cli/foundation-models/system/finetune/image-object-detection/mmdetection-fridgeobjects-detection.sh +++ b/cli/foundation-models/system/finetune/image-object-detection/mmdetection-fridgeobjects-detection.sh @@ -19,13 +19,11 @@ compute_finetune_sku="Standard_NC6s_v3" gpus_per_node=1 # This is the foundation model for finetuning -# TODO: update the model name once it registered in preview registry -# using the latest version of the model - not working yet mmdetection_model_name="yolof_r50_c5_8x8_1x_coco" model_label="latest" version=$(date +%s) -finetuned_mmdetection_model_name="yolof_r50_c5_8x8_1x_coco_fridge_od" +finetuned_mmdetection_model_name="$mmdetection_model_name-fridge-od" mmdetection_endpoint_name="mmd-od-fridge-items-$version" deployment_sku="Standard_DS3_V2" @@ -36,15 +34,14 @@ ds_finetune="./deepspeed_configs/zero1.json" mmdetection_sample_request_data="./mmdetection_sample_request_data.json" # finetuning job parameters -# TODO: update with preview registry component name finetuning_pipeline_component="mmdetection_image_objectdetection_instancesegmentation_pipeline" # Training settings process_count_per_instance=$gpus_per_node # set to the number of GPUs available in the compute # 1. Install dependencies -pip install azure-ai-ml==1.0.0 -pip install azure-identity +pip install azure-ai-ml==1.8.0 +pip install azure-identity==1.13.0 pip install datasets==2.12.0 unameOut=$(uname -a) @@ -89,7 +86,7 @@ fi az account set -s $subscription_id workspace_info="--resource-group $resource_group_name --workspace-name $workspace_name" -# check if $compute_cluster_model_import exists, else create it +# Check if $compute_cluster_model_import exists, else create it if az ml compute show --name $compute_cluster_model_import $workspace_info then echo "Compute cluster $compute_cluster_model_import already exists" @@ -101,7 +98,7 @@ else } fi -# check if $compute_cluster_finetune exists, else create it +# Check if $compute_cluster_finetune exists, else create it if az ml compute show --name $compute_cluster_finetune $workspace_info then echo "Compute cluster $compute_cluster_finetune already exists" @@ -113,7 +110,7 @@ else } fi -# check if the finetuning pipeline component exists +# Check if the finetuning pipeline component exists if ! az ml component show --name $finetuning_pipeline_component --label latest --registry-name $registry_name then echo "Finetuning pipeline component $finetuning_pipeline_component does not exist" @@ -151,6 +148,7 @@ then fi # 5. Submit finetuning job using pipeline.yaml for a open-mmlab mmdetection model + # If you want to use a MMDetection model, specify the inputs.model_name instead of inputs.mlflow_model_path.path like below # inputs.model_name="conditional_detr_r50_8xb2-50e_coco" @@ -182,12 +180,12 @@ az ml model create --name $finetuned_mmdetection_model_name --version $version - } # 7. Deploy the fine-tuned mmdetection model to an endpoint -# create online endpoint +# Create online endpoint az ml online-endpoint create --name $mmdetection_endpoint_name $workspace_info || { echo "endpoint create failed"; exit 1; } -# deploy registered model to endpoint in workspace +# Deploy registered model to endpoint in workspace az ml online-deployment create --file ./deploy.yaml $workspace_info --all-traffic --set \ endpoint_name=$mmdetection_endpoint_name model=azureml:$finetuned_mmdetection_model_name:$version \ instance_type=$deployment_sku || { diff --git a/cli/foundation-models/system/inference/image-classification/image-classification-batch-endpoint.sh b/cli/foundation-models/system/inference/image-classification/image-classification-batch-endpoint.sh index 86822b8a3f6..2d901dfa840 100644 --- a/cli/foundation-models/system/inference/image-classification/image-classification-batch-endpoint.sh +++ b/cli/foundation-models/system/inference/image-classification/image-classification-batch-endpoint.sh @@ -1,8 +1,8 @@ set -x -# the commands in this file map to steps in this notebook: https://aka.ms/azureml-infer-batch-sdk-image-classification -# the sample scoring file available in the same folder as the above notebook +# The commands in this file map to steps in this notebook: https://aka.ms/azureml-infer-batch-sdk-image-classification +# The sample scoring file available in the same folder as the above notebook. # script inputs registry_name="azureml-preview" @@ -49,17 +49,17 @@ az account set -s $subscription_id workspace_info="--resource-group $resource_group_name --workspace-name $workspace_name" # 2. Check if the model exists in the registry -# need to confirm model show command works for registries outside the tenant (aka system registry) +# Need to confirm model show command works for registries outside the tenant (aka system registry) if ! az ml model show --name $model_name --label $model_label --registry-name $registry_name then echo "Model $model_name:$model_label does not exist in registry $registry_name" exit 1 fi -# get the latest model version +# Get the latest model version model_version=$(az ml model show --name $model_name --label $model_label --registry-name $registry_name --query version --output tsv) -# 3. check if compute $deployment_compute exists, else create it +# 3. Check if compute $deployment_compute exists, else create it if az ml compute show --name $deployment_compute $workspace_info then echo "Compute cluster $deployment_compute already exists" @@ -72,12 +72,12 @@ else fi # 4. Deploy the model to an endpoint -# create online endpoint +# Create online endpoint az ml batch-endpoint create --name $endpoint_name $workspace_info || { echo "endpoint create failed"; exit 1; } -# deploy model from registry to endpoint in workspace +# Deploy model from registry to endpoint in workspace az ml batch-deployment create --file ./deploy-batch.yaml $workspace_info --set \ endpoint_name=$endpoint_name model=azureml://registries/$registry_name/models/$model_name/versions/$model_version \ compute=$deployment_compute \ @@ -97,14 +97,14 @@ else exit 1 fi -# invoke the endpoint +# Invoke the endpoint folder_inference_job=$(az ml batch-endpoint invoke --name $endpoint_name \ --deployment-name $deployment_name --input $sample_request_folder --input-type \ uri_folder $workspace_info --query name --output tsv) || { echo "endpoint invoke failed"; exit 1; } -# wait for the job to complete +# Wait for the job to complete az ml job stream --name $folder_inference_job $workspace_info || { echo "job stream failed"; exit 1; } @@ -122,7 +122,7 @@ else exit 1 fi -# invoke the endpoint +# Invoke the endpoint csv_inference_job=$(az ml batch-endpoint invoke --name $endpoint_name \ --deployment-name $deployment_name --input $sample_request_csv --input-type \ uri_file $workspace_info --query name --output tsv) || { diff --git a/cli/foundation-models/system/inference/image-classification/image-classification-online-endpoint.sh b/cli/foundation-models/system/inference/image-classification/image-classification-online-endpoint.sh index 4247dc7e2e7..b8398f1ad34 100644 --- a/cli/foundation-models/system/inference/image-classification/image-classification-online-endpoint.sh +++ b/cli/foundation-models/system/inference/image-classification/image-classification-online-endpoint.sh @@ -1,6 +1,6 @@ set -x -# the commands in this file map to steps in this notebook: https://aka.ms/azureml-infer-sdk-image-classification -# the sample scoring file available in the same folder as the above notebook +# The commands in this file map to steps in this notebook: https://aka.ms/azureml-infer-sdk-image-classification +# The sample scoring file available in the same folder as the above notebook # script inputs registry_name="azureml-preview" @@ -15,7 +15,7 @@ model_label="latest" version=$(date +%s) endpoint_name="image-classification-$version" -# todo: fetch deployment_sku from the min_inference_sku tag of the model +# Todo: fetch deployment_sku from the min_inference_sku tag of the model deployment_sku="Standard_DS3_v2" # Prepare data for deployment @@ -40,23 +40,23 @@ az account set -s $subscription_id workspace_info="--resource-group $resource_group_name --workspace-name $workspace_name" # 2. Check if the model exists in the registry -# need to confirm model show command works for registries outside the tenant (aka system registry) +# Need to confirm model show command works for registries outside the tenant (aka system registry) if ! az ml model show --name $model_name --label $model_label --registry-name $registry_name then echo "Model $model_name:$model_label does not exist in registry $registry_name" exit 1 fi -# get the latest model version +# Get the latest model version model_version=$(az ml model show --name $model_name --label $model_label --registry-name $registry_name --query version --output tsv) # 3. Deploy the model to an endpoint -# create online endpoint +# Create online endpoint az ml online-endpoint create --name $endpoint_name $workspace_info || { echo "endpoint create failed"; exit 1; } -# deploy model from registry to endpoint in workspace +# Deploy model from registry to endpoint in workspace az ml online-deployment create --file deploy-online.yaml $workspace_info --all-traffic --set \ endpoint_name=$endpoint_name model=azureml://registries/$registry_name/models/$model_name/versions/$model_version \ instance_type=$deployment_sku || { diff --git a/cli/foundation-models/system/inference/image-instance-segmentation/image-instance-segmentation-batch-endpoint.sh b/cli/foundation-models/system/inference/image-instance-segmentation/image-instance-segmentation-batch-endpoint.sh index 5a4e5a2c07c..8c66f857b97 100644 --- a/cli/foundation-models/system/inference/image-instance-segmentation/image-instance-segmentation-batch-endpoint.sh +++ b/cli/foundation-models/system/inference/image-instance-segmentation/image-instance-segmentation-batch-endpoint.sh @@ -1,5 +1,5 @@ set -x -# the commands in this file map to steps in this notebook: https://aka.ms/azureml-infer-batch-sdk-image-instance-segmentation +# The commands in this file map to steps in this notebook: https://aka.ms/azureml-infer-batch-sdk-image-instance-segmentation # script inputs registry_name="azureml-preview" @@ -12,7 +12,7 @@ model_name="mask_rcnn_swin-t-p4-w7_fpn_1x_coco" model_label="latest" deployment_compute="cpu-cluster" -# todo: fetch deployment_sku from the min_inference_sku tag of the model +# Todo: fetch deployment_sku from the min_inference_sku tag of the model deployment_sku="Standard_DS3_v2" @@ -24,10 +24,10 @@ deployment_name="demo-$version" data_path="data_batch" python ./prepare_data.py --data_path $data_path --mode "batch" -# sample request data in csv format with image column +# Sample request data in csv format with image column sample_request_csv="./data_batch/odFridgeObjectsMask/image_instance_segmentation_list.csv" -# sample request data in image folder format +# Sample request data in image folder format sample_request_folder="./data_batch/odFridgeObjectsMask/images" # 1. Setup pre-requisites @@ -42,7 +42,7 @@ az account set -s $subscription_id workspace_info="--resource-group $resource_group_name --workspace-name $workspace_name" # 2. Check if the model exists in the registry -# need to confirm model show command works for registries outside the tenant (aka system registry) +# Need to confirm model show command works for registries outside the tenant (aka system registry) if ! az ml model show --name $model_name --label $model_label --registry-name $registry_name then echo "Model $model_name:$model_label does not exist in registry $registry_name" @@ -52,7 +52,7 @@ fi model_version=$(az ml model show --name $model_name --label $model_label --registry-name $registry_name --query version --output tsv) -# 3. check if compute $deployment_compute exists, else create it +# 3. Check if compute $deployment_compute exists, else create it if az ml compute show --name $deployment_compute $workspace_info then echo "Compute cluster $deployment_compute already exists" @@ -65,12 +65,12 @@ else fi # 4. Deploy the model to an endpoint -# create batch endpoint +# Create batch endpoint az ml batch-endpoint create --name $endpoint_name $workspace_info || { echo "endpoint create failed"; exit 1; } -# deploy model from registry to endpoint in workspace +# Deploy model from registry to endpoint in workspace az ml batch-deployment create --file ./deploy-batch.yaml $workspace_info --set \ endpoint_name=$endpoint_name \ model=azureml://registries/$registry_name/models/$model_name/versions/$model_version \ @@ -91,14 +91,14 @@ else exit 1 fi -# invoke the endpoint +# Invoke the endpoint folder_inference_job=$(az ml batch-endpoint invoke --name $endpoint_name \ --deployment-name $deployment_name --input $sample_request_folder --input-type \ uri_folder $workspace_info --query name --output tsv) || { echo "endpoint invoke failed"; exit 1; } -# wait for the job to complete +# Wait for the job to complete az ml job stream --name $folder_inference_job $workspace_info || { echo "job stream failed"; exit 1; } @@ -116,14 +116,14 @@ else exit 1 fi -# invoke the endpoint +# Invoke the endpoint csv_inference_job=$(az ml batch-endpoint invoke --name $endpoint_name \ --deployment-name $deployment_name --input $sample_request_csv --input-type \ uri_file $workspace_info --query name --output tsv) || { echo "endpoint invoke failed"; exit 1; } -# wait for the job to complete +# Wait for the job to complete az ml job stream --name $csv_inference_job $workspace_info || { echo "job stream failed"; exit 1; } diff --git a/cli/foundation-models/system/inference/image-instance-segmentation/image-instance-segmentation-online-endpoint.sh b/cli/foundation-models/system/inference/image-instance-segmentation/image-instance-segmentation-online-endpoint.sh index ac850721fc9..86ea55405bd 100644 --- a/cli/foundation-models/system/inference/image-instance-segmentation/image-instance-segmentation-online-endpoint.sh +++ b/cli/foundation-models/system/inference/image-instance-segmentation/image-instance-segmentation-online-endpoint.sh @@ -1,6 +1,6 @@ set -x -# the commands in this file map to steps in this notebook: https://aka.ms/azureml-infer-sdk-image-instance-segmentation -# the sample scoring file available in the same folder as the above notebook +# The commands in this file map to steps in this notebook: https://aka.ms/azureml-infer-sdk-image-instance-segmentation +# The sample scoring file available in the same folder as the above notebook # script inputs registry_name="azureml-preview" @@ -15,13 +15,12 @@ model_label="latest" version=$(date +%s) endpoint_name="image-is-$version" -# todo: fetch deployment_sku from the min_inference_sku tag of the model +# Todo: fetch deployment_sku from the min_inference_sku tag of the model deployment_sku="Standard_DS3_v2" # Prepare data for deployment python ./prepare_data.py --data_path "data_online" # sample_request_data - sample_request_data="./data_online/odFridgeObjectsMask/sample_request_data.json" @@ -37,7 +36,7 @@ az account set -s $subscription_id workspace_info="--resource-group $resource_group_name --workspace-name $workspace_name" # 2. Check if the model exists in the registry -# need to confirm model show command works for registries outside the tenant (aka system registry) +# Need to confirm model show command works for registries outside the tenant (aka system registry) if ! az ml model show --name $model_name --label $model_label --registry-name $registry_name then echo "Model $model_name:$model_version does not exist in registry $registry_name" @@ -47,12 +46,12 @@ fi model_version=$(az ml model show --name $model_name --label $model_label --registry-name $registry_name --query version --output tsv) # 3. Deploy the model to an endpoint -# create online endpoint +# Create online endpoint az ml online-endpoint create --name $endpoint_name $workspace_info || { echo "endpoint create failed"; exit 1; } -# deploy model from registry to endpoint in workspace +# Deploy model from registry to endpoint in workspace az ml online-deployment create --file deploy-online.yaml $workspace_info --all-traffic --set \ endpoint_name=$endpoint_name model=azureml://registries/$registry_name/models/$model_name/versions/$model_version \ instance_type=$deployment_sku || { diff --git a/cli/foundation-models/system/inference/image-object-detection/image-object-detection-batch-endpoint.sh b/cli/foundation-models/system/inference/image-object-detection/image-object-detection-batch-endpoint.sh index 517c72765dc..f7e80669ac9 100644 --- a/cli/foundation-models/system/inference/image-object-detection/image-object-detection-batch-endpoint.sh +++ b/cli/foundation-models/system/inference/image-object-detection/image-object-detection-batch-endpoint.sh @@ -1,5 +1,5 @@ set -x -# the commands in this file map to steps in this notebook: https://aka.ms/azureml-infer-batch-sdk-image-object-detection +# The commands in this file map to steps in this notebook: https://aka.ms/azureml-infer-batch-sdk-image-object-detection # script inputs registry_name="azureml-preview" @@ -12,10 +12,9 @@ model_name="yolof_r50_c5_8x8_1x_coco" model_label="latest" deployment_compute="cpu-cluster" -# todo: fetch deployment_sku from the min_inference_sku tag of the model +# Todo: fetch deployment_sku from the min_inference_sku tag of the model deployment_sku="Standard_DS3_v2" - version=$(date +%s) endpoint_name="image-od-$version" deployment_name="demo-$version" @@ -24,10 +23,10 @@ deployment_name="demo-$version" data_path="data_batch" python ./prepare_data.py --data_path $data_path --mode "batch" -# sample request data in csv format with image column +# Sample request data in csv format with image column sample_request_csv="./data_batch/odFridgeObjects/image_object_detection_list.csv" -# sample request data in image folder format +# Sample request data in image folder format sample_request_folder="./data_batch/odFridgeObjects/images" # 1. Setup pre-requisites @@ -42,7 +41,7 @@ az account set -s $subscription_id workspace_info="--resource-group $resource_group_name --workspace-name $workspace_name" # 2. Check if the model exists in the registry -# need to confirm model show command works for registries outside the tenant (aka system registry) +# Need to confirm model show command works for registries outside the tenant (aka system registry) if ! az ml model show --name $model_name --label $model_label --registry-name $registry_name then echo "Model $model_name:$model_label does not exist in registry $registry_name" @@ -52,7 +51,7 @@ fi model_version=$(az ml model show --name $model_name --label $model_label --registry-name $registry_name --query version --output tsv) -# 3. check if compute $deployment_compute exists, else create it +# 3. Check if compute $deployment_compute exists, else create it if az ml compute show --name $deployment_compute $workspace_info then echo "Compute cluster $deployment_compute already exists" @@ -65,12 +64,12 @@ else fi # 4. Deploy the model to an endpoint -# create batch endpoint +# Create batch endpoint az ml batch-endpoint create --name $endpoint_name $workspace_info || { echo "endpoint create failed"; exit 1; } -# deploy model from registry to endpoint in workspace +# Deploy model from registry to endpoint in workspace az ml batch-deployment create --file ./deploy-batch.yaml $workspace_info --set \ endpoint_name=$endpoint_name \ model=azureml://registries/$registry_name/models/$model_name/versions/$model_version \ @@ -91,14 +90,14 @@ else exit 1 fi -# invoke the endpoint +# Invoke the endpoint folder_inference_job=$(az ml batch-endpoint invoke --name $endpoint_name \ --deployment-name $deployment_name --input $sample_request_folder --input-type \ uri_folder $workspace_info --query name --output tsv) || { echo "endpoint invoke failed"; exit 1; } -# wait for the job to complete +# Wait for the job to complete az ml job stream --name $folder_inference_job $workspace_info || { echo "job stream failed"; exit 1; } @@ -116,14 +115,14 @@ else exit 1 fi -# invoke the endpoint +# Invoke the endpoint csv_inference_job=$(az ml batch-endpoint invoke --name $endpoint_name \ --deployment-name $deployment_name --input $sample_request_csv --input-type \ uri_file $workspace_info --query name --output tsv) || { echo "endpoint invoke failed"; exit 1; } -# wait for the job to complete +# Wait for the job to complete az ml job stream --name $csv_inference_job $workspace_info || { echo "job stream failed"; exit 1; } diff --git a/sdk/python/foundation-models/system/finetune/image-classification/multiclass-classification/hftransformers-fridgeobjects-multiclass-classification.ipynb b/sdk/python/foundation-models/system/finetune/image-classification/multiclass-classification/hftransformers-fridgeobjects-multiclass-classification.ipynb index ba049eb01dc..bcb8df226e8 100644 --- a/sdk/python/foundation-models/system/finetune/image-classification/multiclass-classification/hftransformers-fridgeobjects-multiclass-classification.ipynb +++ b/sdk/python/foundation-models/system/finetune/image-classification/multiclass-classification/hftransformers-fridgeobjects-multiclass-classification.ipynb @@ -232,11 +232,11 @@ "- /carton\n", "- /can\n", "\n", - "This is the most common data format for multiclass image classification. Each folder title corresponds to the image label for the images contained inside. \n", + "This is the most common data format for multiclass image classification. Each folder's title corresponds to the image label for the images contained inside. \n", "\n", "#### 4.1 Download the Data\n", "We first download and unzip the data locally. By default, the data would be downloaded in `./data` folder in current directory. \n", - "If you prefer to download the data at a different location, update it in `dataset_parent_dir = ...` in the next cell." + "If you prefer to download the data at a different location, update it in `dataset_parent_dir = ...` in the following cell." ] }, { @@ -252,10 +252,10 @@ "# Change to a different location if you prefer\n", "dataset_parent_dir = \"./data\"\n", "\n", - "# create data folder if it doesnt exist.\n", + "# Create data folder if it doesnt exist.\n", "os.makedirs(dataset_parent_dir, exist_ok=True)\n", "\n", - "# download data\n", + "# Download data\n", "download_url = \"https://cvbp-secondary.z19.web.core.windows.net/datasets/image_classification/fridgeObjects.zip\"\n", "\n", "# Extract current dataset name from dataset url\n", @@ -269,12 +269,12 @@ "# Download the dataset\n", "urllib.request.urlretrieve(download_url, filename=data_file)\n", "\n", - "# extract files\n", + "# Extract files\n", "with ZipFile(data_file, \"r\") as zip:\n", " print(\"extracting files...\")\n", " zip.extractall(path=dataset_parent_dir)\n", " print(\"done\")\n", - "# delete zip file\n", + "# Delete zip file\n", "os.remove(data_file)" ] }, @@ -333,9 +333,9 @@ "source": [ "#### 4.3 Convert the downloaded data to JSONL\n", "\n", - "For documentation on preparing the datasets beyond this notebook, please refer to the [documentation on how to prepare datasets](https://docs.microsoft.com/en-us/azure/machine-learning/how-to-prepare-datasets-for-automl-images).\n", + "For documentation on preparing the datasets beyond this notebook, refer to the [documentation on how to prepare datasets](https://docs.microsoft.com/en-us/azure/machine-learning/how-to-prepare-datasets-for-automl-images).\n", "\n", - "In order to use this data to create an AzureML MLTable, we first need to convert it to the required JSONL format. The following script is creating two `.jsonl` files (one for training and one for validation) in the corresponding MLTable folder. The train / validation ratio corresponds to 20% of the data going into the validation file. For further details on jsonl file used for image classification task in automated ml, please refer to the [data schema documentation for multi-class image classification task](https://learn.microsoft.com/en-us/azure/machine-learning/reference-automl-images-schema#image-classification-binarymulti-class)." + "In order to use this data to create an AzureML MLTable, we first need to convert it to the required JSONL format. The following script is creating two `.jsonl` files (one for training and one for validation) in the corresponding MLTable folder. In this example, 20% of the data is kept for validation. For further details on jsonl file used for image classification task in automated ml, please refer to the [data schema documentation for multi-class image classification task](https://learn.microsoft.com/en-us/azure/machine-learning/reference-automl-images-schema#image-classification-binarymulti-class)." ] }, { @@ -347,11 +347,11 @@ "import json\n", "import os\n", "\n", - "# We'll copy each JSONL file within its related MLTable folder\n", + "# We will copy each JSONL file within its related MLTable folder\n", "training_mltable_path = os.path.join(dataset_parent_dir, \"training-mltable-folder\")\n", "validation_mltable_path = os.path.join(dataset_parent_dir, \"validation-mltable-folder\")\n", "\n", - "# First, let's create the folders if they don't exist\n", + "# Create the folders if they don't exist\n", "os.makedirs(training_mltable_path, exist_ok=True)\n", "os.makedirs(validation_mltable_path, exist_ok=True)\n", "\n", @@ -375,7 +375,6 @@ " sub_dir = os.path.join(dataset_dir, class_name)\n", " if not os.path.isdir(sub_dir):\n", " continue\n", - "\n", " # Scan each sub directary\n", " print(f\"Parsing {sub_dir}\")\n", " for image in os.listdir(sub_dir):\n", @@ -384,10 +383,10 @@ " json_line[\"label\"] = class_name\n", "\n", " if index % train_validation_ratio == 0:\n", - " # validation annotation\n", + " # Validation annotation\n", " validation_f.write(json.dumps(json_line) + \"\\n\")\n", " else:\n", - " # train annotation\n", + " # Train annotation\n", " train_f.write(json.dumps(json_line) + \"\\n\")\n", " index += 1" ] @@ -450,7 +449,7 @@ "source": [ "### 5. Submit the fine tuning job using `transformers_image_classification_pipeline` component\n", " \n", - "Create the job that uses the `transformers_image_classification_pipeline` component for multi-class image-classification task. [Learn more]() about all the parameters supported for fine tuning." + "Create the job that uses the `transformers_image_classification_pipeline` component for multi-class image-classification task. Learn more in 5.2 about all the parameters supported for fine tuning." ] }, { @@ -491,20 +490,25 @@ "metadata": {}, "outputs": [], "source": [ + "deepspeed_config_path = \"./deepspeed_configs/zero1.json\"\n", + "if not os.path.exists(deepspeed_config_path):\n", + " print(\"DeepSpeed config file not found\")\n", + " deepspeed_config_path = None\n", + "\n", "pipeline_component_args = {\n", - " # # model_selection_args\n", + " # # Model import args\n", " \"model_family\": \"HuggingFaceImage\",\n", - " # # specify the model_name instead of mlflow_model if you want to use a model from the huggingface hub\n", - " \"mlflow_model\": foundation_model,\n", - " # \"model_name\": huggingface_model_name,\n", - " # # finetune_args\n", + " \"mlflow_model\": foundation_model.id, # foundation_model.id is provided, only foundation_model gives UserErrorException: only path input is supported now but get: ...\n", + " # \"model_name\": huggingface_model_name, # specify the model_name instead of mlflow_model if you want to use a model from the huggingface hub\n", + "\n", + " # # Finetune_args\n", " \"image_width\": -1, # Default value is -1 which means it would be overwritten by default image width in Hugging Face feature extractor\n", " \"image_height\": -1, # Default value is -1 which means it would be overwritten by default image height in Hugging Face feature extractor\n", " \"task_name\": \"image-classification\",\n", " \"apply_augmentations\": True,\n", " \"number_of_workers\": 8,\n", " \"apply_deepspeed\": False,\n", - " \"deepspeed_config\": \"./deepspeed_configs/zero1.json\",\n", + " \"deepspeed_config\": deepspeed_config_path,\n", " \"apply_ort\": False,\n", " \"auto_find_batch_size\": False,\n", " \"precision\": \"32\",\n", @@ -520,6 +524,20 @@ " \"early_stopping_patience\": 1,\n", " \"resume_from_checkpoint\": False,\n", " \"save_as_mlflow_model\": True,\n", + " # # Uncomment one or more lines below to provide specific values, if you wish you override the autoselected default values.\n", + " # \"metric_for_best_model\": \"accuracy\",\n", + " # \"number_of_epochs\": 15,\n", + " # \"max_steps\": -1,\n", + " # \"training_batch_size\": 4,\n", + " # \"validation_batch_size\": 4,\n", + " # \"learning_rate\": 5e-5,\n", + " # \"learning_rate_scheduler\": \"warmup_linear\",\n", + " # \"warmup_steps\": 0,\n", + " # \"optimizer\": \"adamw_hf\",\n", + " # \"weight_decay\": 0.0,\n", + " # \"gradient_accumulation_step\": 1,\n", + " # \"label_smoothing_factor\": 0.0,\n", + " # \"max_grad_norm\": 1.0,\n", "}\n", "process_count_per_instance = 1\n", "instance_count = 1\n", @@ -536,61 +554,6 @@ "print(f\"Finetuning model {use_model_name}\")" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# One can provide values to training args specified in pipeline component as a dictionary as shown below.\n", - "# In this case, user specified values will be respected.\n", - "\n", - "custom_pipeline_component_args = {\n", - " # model_selection_args\n", - " \"model_family\": \"HuggingFaceImage\",\n", - " # # specify the model_name instead of mlflow_model if you want to use a model from the huggingface hub\n", - " \"mlflow_model\": foundation_model,\n", - " # \"model_name\": huggingface_model_name,\n", - " # finetune_args\n", - " \"image_width\": -1,\n", - " \"image_height\": -1,\n", - " \"task_name\": \"image-classification\",\n", - " \"metric_for_best_model\": \"accuracy\",\n", - " \"apply_augmentations\": True,\n", - " \"number_of_workers\": 8,\n", - " \"apply_deepspeed\": False,\n", - " \"deepspeed_config\": \"./deepspeed_configs/zero1.json\",\n", - " \"apply_ort\": False,\n", - " \"number_of_epochs\": 15,\n", - " \"max_steps\": -1,\n", - " \"training_batch_size\": 4,\n", - " \"validation_batch_size\": 4,\n", - " \"auto_find_batch_size\": False,\n", - " \"learning_rate\": 5e-5,\n", - " \"learning_rate_scheduler\": \"warmup_linear\",\n", - " \"warmup_steps\": 0,\n", - " \"optimizer\": \"adamw_hf\",\n", - " \"weight_decay\": 0.0,\n", - " \"extra_optim_args\": \"\",\n", - " \"gradient_accumulation_step\": 1,\n", - " \"precision\": \"32\",\n", - " \"label_smoothing_factor\": 0.0,\n", - " \"random_seed\": 42,\n", - " \"evaluation_strategy\": \"epoch\",\n", - " \"evaluation_steps\": 500,\n", - " \"logging_strategy\": \"epoch\",\n", - " \"logging_steps\": 500,\n", - " \"save_strategy\": \"epoch\",\n", - " \"save_steps\": 500,\n", - " \"save_total_limit\": -1,\n", - " \"early_stopping\": False,\n", - " \"early_stopping_patience\": 1,\n", - " \"max_grad_norm\": 1.0,\n", - " \"resume_from_checkpoint\": False,\n", - " \"save_as_mlflow_model\": True,\n", - "}" - ] - }, { "attachments": {}, "cell_type": "markdown", @@ -625,8 +588,7 @@ " **pipeline_component_args,\n", " )\n", " return {\n", - " # map the output of the fine tuning job to the output of pipeline job so that we can easily register the fine tuned model\n", - " # registering the model is required to deploy the model to an online or batch endpoint\n", + " # Map the output of the fine tuning job to the output of pipeline job so that we can easily register the fine tuned model. Registering the model is required to deploy the model to an online or batch endpoint.\n", " \"trained_model\": transformers_pipeline_component.outputs.mlflow_model_folder,\n", " }" ] @@ -676,7 +638,7 @@ "source": [ "### 6. Get metrics from finetune component\n", "\n", - "The model training happens as part of the finetune component. Please follow below steps to extract validation metrics from the run" + "The model training happens as part of the finetune component. Please follow below steps to extract validation metrics from the run." ] }, { @@ -746,16 +708,17 @@ "metadata": {}, "outputs": [], "source": [ - "# concat 'tags.mlflow.rootRunId=' and pipeline_job.name in single quotes as filter variable\n", + "# Concat 'tags.mlflow.rootRunId=' and pipeline_job.name in single quotes as filter variable\n", "filter = \"tags.mlflow.rootRunId='\" + transformers_pipeline_run.name + \"'\"\n", "runs = mlflow.search_runs(experiment_names=[experiment_name], filter_string=filter, output_format=\"list\")\n", - "# get the training and evaluation runs.\n", - "# using a hacky way till 'Bug 2320997: not able to show eval metrics in FT notebooks - mlflow client now showing display names' is fixed\n", + "\n", + "# Get the training and evaluation runs.\n", + "# Using a hacky way till 'Bug 2320997: not able to show eval metrics in FT notebooks - mlflow client now showing display names' is fixed\n", "for run in runs:\n", - " # check if run.data.metrics.epoch exists\n", + " # Check if run.data.metrics.epoch exists\n", " if \"epoch\" in run.data.metrics:\n", " training_run = run\n", - " # else, check if run.data.metrics.accuracy exists\n", + " # Else, check if run.data.metrics.accuracy exists\n", " elif \"accuracy\" in run.data.metrics:\n", " evaluation_run = run" ] @@ -799,7 +762,7 @@ "source": [ "import time\n", "\n", - "# genrating a unique timestamp that can be used for names and versions that need to be unique\n", + "# Generating a unique timestamp that can be used for names and versions that need to be unique\n", "timestamp = str(int(time.time()))" ] }, @@ -888,11 +851,12 @@ " name=deployment_name,\n", " endpoint_name=online_endpoint_name,\n", " model=registered_model.id,\n", - " # use GPU instance type like STANDARD_NC6s_v3 for faster explanations\n", - " instance_type=\"Standard_DS3_V2\", # \"Standard_DS3_V2\",\n", + " instance_type=\"Standard_DS3_V2\", # Use GPU instance type like STANDARD_NC6s_v3 for faster explanations\n", " instance_count=1,\n", " request_settings=OnlineRequestSettings(\n", - " max_concurrent_requests_per_instance=1, request_timeout_ms=5000, max_queue_wait_ms=500 # 90000,\n", + " max_concurrent_requests_per_instance=1,\n", + " request_timeout_ms=5000, # 90000,\n", + " max_queue_wait_ms=500\n", " ),\n", " liveness_probe=ProbeSettings(\n", " failure_threshold=30,\n", @@ -938,8 +902,9 @@ "# Get the details for online endpoint\n", "endpoint = workspace_ml_client.online_endpoints.get(name=online_endpoint_name)\n", "\n", - "# existing traffic details\n", + "# Existing traffic details\n", "print(endpoint.traffic)\n", + "\n", "# Get the scoring URI\n", "print(endpoint.scoring_uri)\n", "print(demo_deployment)" @@ -957,12 +922,10 @@ "\n", "sample_image = os.path.join(dataset_dir, \"milk_bottle\", \"99.jpg\")\n", "\n", - "\n", "def read_image(image_path):\n", " with open(image_path, \"rb\") as f:\n", " return f.read()\n", "\n", - "\n", "request_json = {\n", " \"input_data\": {\n", " \"columns\": [\"image\"],\n", @@ -1017,6 +980,7 @@ "x, y = img.size\n", "\n", "fig, ax = plt.subplots(1, figsize=(15, 15))\n", + "\n", "# Display the image\n", "ax.imshow(img_np)\n", "\n", @@ -1040,7 +1004,7 @@ "metadata": {}, "source": [ "### 10. Clean up resources - delete the online endpoint\n", - "Don't forget to delete the online endpoint, else you will leave the billing meter running for the compute used by the endpoint" + "Don't forget to delete the online endpoint, else you will leave the billing meter running for the compute used by the endpoint." ] }, { diff --git a/sdk/python/foundation-models/system/finetune/image-classification/multilabel-classification/hftransformers-fridgeobjects-multilabel-classification.ipynb b/sdk/python/foundation-models/system/finetune/image-classification/multilabel-classification/hftransformers-fridgeobjects-multilabel-classification.ipynb index d83fbb16daf..18ae8f2c08d 100644 --- a/sdk/python/foundation-models/system/finetune/image-classification/multilabel-classification/hftransformers-fridgeobjects-multilabel-classification.ipynb +++ b/sdk/python/foundation-models/system/finetune/image-classification/multilabel-classification/hftransformers-fridgeobjects-multilabel-classification.ipynb @@ -235,7 +235,7 @@ "\n", "#### 4.1 Download the Data\n", "We first download and unzip the data locally. By default, the data would be downloaded in `./data` folder in current directory. \n", - "If you prefer to download the data at a different location, update it in `dataset_parent_dir = ...` in the next cell." + "If you prefer to download the data at a different location, update it in `dataset_parent_dir = ...` in the following cell." ] }, { @@ -251,10 +251,10 @@ "# Change to a different location if you prefer\n", "dataset_parent_dir = \"./data\"\n", "\n", - "# create data folder if it doesnt exist.\n", + "# Create data folder if it doesnt exist.\n", "os.makedirs(dataset_parent_dir, exist_ok=True)\n", "\n", - "# download data\n", + "# Download data\n", "download_url = (\n", " \"https://cvbp-secondary.z19.web.core.windows.net/datasets/image_classification/multilabelFridgeObjects.zip\"\n", ")\n", @@ -270,12 +270,12 @@ "# Download the dataset\n", "urllib.request.urlretrieve(download_url, filename=data_file)\n", "\n", - "# extract files\n", + "# Extract files\n", "with ZipFile(data_file, \"r\") as zip:\n", " print(\"extracting files...\")\n", " zip.extractall(path=dataset_parent_dir)\n", " print(\"done\")\n", - "# delete zip file\n", + "# Delete zip file\n", "os.remove(data_file)" ] }, @@ -336,7 +336,7 @@ "\n", "For documentation on preparing the datasets beyond this notebook, please refer to the [documentation on how to prepare datasets](https://docs.microsoft.com/en-us/azure/machine-learning/how-to-prepare-datasets-for-automl-images).\n", "\n", - "In order to use this data to create an AzureML MLTable, we first need to convert it to the required JSONL format. The following script is creating two `.jsonl` files (one for training and one for validation) in the corresponding MLTable folder. The train / validation ratio corresponds to 20% of the data going into the validation file. For further details on jsonl file used for image classification task in automated ml, please refer to the [data schema documentation for multi-label image classification task](https://learn.microsoft.com/en-us/azure/machine-learning/reference-automl-images-schema#image-classification-multi-label)." + "In order to use this data to create an AzureML MLTable, we first need to convert it to the required JSONL format. The following script is creating two `.jsonl` files (one for training and one for validation) in the corresponding MLTable folder. In this example, 20% of the data is kept for validation. For further details on jsonl file used for image classification task in automated ml, please refer to the [data schema documentation for multi-label image classification task](https://learn.microsoft.com/en-us/azure/machine-learning/reference-automl-images-schema#image-classification-multi-label)." ] }, { @@ -348,11 +348,11 @@ "import json\n", "import os\n", "\n", - "# We'll copy each JSONL file within its related MLTable folder\n", + "# We will copy each JSONL file within its related MLTable folder\n", "training_mltable_path = os.path.join(dataset_parent_dir, \"training-mltable-folder\")\n", "validation_mltable_path = os.path.join(dataset_parent_dir, \"validation-mltable-folder\")\n", "\n", - "# First, let's create the folders if they don't exist\n", + "# Create the folders if they don't exist\n", "os.makedirs(training_mltable_path, exist_ok=True)\n", "os.makedirs(validation_mltable_path, exist_ok=True)\n", "\n", @@ -368,7 +368,7 @@ " \"label\": [],\n", "}\n", "\n", - "# Path to the labels file.\n", + "# Path to the labels file\n", "labelFile = os.path.join(dataset_dir, \"labels.csv\")\n", "\n", "# Read each annotation and convert it to jsonl line\n", @@ -388,10 +388,10 @@ " json_line[\"label\"] = line_split[1].strip().split(\" \")\n", "\n", " if i % train_validation_ratio == 0:\n", - " # validation annotation\n", + " # Validation annotation\n", " validation_f.write(json.dumps(json_line) + \"\\n\")\n", " else:\n", - " # train annotation\n", + " # Train annotation\n", " train_f.write(json.dumps(json_line) + \"\\n\")" ] }, @@ -453,7 +453,7 @@ "source": [ "### 5. Submit the fine tuning job using `transformers_image_classification_pipeline` component\n", " \n", - "Create the job that uses the `transformers_image_classification_pipeline` component for multi-label image-classification task. [Learn more]() about all the parameters supported for fine tuning." + "Create the job that uses the `transformers_image_classification_pipeline` component for multi-label image-classification task. Learn more in 5.2 about all the parameters supported for fine tuning." ] }, { @@ -494,20 +494,25 @@ "metadata": {}, "outputs": [], "source": [ + "deepspeed_config_path = \"./deepspeed_configs/zero1.json\"\n", + "if not os.path.exists(deepspeed_config_path):\n", + " print(\"DeepSpeed config file not found\")\n", + " deepspeed_config_path = None\n", + "\n", "pipeline_component_args = {\n", - " # model_selection_args\n", + " # # Model import args\n", " \"model_family\": \"HuggingFaceImage\",\n", - " # # specify the model_name instead of mlflow_model if you want to use a model from the huggingface hub\n", - " \"mlflow_model\": foundation_model,\n", - " # \"model_name\": huggingface_model_name,\n", - " # finetune_args\n", - " \"image_width\": -1,\n", - " \"image_height\": -1,\n", + " \"mlflow_model\": foundation_model.id, # foundation_model.id is provided, only foundation_model gives UserErrorException: only path input is supported now but get: ...\n", + " # \"model_name\": huggingface_model_name, # specify the model_name instead of mlflow_model if you want to use a model from the huggingface hub\n", + " \n", + " # # Finetune_args\n", + " \"image_width\": -1, # Default value is -1 which means it would be overwritten by default image width in Hugging Face feature extractor\n", + " \"image_height\": -1, # Default value is -1 which means it would be overwritten by default image height in Hugging Face feature extractor\n", " \"task_name\": \"image-classification-multilabel\",\n", " \"apply_augmentations\": True,\n", " \"number_of_workers\": 8,\n", " \"apply_deepspeed\": False,\n", - " \"deepspeed_config\": \"./deepspeed_configs/zero1.json\",\n", + " \"deepspeed_config\": deepspeed_config_path,\n", " \"apply_ort\": False,\n", " \"auto_find_batch_size\": False,\n", " \"precision\": \"32\",\n", @@ -523,6 +528,20 @@ " \"early_stopping_patience\": 1,\n", " \"resume_from_checkpoint\": False,\n", " \"save_as_mlflow_model\": True,\n", + " # # Uncomment one or more lines below to provide specific values, if you wish you override the autoselected default values.\n", + " # \"metric_for_best_model\": \"iou\",\n", + " # \"number_of_epochs\": 15,\n", + " # \"max_steps\": -1,\n", + " # \"training_batch_size\": 4,\n", + " # \"validation_batch_size\": 4,\n", + " # \"learning_rate\": 5e-5,\n", + " # \"learning_rate_scheduler\": \"warmup_linear\",\n", + " # \"warmup_steps\": 0,\n", + " # \"optimizer\": \"adamw_hf\",\n", + " # \"weight_decay\": 0.0,\n", + " # \"gradient_accumulation_step\": 1,\n", + " # \"label_smoothing_factor\": 0.0,\n", + " # \"max_grad_norm\": 1.0,\n", "}\n", "process_count_per_instance = 1\n", "instance_count = 1\n", @@ -539,61 +558,6 @@ "print(f\"Finetuning model {use_model_name}\")" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# One can provide values to training args specified in pipeline component as a dictionary as shown below.\n", - "# In this case, user specified values will be respected.\n", - "\n", - "custom_pipeline_component_args = {\n", - " # model_selection_args\n", - " \"model_family\": \"HuggingFaceImage\",\n", - " # # specify the model_name instead of mlflow_model if you want to use a model from the huggingface hub\n", - " \"mlflow_model\": foundation_model,\n", - " # \"model_name\": huggingface_model_name,\n", - " # finetune_args\n", - " \"image_width\": -1,\n", - " \"image_height\": -1,\n", - " \"task_name\": \"image-classification-multilabel\",\n", - " \"metric_for_best_model\": \"iou\",\n", - " \"apply_augmentations\": True,\n", - " \"number_of_workers\": 8,\n", - " \"apply_deepspeed\": False,\n", - " \"deepspeed_config\": \"./deepspeed_configs/zero1.json\",\n", - " \"apply_ort\": False,\n", - " \"number_of_epochs\": 15,\n", - " \"max_steps\": -1,\n", - " \"training_batch_size\": 4,\n", - " \"validation_batch_size\": 4,\n", - " \"auto_find_batch_size\": False,\n", - " \"learning_rate\": 5e-5,\n", - " \"learning_rate_scheduler\": \"warmup_linear\",\n", - " \"warmup_steps\": 0,\n", - " \"optimizer\": \"adamw_hf\",\n", - " \"weight_decay\": 0.0,\n", - " \"extra_optim_args\": \"\",\n", - " \"gradient_accumulation_step\": 1,\n", - " \"precision\": \"32\",\n", - " \"label_smoothing_factor\": 0.0,\n", - " \"random_seed\": 42,\n", - " \"evaluation_strategy\": \"epoch\",\n", - " \"evaluation_steps\": 500,\n", - " \"logging_strategy\": \"epoch\",\n", - " \"logging_steps\": 500,\n", - " \"save_strategy\": \"epoch\",\n", - " \"save_steps\": 500,\n", - " \"save_total_limit\": -1,\n", - " \"early_stopping\": False,\n", - " \"early_stopping_patience\": 1,\n", - " \"max_grad_norm\": 1.0,\n", - " \"resume_from_checkpoint\": False,\n", - " \"save_as_mlflow_model\": True,\n", - "}" - ] - }, { "attachments": {}, "cell_type": "markdown", @@ -628,8 +592,7 @@ " **pipeline_component_args,\n", " )\n", " return {\n", - " # map the output of the fine tuning job to the output of pipeline job so that we can easily register the fine tuned model\n", - " # registering the model is required to deploy the model to an online or batch endpoint\n", + " # Map the output of the fine tuning job to the output of pipeline job so that we can easily register the fine tuned model. Registering the model is required to deploy the model to an online or batch endpoint.\n", " \"trained_model\": transformers_pipeline_component.outputs.mlflow_model_folder,\n", " }" ] @@ -679,7 +642,7 @@ "source": [ "### 6. Get metrics from finetune component\n", "\n", - "The model training happens as part of the finetune component. Please follow below steps to extract validation metrics from the run" + "The model training happens as part of the finetune component. Please follow below steps to extract validation metrics from the run." ] }, { @@ -749,17 +712,18 @@ "metadata": {}, "outputs": [], "source": [ - "# concat 'tags.mlflow.rootRunId=' and pipeline_job.name in single quotes as filter variable\n", + "# Concat 'tags.mlflow.rootRunId=' and pipeline_job.name in single quotes as filter variable\n", "filter = \"tags.mlflow.rootRunId='\" + transformers_pipeline_run.name + \"'\"\n", "runs = mlflow.search_runs(experiment_names=[experiment_name], filter_string=filter, output_format=\"list\")\n", - "# get the training and evaluation runs.\n", - "# using a hacky way till 'Bug 2320997: not able to show eval metrics in FT notebooks - mlflow client now showing display names' is fixed\n", + "\n", + "# Get the training and evaluation runs.\n", + "# Using a hacky way till 'Bug 2320997: not able to show eval metrics in FT notebooks - mlflow client now showing display names' is fixed\n", "for run in runs:\n", - " # check if run.data.metrics.epoch exists\n", + " # Check if run.data.metrics.epoch exists\n", " if \"epoch\" in run.data.metrics:\n", " training_run = run\n", - " # else, check if run.data.metrics.accuracy exists\n", - " elif \"accuracy\" in run.data.metrics:\n", + " # Else, check if run.data.metrics.accuracy exists\n", + " elif \"iou\" in run.data.metrics:\n", " evaluation_run = run" ] }, @@ -802,7 +766,7 @@ "source": [ "import time\n", "\n", - "# genrating a unique timestamp that can be used for names and versions that need to be unique\n", + "# Generating a unique timestamp that can be used for names and versions that need to be unique\n", "timestamp = str(int(time.time()))" ] }, @@ -830,7 +794,7 @@ " path=model_path_from_job,\n", " type=AssetTypes.MLFLOW_MODEL,\n", " name=finetuned_model_name,\n", - " version=timestamp, # use timestamp as version to avoid version conflict\n", + " version=timestamp, # Use timestamp as version to avoid version conflict\n", " description=finetuned_model_description,\n", ")\n", "print(f\"Prepare to register model: \\n{prepare_to_register_model}\")\n", @@ -882,16 +846,22 @@ "from azure.ai.ml.entities import OnlineRequestSettings, ProbeSettings\n", "\n", "deployment_name = \"hf-ml-fridge-items-mlflow-deploy\"\n", + "print(registered_model.id)\n", + "print(online_endpoint_name)\n", + "print(deployment_name)\n", + "\n", "# Create a deployment\n", - "req_timeout = OnlineRequestSettings(request_timeout_ms=90000)\n", "demo_deployment = ManagedOnlineDeployment(\n", " name=deployment_name,\n", " endpoint_name=online_endpoint_name,\n", " model=registered_model.id,\n", - " # use GPU instance type like STANDARD_NC6s_v3 for faster explanations\n", - " instance_type=\"Standard_DS3_V2\",\n", + " instance_type=\"Standard_DS3_V2\", # Use GPU instance type like STANDARD_NC6s_v3 for faster explanations\n", " instance_count=1,\n", - " request_settings=req_timeout,\n", + " request_settings=OnlineRequestSettings(\n", + " max_concurrent_requests_per_instance=1,\n", + " request_timeout_ms=5000, # 90000,\n", + " max_queue_wait_ms=500\n", + " ),\n", " liveness_probe=ProbeSettings(\n", " failure_threshold=30,\n", " success_threshold=1,\n", @@ -920,7 +890,7 @@ "source": [ "### 9. Test the endpoint with sample data\n", "\n", - "We will fetch some sample data from the test dataset and submit to online endpoint for inference. We will then show the display the scored labels alongside the ground truth labels" + "We will fetch some sample data from the test dataset and submit to online endpoint for inference. We will then display the scored labels alongside the ground truth labels." ] }, { @@ -937,8 +907,9 @@ "# Get the details for online endpoint\n", "endpoint = workspace_ml_client.online_endpoints.get(name=online_endpoint_name)\n", "\n", - "# existing traffic details\n", + "# Existing traffic details\n", "print(endpoint.traffic)\n", + "\n", "# Get the scoring URI\n", "print(endpoint.scoring_uri)\n", "print(demo_deployment)" @@ -956,12 +927,10 @@ "\n", "sample_image = os.path.join(dataset_dir, \"images\", \"56.jpg\")\n", "\n", - "\n", "def read_image(image_path):\n", " with open(image_path, \"rb\") as f:\n", " return f.read()\n", "\n", - "\n", "request_json = {\n", " \"input_data\": {\n", " \"columns\": [\"image\"],\n", @@ -1042,7 +1011,7 @@ "metadata": {}, "source": [ "### 10. Clean up resources - delete the online endpoint\n", - "Don't forget to delete the online endpoint, else you will leave the billing meter running for the compute used by the endpoint" + "Don't forget to delete the online endpoint, else you will leave the billing meter running for the compute used by the endpoint." ] }, { diff --git a/sdk/python/foundation-models/system/finetune/image-instance-segmentation/mmdetection-fridgeobjects-instance-segmentation.ipynb b/sdk/python/foundation-models/system/finetune/image-instance-segmentation/mmdetection-fridgeobjects-instance-segmentation.ipynb index 8c43b962172..05530352d7f 100644 --- a/sdk/python/foundation-models/system/finetune/image-instance-segmentation/mmdetection-fridgeobjects-instance-segmentation.ipynb +++ b/sdk/python/foundation-models/system/finetune/image-instance-segmentation/mmdetection-fridgeobjects-instance-segmentation.ipynb @@ -43,8 +43,8 @@ "metadata": {}, "outputs": [], "source": [ - "! pip install azure-ai-ml==1.0.0\n", - "! pip install azure-identity\n", + "! pip install azure-ai-ml==1.8.0\n", + "! pip install azure-identity==1.13.0\n", "! pip install datasets==2.12.0" ] }, @@ -235,7 +235,7 @@ "\n", "#### 4.1 Download the Data\n", "We first download and unzip the data locally. By default, the data would be downloaded in `./data` folder in current directory. \n", - "If you prefer to download the data at a different location, update it in `dataset_parent_dir = ...` in the next cell." + "If you prefer to download the data at a different location, update it in `dataset_parent_dir = ...` in the following cell." ] }, { @@ -251,10 +251,10 @@ "# Change to a different location if you prefer\n", "dataset_parent_dir = \"./data\"\n", "\n", - "# create data folder if it doesnt exist.\n", + "# Create data folder if it doesnt exist.\n", "os.makedirs(dataset_parent_dir, exist_ok=True)\n", "\n", - "# download data\n", + "# Download data\n", "download_url = \"https://cvbp-secondary.z19.web.core.windows.net/datasets/object_detection/odFridgeObjectsMask.zip\"\n", "\n", "# Extract current dataset name from dataset url\n", @@ -268,12 +268,12 @@ "# Download the dataset\n", "urllib.request.urlretrieve(download_url, filename=data_file)\n", "\n", - "# extract files\n", + "# Extract files\n", "with ZipFile(data_file, \"r\") as zip:\n", " print(\"extracting files...\")\n", " zip.extractall(path=dataset_parent_dir)\n", " print(\"done\")\n", - "# delete zip file\n", + "# Delete zip file\n", "os.remove(data_file)" ] }, @@ -336,7 +336,7 @@ "\n", "For documentation on preparing the datasets beyond this notebook, please refer to the [documentation on how to prepare datasets](https://docs.microsoft.com/en-us/azure/machine-learning/how-to-prepare-datasets-for-automl-images).\n", "\n", - "In order to use this data to create an AzureML MLTable, we first need to convert it to the required JSONL format. The following script is creating two `.jsonl` files (one for training and one for validation) in the corresponding MLTable folder. The train / validation ratio corresponds to 20% of the data going into the validation file. For further details on jsonl file used for image classification task in automated ml, please refer to the [data schema documentation for image instance segmentation task](https://learn.microsoft.com/en-us/azure/machine-learning/reference-automl-images-schema#instance-segmentation)." + "In order to use this data to create an AzureML MLTable, we first need to convert it to the required JSONL format. The following script is creating two `.jsonl` files (one for training and one for validation) in the corresponding MLTable folder. In this example, 20% of the data is kept for validation. For further details on jsonl file used for image classification task in automated ml, please refer to the [data schema documentation for image instance segmentation task](https://learn.microsoft.com/en-us/azure/machine-learning/reference-automl-images-schema#instance-segmentation)." ] }, { @@ -403,11 +403,11 @@ " f.write(mltable_file_contents)\n", "\n", "\n", - "# We'll copy each JSONL file within its related MLTable folder\n", + "# We will copy each JSONL file within its related MLTable folder\n", "training_mltable_path = os.path.join(dataset_parent_dir, \"training-mltable-folder\")\n", "validation_mltable_path = os.path.join(dataset_parent_dir, \"validation-mltable-folder\")\n", "\n", - "# First, let's create the folders if they don't exist\n", + "# Create the folders if they don't exist\n", "os.makedirs(training_mltable_path, exist_ok=True)\n", "os.makedirs(validation_mltable_path, exist_ok=True)\n", "\n", @@ -478,19 +478,23 @@ "metadata": {}, "outputs": [], "source": [ + "deepspeed_config_path = \"./deepspeed_configs/zero1.json\"\n", + "if not os.path.exists(deepspeed_config_path):\n", + " print(\"DeepSpeed config file not found\")\n", + " deepspeed_config_path = None\n", + "\n", "pipeline_component_args = {\n", - " # model_selection_args\n", + " # # Model import args\n", " \"model_family\": \"MmDetectionImage\",\n", - " # # specify the model_name instead of mlflow_model if you want to use a model from the mmdetection model zoo\n", - " \"mlflow_model\": foundation_model,\n", - " # \"model_name\": mmdetection_model_name,\n", - " # finetune_args\n", - " # # \"auto_hyperparameter_selection\": False,\n", + " \"mlflow_model\": foundation_model.id, # foundation_model.id is provided, only foundation_model gives UserErrorException: only path input is supported now but get: ...\n", + " # \"model_name\": mmdetection_model_name, # specify the model_name instead of mlflow_model if you want to use a model from the mmdetection model zoo\n", + "\n", + " # Finetune args\n", " \"task_name\": \"image-instance-segmentation\",\n", " \"apply_augmentations\": True,\n", " \"number_of_workers\": 8,\n", " \"apply_deepspeed\": False,\n", - " \"deepspeed_config\": \"./deepspeed_configs/zero1.json\",\n", + " \"deepspeed_config\": deepspeed_config_path,\n", " \"apply_ort\": False,\n", " \"auto_find_batch_size\": False,\n", " \"extra_optim_args\": \"\",\n", @@ -509,6 +513,21 @@ " \"early_stopping_patience\": 1,\n", " \"resume_from_checkpoint\": False,\n", " \"save_as_mlflow_model\": True,\n", + " # # Uncomment one or more lines below to provide specific values, if you wish you override the autoselected default values.\n", + " # \"image_min_size\": -1,\n", + " # \"image_max_size\": -1,\n", + " # \"metric_for_best_model\": \"mean_average_precision\",\n", + " # \"number_of_epochs\": 15,\n", + " # \"max_steps\": -1,\n", + " # \"training_batch_size\": 4,\n", + " # \"validation_batch_size\": 4,\n", + " # \"learning_rate\": 5e-5,\n", + " # \"learning_rate_scheduler\": \"warmup_linear\",\n", + " # \"warmup_steps\": 0,\n", + " # \"optimizer\": \"adamw_hf\",\n", + " # \"weight_decay\": 0.0,\n", + " # \"gradient_accumulation_step\": 1,\n", + " # \"max_grad_norm\": 1.0,\n", "}\n", "instance_count = 1\n", "process_count_per_instance = 1\n", @@ -529,63 +548,6 @@ "print(f\"Finetuning model {use_model_name}\")" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# One can provide values to training args specified in pipeline component as a dictionary as shown below.\n", - "# In this case, user specified values will be respected.\n", - "\n", - "custom_pipeline_component_args = {\n", - " # model_selection_args\n", - " \"model_family\": \"MmDetectionImage\",\n", - " # # specify the model_name instead of mlflow_model if you want to use a model from the mmdetection model zoo\n", - " \"mlflow_model\": foundation_model,\n", - " # \"model_name\": mmdetection_model_name,\n", - " # finetune_args\n", - " # # \"auto_hyperparameter_selection\": False,\n", - " \"image_min_size\": -1,\n", - " \"image_max_size\": -1,\n", - " \"task_name\": \"image-instance-segmentation\",\n", - " \"metric_for_best_model\": \"mean_average_precision\",\n", - " \"apply_augmentations\": True,\n", - " \"number_of_workers\": 8,\n", - " \"apply_deepspeed\": False,\n", - " \"deepspeed_config\": \"./deepspeed_configs/zero1.json\",\n", - " \"apply_ort\": False,\n", - " \"number_of_epochs\": 15,\n", - " \"max_steps\": -1,\n", - " \"training_batch_size\": 4,\n", - " \"validation_batch_size\": 4,\n", - " \"auto_find_batch_size\": False,\n", - " \"learning_rate\": 5e-5,\n", - " \"learning_rate_scheduler\": \"warmup_linear\",\n", - " \"warmup_steps\": 0,\n", - " \"optimizer\": \"adamw_hf\",\n", - " \"weight_decay\": 0.0,\n", - " \"extra_optim_args\": \"\",\n", - " \"gradient_accumulation_step\": 1,\n", - " \"precision\": \"32\",\n", - " \"iou_threshold\": 0.5,\n", - " \"box_score_threshold\": 0.3,\n", - " \"random_seed\": 42,\n", - " \"evaluation_strategy\": \"epoch\",\n", - " \"evaluation_steps\": 500,\n", - " \"logging_strategy\": \"epoch\",\n", - " \"logging_steps\": 500,\n", - " \"save_strategy\": \"epoch\",\n", - " \"save_steps\": 500,\n", - " \"save_total_limit\": -1,\n", - " \"early_stopping\": False,\n", - " \"early_stopping_patience\": 1,\n", - " \"max_grad_norm\": 1.0,\n", - " \"resume_from_checkpoint\": False,\n", - " \"save_as_mlflow_model\": True,\n", - "}" - ] - }, { "attachments": {}, "cell_type": "markdown", @@ -620,8 +582,7 @@ " **pipeline_component_args,\n", " )\n", " return {\n", - " # map the output of the fine tuning job to the output of pipeline job so that we can easily register the fine tuned model\n", - " # registering the model is required to deploy the model to an online or batch endpoint\n", + " # Map the output of the fine tuning job to the output of pipeline job so that we can easily register the fine tuned model. Registering the model is required to deploy the model to an online or batch endpoint.\n", " \"trained_model\": mmdetection_pipeline_component.outputs.mlflow_model_folder,\n", " }" ] @@ -673,7 +634,7 @@ "source": [ "### 6. Get metrics from finetune component\n", "\n", - "The model training happens as part of the finetune component. Please follow below steps to extract validation metrics from the run" + "The model training happens as part of the finetune component. Please follow below steps to extract validation metrics from the run." ] }, { @@ -747,16 +708,17 @@ "metadata": {}, "outputs": [], "source": [ - "# concat 'tags.mlflow.rootRunId=' and pipeline_job.name in single quotes as filter variable\n", + "# Concat 'tags.mlflow.rootRunId=' and pipeline_job.name in single quotes as filter variable\n", "filter = \"tags.mlflow.rootRunId='\" + mmdetection_pipeline_run.name + \"'\"\n", "runs = mlflow.search_runs(experiment_names=[experiment_name], filter_string = filter, output_format=\"list\")\n", - "# get the training and evaluation runs. \n", - "# using a hacky way till 'Bug 2320997: not able to show eval metrics in FT notebooks - mlflow client now showing display names' is fixed\n", + "\n", + "# Get the training and evaluation runs. \n", + "# Using a hacky way till 'Bug 2320997: not able to show eval metrics in FT notebooks - mlflow client now showing display names' is fixed\n", "for run in runs:\n", - " # check if run.data.metrics.epoch exists\n", + " # Check if run.data.metrics.epoch exists\n", " if 'epoch' in run.data.metrics:\n", " training_run = run\n", - " # else, check if run.data.metrics.accuracy exists\n", + " # Else, check if run.data.metrics.accuracy exists\n", " elif 'mean_average_precision' in run.data.metrics:\n", " evaluation_run = run" ] @@ -799,7 +761,7 @@ "outputs": [], "source": [ "import time\n", - "# genrating a unique timestamp that can be used for names and versions that need to be unique\n", + "# Generating a unique timestamp that can be used for names and versions that need to be unique\n", "timestamp = str(int(time.time())) " ] }, @@ -825,7 +787,7 @@ " path=model_path_from_job,\n", " type=AssetTypes.MLFLOW_MODEL,\n", " name=finetuned_model_name,\n", - " version=timestamp, # use timestamp as version to avoid version conflict\n", + " version=timestamp, # Use timestamp as version to avoid version conflict\n", " description=finetuned_model_description\n", ")\n", "print(f\"Prepare to register model: \\n{prepare_to_register_model}\")\n", @@ -890,7 +852,7 @@ " instance_count=1,\n", " request_settings=OnlineRequestSettings(\n", " max_concurrent_requests_per_instance=1,\n", - " request_timeout_ms=5000,\n", + " request_timeout_ms=5000, # 9000,\n", " max_queue_wait_ms=500\n", " ),\n", " liveness_probe=ProbeSettings(\n", @@ -920,7 +882,7 @@ "source": [ "### 9. Test the endpoint with sample data\n", "\n", - "We will fetch some sample data from the test dataset and submit to online endpoint for inference. We will then show the display the scored labels alongside the ground truth labels" + "We will fetch some sample data from the test dataset and submit to online endpoint for inference. We will then display the scored labels alongside the ground truth labels." ] }, { @@ -937,8 +899,9 @@ "# Get the details for online endpoint\n", "endpoint = workspace_ml_client.online_endpoints.get(name=online_endpoint_name)\n", "\n", - "# existing traffic details\n", + "# Existing traffic details\n", "print(endpoint.traffic)\n", + "\n", "# Get the scoring URI\n", "print(endpoint.scoring_uri)\n", "print(demo_deployment)" @@ -1040,7 +1003,7 @@ " f\"{round(width, 3)}, {round(height, 3)}], {round(conf_score, 3)}\"\n", " )\n", "\n", - " color = np.random.rand(3) #'red'\n", + " color = np.random.rand(3)\n", " rect = patches.Rectangle(\n", " (topleft_x, topleft_y),\n", " width,\n", @@ -1077,7 +1040,7 @@ "metadata": {}, "source": [ "### 10. Clean up resources - delete the online endpoint\n", - "Don't forget to delete the online endpoint, else you will leave the billing meter running for the compute used by the endpoint" + "Don't forget to delete the online endpoint, else you will leave the billing meter running for the compute used by the endpoint." ] }, { diff --git a/sdk/python/foundation-models/system/finetune/image-object-detection/mmdetection-fridgeobjects-object-detection.ipynb b/sdk/python/foundation-models/system/finetune/image-object-detection/mmdetection-fridgeobjects-object-detection.ipynb index 68b40cce51e..2ef8fc0d55e 100644 --- a/sdk/python/foundation-models/system/finetune/image-object-detection/mmdetection-fridgeobjects-object-detection.ipynb +++ b/sdk/python/foundation-models/system/finetune/image-object-detection/mmdetection-fridgeobjects-object-detection.ipynb @@ -43,8 +43,8 @@ "metadata": {}, "outputs": [], "source": [ - "! pip install azure-ai-ml==1.0.0\n", - "! pip install azure-identity\n", + "! pip install azure-ai-ml==1.8.0\n", + "! pip install azure-identity==1.13.0\n", "! pip install datasets==2.12.0" ] }, @@ -196,7 +196,7 @@ "source": [ "### 3. Pick a foundation model to fine tune\n", "\n", - "We will use the `vfnet_r50_fpn_mdconv_c3-c5_mstrain_2x_coco` model in this notebook. If you need to fine tune a model that is available on MMDetection model zoo, but not available in `azureml-staging` system registry, you can either register the model and use the registered model or use the `model_name` parameter to instruct the components to pull the model directly from MMDetection model zoo.\n", + "We will use the `yolof_r50_c5_8x8_1x_coco` model in this notebook. If you need to fine tune a model that is available on MMDetection model zoo, but not available in `azureml-staging` system registry, you can either register the model and use the registered model or use the `model_name` parameter to instruct the components to pull the model directly from MMDetection model zoo.\n", "\n", "Currently following models are supported:\n", "\n", @@ -240,7 +240,7 @@ "\n", "#### 4.1 Download the Data\n", "We first download and unzip the data locally. By default, the data would be downloaded in `./data` folder in current directory. \n", - "If you prefer to download the data at a different location, update it in `dataset_parent_dir = ...` in the next cell." + "If you prefer to download the data at a different location, update it in `dataset_parent_dir = ...` in the following cell." ] }, { @@ -256,14 +256,15 @@ "# Change to a different location if you prefer\n", "dataset_parent_dir = \"./data\"\n", "\n", - "# create data folder if it doesnt exist.\n", + "# Create data folder if it doesnt exist.\n", "os.makedirs(dataset_parent_dir, exist_ok=True)\n", "\n", - "# download data\n", + "# Download data\n", "download_url = \"https://cvbp-secondary.z19.web.core.windows.net/datasets/object_detection/odFridgeObjects.zip\"\n", "\n", "# Extract current dataset name from dataset url\n", "dataset_name = os.path.split(download_url)[-1].split(\".\")[0]\n", + "\n", "# Get dataset path for later use\n", "dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n", "\n", @@ -273,12 +274,13 @@ "# Download the dataset\n", "urllib.request.urlretrieve(download_url, filename=data_file)\n", "\n", - "# extract files\n", + "# Extract files\n", "with ZipFile(data_file, \"r\") as zip:\n", " print(\"extracting files...\")\n", " zip.extractall(path=dataset_parent_dir)\n", " print(\"done\")\n", - "# delete zip file\n", + "\n", + "# Delete zip file\n", "os.remove(data_file)" ] }, @@ -342,7 +344,7 @@ "For documentation on preparing the datasets beyond this notebook, please refer to the [documentation on how to prepare datasets](https://docs.microsoft.com/en-us/azure/machine-learning/how-to-prepare-datasets-for-automl-images).\n", "\n", "\n", - "In order to use this data to create an AzureML MLTable, we first need to convert it to the required JSONL format. The following script is creating two `.jsonl` files (one for training and one for validation) in the corresponding MLTable folder. The train / validation ratio corresponds to 20% of the data going into the validation file. For further details on jsonl file used for image object detection task in automated ml, please refer to the [data schema documentation for image object-detection task](https://learn.microsoft.com/en-us/azure/machine-learning/reference-automl-images-schema#object-detection)." + "In order to use this data to create an AzureML MLTable, we first need to convert it to the required JSONL format. The following script is creating two `.jsonl` files (one for training and one for validation) in the corresponding MLTable folder. In this example, 20% of the data is kept for validation. For further details on jsonl file used for image object detection task in automated ml, please refer to the [data schema documentation for image object-detection task](https://learn.microsoft.com/en-us/azure/machine-learning/reference-automl-images-schema#object-detection)." ] }, { @@ -355,11 +357,11 @@ "import os\n", "import xml.etree.ElementTree as ET\n", "\n", - "# We'll copy each JSONL file within its related MLTable folder\n", + "# We will copy each JSONL file within its related MLTable folder\n", "training_mltable_path = os.path.join(dataset_parent_dir, \"training-mltable-folder\")\n", "validation_mltable_path = os.path.join(dataset_parent_dir, \"validation-mltable-folder\")\n", "\n", - "# First, let's create the folders if they don't exist\n", + "# Create the folders if they don't exist\n", "os.makedirs(training_mltable_path, exist_ok=True)\n", "os.makedirs(validation_mltable_path, exist_ok=True)\n", "\n", @@ -415,7 +417,7 @@ " \"isCrowd\": isCrowd,\n", " }\n", " )\n", - " # build the jsonl file\n", + " # Build the jsonl file\n", " image_filename = root.find(\"filename\").text\n", " _, file_extension = os.path.splitext(image_filename)\n", " json_line = dict(json_line_sample)\n", @@ -426,10 +428,10 @@ " json_line[\"label\"] = labels\n", "\n", " if i % train_validation_ratio == 0:\n", - " # validation annotation\n", + " # Validation annotation\n", " validation_f.write(json.dumps(json_line) + \"\\n\")\n", " else:\n", - " # train annotation\n", + " # Train annotation\n", " train_f.write(json.dumps(json_line) + \"\\n\")" ] }, @@ -562,19 +564,23 @@ "metadata": {}, "outputs": [], "source": [ + "deepspeed_config_path = \"./deepspeed_configs/zero1.json\"\n", + "if not os.path.exists(deepspeed_config_path):\n", + " print(\"DeepSpeed config file not found\")\n", + " deepspeed_config_path = None\n", + "\n", "pipeline_component_args = {\n", - " # model_selection_args\n", + " # # Model import args\n", " \"model_family\": \"MmDetectionImage\",\n", - " # # specify the model_name instead of mlflow_model if you want to use a model from the mmdetection model zoo\n", - " \"mlflow_model\": foundation_model,\n", - " # \"model_name\": mmdetection_model_name,\n", - " # finetune_args\n", - " # # \"auto_hyperparameter_selection\": False,\n", + " \"mlflow_model\": foundation_model.id, # foundation_model.id is provided, only foundation_model gives UserErrorException: only path input is supported now but get: ...\n", + " # \"model_name\": mmdetection_model_name, # specify the model_name instead of mlflow_model if you want to use a model from the mmdetection model zoo\n", + "\n", + " # Finetune args\n", " \"task_name\": \"image-object-detection\",\n", " \"apply_augmentations\": True,\n", " \"number_of_workers\": 8,\n", " \"apply_deepspeed\": False,\n", - " \"deepspeed_config\": \"./deepspeed_configs/zero1.json\",\n", + " \"deepspeed_config\": deepspeed_config_path,\n", " \"apply_ort\": False,\n", " \"auto_find_batch_size\": False,\n", " \"extra_optim_args\": \"\",\n", @@ -593,6 +599,21 @@ " \"early_stopping_patience\": 1,\n", " \"resume_from_checkpoint\": False,\n", " \"save_as_mlflow_model\": True,\n", + " # # Uncomment one or more lines below to provide specific values, if you wish you override the autoselected default values.\n", + " # \"image_min_size\": -1,\n", + " # \"image_max_size\": -1,\n", + " # \"metric_for_best_model\": \"mean_average_precision\",\n", + " # \"number_of_epochs\": 15,\n", + " # \"max_steps\": -1,\n", + " # \"training_batch_size\": 4,\n", + " # \"validation_batch_size\": 4,\n", + " # \"learning_rate\": 5e-5,\n", + " # \"learning_rate_scheduler\": \"warmup_linear\",\n", + " # \"warmup_steps\": 0,\n", + " # \"optimizer\": \"adamw_hf\",\n", + " # \"weight_decay\": 0.0,\n", + " # \"gradient_accumulation_step\": 1,\n", + " # \"max_grad_norm\": 1.0,\n", "}\n", "instance_count = 1\n", "process_count_per_instance = 1\n", @@ -623,21 +644,20 @@ "# In this case, user specified values will be respected.\n", "\n", "custom_pipeline_component_args = {\n", - " # model_selection_args\n", + " # # Model import args\n", " \"model_family\": \"MmDetectionImage\",\n", - " # # specify the model_name instead of mlflow_model if you want to use a model from the mmdetection model zoo\n", " \"mlflow_model\": foundation_model,\n", - " # \"model_name\": mmdetection_model_name,\n", - " # finetune_args\n", - " # # \"auto_hyperparameter_selection\": False,\n", + " # \"model_name\": mmdetection_model_name, # specify the model_name instead of mlflow_model if you want to use a model from the mmdetection model zoo\n", + "\n", + " # Finetune args\n", + " \"task_name\": \"image-object-detection\",\n", " \"image_min_size\": -1,\n", " \"image_max_size\": -1,\n", - " \"task_name\": \"image-object-detection\",\n", " \"metric_for_best_model\": \"mean_average_precision\",\n", " \"apply_augmentations\": True,\n", " \"number_of_workers\": 8,\n", " \"apply_deepspeed\": False,\n", - " \"deepspeed_config\": \"./deepspeed_configs/zero1.json\",\n", + " \"deepspeed_config\": deepspeed_config_path,\n", " \"apply_ort\": False,\n", " \"number_of_epochs\": 15,\n", " \"max_steps\": -1,\n", @@ -704,8 +724,7 @@ " **pipeline_component_args,\n", " )\n", " return {\n", - " # map the output of the fine tuning job to the output of pipeline job so that we can easily register the fine tuned model\n", - " # registering the model is required to deploy the model to an online or batch endpoint\n", + " # Map the output of the fine tuning job to the output of pipeline job so that we can easily register the fine tuned model. Registering the model is required to deploy the model to an online or batch endpoint.\n", " \"trained_model\": mmdetection_pipeline_component.outputs.mlflow_model_folder,\n", " }" ] @@ -757,7 +776,7 @@ "source": [ "### 6. Get metrics from finetune component\n", "\n", - "The model training happens as part of the finetune component. Please follow below steps to extract validation metrics from the run" + "The model training happens as part of the finetune component. Please follow below steps to extract validation metrics from the run." ] }, { @@ -831,16 +850,17 @@ "metadata": {}, "outputs": [], "source": [ - "# concat 'tags.mlflow.rootRunId=' and pipeline_job.name in single quotes as filter variable\n", + "# Concat 'tags.mlflow.rootRunId=' and pipeline_job.name in single quotes as filter variable\n", "filter = \"tags.mlflow.rootRunId='\" + mmdetection_pipeline_run.name + \"'\"\n", "runs = mlflow.search_runs(experiment_names=[experiment_name], filter_string = filter, output_format=\"list\")\n", - "# get the training and evaluation runs. \n", - "# using a hacky way till 'Bug 2320997: not able to show eval metrics in FT notebooks - mlflow client now showing display names' is fixed\n", + "\n", + "# Get the training and evaluation runs. \n", + "# Using a hacky way till 'Bug 2320997: not able to show eval metrics in FT notebooks - mlflow client now showing display names' is fixed\n", "for run in runs:\n", - " # check if run.data.metrics.epoch exists\n", + " # Check if run.data.metrics.epoch exists\n", " if 'epoch' in run.data.metrics:\n", " training_run = run\n", - " # else, check if run.data.metrics.accuracy exists\n", + " # Else, check if run.data.metrics.accuracy exists\n", " elif 'mean_average_precision' in run.data.metrics:\n", " evaluation_run = run" ] @@ -883,7 +903,7 @@ "outputs": [], "source": [ "import time\n", - "# genrating a unique timestamp that can be used for names and versions that need to be unique\n", + "# Generating a unique timestamp that can be used for names and versions that need to be unique\n", "timestamp = str(int(time.time())) " ] }, @@ -909,7 +929,7 @@ " path=model_path_from_job,\n", " type=AssetTypes.MLFLOW_MODEL,\n", " name=finetuned_model_name,\n", - " version=timestamp, # use timestamp as version to avoid version conflict\n", + " version=timestamp, # Use timestamp as version to avoid version conflict\n", " description=finetuned_model_description\n", ")\n", "print(f\"Prepare to register model: \\n{prepare_to_register_model}\")\n", @@ -974,7 +994,7 @@ " instance_count=1,\n", " request_settings=OnlineRequestSettings(\n", " max_concurrent_requests_per_instance=1,\n", - " request_timeout_ms=5000,\n", + " request_timeout_ms=5000, # 9000,\n", " max_queue_wait_ms=500\n", " ),\n", " liveness_probe=ProbeSettings(\n", @@ -1004,7 +1024,7 @@ "source": [ "### 9. Test the endpoint with sample data\n", "\n", - "We will fetch some sample data from the test dataset and submit to online endpoint for inference. We will then show the display the scored labels alongside the ground truth labels" + "We will fetch some sample data from the test dataset and submit to online endpoint for inference. We will then display the scored labels alongside the ground truth labels." ] }, { @@ -1122,7 +1142,7 @@ " f\"{round(width, 3)}, {round(height, 3)}], {round(conf_score, 3)}\"\n", " )\n", "\n", - " color = np.random.rand(3) #'red'\n", + " color = np.random.rand(3)\n", " rect = patches.Rectangle(\n", " (topleft_x, topleft_y),\n", " width,\n", @@ -1142,7 +1162,7 @@ "metadata": {}, "source": [ "### 10. Clean up resources - delete the online endpoint\n", - "Don't forget to delete the online endpoint, else you will leave the billing meter running for the compute used by the endpoint" + "Don't forget to delete the online endpoint, else you will leave the billing meter running for the compute used by the endpoint." ] }, { diff --git a/sdk/python/foundation-models/system/inference/image-classification/image-classification-batch-endpoint.ipynb b/sdk/python/foundation-models/system/inference/image-classification/image-classification-batch-endpoint.ipynb index dc2707e9b7b..024a8400464 100644 --- a/sdk/python/foundation-models/system/inference/image-classification/image-classification-batch-endpoint.ipynb +++ b/sdk/python/foundation-models/system/inference/image-classification/image-classification-batch-endpoint.ipynb @@ -19,18 +19,16 @@ "Models that can perform the `image-classification` task are tagged with `image-classification`. We will use the `microsoft-beit-base-patch16-224-pt22k-ft22k` model in this notebook. If you opened this notebook from a specific model card, remember to replace the specific model name. If you don't find a model that suits your scenario or domain, you can discover and [import models from HuggingFace hub](../../import/import_model_into_registry.ipynb) and then use them for inference. \n", "\n", "### Inference data\n", - "We will use the [fridgeObjects](https://cvbp-secondary.z19.web.core.windows.net/datasets/image_classification/fridgeObjects.zip) dataset.\n", + "We will use the [fridgeObjects](https://cvbp-secondary.z19.web.core.windows.net/datasets/image_classification/fridgeObjects.zip) dataset for image multi-class classification.\n", "\n", "\n", "### Outline\n", - "* Setup pre-requisites.\n", - "* Pick a model to deploy.\n", - "* Prepare data for inference.\n", - " * Using ImageFolder\n", - " * Using CSV file\n", - "* Deploy the model for batch inference.\n", - "* Test the endpoint.\n", - "* Clean up resources." + "1. Setup pre-requisites\n", + "2. Pick a model to deploy\n", + "3. Prepare data for inference - Using a folder of images; Using a csv file with base64 images\n", + "4. Deploy the model to a batch endpoint\n", + "5. Test the endpoint - Using a folder of images; Using a csv file with base64 images\n", + "6. Clean up resources - delete the endpoint" ] }, { @@ -76,14 +74,14 @@ "\n", "workspace_ml_client = MLClient(credential, subscription_id, resource_group, workspace_name)\n", "\n", - "# the models, fine tuning pipelines and environments are available in the AzureML system registry, \"azureml-preview\"\n", + "# The models, fine tuning pipelines and environments are available in the AzureML system registry, \"azureml-preview\"\n", "registry_ml_client = MLClient(\n", " credential,\n", " subscription_id,\n", " resource_group,\n", " registry_name=\"azureml-preview\",\n", ")\n", - "# generating a unique timestamp that can be used for names and versions that need to be unique\n", + "# Generating a unique timestamp that can be used for names and versions that need to be unique\n", "timestamp = str(int(time.time()))" ] }, @@ -130,7 +128,7 @@ "source": [ "### 2. Pick a model to deploy\n", "\n", - "Browse models in the Model Catalog in the AzureML Studio, filtering by the `image-classification` task. In this example, we use the `microsoft-beit-base-patch16-224-pt22k-ft22k ` model. If you have opened this notebook for a different model, replace the model name accordingly. " + "Browse models in the Model Catalog in the AzureML Studio, filtering by the `image-classification` task. In this example, we use the `microsoft-beit-base-patch16-224-pt22k-ft22k ` model. If you have opened this notebook for a different model, replace the model name accordingly. This is a pre-trained model and may not give correct prediction for your dataset. We strongly recommend to finetune this model on a down-stream task to be able to use it for predictions and inference. Please refer to the [multi-class classification finetuning notebook](../../finetune/image-classification/multiclass-classification/hftransformers-fridgeobjects-multiclass-classification.ipynb)." ] }, { @@ -153,9 +151,9 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### 3. Prepare data for inference\n", + "### 3. Prepare data for inference - Using a folder of images; Using a csv file with base64 images\n", "\n", - "We will use the [fridgeObjects](https://cvbp-secondary.z19.web.core.windows.net/datasets/image_classification/fridgeObjects.zip) dataset. The fridge object dataset is stored in a directory. There are four different folders inside:\n", + "We will use the [fridgeObjects](https://cvbp-secondary.z19.web.core.windows.net/datasets/image_classification/fridgeObjects.zip) dataset for multi-class classification. The fridge object dataset is stored in a directory. There are four different folders inside:\n", "- /water_bottle\n", "- /milk_bottle\n", "- /carton\n", @@ -180,7 +178,7 @@ "# create data folder if it doesnt exist.\n", "os.makedirs(dataset_parent_dir, exist_ok=True)\n", "\n", - "# download data\n", + "# Download data\n", "download_url = \"https://cvbp-secondary.z19.web.core.windows.net/datasets/image_classification/fridgeObjects.zip\"\n", "\n", "# Extract current dataset name from dataset url\n", @@ -197,13 +195,13 @@ "# Download the dataset\n", "urllib.request.urlretrieve(download_url, filename=data_file)\n", "\n", - "# extract files\n", + "# Extract files\n", "with ZipFile(data_file, \"r\") as zip:\n", " print(\"extracting files...\")\n", " zip.extractall(path=dataset_parent_dir)\n", " print(\"done\")\n", "\n", - "# delete zip file\n", + "# Delete zip file\n", "os.remove(data_file)" ] }, @@ -237,7 +235,7 @@ "source": [ "#### 3.2 Prepare CSV file with base64 images for batch inference input\n", "\n", - "We can provide input images to batch inference either in a folder containing images or in a csv file containing \"image\" named column having images in base 64 format.\n", + "We can provide input images to batch inference either in a folder containing images or in a csv file containing \"image\" named column having images in base64 format.\n", "\n", "Note: If job failed with error Assertion Error (`The actual length exceeded max length 100 MB`) then please try with less number of input images or use ImageFolder Input mode." ] @@ -281,7 +279,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### 4. Deploy the model to an online endpoint\n", + "### 4. Deploy the model to a batch endpoint\n", "Batch endpoints are endpoints that are used to do batch inferencing on large volumes of data over a period of time. The endpoints receive pointers to data and run jobs asynchronously to process the data in parallel on compute clusters. Batch endpoints store outputs to a data store for further analysis. For more information on batch endpoints and deployments see [What are batch endpoints?](https://learn.microsoft.com/en-us/azure/machine-learning/concept-endpoints?view=azureml-api-2#what-are-batch-endpoints).\n", "\n", "* Create a batch endpoint.\n", @@ -294,7 +292,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "##### Create a batch endpoint" + "#### Create a batch endpoint" ] }, { @@ -306,9 +304,9 @@ "import time, sys\n", "from azure.ai.ml.entities import BatchEndpoint, BatchDeployment, BatchRetrySettings, AmlCompute\n", "\n", - "# Create online endpoint - endpoint names need to be unique in a region, hence using timestamp to create unique endpoint name\n", + "# Endpoint names need to be unique in a region, hence using timestamp to create unique endpoint name\n", "endpoint_name = \"hf-image-classif-\" + str(timestamp)\n", - "# create a batch endpoint\n", + "# Create a batch endpoint\n", "endpoint = BatchEndpoint(\n", " name=endpoint_name,\n", " description=\"Batch endpoint for \" + foundation_model.name + \", for image-classification task\",\n", @@ -321,7 +319,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "##### Create a batch deployment" + "#### Create a batch deployment" ] }, { @@ -370,12 +368,21 @@ "print(f\"The default deployment is {endpoint.defaults.deployment_name}\")" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 5. Test the endpoint - Using a folder of images; Using a csv file with base64 images\n", + "\n", + "We will fetch some sample data from the test dataset and invoke batch endpoint for inference." + ] + }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ - "### 5.1 Test the endpoint with Image Folder\n", + "#### 5.1 Test the endpoint - Using folder of images from 3.1\n", "\n", "Invoke the batch endpoint with the input parameter pointing to the folder containing the batch inference input. This creates a pipeline job using the default deployment in the endpoint. Wait for the job to complete." ] @@ -402,7 +409,11 @@ "scoring_job = list(workspace_ml_client.jobs.list(parent_job_name=job.name))[0]\n", "\n", "workspace_ml_client.jobs.download(\n", - " name=scoring_job.name, download_path=os.path.join(dataset_parent_dir, \"image-folder-output\"), output_name=\"score\"\n", + " name=scoring_job.name,\n", + " download_path=os.path.join(\n", + " dataset_parent_dir, \"image-folder-output\"\n", + " ),\n", + " output_name=\"score\"\n", ")\n", "\n", "predictions_file = os.path.join(dataset_parent_dir, \"image-folder-output\", \"named-outputs\", \"score\", \"predictions.csv\")\n", @@ -421,7 +432,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### 5.2 Test the endpoint with CSV input\n", + "#### 5.2 Test the endpoint - Using CSV input with base64 images from 3.2\n", "\n", "Invoke the batch endpoint with the input parameter pointing to the csv file containing the batch inference input. This creates a pipeline job using the default deployment in the endpoint. Wait for the job to complete." ] @@ -448,7 +459,11 @@ "scoring_job = list(workspace_ml_client.jobs.list(parent_job_name=job.name))[0]\n", "\n", "workspace_ml_client.jobs.download(\n", - " name=scoring_job.name, download_path=os.path.join(dataset_parent_dir, \"csv-output\"), output_name=\"score\"\n", + " name=scoring_job.name,\n", + " download_path=os.path.join(\n", + " dataset_parent_dir, \"csv-output\"\n", + " ),\n", + " output_name=\"score\"\n", ")\n", "\n", "predictions_file = os.path.join(dataset_parent_dir, \"csv-output\", \"named-outputs\", \"score\", \"predictions.csv\")\n", @@ -467,7 +482,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### 6. Clean up resources\n", + "### 6. Clean up resources - delete the endpoint\n", "Batch endpoints use compute resources only when jobs are submitted. You can keep the batch endpoint for your reference without worrying about compute bills, or choose to delete the endpoint. If you created your compute cluster to have zero minimum instances and scale down soon after being idle, you won't be charged for an unused compute." ] }, diff --git a/sdk/python/foundation-models/system/inference/image-classification/image-classification-online-endpoint.ipynb b/sdk/python/foundation-models/system/inference/image-classification/image-classification-online-endpoint.ipynb index b4a720fd7b5..10181705012 100644 --- a/sdk/python/foundation-models/system/inference/image-classification/image-classification-online-endpoint.ipynb +++ b/sdk/python/foundation-models/system/inference/image-classification/image-classification-online-endpoint.ipynb @@ -23,12 +23,12 @@ "\n", "\n", "### Outline\n", - "* Setup pre-requisites.\n", - "* Pick a model to deploy.\n", - "* Prepare data for inference. \n", - "* Deploy the model for real time inference.\n", - "* Test the endpoint\n", - "* Clean up resources." + "1. Setup pre-requisites\n", + "2. Pick a model to deploy\n", + "3. Prepare data for inference\n", + "4. Deploy the model to an online endpoint for real time inference\n", + "5. Test the endpoint\n", + "6. Clean up resources - delete the online endpoint" ] }, { @@ -72,14 +72,14 @@ " workspace_name = \"\"\n", "workspace_ml_client = MLClient(credential, subscription_id, resource_group, workspace_name)\n", "\n", - "# the models, fine tuning pipelines and environments are available in the AzureML system registry, \"azureml-preview\"\n", + "# The models, fine tuning pipelines and environments are available in the AzureML system registry, \"azureml-preview\"\n", "registry_ml_client = MLClient(\n", " credential,\n", " subscription_id,\n", " resource_group,\n", " registry_name=\"azureml-preview\",\n", ")\n", - "# genrating a unique timestamp that can be used for names and versions that need to be unique\n", + "# Generating a unique timestamp that can be used for names and versions that need to be unique\n", "timestamp = str(int(time.time()))" ] }, @@ -90,7 +90,7 @@ "source": [ "### 2. Pick a model to deploy\n", "\n", - "Browse models in the Model Catalog in the AzureML Studio, filtering by the `image-classification` task. In this example, we use the `microsoft-beit-base-patch16-224-pt22k-ft22k ` model. If you have opened this notebook for a different model, replace the model name accordingly. " + "Browse models in the Model Catalog in the AzureML Studio, filtering by the `image-classification` task. In this example, we use the `microsoft-beit-base-patch16-224-pt22k-ft22k ` model. If you have opened this notebook for a different model, replace the model name accordingly. This is a pre-trained model and may not give correct prediction for your dataset. We strongly recommend to finetune this model on a down-stream task to be able to use it for predictions and inference. Please refer to the [multi-class classification finetuning notebook](../../finetune/image-classification/multiclass-classification/hftransformers-fridgeobjects-multiclass-classification.ipynb)." ] }, { @@ -112,9 +112,9 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### 3. Prepare multi-class classification data for inference\n", + "### 3. Prepare data for inference\n", "\n", - "We will use the [fridgeObjects](https://cvbp-secondary.z19.web.core.windows.net/datasets/image_classification/fridgeObjects.zip) dataset. The fridge object dataset is stored in a directory. There are four different folders inside:\n", + "We will use the [fridgeObjects](https://cvbp-secondary.z19.web.core.windows.net/datasets/image_classification/fridgeObjects.zip) dataset for multi-class classification task. The fridge object dataset is stored in a directory. There are four different folders inside:\n", "- /water_bottle\n", "- /milk_bottle\n", "- /carton\n", @@ -136,10 +136,10 @@ "# Change to a different location if you prefer\n", "dataset_parent_dir = \"./data\"\n", "\n", - "# create data folder if it doesnt exist.\n", + "# Create data folder if it doesnt exist.\n", "os.makedirs(dataset_parent_dir, exist_ok=True)\n", "\n", - "# download data\n", + "# Download data\n", "download_url = \"https://cvbp-secondary.z19.web.core.windows.net/datasets/image_classification/fridgeObjects.zip\"\n", "\n", "# Extract current dataset name from dataset url\n", @@ -153,12 +153,12 @@ "# Download the dataset\n", "urllib.request.urlretrieve(download_url, filename=data_file)\n", "\n", - "# extract files\n", + "# Extract files\n", "with ZipFile(data_file, \"r\") as zip:\n", " print(\"extracting files...\")\n", " zip.extractall(path=dataset_parent_dir)\n", " print(\"done\")\n", - "# delete zip file\n", + "# Delete zip file\n", "os.remove(data_file)" ] }, @@ -179,7 +179,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### 4. Deploy the model to an online endpoint\n", + "### 4. Deploy the model to an online endpoint for real time inference\n", "Online endpoints give a durable REST API that can be used to integrate with applications that need to use the model." ] }, @@ -192,10 +192,10 @@ "import time, sys\n", "from azure.ai.ml.entities import ManagedOnlineEndpoint, ManagedOnlineDeployment, OnlineRequestSettings\n", "\n", - "# Create online endpoint - endpoint names need to be unique in a region, hence using timestamp to create unique endpoint name\n", + "# Endpoint names need to be unique in a region, hence using timestamp to create unique endpoint name\n", "timestamp = int(time.time())\n", "online_endpoint_name = \"hf-image-classif-\" + str(timestamp)\n", - "# create an online endpoint\n", + "# Create an online endpoint\n", "endpoint = ManagedOnlineEndpoint(\n", " name=online_endpoint_name,\n", " description=\"Online endpoint for \" + foundation_model.name + \", for image-classification task\",\n", @@ -223,11 +223,12 @@ " name=deployment_name,\n", " endpoint_name=online_endpoint_name,\n", " model=foundation_model.id,\n", - " # use GPU instance type like Standard_NC6s_v3 for faster explanations\n", - " instance_type=\"Standard_DS3_V2\",\n", + " instance_type=\"Standard_DS3_V2\", # Use GPU instance type like Standard_NC6s_v3 for faster inference\n", " instance_count=1,\n", " request_settings=OnlineRequestSettings(\n", - " max_concurrent_requests_per_instance=1, request_timeout_ms=5000, max_queue_wait_ms=500 # 90000,\n", + " max_concurrent_requests_per_instance=1,\n", + " request_timeout_ms=5000, # 90000,\n", + " max_queue_wait_ms=500\n", " ),\n", " liveness_probe=ProbeSettings(\n", " failure_threshold=30,\n", @@ -254,9 +255,9 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### 5. Test the endpoint with sample data\n", + "### 5. Test the endpoint\n", "\n", - "We will fetch some sample data from the test dataset and submit to online endpoint for inference. We will then show the scored labels alongside the ground truth labels." + "We will fetch some sample data from the test dataset and submit to online endpoint for inference." ] }, { @@ -273,8 +274,9 @@ "# Get the details for online endpoint\n", "endpoint = workspace_ml_client.online_endpoints.get(name=online_endpoint_name)\n", "\n", - "# existing traffic details\n", + "# Existing traffic details\n", "print(endpoint.traffic)\n", + "\n", "# Get the scoring URI\n", "print(endpoint.scoring_uri)\n", "print(demo_deployment)" @@ -286,27 +288,26 @@ "metadata": {}, "outputs": [], "source": [ - "# Create request json\n", "import base64\n", "import json\n", "\n", "sample_image = os.path.join(dataset_dir, \"milk_bottle\", \"99.jpg\")\n", "\n", - "\n", "def read_image(image_path):\n", " with open(image_path, \"rb\") as f:\n", " return f.read()\n", "\n", - "\n", - "# {\"inputs\":{\"image\":[\"\"]}}\n", "request_json = {\n", - " \"inputs\": {\n", - " \"image\": [base64.encodebytes(read_image(sample_image)).decode(\"utf-8\")],\n", - " }\n", + " \"input_data\": \n", + " {\n", + " \"columns\": [\"image\"],\n", + " \"index\": [0],\n", + " \"data\": [base64.encodebytes(read_image(sample_image)).decode(\"utf-8\")]\n", + " }\n", "}\n", "\n", + "# Create request json\n", "request_file_name = \"sample_request_data.json\"\n", - "\n", "with open(request_file_name, \"w\") as request_file:\n", " json.dump(request_json, request_file)" ] @@ -317,7 +318,7 @@ "metadata": {}, "outputs": [], "source": [ - "# score the sample_score.json file using the online endpoint with the azureml endpoint invoke method\n", + "# Score the sample_score.json file using the online endpoint with the azureml endpoint invoke method\n", "response = workspace_ml_client.online_endpoints.invoke(\n", " endpoint_name=online_endpoint_name,\n", " deployment_name=demo_deployment.name,\n", @@ -331,8 +332,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### 6. Delete the online endpoint\n", - "Don't forget to delete the online endpoint, else you will leave the billing meter running for the compute used by the endpoint" + "### 6. Clean up resources - delete the online endpoint\n", + "Don't forget to delete the online endpoint, else you will leave the billing meter running for the compute used by the endpoint." ] }, { diff --git a/sdk/python/foundation-models/system/inference/image-instance-segmentation/image-instance-segmentation-batch-endpoint.ipynb b/sdk/python/foundation-models/system/inference/image-instance-segmentation/image-instance-segmentation-batch-endpoint.ipynb index fd6d9798ea6..d43330faefd 100644 --- a/sdk/python/foundation-models/system/inference/image-instance-segmentation/image-instance-segmentation-batch-endpoint.ipynb +++ b/sdk/python/foundation-models/system/inference/image-instance-segmentation/image-instance-segmentation-batch-endpoint.ipynb @@ -16,18 +16,16 @@ "Models that can perform the `image-instance-segmentation` task are tagged with `image-instance-segmentation`. We will use the `mask_rcnn_swin-t-p4-w7_fpn_1x_coco` model in this notebook. If you opened this notebook from a specific model card, remember to replace the specific model name.\n", "\n", "### Inference data\n", - "We will use the [fridgeObjects](https://cvbp-secondary.z19.web.core.windows.net/datasets/object_detection/odFridgeObjectsMask.zip) dataset.\n", + "We will use the [fridgeObjects](https://cvbp-secondary.z19.web.core.windows.net/datasets/object_detection/odFridgeObjectsMask.zip) dataset for image instance segmentation.\n", "\n", "\n", "### Outline\n", - "* Setup pre-requisites.\n", - "* Pick a model to deploy.\n", - "* Prepare data for inference.\n", - " * Using ImageFolder\n", - " * Using CSV file\n", - "* Deploy the model for batch inference.\n", - "* Test the endpoint.\n", - "* Clean up resources." + "1. Setup pre-requisites\n", + "2. Pick a model to deploy\n", + "3. Prepare data for inference - Using a folder of images; Using a csv file with base64 images\n", + "4. Deploy the model to a batch endpoint\n", + "5. Test the endpoint - Using a folder of images; Using a csv file with base64 images\n", + "6. Clean up resources - delete the endpoint" ] }, { @@ -38,7 +36,7 @@ "### 1. Setup pre-requisites\n", "* Install dependencies\n", "* Connect to AzureML Workspace. Learn more at [set up SDK authentication](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-setup-authentication?tabs=sdk). Replace ``, `` and `` below.\n", - "* Connect to `azureml` system registry" + "* Connect to `azureml-staging` system registry" ] }, { @@ -73,14 +71,14 @@ "\n", "workspace_ml_client = MLClient(credential, subscription_id, resource_group, workspace_name)\n", "\n", - "# the models, fine tuning pipelines and environments are available in the AzureML system registry, \"azureml-preview\"\n", + "# The models, fine tuning pipelines and environments are available in the AzureML system registry, \"azureml-preview\"\n", "registry_ml_client = MLClient(\n", " credential,\n", " subscription_id,\n", " resource_group,\n", " registry_name=\"azureml-preview\",\n", ")\n", - "# generating a unique timestamp that can be used for names and versions that need to be unique\n", + "# Generating a unique timestamp that can be used for names and versions that need to be unique\n", "timestamp = str(int(time.time()))" ] }, @@ -127,7 +125,7 @@ "source": [ "### 2. Pick a model to deploy\n", "\n", - "Browse models in the Model Catalog in the AzureML Studio, filtering by the `image-instance-segmentation` task. In this example, we use the `mask_rcnn_swin-t-p4-w7_fpn_1x_coco` model. If you have opened this notebook for a different model, replace the model name accordingly. This is a pre-trained model and may not give correct prediction for your dataset. We strongly recommend to TRAIN this model on a down-stream task to be able to use it for predictions and inference. Please refer to the [notebook.](../../finetune/image-instance-segmentation/mmdetection-fridgeobjects-instance-segmentation.ipynb)" + "Browse models in the Model Catalog in the AzureML Studio, filtering by the `image-segmentation` task or `image-instance-segmentation` finetuning task. In this example, we use the `mask_rcnn_swin-t-p4-w7_fpn_1x_coco` model. If you have opened this notebook for a different model, replace the model name accordingly. This is a pre-trained model and may not give correct prediction for your dataset. We strongly recommend to finetune this model on a down-stream task to be able to use it for predictions and inference. Please refer to the [image instance segmentation finetuning notebook](../../finetune/image-instance-segmentation/mmdetection-fridgeobjects-instance-segmentation.ipynb)." ] }, { @@ -149,9 +147,9 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### 3. Prepare data for inference\n", + "### 3. Prepare data for inference - Using a folder of images; Using a csv file with base64 images\n", "\n", - "We will use the [odFridgeObjectsMask](https://cvbp-secondary.z19.web.core.windows.net/datasets/object_detection/odFridgeObjectsMask.zip) dataset.\n" + "We will use the [odFridgeObjectsMask](https://cvbp-secondary.z19.web.core.windows.net/datasets/object_detection/odFridgeObjectsMask.zip) dataset for image instance segmentation.\n" ] }, { @@ -171,7 +169,7 @@ "# create data folder if it doesnt exist.\n", "os.makedirs(dataset_parent_dir, exist_ok=True)\n", "\n", - "# download data\n", + "# Download data\n", "download_url = \"https://cvbp-secondary.z19.web.core.windows.net/datasets/object_detection/odFridgeObjectsMask.zip\"\n", "\n", "# Extract current dataset name from dataset url\n", @@ -188,13 +186,13 @@ "# Download the dataset\n", "urllib.request.urlretrieve(download_url, filename=data_file)\n", "\n", - "# extract files\n", + "# Extract files\n", "with ZipFile(data_file, \"r\") as zip:\n", " print(\"extracting files...\")\n", " zip.extractall(path=dataset_parent_dir)\n", " print(\"done\")\n", "\n", - "# delete zip file\n", + "# Delete zip file\n", "os.remove(data_file)" ] }, @@ -203,7 +201,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "#### 3.1 Image folder for Batch inference" + "#### 3.1 Arrange images in common folder for batch inference input" ] }, { @@ -222,9 +220,9 @@ "source": [ "#### 3.2 Prepare CSV file with base64 images for batch inference input\n", "\n", - "We can provide input images to batch inference either in a folder containing images or in a csv file containing \"image\" named column having images in base 64 format.\n", + "We can provide input images to batch inference either in a folder containing images or in a csv file containing \"image\" named column having images in base64 format.\n", "\n", - "Note: If job failed with error Assertion Error (The actual length exceeded max length 100 MB) then please try with less number of input images or use ImageFolder Input mode." + "Note: If job failed with error Assertion Error (`The actual length exceeded max length 100 MB`) then please try with less number of input images or use ImageFolder Input mode." ] }, { @@ -261,12 +259,6 @@ "Image(filename=sample_image)" ] }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [] - }, { "attachments": {}, "cell_type": "markdown", @@ -285,7 +277,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "##### Create a batch endpoint" + "#### Create a batch endpoint" ] }, { @@ -297,9 +289,9 @@ "import time, sys\n", "from azure.ai.ml.entities import BatchEndpoint, BatchDeployment, BatchRetrySettings, AmlCompute\n", "\n", - "# Create batch endpoint - endpoint names need to be unique in a region, hence using timestamp to create unique endpoint name\n", + "# Endpoint names need to be unique in a region, hence using timestamp to create unique endpoint name\n", "endpoint_name = \"mmd-image-is-\" + str(timestamp)\n", - "# create a batch endpoint\n", + "# Create a batch endpoint\n", "endpoint = BatchEndpoint(\n", " name=endpoint_name,\n", " description=\"Batch endpoint for \" + foundation_model.name + \", for image-instance-segmentation task\",\n", @@ -312,7 +304,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "##### Create a batch deployment" + "#### Create a batch deployment" ] }, { @@ -361,12 +353,21 @@ "print(f\"The default deployment is {endpoint.defaults.deployment_name}\")" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 5. Test the endpoint - Using a folder of images; Using a csv file with base64 images\n", + "\n", + "We will fetch some sample data from the test dataset and invoke batch endpoint for inference." + ] + }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ - "### 5.1 Test the endpoint with Image Folder\n", + "#### 5.1 Test the endpoint - Using folder of images from 3.1\n", "\n", "Invoke the batch endpoint with the input parameter pointing to the folder containing the batch inference input. This creates a pipeline job using the default deployment in the endpoint. Wait for the job to complete." ] @@ -393,7 +394,9 @@ "scoring_job = list(workspace_ml_client.jobs.list(parent_job_name=job.name))[0]\n", "\n", "workspace_ml_client.jobs.download(\n", - " name=scoring_job.name, download_path=os.path.join(dataset_parent_dir, \"image-folder-output\"), output_name=\"score\"\n", + " name=scoring_job.name,\n", + " download_path=os.path.join(dataset_parent_dir, \"image-folder-output\"),\n", + " output_name=\"score\"\n", ")\n", "\n", "predictions_file = os.path.join(dataset_parent_dir, \"image-folder-output\", \"named-outputs\", \"score\", \"predictions.csv\")\n", @@ -412,7 +415,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### 5.2 Test the endpoint with CSV input\n", + "#### 5.2 Test the endpoint - Using CSV input with base64 images from 3.2\n", "\n", "Invoke the batch endpoint with the input parameter pointing to the csv file containing the batch inference input. This creates a pipeline job using the default deployment in the endpoint. Wait for the job to complete." ] @@ -439,7 +442,9 @@ "scoring_job = list(workspace_ml_client.jobs.list(parent_job_name=job.name))[0]\n", "\n", "workspace_ml_client.jobs.download(\n", - " name=scoring_job.name, download_path=os.path.join(dataset_parent_dir, \"csv-output\"), output_name=\"score\"\n", + " name=scoring_job.name,\n", + " download_path=os.path.join(dataset_parent_dir, \"csv-output\"),\n", + " output_name=\"score\"\n", ")\n", "\n", "predictions_file = os.path.join(dataset_parent_dir, \"csv-output\", \"named-outputs\", \"score\", \"predictions.csv\")\n", @@ -458,7 +463,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### 6. Clean up resources\n", + "### 6. Clean up resources - delete the endpoint\n", "Batch endpoints use compute resources only when jobs are submitted. You can keep the batch endpoint for your reference without worrying about compute bills, or choose to delete the endpoint. If you created your compute cluster to have zero minimum instances and scale down soon after being idle, you won't be charged for an unused compute." ] }, diff --git a/sdk/python/foundation-models/system/inference/image-instance-segmentation/image-instance-segmentation-online-endpoint.ipynb b/sdk/python/foundation-models/system/inference/image-instance-segmentation/image-instance-segmentation-online-endpoint.ipynb index d901e888e71..644dd956c77 100644 --- a/sdk/python/foundation-models/system/inference/image-instance-segmentation/image-instance-segmentation-online-endpoint.ipynb +++ b/sdk/python/foundation-models/system/inference/image-instance-segmentation/image-instance-segmentation-online-endpoint.ipynb @@ -16,18 +16,16 @@ "Models that can perform the `image-instance-segmentation` task are tagged with `image-instance-segmentation`. We will use the `mask_rcnn_swin-t-p4-w7_fpn_1x_coco` model in this notebook. If you opened this notebook from a specific model card, remember to replace the specific model name.\n", "\n", "### Inference data\n", - "We will use the [fridgeObjects](https://cvbp-secondary.z19.web.core.windows.net/datasets/object_detection/odFridgeObjectsMask.zip) dataset.\n", + "We will use the [fridgeObjects](https://cvbp-secondary.z19.web.core.windows.net/datasets/object_detection/odFridgeObjectsMask.zip) dataset for instance segemntation.\n", "\n", "\n", "### Outline\n", - "* Setup pre-requisites.\n", - "* Pick a model to deploy.\n", - "* Prepare data for inference.\n", - " * Using ImageFolder\n", - " * Using CSV file\n", - "* Deploy the model for online inference.\n", - "* Test the endpoint.\n", - "* Clean up resources." + "1. Setup pre-requisites\n", + "2. Pick a model to deploy\n", + "3. Prepare data for inference\n", + "4. Deploy the model to an online endpoint for real time inference\n", + "5. Test the endpoint\n", + "6. Clean up resources - delete the online endpoint" ] }, { @@ -71,14 +69,14 @@ " workspace_name = \"\"\n", "workspace_ml_client = MLClient(credential, subscription_id, resource_group, workspace_name)\n", "\n", - "# the models, fine tuning pipelines and environments are available in the AzureML system registry, \"azureml-preview\"\n", + "# The models, fine tuning pipelines and environments are available in the AzureML system registry, \"azureml-preview\"\n", "registry_ml_client = MLClient(\n", " credential,\n", " subscription_id,\n", " resource_group,\n", " registry_name=\"azureml-preview\",\n", ")\n", - "# genrating a unique timestamp that can be used for names and versions that need to be unique\n", + "# Generating a unique timestamp that can be used for names and versions that need to be unique\n", "timestamp = str(int(time.time()))" ] }, @@ -89,7 +87,7 @@ "source": [ "### 2. Pick a model to deploy\n", "\n", - "Browse models in the Model Catalog in the AzureML Studio, filtering by the `image-instance-segmentation` task. In this example, we use the `mask_rcnn_swin-t-p4-w7_fpn_1x_coco` model. If you have opened this notebook for a different model, replace the model name accordingly. This is a pre-trained model and may not give correct prediction for your dataset. We strongly recommend to TRAIN this model on a down-stream task to be able to use it for predictions and inference. Please refer to the [notebook](../../finetune/image-instance-segmentation/mmdetection-fridgeobjects-instance-segmentation.ipynb)." + "Browse models in the Model Catalog in the AzureML Studio, filtering by the `image-instance-segmentation` task. In this example, we use the `mask_rcnn_swin-t-p4-w7_fpn_1x_coco` model. If you have opened this notebook for a different model, replace the model name accordingly. This is a pre-trained model and may not give correct prediction for your dataset. We strongly recommend to finetune this model on a down-stream task to be able to use it for predictions and inference. Please refer to the [image instance segmentation finetuning notebook](../../finetune/image-instance-segmentation/mmdetection-fridgeobjects-instance-segmentation.ipynb)." ] }, { @@ -111,9 +109,9 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### 3. Prepare instance segmentation data for inference\n", + "### 3. Prepare data for inference\n", "\n", - "We will use the [odFridgeObjectsMask](https://cvbp-secondary.z19.web.core.windows.net/datasets/object_detection/odFridgeObjectsMask.zip) dataset." + "We will use the [odFridgeObjectsMask](https://cvbp-secondary.z19.web.core.windows.net/datasets/object_detection/odFridgeObjectsMask.zip) dataset for instance segmentation task." ] }, { @@ -129,10 +127,10 @@ "# Change to a different location if you prefer\n", "dataset_parent_dir = \"./data\"\n", "\n", - "# create data folder if it doesnt exist.\n", + "# Create data folder if it doesnt exist.\n", "os.makedirs(dataset_parent_dir, exist_ok=True)\n", "\n", - "# download data\n", + "# Download data\n", "download_url = \"https://cvbp-secondary.z19.web.core.windows.net/datasets/object_detection/odFridgeObjectsMask.zip\"\n", "\n", "# Extract current dataset name from dataset url\n", @@ -146,12 +144,12 @@ "# Download the dataset\n", "urllib.request.urlretrieve(download_url, filename=data_file)\n", "\n", - "# extract files\n", + "# Extract files\n", "with ZipFile(data_file, \"r\") as zip:\n", " print(\"extracting files...\")\n", " zip.extractall(path=dataset_parent_dir)\n", " print(\"done\")\n", - "# delete zip file\n", + "# Delete zip file\n", "os.remove(data_file)" ] }, @@ -172,7 +170,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### 4. Deploy the model to an online endpoint\n", + "### 4. Deploy the model to an online endpoint for real time inference\n", "Online endpoints give a durable REST API that can be used to integrate with applications that need to use the model." ] }, @@ -185,10 +183,10 @@ "import time\n", "from azure.ai.ml.entities import ManagedOnlineEndpoint, ManagedOnlineDeployment\n", "\n", - "# Create online endpoint - endpoint names need to be unique in a region, hence using timestamp to create unique endpoint name\n", + "# Endpoint names need to be unique in a region, hence using timestamp to create unique endpoint name\n", "timestamp = int(time.time())\n", "online_endpoint_name = \"mmd-image-is-\" + str(timestamp)\n", - "# create an online endpoint\n", + "# Create an online endpoint\n", "endpoint = ManagedOnlineEndpoint(\n", " name=online_endpoint_name,\n", " description=\"Online endpoint for \" + foundation_model.name + \", for image-instance-segmentation task\",\n", @@ -216,7 +214,7 @@ " name=deployment_name,\n", " endpoint_name=online_endpoint_name,\n", " model=foundation_model.id,\n", - " instance_type=\"Standard_DS3_v2\",\n", + " instance_type=\"Standard_DS3_v2\", # Use GPU instance type like Standard_NC6s_v3 for faster inference\n", " instance_count=1,\n", " request_settings=OnlineRequestSettings(\n", " max_concurrent_requests_per_instance=1,\n", @@ -248,7 +246,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### 5. Test the endpoint with sample data\n", + "### 5. Test the endpoint\n", "\n", "We will fetch some sample data from the test dataset and submit to online endpoint for inference." ] @@ -267,8 +265,9 @@ "# Get the details for online endpoint\n", "endpoint = workspace_ml_client.online_endpoints.get(name=online_endpoint_name)\n", "\n", - "# existing traffic details\n", + "# Existing traffic details\n", "print(endpoint.traffic)\n", + "\n", "# Get the scoring URI\n", "print(endpoint.scoring_uri)\n", "print(demo_deployment)" @@ -280,7 +279,6 @@ "metadata": {}, "outputs": [], "source": [ - "# Create request json\n", "import base64\n", "import json\n", "\n", @@ -299,8 +297,8 @@ " }\n", "}\n", "\n", + "# Create request json\n", "request_file_name = \"sample_request_data.json\"\n", - "\n", "with open(request_file_name, \"w\") as request_file:\n", " json.dump(request_json, request_file)" ] @@ -311,7 +309,7 @@ "metadata": {}, "outputs": [], "source": [ - "# score the sample_score.json file using the online endpoint with the azureml endpoint invoke method\n", + "# Score the sample_score.json file using the online endpoint with the azureml endpoint invoke method\n", "response = workspace_ml_client.online_endpoints.invoke(\n", " endpoint_name=online_endpoint_name,\n", " deployment_name=demo_deployment.name,\n", @@ -325,8 +323,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### 6. Delete the online endpoint\n", - "Don't forget to delete the online endpoint, else you will leave the billing meter running for the compute used by the endpoint" + "### 6. Clean up resources - delete the online endpoint\n", + "Don't forget to delete the online endpoint, else you will leave the billing meter running for the compute used by the endpoint." ] }, { diff --git a/sdk/python/foundation-models/system/inference/image-object-detection/image-object-detection-batch-endpoint.ipynb b/sdk/python/foundation-models/system/inference/image-object-detection/image-object-detection-batch-endpoint.ipynb index dda7b239689..86f03404f73 100644 --- a/sdk/python/foundation-models/system/inference/image-object-detection/image-object-detection-batch-endpoint.ipynb +++ b/sdk/python/foundation-models/system/inference/image-object-detection/image-object-detection-batch-endpoint.ipynb @@ -16,18 +16,16 @@ "Models that can perform the `image-object-detection` task are tagged with `image-object-detection`. We will use the `yolof_r50_c5_8x8_1x_coco` model in this notebook. If you opened this notebook from a specific model card, remember to replace the specific model name.\n", "\n", "### Inference data\n", - "We will use the [fridgeObjects](https://cvbp-secondary.z19.web.core.windows.net/datasets/object_detection/odFridgeObjects.zip) dataset.\n", + "We will use the [fridgeObjects](https://cvbp-secondary.z19.web.core.windows.net/datasets/object_detection/odFridgeObjects.zip) dataset for image object detection.\n", "\n", "\n", "### Outline\n", - "* Setup pre-requisites.\n", - "* Pick a model to deploy.\n", - "* Prepare data for inference.\n", - " * Using ImageFolder\n", - " * Using CSV file\n", - "* Deploy the model for batch inference.\n", - "* Test the endpoint.\n", - "* Clean up resources." + "1. Setup pre-requisites\n", + "2. Pick a model to deploy\n", + "3. Prepare data for inference - Using a folder of images; Using a csv file with base64 images\n", + "4. Deploy the model to a batch endpoint\n", + "5. Test the endpoint - Using a folder of images; Using a csv file with base64 images\n", + "6. Clean up resources - delete the endpoint" ] }, { @@ -73,14 +71,14 @@ "\n", "workspace_ml_client = MLClient(credential, subscription_id, resource_group, workspace_name)\n", "\n", - "# the models, fine tuning pipelines and environments are available in the AzureML system registry, \"azureml-preview\"\n", + "# The models, fine tuning pipelines and environments are available in the AzureML system registry, \"azureml-preview\"\n", "registry_ml_client = MLClient(\n", " credential,\n", " subscription_id,\n", " resource_group,\n", " registry_name=\"azureml-preview\",\n", ")\n", - "# generating a unique timestamp that can be used for names and versions that need to be unique\n", + "# Generating a unique timestamp that can be used for names and versions that need to be unique\n", "timestamp = str(int(time.time()))" ] }, @@ -127,7 +125,7 @@ "source": [ "### 2. Pick a model to deploy\n", "\n", - "Browse models in the Model Catalog in the AzureML Studio, filtering by the `image-object-detection` task. In this example, we use the `yolof_r50_c5_8x8_1x_coco ` model. If you have opened this notebook for a different model, replace the model name accordingly. This is a pre-trained model and may not give correct prediction for your dataset. We strongly recommend to TRAIN this model on a down-stream task to be able to use it for predictions and inference. Please refer to the [notebook.](../../finetune/image-object-detection/mmdetection-fridgeobjects-object-detection.ipynb)" + "Browse models in the Model Catalog in the AzureML Studio, filtering by the `image-object-detection` task. In this example, we use the `yolof_r50_c5_8x8_1x_coco ` model. If you have opened this notebook for a different model, replace the model name accordingly. This is a pre-trained model and may not give correct prediction for your dataset. We strongly recommend to finetune this model on a down-stream task to be able to use it for predictions and inference. Please refer to the [image object detection finetuning notebook](../../finetune/image-object-detection/mmdetection-fridgeobjects-object-detection.ipynb)." ] }, { @@ -149,9 +147,9 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### 3. Prepare data for inference\n", + "### 3. Prepare data for inference - Using a folder of images; Using a csv file with base64 images\n", "\n", - "We will use the [odFridgeObjects](https://cvbp-secondary.z19.web.core.windows.net/datasets/object_detection/odFridgeObjects.zip) dataset.\n" + "We will use the [odFridgeObjects](https://cvbp-secondary.z19.web.core.windows.net/datasets/object_detection/odFridgeObjects.zip) dataset for image object detection.\n" ] }, { @@ -171,7 +169,7 @@ "# create data folder if it doesnt exist.\n", "os.makedirs(dataset_parent_dir, exist_ok=True)\n", "\n", - "# download data\n", + "# Download data\n", "download_url = \"https://cvbp-secondary.z19.web.core.windows.net/datasets/object_detection/odFridgeObjects.zip\"\n", "\n", "# Extract current dataset name from dataset url\n", @@ -188,13 +186,13 @@ "# Download the dataset\n", "urllib.request.urlretrieve(download_url, filename=data_file)\n", "\n", - "# extract files\n", + "# Extract files\n", "with ZipFile(data_file, \"r\") as zip:\n", " print(\"extracting files...\")\n", " zip.extractall(path=dataset_parent_dir)\n", " print(\"done\")\n", "\n", - "# delete zip file\n", + "# Delete zip file\n", "os.remove(data_file)" ] }, @@ -203,7 +201,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "#### 3.1 Image folder for Batch inference" + "#### 3.1 Arrange images in common folder for batch inference input" ] }, { @@ -222,9 +220,9 @@ "source": [ "#### 3.2 Prepare CSV file with base64 images for batch inference input\n", "\n", - "We can provide input images to batch inference either in a folder containing images or in a csv file containing \"image\" named column having images in base 64 format.\n", + "We can provide input images to batch inference either in a folder containing images or in a csv file containing \"image\" named column having images in base64 format.\n", "\n", - "Note: If job failed with error Assertion Error (The actual length exceeded max length 100 MB) then please try with less number of input images or use ImageFolder Input mode." + "Note: If job failed with error Assertion Error (`The actual length exceeded max length 100 MB`) then please try with less number of input images or use ImageFolder Input mode." ] }, { @@ -261,12 +259,6 @@ "Image(filename=sample_image)" ] }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [] - }, { "attachments": {}, "cell_type": "markdown", @@ -285,7 +277,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "##### Create a batch endpoint" + "#### Create a batch endpoint" ] }, { @@ -297,9 +289,9 @@ "import time, sys\n", "from azure.ai.ml.entities import BatchEndpoint, BatchDeployment, BatchRetrySettings, AmlCompute\n", "\n", - "# Create batch endpoint - endpoint names need to be unique in a region, hence using timestamp to create unique endpoint name\n", + "# Endpoint names need to be unique in a region, hence using timestamp to create unique endpoint name\n", "endpoint_name = \"mmd-image-od-\" + str(timestamp)\n", - "# create a batch endpoint\n", + "# Create a batch endpoint\n", "endpoint = BatchEndpoint(\n", " name=endpoint_name,\n", " description=\"Batch endpoint for \" + foundation_model.name + \", for image-object-detection task\",\n", @@ -361,12 +353,21 @@ "print(f\"The default deployment is {endpoint.defaults.deployment_name}\")" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 5. Test the endpoint - Using a folder of images; Using a csv file with base64 images\n", + "\n", + "We will fetch some sample data from the test dataset and invoke batch endpoint for inference." + ] + }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ - "### 5.1 Test the endpoint with Image Folder\n", + "#### 5.1 Test the endpoint - Using folder of images from 3.1\n", "\n", "Invoke the batch endpoint with the input parameter pointing to the folder containing the batch inference input. This creates a pipeline job using the default deployment in the endpoint. Wait for the job to complete." ] @@ -393,7 +394,9 @@ "scoring_job = list(workspace_ml_client.jobs.list(parent_job_name=job.name))[0]\n", "\n", "workspace_ml_client.jobs.download(\n", - " name=scoring_job.name, download_path=os.path.join(dataset_parent_dir, \"image-folder-output\"), output_name=\"score\"\n", + " name=scoring_job.name,\n", + " download_path=os.path.join(dataset_parent_dir, \"image-folder-output\"),\n", + " output_name=\"score\"\n", ")\n", "\n", "predictions_file = os.path.join(dataset_parent_dir, \"image-folder-output\", \"named-outputs\", \"score\", \"predictions.csv\")\n", @@ -412,7 +415,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### 5.2 Test the endpoint with CSV input\n", + "#### 5.2 Test the endpoint - Using CSV input with base64 images from 3.2\n", "\n", "Invoke the batch endpoint with the input parameter pointing to the csv file containing the batch inference input. This creates a pipeline job using the default deployment in the endpoint. Wait for the job to complete." ] @@ -439,7 +442,9 @@ "scoring_job = list(workspace_ml_client.jobs.list(parent_job_name=job.name))[0]\n", "\n", "workspace_ml_client.jobs.download(\n", - " name=scoring_job.name, download_path=os.path.join(dataset_parent_dir, \"csv-output\"), output_name=\"score\"\n", + " name=scoring_job.name,\n", + " download_path=os.path.join(dataset_parent_dir, \"csv-output\"),\n", + " output_name=\"score\"\n", ")\n", "\n", "predictions_file = os.path.join(dataset_parent_dir, \"csv-output\", \"named-outputs\", \"score\", \"predictions.csv\")\n", @@ -458,7 +463,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### 6. Clean up resources\n", + "### 6. Clean up resources - delete the endpoint\n", "Batch endpoints use compute resources only when jobs are submitted. You can keep the batch endpoint for your reference without worrying about compute bills, or choose to delete the endpoint. If you created your compute cluster to have zero minimum instances and scale down soon after being idle, you won't be charged for an unused compute." ] }, diff --git a/sdk/python/foundation-models/system/inference/image-object-detection/image-object-detection-online-endpoint.ipynb b/sdk/python/foundation-models/system/inference/image-object-detection/image-object-detection-online-endpoint.ipynb index c153480c98e..bc24496959e 100644 --- a/sdk/python/foundation-models/system/inference/image-object-detection/image-object-detection-online-endpoint.ipynb +++ b/sdk/python/foundation-models/system/inference/image-object-detection/image-object-detection-online-endpoint.ipynb @@ -16,16 +16,16 @@ "Models that can perform the `image-object-detection` task are tagged with `image-object-detection`. We will use the `yolof_r50_c5_8x8_1x_coco` model in this notebook. If you opened this notebook from a specific model card, remember to replace the specific model name.\n", "\n", "### Inference data\n", - "We will use the [fridgeObjects](https://cvbp-secondary.z19.web.core.windows.net/datasets/object_detection/odFridgeObjects.zip) dataset.\n", + "We will use the [fridgeObjects](https://cvbp-secondary.z19.web.core.windows.net/datasets/object_detection/odFridgeObjects.zip) dataset for object detection.\n", "\n", "\n", "### Outline\n", - "* Setup pre-requisites.\n", - "* Pick a model to deploy.\n", - "* Prepare data for inference. \n", - "* Deploy the model for real time inference.\n", - "* Test the endpoint\n", - "* Clean up resources." + "1. Setup pre-requisites\n", + "2. Pick a model to deploy\n", + "3. Prepare data for inference\n", + "4. Deploy the model to an online endpoint for real time inference\n", + "5. Test the endpoint\n", + "6. Clean up resources - delete the online endpoint" ] }, { @@ -69,14 +69,14 @@ " workspace_name = \"\"\n", "workspace_ml_client = MLClient(credential, subscription_id, resource_group, workspace_name)\n", "\n", - "# the models, fine tuning pipelines and environments are available in the AzureML system registry, \"azureml-preview\"\n", + "# The models, fine tuning pipelines and environments are available in the AzureML system registry, \"azureml-preview\"\n", "registry_ml_client = MLClient(\n", " credential,\n", " subscription_id,\n", " resource_group,\n", " registry_name=\"azureml-preview\",\n", ")\n", - "# genrating a unique timestamp that can be used for names and versions that need to be unique\n", + "# Generating a unique timestamp that can be used for names and versions that need to be unique\n", "timestamp = str(int(time.time()))" ] }, @@ -87,7 +87,7 @@ "source": [ "### 2. Pick a model to deploy\n", "\n", - "Browse models in the Model Catalog in the AzureML Studio, filtering by the `image-object-detection` task. In this example, we use the `yolof_r50_c5_8x8_1x_coco ` model. If you have opened this notebook for a different model, replace the model name accordingly. This is a pre-trained model and may not give correct prediction for your dataset. We strongly recommend to TRAIN this model on a down-stream task to be able to use it for predictions and inference. Please refer to the [notebook](../../finetune/image-object-detection/mmdetection-fridgeobjects-object-detection.ipynb)." + "Browse models in the Model Catalog in the AzureML Studio, filtering by the `image-object-detection` task. In this example, we use the `yolof_r50_c5_8x8_1x_coco ` model. If you have opened this notebook for a different model, replace the model name accordingly. This is a pre-trained model and may not give correct prediction for your dataset. We strongly recommend to finetune this model on a down-stream task to be able to use it for predictions and inference. Please refer to the [image object detection finetuning notebook](../../finetune/image-object-detection/mmdetection-fridgeobjects-object-detection.ipynb)." ] }, { @@ -109,9 +109,9 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### 3. Prepare object detection data for inference\n", + "### 3. Prepare data for inference\n", "\n", - "We will use the [odFridgeObjects](https://cvbp-secondary.z19.web.core.windows.net/datasets/object_detection/odFridgeObjects.zip) dataset." + "We will use the [odFridgeObjects](https://cvbp-secondary.z19.web.core.windows.net/datasets/object_detection/odFridgeObjects.zip) dataset for object detection." ] }, { @@ -127,10 +127,10 @@ "# Change to a different location if you prefer\n", "dataset_parent_dir = \"./data\"\n", "\n", - "# create data folder if it doesnt exist.\n", + "# Create data folder if it doesnt exist.\n", "os.makedirs(dataset_parent_dir, exist_ok=True)\n", "\n", - "# download data\n", + "# Download data\n", "download_url = \"https://cvbp-secondary.z19.web.core.windows.net/datasets/object_detection/odFridgeObjects.zip\"\n", "\n", "# Extract current dataset name from dataset url\n", @@ -144,12 +144,12 @@ "# Download the dataset\n", "urllib.request.urlretrieve(download_url, filename=data_file)\n", "\n", - "# extract files\n", + "# Extract files\n", "with ZipFile(data_file, \"r\") as zip:\n", " print(\"extracting files...\")\n", " zip.extractall(path=dataset_parent_dir)\n", " print(\"done\")\n", - "# delete zip file\n", + "# Delete zip file\n", "os.remove(data_file)" ] }, @@ -170,7 +170,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### 4. Deploy the model to an online endpoint\n", + "### 4. Deploy the model to an online endpoint for real time inference\n", "Online endpoints give a durable REST API that can be used to integrate with applications that need to use the model." ] }, @@ -183,10 +183,10 @@ "import time\n", "from azure.ai.ml.entities import ManagedOnlineEndpoint, ManagedOnlineDeployment\n", "\n", - "# Create online endpoint - endpoint names need to be unique in a region, hence using timestamp to create unique endpoint name\n", + "# Endpoint names need to be unique in a region, hence using timestamp to create unique endpoint name\n", "timestamp = int(time.time())\n", "online_endpoint_name = \"mmd-image-od-\" + str(timestamp)\n", - "# create an online endpoint\n", + "# Create an online endpoint\n", "endpoint = ManagedOnlineEndpoint(\n", " name=online_endpoint_name,\n", " description=\"Online endpoint for \" + foundation_model.name + \", for image-object-detection task\",\n", @@ -214,7 +214,7 @@ " name=deployment_name,\n", " endpoint_name=online_endpoint_name,\n", " model=foundation_model.id,\n", - " instance_type=\"Standard_DS3_v2\",\n", + " instance_type=\"Standard_DS3_v2\", # Use GPU instance type like Standard_NC6s_v3 for faster inference\n", " instance_count=1,\n", " request_settings=OnlineRequestSettings(\n", " max_concurrent_requests_per_instance=1,\n", @@ -246,9 +246,9 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### 5. Test the endpoint with sample data\n", + "### 5. Test the endpoint\n", "\n", - "We will fetch some sample data from the test dataset and submit to online endpoint for inference. We will then show the display the scored labels alongside the ground truth labels" + "We will fetch some sample data from the test dataset and submit to online endpoint for inference." ] }, { @@ -265,8 +265,9 @@ "# Get the details for online endpoint\n", "endpoint = workspace_ml_client.online_endpoints.get(name=online_endpoint_name)\n", "\n", - "# existing traffic details\n", + "# Existing traffic details\n", "print(endpoint.traffic)\n", + "\n", "# Get the scoring URI\n", "print(endpoint.scoring_uri)\n", "print(demo_deployment)" @@ -309,7 +310,7 @@ "metadata": {}, "outputs": [], "source": [ - "# score the sample_score.json file using the online endpoint with the azureml endpoint invoke method\n", + "# Score the sample_score.json file using the online endpoint with the azureml endpoint invoke method\n", "response = workspace_ml_client.online_endpoints.invoke(\n", " endpoint_name=online_endpoint_name,\n", " deployment_name=demo_deployment.name,\n", @@ -323,8 +324,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### 6. Delete the online endpoint\n", - "Don't forget to delete the online endpoint, else you will leave the billing meter running for the compute used by the endpoint" + "### 6. Clean up resources - delete the online endpoint\n", + "Don't forget to delete the online endpoint, else you will leave the billing meter running for the compute used by the endpoint." ] }, { From a098e5d159c204a26a0fd3281a905d6ad9e46483 Mon Sep 17 00:00:00 2001 From: Rupal Jain Date: Fri, 4 Aug 2023 19:10:35 +0530 Subject: [PATCH 4/5] updating score col names while fetching batch scoring output --- .../image-classification-batch-endpoint.ipynb | 4 ++-- .../image-instance-segmentation-batch-endpoint.ipynb | 4 ++-- .../image-object-detection-batch-endpoint.ipynb | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/sdk/python/foundation-models/system/inference/image-classification/image-classification-batch-endpoint.ipynb b/sdk/python/foundation-models/system/inference/image-classification/image-classification-batch-endpoint.ipynb index 024a8400464..192302aa8cd 100644 --- a/sdk/python/foundation-models/system/inference/image-classification/image-classification-batch-endpoint.ipynb +++ b/sdk/python/foundation-models/system/inference/image-classification/image-classification-batch-endpoint.ipynb @@ -422,7 +422,7 @@ "score_df = pd.read_csv(\n", " predictions_file,\n", " header=None,\n", - " names=[\"row_number_per_file\", \"preds\", \"labels\", \"file_name\"],\n", + " names=[\"row_number_per_file\", \"preds\", \"file_name\"],\n", ")\n", "score_df.head()" ] @@ -472,7 +472,7 @@ "score_df = pd.read_csv(\n", " predictions_file,\n", " header=None,\n", - " names=[\"row_number_per_file\", \"preds\", \"labels\", \"file_name\"],\n", + " names=[\"row_number_per_file\", \"preds\", \"file_name\"],\n", ")\n", "score_df.head()" ] diff --git a/sdk/python/foundation-models/system/inference/image-instance-segmentation/image-instance-segmentation-batch-endpoint.ipynb b/sdk/python/foundation-models/system/inference/image-instance-segmentation/image-instance-segmentation-batch-endpoint.ipynb index d43330faefd..f65d5690e55 100644 --- a/sdk/python/foundation-models/system/inference/image-instance-segmentation/image-instance-segmentation-batch-endpoint.ipynb +++ b/sdk/python/foundation-models/system/inference/image-instance-segmentation/image-instance-segmentation-batch-endpoint.ipynb @@ -405,7 +405,7 @@ "score_df = pd.read_csv(\n", " predictions_file,\n", " header=None,\n", - " names=[\"row_number_per_file\", \"preds\", \"labels\", \"file_name\"],\n", + " names=[\"row_number_per_file\", \"preds\", \"file_name\"],\n", ")\n", "score_df.head()" ] @@ -453,7 +453,7 @@ "score_df = pd.read_csv(\n", " predictions_file,\n", " header=None,\n", - " names=[\"row_number_per_file\", \"preds\", \"labels\", \"file_name\"],\n", + " names=[\"row_number_per_file\", \"preds\", \"file_name\"],\n", ")\n", "score_df.head()" ] diff --git a/sdk/python/foundation-models/system/inference/image-object-detection/image-object-detection-batch-endpoint.ipynb b/sdk/python/foundation-models/system/inference/image-object-detection/image-object-detection-batch-endpoint.ipynb index 86f03404f73..b997061c40d 100644 --- a/sdk/python/foundation-models/system/inference/image-object-detection/image-object-detection-batch-endpoint.ipynb +++ b/sdk/python/foundation-models/system/inference/image-object-detection/image-object-detection-batch-endpoint.ipynb @@ -405,7 +405,7 @@ "score_df = pd.read_csv(\n", " predictions_file,\n", " header=None,\n", - " names=[\"row_number_per_file\", \"preds\", \"labels\", \"file_name\"],\n", + " names=[\"row_number_per_file\", \"preds\", \"file_name\"],\n", ")\n", "score_df.head()" ] @@ -453,7 +453,7 @@ "score_df = pd.read_csv(\n", " predictions_file,\n", " header=None,\n", - " names=[\"row_number_per_file\", \"preds\", \"labels\", \"file_name\"],\n", + " names=[\"row_number_per_file\", \"preds\", \"file_name\"],\n", ")\n", "score_df.head()" ] From 47e9f03df76522337e607ebb8ee5e782c3e19a40 Mon Sep 17 00:00:00 2001 From: Rupal Jain Date: Fri, 4 Aug 2023 19:25:40 +0530 Subject: [PATCH 5/5] updating score col names while fetching batch scoring output for classification --- .../image-classification-batch-endpoint.ipynb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sdk/python/foundation-models/system/inference/image-classification/image-classification-batch-endpoint.ipynb b/sdk/python/foundation-models/system/inference/image-classification/image-classification-batch-endpoint.ipynb index 192302aa8cd..024a8400464 100644 --- a/sdk/python/foundation-models/system/inference/image-classification/image-classification-batch-endpoint.ipynb +++ b/sdk/python/foundation-models/system/inference/image-classification/image-classification-batch-endpoint.ipynb @@ -422,7 +422,7 @@ "score_df = pd.read_csv(\n", " predictions_file,\n", " header=None,\n", - " names=[\"row_number_per_file\", \"preds\", \"file_name\"],\n", + " names=[\"row_number_per_file\", \"preds\", \"labels\", \"file_name\"],\n", ")\n", "score_df.head()" ] @@ -472,7 +472,7 @@ "score_df = pd.read_csv(\n", " predictions_file,\n", " header=None,\n", - " names=[\"row_number_per_file\", \"preds\", \"file_name\"],\n", + " names=[\"row_number_per_file\", \"preds\", \"labels\", \"file_name\"],\n", ")\n", "score_df.head()" ]