diff --git a/docs/Explore Algorithms/AI Services/Advanced Usage - Async, Batching, and Multi-Key.ipynb b/docs/Explore Algorithms/AI Services/Advanced Usage - Async, Batching, and Multi-Key.ipynb index a49fc5841b..7b64006026 100644 --- a/docs/Explore Algorithms/AI Services/Advanced Usage - Async, Batching, and Multi-Key.ipynb +++ b/docs/Explore Algorithms/AI Services/Advanced Usage - Async, Batching, and Multi-Key.ipynb @@ -2,88 +2,99 @@ "cells": [ { "cell_type": "markdown", - "source": [ - "# Cognitive Services Advanced Guide: Asynchrony, Batching, Multi-Key" - ], "metadata": { "application/vnd.databricks.v1+cell": { - "showTitle": false, "cellMetadata": {}, - "nuid": "1a39046d-a692-44c3-b673-78dfc1f97e08", "inputWidgets": {}, + "nuid": "1a39046d-a692-44c3-b673-78dfc1f97e08", + "showTitle": false, "title": "" } - } + }, + "source": [ + "# Cognitive Services Advanced Guide: Asynchrony, Batching, Multi-Key" + ] }, { "cell_type": "markdown", - "source": [ - "## Step 1: Imports and Keys" - ], "metadata": { "application/vnd.databricks.v1+cell": { - "showTitle": false, "cellMetadata": {}, - "nuid": "0f8d5274-46d5-4604-be41-1f3f5d481d9a", "inputWidgets": {}, + "nuid": "0f8d5274-46d5-4604-be41-1f3f5d481d9a", + "showTitle": false, "title": "" } - } + }, + "source": [ + "## Step 1: Imports and Keys" + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "84829dd7-0e7d-4ee3-aa9e-c3aa6ef96c8d", + "showTitle": false, + "title": "" + } + }, + "outputs": [], "source": [ "from synapse.ml.core.platform import find_secret\n", "\n", "service_key = find_secret(\"cognitive-api-key\")\n", "service_loc = \"eastus\"" - ], + ] + }, + { + "cell_type": "markdown", "metadata": { "application/vnd.databricks.v1+cell": { - "showTitle": false, "cellMetadata": {}, - "nuid": "84829dd7-0e7d-4ee3-aa9e-c3aa6ef96c8d", "inputWidgets": {}, + "nuid": "8a49dfe2-f00d-4db5-95d5-f119fc09e2ee", + "showTitle": false, "title": "" } }, - "outputs": [], - "execution_count": 0 + "source": [ + "## Step 2: Basic Usage" + ] }, { "cell_type": "markdown", - "source": [ - "## Step 2: Basic Usage" - ], "metadata": { "application/vnd.databricks.v1+cell": { - "showTitle": false, "cellMetadata": {}, - "nuid": "8a49dfe2-f00d-4db5-95d5-f119fc09e2ee", "inputWidgets": {}, + "nuid": "93d1a1d0-96b5-48a2-9248-0d9facdae679", + "showTitle": false, "title": "" } - } - }, - { - "cell_type": "markdown", + }, "source": [ "Image 1 | Image 2 | Image 3 \n", ":-------------------------:|:-------------------------:|:----------------------:|\n", "! | | " - ], + ] + }, + { + "cell_type": "code", + "execution_count": null, "metadata": { "application/vnd.databricks.v1+cell": { - "showTitle": false, "cellMetadata": {}, - "nuid": "93d1a1d0-96b5-48a2-9248-0d9facdae679", "inputWidgets": {}, + "nuid": "9e1933f3-06b3-4dfd-a6a2-30d33d7da845", + "showTitle": false, "title": "" } - } - }, - { - "cell_type": "code", + }, + "outputs": [], "source": [ "from synapse.ml.cognitive.vision import AnalyzeImage\n", "\n", @@ -108,85 +119,83 @@ ")\n", "\n", "image_results = analyzer.transform(image_df).cache()" - ], - "metadata": { - "application/vnd.databricks.v1+cell": { - "showTitle": false, - "cellMetadata": {}, - "nuid": "9e1933f3-06b3-4dfd-a6a2-30d33d7da845", - "inputWidgets": {}, - "title": "" - } - }, - "outputs": [], - "execution_count": 0 + ] }, { "cell_type": "markdown", - "source": [ - "#### First we'll look at the full response objects:" - ], "metadata": { "application/vnd.databricks.v1+cell": { - "showTitle": false, "cellMetadata": {}, - "nuid": "8f759bfa-4b88-4659-a535-d768ddee9e4f", "inputWidgets": {}, + "nuid": "8f759bfa-4b88-4659-a535-d768ddee9e4f", + "showTitle": false, "title": "" } - } + }, + "source": [ + "#### First we'll look at the full response objects:" + ] }, { "cell_type": "code", - "source": [ - "display(image_results)" - ], + "execution_count": null, "metadata": { "application/vnd.databricks.v1+cell": { - "showTitle": false, "cellMetadata": {}, - "nuid": "9fae2ca5-f16a-460b-94ec-e433f24f7fb4", "inputWidgets": {}, + "nuid": "9fae2ca5-f16a-460b-94ec-e433f24f7fb4", + "showTitle": false, "title": "" } }, "outputs": [], - "execution_count": 0 + "source": [ + "display(image_results)" + ] }, { "cell_type": "markdown", - "source": [ - "#### We can select out just what we need:" - ], "metadata": { "application/vnd.databricks.v1+cell": { - "showTitle": false, "cellMetadata": {}, - "nuid": "7b08b439-a505-4de3-a71e-63af30453163", "inputWidgets": {}, + "nuid": "7b08b439-a505-4de3-a71e-63af30453163", + "showTitle": false, "title": "" } - } + }, + "source": [ + "#### We can select out just what we need:" + ] }, { "cell_type": "code", - "source": [ - "display(image_results.select(\"analysis_results.description.captions.text\"))" - ], + "execution_count": null, "metadata": { "application/vnd.databricks.v1+cell": { - "showTitle": false, "cellMetadata": {}, - "nuid": "88e738a6-f1bf-4077-8436-984aac858b1b", "inputWidgets": {}, + "nuid": "88e738a6-f1bf-4077-8436-984aac858b1b", + "showTitle": false, "title": "" } }, "outputs": [], - "execution_count": 0 + "source": [ + "display(image_results.select(\"analysis_results.description.captions.text\"))" + ] }, { "cell_type": "markdown", + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "c6c2fd12-8c26-4f96-b0a5-0c55c04c182a", + "showTitle": false, + "title": "" + } + }, "source": [ "#### What's going on under the hood\n", "\n", @@ -194,99 +203,101 @@ "\n", "When we call the cognitive service transformer, we start cognitive service clients on each of your spark workers.\n", "These clients send requests to the cloud, and turn the JSON responses into Spark Struct Types so that you can access any field that the service returns." - ], + ] + }, + { + "cell_type": "markdown", "metadata": { "application/vnd.databricks.v1+cell": { - "showTitle": false, "cellMetadata": {}, - "nuid": "c6c2fd12-8c26-4f96-b0a5-0c55c04c182a", "inputWidgets": {}, + "nuid": "31618622-57db-4973-8ab8-1bab6d7efd2e", + "showTitle": false, "title": "" } - } + }, + "source": [ + "## Step 3: Asynchronous Usage" + ] }, { "cell_type": "markdown", - "source": [ - "## Step 3: Asynchronous Usage" - ], "metadata": { "application/vnd.databricks.v1+cell": { - "showTitle": false, "cellMetadata": {}, - "nuid": "31618622-57db-4973-8ab8-1bab6d7efd2e", "inputWidgets": {}, + "nuid": "8e7e5ace-71c2-4170-8b5d-350297b907db", + "showTitle": false, "title": "" } - } - }, - { - "cell_type": "markdown", + }, "source": [ "\n", "\n", "Apache Spark ordinarily parallelizes a computation to all of it's worker threads. When working with services however this parallelism doesent fully maximize throughput because workers sit idle as requests are processed on the server. The `concurrency` parameter makes sure that each worker can stay busy as they wait for requests to complete." - ], - "metadata": { - "application/vnd.databricks.v1+cell": { - "showTitle": false, - "cellMetadata": {}, - "nuid": "8e7e5ace-71c2-4170-8b5d-350297b907db", - "inputWidgets": {}, - "title": "" - } - } + ] }, { "cell_type": "code", - "source": [ - "display(analyzer.setConcurrency(3).transform(image_df))" - ], + "execution_count": null, "metadata": { "application/vnd.databricks.v1+cell": { - "showTitle": false, "cellMetadata": {}, - "nuid": "f874a63e-f22e-4c6f-9d54-83f93d140721", "inputWidgets": {}, + "nuid": "f874a63e-f22e-4c6f-9d54-83f93d140721", + "showTitle": false, "title": "" } }, "outputs": [], - "execution_count": 0 + "source": [ + "display(analyzer.setConcurrency(3).transform(image_df))" + ] }, { "cell_type": "markdown", - "source": [ - "#### Faster without extra hardware:\n", - "" - ], "metadata": { "application/vnd.databricks.v1+cell": { - "showTitle": false, "cellMetadata": {}, - "nuid": "f82c9d17-77db-44fa-8d1c-b0b7905c0e31", "inputWidgets": {}, + "nuid": "f82c9d17-77db-44fa-8d1c-b0b7905c0e31", + "showTitle": false, "title": "" } - } + }, + "source": [ + "#### Faster without extra hardware:\n", + "" + ] }, { "cell_type": "markdown", - "source": [ - "## Step 4: Batching" - ], "metadata": { "application/vnd.databricks.v1+cell": { - "showTitle": false, "cellMetadata": {}, - "nuid": "d54b3f5e-8d44-486f-97a3-0b8528934e73", "inputWidgets": {}, + "nuid": "d54b3f5e-8d44-486f-97a3-0b8528934e73", + "showTitle": false, "title": "" } - } + }, + "source": [ + "## Step 4: Batching" + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "c3092f7b-105b-4171-9649-f04b189d76a0", + "showTitle": false, + "title": "" + } + }, + "outputs": [], "source": [ "from synapse.ml.cognitive.text import TextSentiment\n", "\n", @@ -312,36 +323,36 @@ "\n", "# Show the results of your text query\n", "display(sentiment.transform(text_df).select(\"text\", \"sentiment.document.sentiment\"))" - ], + ] + }, + { + "cell_type": "markdown", "metadata": { "application/vnd.databricks.v1+cell": { - "showTitle": false, "cellMetadata": {}, - "nuid": "c3092f7b-105b-4171-9649-f04b189d76a0", "inputWidgets": {}, + "nuid": "ee4a9f18-d845-4059-9edd-9bd625a75a1a", + "showTitle": false, "title": "" } }, - "outputs": [], - "execution_count": 0 - }, - { - "cell_type": "markdown", "source": [ "## Step 5: Multi-Key" - ], + ] + }, + { + "cell_type": "code", + "execution_count": null, "metadata": { "application/vnd.databricks.v1+cell": { - "showTitle": false, "cellMetadata": {}, - "nuid": "ee4a9f18-d845-4059-9edd-9bd625a75a1a", "inputWidgets": {}, + "nuid": "a6f89d8b-7cd1-42be-8310-62989c80deb2", + "showTitle": false, "title": "" } - } - }, - { - "cell_type": "code", + }, + "outputs": [], "source": [ "from synapse.ml.cognitive.text import TextSentiment\n", "from pyspark.sql.functions import udf\n", @@ -359,71 +370,60 @@ "image_df2 = image_df.withColumn(\"key\", random_key())\n", "\n", "results = analyzer.setSubscriptionKeyCol(\"key\").transform(image_df2)" - ], - "metadata": { - "application/vnd.databricks.v1+cell": { - "showTitle": false, - "cellMetadata": {}, - "nuid": "a6f89d8b-7cd1-42be-8310-62989c80deb2", - "inputWidgets": {}, - "title": "" - } - }, - "outputs": [], - "execution_count": 0 + ] }, { "cell_type": "code", - "source": [ - "display(results.select(\"key\", \"analysis_results.description.captions.text\"))" - ], + "execution_count": null, "metadata": { "application/vnd.databricks.v1+cell": { - "showTitle": false, "cellMetadata": {}, - "nuid": "c2f0ff6f-688e-4ca0-88eb-9eb8bda66786", "inputWidgets": {}, + "nuid": "c2f0ff6f-688e-4ca0-88eb-9eb8bda66786", + "showTitle": false, "title": "" } }, "outputs": [], - "execution_count": 0 + "source": [ + "display(results.select(\"key\", \"analysis_results.description.captions.text\"))" + ] }, { "cell_type": "markdown", - "source": [ - "## Learn More\n", - "- [Explore other cogntive services](https://microsoft.github.io/SynapseML/docs/Explore%20Algorithms/CognitiveServices%20-%20Overview/)\n", - "- [Read our paper \"Large-Scale Intelligent Microservices\"](https://arxiv.org/abs/2009.08044)" - ], "metadata": { "application/vnd.databricks.v1+cell": { - "showTitle": false, "cellMetadata": {}, - "nuid": "1ed7401d-28f7-4133-93e3-08e145772502", "inputWidgets": {}, + "nuid": "1ed7401d-28f7-4133-93e3-08e145772502", + "showTitle": false, "title": "" } - } + }, + "source": [ + "## Learn More\n", + "- [Explore other cogntive services](./Overview)\n", + "- [Read our paper \"Large-Scale Intelligent Microservices\"](https://arxiv.org/abs/2009.08044)" + ] } ], "metadata": { "application/vnd.databricks.v1+notebook": { - "notebookName": "CognitiveServices - Advanced Usage: Async, Batching, and Multi-Key", "dashboards": [], + "language": "python", "notebookMetadata": { "pythonIndentUnit": 2 }, - "language": "python", - "widgets": {}, - "notebookOrigID": 3743502060540796 + "notebookName": "CognitiveServices - Advanced Usage: Async, Batching, and Multi-Key", + "notebookOrigID": 3743502060540796, + "widgets": {} }, "kernelspec": { - "name": "python3", + "display_name": "Python 3 (ipykernel)", "language": "python", - "display_name": "Python 3 (ipykernel)" + "name": "python3" } }, "nbformat": 4, "nbformat_minor": 0 -} \ No newline at end of file +}