From e2205b1f7d6d63bb27474c0f3381dbf6349bb512 Mon Sep 17 00:00:00 2001
From: JessicaXYWang <108437381+JessicaXYWang@users.noreply.github.com>
Date: Mon, 13 May 2024 09:38:27 -0700
Subject: [PATCH 1/7] update resource location
---
.../form/FormOntologyLearnerSuite.scala | 6 +-
.../Quickstart - Create Audiobooks.ipynb | 220 +++++++++---------
...kstart - Understand and Search Forms.ipynb | 4 +-
3 files changed, 115 insertions(+), 115 deletions(-)
diff --git a/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/services/form/FormOntologyLearnerSuite.scala b/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/services/form/FormOntologyLearnerSuite.scala
index 472c6d8191..95ea0f03d1 100644
--- a/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/services/form/FormOntologyLearnerSuite.scala
+++ b/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/services/form/FormOntologyLearnerSuite.scala
@@ -26,9 +26,9 @@ class FormOntologyLearnerSuite extends EstimatorFuzzing[FormOntologyLearner] wit
.setOutputCol("unified_ontology")
lazy val urlDF: DataFrame = Seq(
- "https://mmlsparkdemo.blob.core.windows.net/ignite2021/forms/2017/Invoice115991.pdf",
- "https://mmlsparkdemo.blob.core.windows.net/ignite2021/forms/2018/Invoice119554.pdf",
- "https://mmlsparkdemo.blob.core.windows.net/ignite2021/forms/2009/Invoice12241.pdf"
+ "https://mmlspark.blob.core.windows.net/publicwasb/form_test/Invoice115991.pdf",
+ "https://mmlspark.blob.core.windows.net/publicwasb/form_test/Invoice119554.pdf",
+ "https://mmlspark.blob.core.windows.netpublicwasb/form_test/Invoice12241.pdf"
).toDF("url")
lazy val tableUrlDF: DataFrame = Seq(
diff --git a/docs/Explore Algorithms/AI Services/Quickstart - Create Audiobooks.ipynb b/docs/Explore Algorithms/AI Services/Quickstart - Create Audiobooks.ipynb
index c7c2543e00..b0739c2cae 100644
--- a/docs/Explore Algorithms/AI Services/Quickstart - Create Audiobooks.ipynb
+++ b/docs/Explore Algorithms/AI Services/Quickstart - Create Audiobooks.ipynb
@@ -2,36 +2,47 @@
"cells": [
{
"cell_type": "markdown",
- "source": [
- "# Create audiobooks using neural Text to speech"
- ],
"metadata": {
"application/vnd.databricks.v1+cell": {
- "showTitle": false,
"cellMetadata": {},
- "nuid": "389e4a78-19aa-4c3f-9b7a-92e81f088168",
"inputWidgets": {},
+ "nuid": "389e4a78-19aa-4c3f-9b7a-92e81f088168",
+ "showTitle": false,
"title": ""
}
- }
+ },
+ "source": [
+ "# Create audiobooks using neural Text to speech"
+ ]
},
{
"cell_type": "markdown",
- "source": [
- "## Step 1: Load libraries and add service information"
- ],
"metadata": {
"application/vnd.databricks.v1+cell": {
- "showTitle": false,
"cellMetadata": {},
- "nuid": "f320d6af-b255-4cb5-b60b-da840760713e",
"inputWidgets": {},
+ "nuid": "f320d6af-b255-4cb5-b60b-da840760713e",
+ "showTitle": false,
"title": ""
}
- }
+ },
+ "source": [
+ "## Step 1: Load libraries and add service information"
+ ]
},
{
"cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "application/vnd.databricks.v1+cell": {
+ "cellMetadata": {},
+ "inputWidgets": {},
+ "nuid": "ab422610-0438-4ca4-bd16-b45e90125294",
+ "showTitle": false,
+ "title": ""
+ }
+ },
+ "outputs": [],
"source": [
"from synapse.ml.core.platform import *\n",
"\n",
@@ -49,54 +60,54 @@
" secret_name=\"madtest-storage-key\", keyvault=\"mmlspark-build-keys\"\n",
")\n",
"storage_account = \"anomalydetectiontest\""
- ],
- "outputs": [],
- "execution_count": null,
+ ]
+ },
+ {
+ "cell_type": "markdown",
"metadata": {
"application/vnd.databricks.v1+cell": {
- "showTitle": false,
"cellMetadata": {},
- "nuid": "ab422610-0438-4ca4-bd16-b45e90125294",
"inputWidgets": {},
+ "nuid": "10c83d0e-998f-4d72-a351-4ffab15f662c",
+ "showTitle": false,
"title": ""
}
- }
- },
- {
- "cell_type": "markdown",
+ },
"source": [
"## Step 2: Attach the storage account to hold the audio files"
- ],
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
"metadata": {
"application/vnd.databricks.v1+cell": {
- "showTitle": false,
"cellMetadata": {},
- "nuid": "10c83d0e-998f-4d72-a351-4ffab15f662c",
"inputWidgets": {},
+ "nuid": "55b83038-e907-4101-a914-0a32825a9d03",
+ "showTitle": false,
"title": ""
}
- }
- },
- {
- "cell_type": "code",
+ },
+ "outputs": [],
"source": [
"spark_key_setting = f\"fs.azure.account.key.{storage_account}.blob.core.windows.net\"\n",
"spark.sparkContext._jsc.hadoopConfiguration().set(spark_key_setting, storage_key)"
- ],
- "outputs": [],
+ ]
+ },
+ {
+ "cell_type": "code",
"execution_count": null,
"metadata": {
"application/vnd.databricks.v1+cell": {
- "showTitle": false,
"cellMetadata": {},
- "nuid": "55b83038-e907-4101-a914-0a32825a9d03",
"inputWidgets": {},
+ "nuid": "625c7b1d-4034-4df2-b919-3775ac9c271c",
+ "showTitle": false,
"title": ""
}
- }
- },
- {
- "cell_type": "code",
+ },
+ "outputs": [],
"source": [
"import os\n",
"from os.path import exists, join\n",
@@ -115,36 +126,37 @@
" mount_point=f\"/mnt/{storage_container}\",\n",
" extra_configs={spark_key_setting: storage_key},\n",
" )"
- ],
- "outputs": [],
- "execution_count": null,
+ ]
+ },
+ {
+ "cell_type": "markdown",
"metadata": {
"application/vnd.databricks.v1+cell": {
- "showTitle": false,
"cellMetadata": {},
- "nuid": "625c7b1d-4034-4df2-b919-3775ac9c271c",
"inputWidgets": {},
+ "nuid": "381c3af7-e0e8-4a29-ae88-467e86a0e717",
+ "showTitle": false,
"title": ""
}
- }
- },
- {
- "cell_type": "markdown",
+ },
"source": [
"## Step 3: Read in text data"
- ],
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
"metadata": {
"application/vnd.databricks.v1+cell": {
- "showTitle": false,
"cellMetadata": {},
- "nuid": "381c3af7-e0e8-4a29-ae88-467e86a0e717",
"inputWidgets": {},
+ "nuid": "56c8ebab-567f-4c1d-a2ea-1aeb5aefcf1e",
+ "showTitle": false,
"title": ""
- }
- }
- },
- {
- "cell_type": "code",
+ },
+ "collapsed": false
+ },
+ "outputs": [],
"source": [
"from pyspark.sql.functions import udf\n",
"\n",
@@ -163,41 +175,41 @@
")\n",
"\n",
"display(df)"
- ],
- "outputs": [],
- "execution_count": null,
+ ]
+ },
+ {
+ "cell_type": "markdown",
"metadata": {
"application/vnd.databricks.v1+cell": {
- "showTitle": false,
"cellMetadata": {},
- "nuid": "56c8ebab-567f-4c1d-a2ea-1aeb5aefcf1e",
"inputWidgets": {},
+ "nuid": "9fcb4305-a6d4-4f48-ac6f-cf4f863c7f5f",
+ "showTitle": false,
"title": ""
- },
- "collapsed": false
- }
- },
- {
- "cell_type": "markdown",
+ }
+ },
"source": [
"## Step 4: Synthesize audio from text\n",
"\n",
"
\n",
- "
\n",
+ "
\n",
"
"
- ],
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
"metadata": {
"application/vnd.databricks.v1+cell": {
- "showTitle": false,
"cellMetadata": {},
- "nuid": "9fcb4305-a6d4-4f48-ac6f-cf4f863c7f5f",
"inputWidgets": {},
+ "nuid": "2730c8cd-616a-4258-909d-912ea66d6446",
+ "showTitle": false,
"title": ""
- }
- }
- },
- {
- "cell_type": "code",
+ },
+ "collapsed": false
+ },
+ "outputs": [],
"source": [
"from synapse.ml.services.speech import TextToSpeech\n",
"\n",
@@ -213,37 +225,36 @@
"\n",
"audio = tts.transform(df).cache()\n",
"display(audio)"
- ],
- "outputs": [],
- "execution_count": null,
+ ]
+ },
+ {
+ "cell_type": "markdown",
"metadata": {
"application/vnd.databricks.v1+cell": {
- "showTitle": false,
"cellMetadata": {},
- "nuid": "2730c8cd-616a-4258-909d-912ea66d6446",
"inputWidgets": {},
+ "nuid": "157a368a-d80b-4bf8-a5cb-c1f266be2f00",
+ "showTitle": false,
"title": ""
- },
- "collapsed": false
- }
- },
- {
- "cell_type": "markdown",
+ }
+ },
"source": [
"## Step 5: Listen to an audio file"
- ],
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
"metadata": {
"application/vnd.databricks.v1+cell": {
- "showTitle": false,
"cellMetadata": {},
- "nuid": "157a368a-d80b-4bf8-a5cb-c1f266be2f00",
"inputWidgets": {},
+ "nuid": "7a0ad60f-5511-42ba-9882-e93f474f85e9",
+ "showTitle": false,
"title": ""
}
- }
- },
- {
- "cell_type": "code",
+ },
+ "outputs": [],
"source": [
"from IPython.display import Audio\n",
"\n",
@@ -256,36 +267,25 @@
"\n",
"\n",
"Audio(filename=get_audio_file(1))"
- ],
- "outputs": [],
- "execution_count": null,
- "metadata": {
- "application/vnd.databricks.v1+cell": {
- "showTitle": false,
- "cellMetadata": {},
- "nuid": "7a0ad60f-5511-42ba-9882-e93f474f85e9",
- "inputWidgets": {},
- "title": ""
- }
- }
+ ]
}
],
"metadata": {
+ "kernel_info": {
+ "name": "synapse_pyspark"
+ },
"kernelspec": {
- "name": "synapse_pyspark",
+ "display_name": "Synapse PySpark",
"language": "Python",
- "display_name": "Synapse PySpark"
+ "name": "synapse_pyspark"
},
"language_info": {
"name": "python"
},
- "kernel_info": {
- "name": "synapse_pyspark"
- },
"save_output": true,
"synapse_widget": {
- "version": "0.1",
- "state": {}
+ "state": {},
+ "version": "0.1"
}
},
"nbformat": 4,
diff --git a/docs/Explore Algorithms/OpenAI/Quickstart - Understand and Search Forms.ipynb b/docs/Explore Algorithms/OpenAI/Quickstart - Understand and Search Forms.ipynb
index d2b58e36e2..f424554e47 100644
--- a/docs/Explore Algorithms/OpenAI/Quickstart - Understand and Search Forms.ipynb
+++ b/docs/Explore Algorithms/OpenAI/Quickstart - Understand and Search Forms.ipynb
@@ -157,7 +157,7 @@
"\n",
"df2 = (\n",
" spark.read.format(\"binaryFile\")\n",
- " .load(\"wasbs://ignite2021@mmlsparkdemo.blob.core.windows.net/form_subset/*\")\n",
+ " .load(\"wasbs://publicwasb@mmlspark.blob.core.windows.net/form_subset/*\")\n",
" .select(\"path\")\n",
" .limit(10)\n",
" .select(udf(blob_to_url, StringType())(\"path\").alias(\"url\"))\n",
@@ -189,7 +189,7 @@
]
},
"source": [
- ""
+ ""
]
},
{
From bda0e00047a1771df0005f26b1871d0e1c75dc3d Mon Sep 17 00:00:00 2001
From: JessicaXYWang <108437381+JessicaXYWang@users.noreply.github.com>
Date: Mon, 13 May 2024 09:41:25 -0700
Subject: [PATCH 2/7] fix typo
---
.../synapse/ml/services/form/FormOntologyLearnerSuite.scala | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/services/form/FormOntologyLearnerSuite.scala b/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/services/form/FormOntologyLearnerSuite.scala
index 95ea0f03d1..af55615ec4 100644
--- a/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/services/form/FormOntologyLearnerSuite.scala
+++ b/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/services/form/FormOntologyLearnerSuite.scala
@@ -28,7 +28,7 @@ class FormOntologyLearnerSuite extends EstimatorFuzzing[FormOntologyLearner] wit
lazy val urlDF: DataFrame = Seq(
"https://mmlspark.blob.core.windows.net/publicwasb/form_test/Invoice115991.pdf",
"https://mmlspark.blob.core.windows.net/publicwasb/form_test/Invoice119554.pdf",
- "https://mmlspark.blob.core.windows.netpublicwasb/form_test/Invoice12241.pdf"
+ "https://mmlspark.blob.core.windows.net/publicwasb/form_test/Invoice12241.pdf"
).toDF("url")
lazy val tableUrlDF: DataFrame = Seq(
From 5fa190f111bf082b4f3dc8c645373f5db97809bb Mon Sep 17 00:00:00 2001
From: JessicaXYWang <108437381+JessicaXYWang@users.noreply.github.com>
Date: Mon, 13 May 2024 09:59:24 -0700
Subject: [PATCH 3/7] update openai resources
---
.../ml/services/openai/OpenAIChatCompletionSuite.scala | 4 ++--
.../ml/services/openai/OpenAICompletionSuite.scala | 8 ++++----
.../ml/services/openai/OpenAIEmbeddingsSuite.scala | 4 ++--
.../scala/com/microsoft/azure/synapse/ml/Secrets.scala | 2 +-
4 files changed, 9 insertions(+), 9 deletions(-)
diff --git a/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/services/openai/OpenAIChatCompletionSuite.scala b/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/services/openai/OpenAIChatCompletionSuite.scala
index 4516ebcbea..079106493c 100644
--- a/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/services/openai/OpenAIChatCompletionSuite.scala
+++ b/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/services/openai/OpenAIChatCompletionSuite.scala
@@ -15,13 +15,13 @@ class OpenAIChatCompletionSuite extends TransformerFuzzing[OpenAIChatCompletion]
lazy val completion: OpenAIChatCompletion = new OpenAIChatCompletion()
.setDeploymentName(deploymentNameGpt4)
- .setCustomServiceName(openAIServiceNameGpt4)
+ .setCustomServiceName(openAIServiceName)
.setApiVersion("2023-05-15")
.setMaxTokens(5000)
.setOutputCol("out")
.setMessagesCol("messages")
.setTemperature(0)
- .setSubscriptionKey(openAIAPIKeyGpt4)
+ .setSubscriptionKey(openAIAPIKey)
lazy val goodDf: DataFrame = Seq(
diff --git a/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/services/openai/OpenAICompletionSuite.scala b/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/services/openai/OpenAICompletionSuite.scala
index cd14a58498..062144ce12 100644
--- a/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/services/openai/OpenAICompletionSuite.scala
+++ b/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/services/openai/OpenAICompletionSuite.scala
@@ -12,12 +12,12 @@ import org.apache.spark.sql.{DataFrame, Row}
import org.scalactic.Equality
trait OpenAIAPIKey {
- lazy val openAIAPIKey: String = sys.env.getOrElse("OPENAI_API_KEY", Secrets.OpenAIApiKey)
- lazy val openAIServiceName: String = sys.env.getOrElse("OPENAI_SERVICE_NAME", "synapseml-openai")
+ lazy val openAIAPIKey: String = sys.env.getOrElse("OPENAI_API_KEY_2", Secrets.OpenAIApiKey)
+ lazy val openAIServiceName: String = sys.env.getOrElse("OPENAI_SERVICE_NAME", "synapseml-openai-2")
lazy val deploymentName: String = "gpt-35-turbo"
lazy val modelName: String = "gpt-35-turbo"
- lazy val openAIAPIKeyGpt4: String = sys.env.getOrElse("OPENAI_API_KEY_2", Secrets.OpenAIApiKeyGpt4)
- lazy val openAIServiceNameGpt4: String = sys.env.getOrElse("OPENAI_SERVICE_NAME_2", "synapseml-openai-2")
+ //lazy val openAIAPIKeyGpt4: String = sys.env.getOrElse("OPENAI_API_KEY_2", Secrets.OpenAIApiKeyGpt4)
+ //lazy val openAIServiceNameGpt4: String = sys.env.getOrElse("OPENAI_SERVICE_NAME_2", "synapseml-openai-2")
lazy val deploymentNameGpt4: String = "gpt-4"
lazy val modelNameGpt4: String = "gpt-4"
}
diff --git a/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/services/openai/OpenAIEmbeddingsSuite.scala b/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/services/openai/OpenAIEmbeddingsSuite.scala
index 9d4f71910b..53990ec04e 100644
--- a/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/services/openai/OpenAIEmbeddingsSuite.scala
+++ b/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/services/openai/OpenAIEmbeddingsSuite.scala
@@ -34,12 +34,12 @@ class OpenAIEmbeddingsSuite extends TransformerFuzzing[OpenAIEmbedding] with Ope
}
lazy val embeddingExtra: OpenAIEmbedding = new OpenAIEmbedding()
- .setSubscriptionKey(openAIAPIKeyGpt4)
+ .setSubscriptionKey(openAIAPIKey)
.setDeploymentName("text-embedding-3-small")
.setApiVersion("2024-03-01-preview")
.setDimensions(100)
.setUser("testUser")
- .setCustomServiceName(openAIServiceNameGpt4)
+ .setCustomServiceName(openAIServiceName)
.setTextCol("text")
.setOutputCol("out")
diff --git a/core/src/test/scala/com/microsoft/azure/synapse/ml/Secrets.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/Secrets.scala
index d36b0731e2..c78f10f7d2 100644
--- a/core/src/test/scala/com/microsoft/azure/synapse/ml/Secrets.scala
+++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/Secrets.scala
@@ -55,7 +55,7 @@ object Secrets {
lazy val CognitiveApiKey: String = getSecret("cognitive-api-key")
lazy val OpenAIApiKey: String = getSecret("openai-api-key")
- lazy val OpenAIApiKeyGpt4: String = getSecret("openai-api-key-2")
+ //lazy val OpenAIApiKeyGpt4: String = getSecret("openai-api-key-2")
lazy val CustomSpeechApiKey: String = getSecret("custom-speech-api-key")
lazy val ConversationTranscriptionUrl: String = getSecret("conversation-transcription-url")
From 96d9b2f7a97afacf006ad95bfe8fd7e547340f1a Mon Sep 17 00:00:00 2001
From: JessicaXYWang <108437381+JessicaXYWang@users.noreply.github.com>
Date: Mon, 13 May 2024 10:55:23 -0700
Subject: [PATCH 4/7] fix bug
---
.../synapse/ml/services/openai/OpenAICompletionSuite.scala | 2 +-
.../src/test/scala/com/microsoft/azure/synapse/ml/Secrets.scala | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/services/openai/OpenAICompletionSuite.scala b/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/services/openai/OpenAICompletionSuite.scala
index 062144ce12..8065ef23c0 100644
--- a/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/services/openai/OpenAICompletionSuite.scala
+++ b/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/services/openai/OpenAICompletionSuite.scala
@@ -13,7 +13,7 @@ import org.scalactic.Equality
trait OpenAIAPIKey {
lazy val openAIAPIKey: String = sys.env.getOrElse("OPENAI_API_KEY_2", Secrets.OpenAIApiKey)
- lazy val openAIServiceName: String = sys.env.getOrElse("OPENAI_SERVICE_NAME", "synapseml-openai-2")
+ lazy val openAIServiceName: String = sys.env.getOrElse("OPENAI_SERVICE_NAME_2", "synapseml-openai-2")
lazy val deploymentName: String = "gpt-35-turbo"
lazy val modelName: String = "gpt-35-turbo"
//lazy val openAIAPIKeyGpt4: String = sys.env.getOrElse("OPENAI_API_KEY_2", Secrets.OpenAIApiKeyGpt4)
diff --git a/core/src/test/scala/com/microsoft/azure/synapse/ml/Secrets.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/Secrets.scala
index c78f10f7d2..36123ccc56 100644
--- a/core/src/test/scala/com/microsoft/azure/synapse/ml/Secrets.scala
+++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/Secrets.scala
@@ -54,7 +54,7 @@ object Secrets {
}
lazy val CognitiveApiKey: String = getSecret("cognitive-api-key")
- lazy val OpenAIApiKey: String = getSecret("openai-api-key")
+ lazy val OpenAIApiKey: String = getSecret("openai-api-key-2")
//lazy val OpenAIApiKeyGpt4: String = getSecret("openai-api-key-2")
lazy val CustomSpeechApiKey: String = getSecret("custom-speech-api-key")
From 88c73b09a67f140aafe96eeaa12b1906d0c240f9 Mon Sep 17 00:00:00 2001
From: JessicaXYWang <108437381+JessicaXYWang@users.noreply.github.com>
Date: Mon, 13 May 2024 23:31:59 -0700
Subject: [PATCH 5/7] update nasaearth data
---
...ent Question and Answering with PDFs.ipynb | 162 +++++++++---------
1 file changed, 81 insertions(+), 81 deletions(-)
diff --git a/docs/Explore Algorithms/AI Services/Quickstart - Document Question and Answering with PDFs.ipynb b/docs/Explore Algorithms/AI Services/Quickstart - Document Question and Answering with PDFs.ipynb
index 27211d1c08..0579bcc7e9 100644
--- a/docs/Explore Algorithms/AI Services/Quickstart - Document Question and Answering with PDFs.ipynb
+++ b/docs/Explore Algorithms/AI Services/Quickstart - Document Question and Answering with PDFs.ipynb
@@ -2,6 +2,7 @@
"cells": [
{
"cell_type": "markdown",
+ "id": "a8a1541c29383520",
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {
@@ -21,11 +22,11 @@
},
"source": [
"# A Guide to Q&A on PDF Documents"
- ],
- "id": "a8a1541c29383520"
+ ]
},
{
"cell_type": "markdown",
+ "id": "802e72cb91971292",
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {
@@ -52,11 +53,11 @@
"1. Preprocessing PDF Documents: Learn how to load the PDF documents into a Spark DataFrame, read the documents using the [Azure AI Document Intelligence](https://azure.microsoft.com/products/ai-services/ai-document-intelligence) in Azure AI Services, and use SynapseML to split the documents into chunks.\n",
"2. Embedding Generation and Storage: Learn how to generate embeddings for the chunks using SynapseML and [Azure OpenAI Services](https://azure.microsoft.com/products/ai-services/openai-service), store the embeddings in a vector store using [Azure Cognitive Search](https://azure.microsoft.com/products/search), and search the vector store to answer the user’s question.\n",
"3. Question Answering Pipeline: Learn how to retrieve relevant document based on the user’s question and provide the answer using [Langchain](https://python.langchain.com/en/latest/index.html#)."
- ],
- "id": "802e72cb91971292"
+ ]
},
{
"cell_type": "markdown",
+ "id": "2bc3e2b42bff041c",
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {
@@ -71,21 +72,21 @@
},
"source": [
"We start by installing the necessary python libraries."
- ],
- "id": "2bc3e2b42bff041c"
+ ]
},
{
"cell_type": "code",
"execution_count": null,
+ "id": "9e3b7e183bba5bfc",
"metadata": {},
"outputs": [],
"source": [
"%pip install openai==0.28.1 langchain==0.0.331"
- ],
- "id": "9e3b7e183bba5bfc"
+ ]
},
{
"cell_type": "markdown",
+ "id": "fb8f796d1fd622e6",
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {
@@ -105,11 +106,11 @@
},
"source": [
"### Step 1: Provide the keys for Azure AI Services and Azure OpenAI to authenticate the applications."
- ],
- "id": "fb8f796d1fd622e6"
+ ]
},
{
"cell_type": "markdown",
+ "id": "609142905ffbb2d7",
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {
@@ -129,12 +130,12 @@
},
"source": [
"To authenticate Azure AI Services and Azure OpenAI applications, you need to provide the respective API keys. Here is an example of how you can provide the keys in Python code. `find_secret()` function uses Azure Keyvault to get the API keys, however you can directly paste your own keys there."
- ],
- "id": "609142905ffbb2d7"
+ ]
},
{
"cell_type": "code",
"execution_count": null,
+ "id": "8fbc0743f3a0f6ab",
"metadata": {},
"outputs": [],
"source": [
@@ -160,11 +161,11 @@
"cogsearch_api_key = find_secret(\n",
" secret_name=\"azure-search-key\", keyvault=\"mmlspark-build-keys\"\n",
")"
- ],
- "id": "8fbc0743f3a0f6ab"
+ ]
},
{
"cell_type": "markdown",
+ "id": "906c53ccba03db4d",
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {
@@ -184,11 +185,11 @@
},
"source": [
"### Step 2: Load the PDF documents into a Spark DataFrame."
- ],
- "id": "906c53ccba03db4d"
+ ]
},
{
"cell_type": "markdown",
+ "id": "7b64938eebf6a881",
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {
@@ -203,25 +204,25 @@
},
"source": [
"For this tutorial, we will be using NASA's [Earth](https://www.nasa.gov/sites/default/files/atoms/files/earth_book_2019_tagged.pdf) and [Earth at Night](https://www.nasa.gov/sites/default/files/atoms/files/earth_at_night_508.pdf) e-books. To load PDF documents into a Spark DataFrame, you can use the ```spark.read.format(\"binaryFile\")``` method provided by Apache Spark."
- ],
- "id": "7b64938eebf6a881"
+ ]
},
{
"cell_type": "code",
"execution_count": null,
+ "id": "4959c5737781149a",
"metadata": {},
"outputs": [],
"source": [
"from pyspark.sql.functions import udf\n",
"from pyspark.sql.types import StringType\n",
"\n",
- "document_path = \"wasbs://public@synapseaisolutionsa.blob.core.windows.net/NASAEarth\" # path to your document\n",
+ "document_path = \"wasbs://publicwasb@mmlspark.blob.core.windows.net/NASAEarth\" # path to your document\n",
"df = spark.read.format(\"binaryFile\").load(document_path).limit(10).cache()"
- ],
- "id": "4959c5737781149a"
+ ]
},
{
"cell_type": "markdown",
+ "id": "fd5b7e549b813d97",
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {
@@ -236,11 +237,11 @@
},
"source": [
"This code will read the PDF documents and create a Spark DataFrame named df with the contents of the PDFs. The DataFrame will have a schema that represents the structure of the PDF documents, including their textual content."
- ],
- "id": "fd5b7e549b813d97"
+ ]
},
{
"cell_type": "markdown",
+ "id": "28aa80718b187897",
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {
@@ -258,11 +259,11 @@
"\n",
"\n",
""
- ],
- "id": "28aa80718b187897"
+ ]
},
{
"cell_type": "markdown",
+ "id": "f0d64237df1354a4",
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {
@@ -282,22 +283,22 @@
},
"source": [
"##### Display the raw data from the PDF documents"
- ],
- "id": "f0d64237df1354a4"
+ ]
},
{
"cell_type": "code",
"execution_count": null,
+ "id": "393470a52e83b607",
"metadata": {},
"outputs": [],
"source": [
"# Show the dataframe without the content\n",
"display(df.drop(\"content\"))"
- ],
- "id": "393470a52e83b607"
+ ]
},
{
"cell_type": "markdown",
+ "id": "d888040fcfdccd0a",
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {
@@ -317,11 +318,11 @@
},
"source": [
"### Step 3: Read the documents using Azure AI Document Intelligence."
- ],
- "id": "d888040fcfdccd0a"
+ ]
},
{
"cell_type": "markdown",
+ "id": "c46bf0a8029196f9",
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {
@@ -343,12 +344,12 @@
"We utilize [SynapseML](https://microsoft.github.io/SynapseML/), an ecosystem of tools designed to enhance the distributed computing framework [Apache Spark](https://github.com/apache/spark). SynapseML introduces advanced networking capabilities to the Spark ecosystem and offers user-friendly SparkML transformers for various [Azure AI Services](https://azure.microsoft.com/products/ai-services).\n",
"\n",
"Additionally, we employ AnalyzeDocument from Azure AI Services to extract the complete document content and present it in the designated columns called \"output_content\" and \"paragraph.\""
- ],
- "id": "c46bf0a8029196f9"
+ ]
},
{
"cell_type": "code",
"execution_count": null,
+ "id": "a198b14e6c20489d",
"metadata": {},
"outputs": [],
"source": [
@@ -372,11 +373,11 @@
" .withColumn(\"output_content\", col(\"result.analyzeResult.content\"))\n",
" .withColumn(\"paragraphs\", col(\"result.analyzeResult.paragraphs\"))\n",
").cache()"
- ],
- "id": "a198b14e6c20489d"
+ ]
},
{
"cell_type": "markdown",
+ "id": "f2a40a2afcf95a9c",
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {
@@ -396,22 +397,22 @@
},
"source": [
"We can observe the analayzed Spark DataFrame named ```analyzed_df``` using the following code. Note that we drop the \"content\" column as it is not needed anymore."
- ],
- "id": "f2a40a2afcf95a9c"
+ ]
},
{
"cell_type": "code",
"execution_count": null,
+ "id": "45d422f7dbc8de6d",
"metadata": {},
"outputs": [],
"source": [
"analyzed_df = analyzed_df.drop(\"content\")\n",
"display(analyzed_df)"
- ],
- "id": "45d422f7dbc8de6d"
+ ]
},
{
"cell_type": "markdown",
+ "id": "6b8e05223a5f0953",
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {
@@ -431,11 +432,11 @@
},
"source": [
"### Step 4: Split the documents into chunks."
- ],
- "id": "6b8e05223a5f0953"
+ ]
},
{
"cell_type": "markdown",
+ "id": "5a5b64272230878c",
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {
@@ -455,12 +456,12 @@
},
"source": [
"After analyzing the document, we leverage SynapseML’s PageSplitter to divide the documents into smaller sections, which are subsequently stored in the “chunks” column. This allows for more granular representation and processing of the document content."
- ],
- "id": "5a5b64272230878c"
+ ]
},
{
"cell_type": "code",
"execution_count": null,
+ "id": "34e3a57e64e81ba0",
"metadata": {},
"outputs": [],
"source": [
@@ -476,11 +477,11 @@
"\n",
"splitted_df = ps.transform(analyzed_df)\n",
"display(splitted_df)"
- ],
- "id": "34e3a57e64e81ba0"
+ ]
},
{
"cell_type": "markdown",
+ "id": "126d4367fa9bf899",
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {
@@ -500,12 +501,12 @@
},
"source": [
"Note that the chunks for each document are presented in a single row inside an array. In order to embed all the chunks in the following cells, we need to have each chunk in a separate row. To accomplish that, we first explode these arrays so there is only one chunk in each row, then filter the Spark DataFrame in order to only keep the path to the document and the chunk in a single row."
- ],
- "id": "126d4367fa9bf899"
+ ]
},
{
"cell_type": "code",
"execution_count": null,
+ "id": "27dbf4dc20efc19e",
"metadata": {},
"outputs": [],
"source": [
@@ -517,11 +518,11 @@
" \"path\", \"chunk\"\n",
")\n",
"display(exploded_df)"
- ],
- "id": "27dbf4dc20efc19e"
+ ]
},
{
"cell_type": "markdown",
+ "id": "12539a7efea29008",
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {
@@ -541,11 +542,11 @@
},
"source": [
"### Step 5: Generate Embeddings."
- ],
- "id": "12539a7efea29008"
+ ]
},
{
"cell_type": "markdown",
+ "id": "a512653b409a31b5",
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {
@@ -560,12 +561,12 @@
},
"source": [
"To produce embeddings for each chunk, we utilize both SynapseML and Azure OpenAI Service. By integrating the Azure OpenAI service with SynapseML, we can leverage the power of the Apache Spark distributed computing framework to process numerous prompts using the OpenAI service. This integration enables the SynapseML embedding client to generate embeddings in a distributed manner, enabling efficient processing of large volumes of data. If you're interested in applying large language models at a distributed scale using Azure OpenAI and Azure Synapse Analytics, you can refer to [this approach](https://microsoft.github.io/SynapseML/docs/Explore%20Algorithms/OpenAI/). For more detailed information on generating embeddings with Azure OpenAI, you can look [here]( https://learn.microsoft.com/azure/cognitive-services/openai/how-to/embeddings?tabs=console)."
- ],
- "id": "a512653b409a31b5"
+ ]
},
{
"cell_type": "code",
"execution_count": null,
+ "id": "df686c1b62da8fde",
"metadata": {},
"outputs": [],
"source": [
@@ -584,11 +585,11 @@
"df_embeddings = embedding.transform(exploded_df)\n",
"\n",
"display(df_embeddings)"
- ],
- "id": "df686c1b62da8fde"
+ ]
},
{
"cell_type": "markdown",
+ "id": "35ebf08c47b1a6ff",
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {
@@ -608,11 +609,11 @@
},
"source": [
"### Step 6: Store the embeddings in Azure Cognitive Search Vector Store."
- ],
- "id": "35ebf08c47b1a6ff"
+ ]
},
{
"cell_type": "markdown",
+ "id": "9a5407b73888a5da",
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {
@@ -636,12 +637,12 @@
"Adding Chunked Documents and Embeddings: The second step involves adding the chunked documents, along with their corresponding embeddings, to the vector datastore. This allows for efficient storage and retrieval of the data using vector search capabilities.\n",
"\n",
"By following these steps, you can effectively store your chunked documents and their associated embeddings in the AzureCogSearch vector database, enabling seamless retrieval of relevant information through vector search functionality."
- ],
- "id": "9a5407b73888a5da"
+ ]
},
{
"cell_type": "code",
"execution_count": null,
+ "id": "2a5f68ff786229b7",
"metadata": {},
"outputs": [],
"source": [
@@ -655,12 +656,12 @@
" ) # create index ID for ACS\n",
" .withColumn(\"searchAction\", lit(\"upload\"))\n",
")"
- ],
- "id": "2a5f68ff786229b7"
+ ]
},
{
"cell_type": "code",
"execution_count": null,
+ "id": "924f34c3e1612826",
"metadata": {},
"outputs": [],
"source": [
@@ -675,11 +676,11 @@
" keyCol=\"idx\",\n",
" vectorCols=json.dumps([{\"name\": \"embeddings\", \"dimension\": 1536}]),\n",
")"
- ],
- "id": "924f34c3e1612826"
+ ]
},
{
"cell_type": "markdown",
+ "id": "12c516463ddf41f0",
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {
@@ -699,11 +700,11 @@
},
"source": [
"### Step 7: Ask a Question."
- ],
- "id": "12c516463ddf41f0"
+ ]
},
{
"cell_type": "markdown",
+ "id": "83f34faff00e6a43",
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {
@@ -723,23 +724,23 @@
},
"source": [
"After processing the document, we can proceed to pose a question. We will use [SynapseML](https://microsoft.github.io/SynapseML/docs/Explore%20Algorithms/OpenAI/Quickstart%20-%20OpenAI%20Embedding/) to convert the user's question into an embedding and then utilize cosine similarity to retrieve the top K document chunks that closely match the user's question. It's worth mentioning that alternative similarity metrics can also be employed."
- ],
- "id": "83f34faff00e6a43"
+ ]
},
{
"cell_type": "code",
"execution_count": null,
+ "id": "6a8b461103d3c24e",
"metadata": {},
"outputs": [],
"source": [
"user_question = \"What did the astronaut Edgar Mitchell call Earth?\"\n",
"retrieve_k = 2 # Retrieve the top 2 documents from vector database"
- ],
- "id": "6a8b461103d3c24e"
+ ]
},
{
"cell_type": "code",
"execution_count": null,
+ "id": "34400fa63af3ca80",
"metadata": {},
"outputs": [],
"source": [
@@ -789,11 +790,11 @@
"# Generate embeddings for the question and retrieve the top k document chunks\n",
"question_embedding = gen_question_embedding(user_question)\n",
"output = retrieve_k_chunk(retrieve_k, question_embedding)"
- ],
- "id": "34400fa63af3ca80"
+ ]
},
{
"cell_type": "markdown",
+ "id": "7b14d74ade6d19d7",
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {
@@ -813,11 +814,11 @@
},
"source": [
"### Step 8: Respond to a User’s Question."
- ],
- "id": "7b14d74ade6d19d7"
+ ]
},
{
"cell_type": "markdown",
+ "id": "be48fedab0bc8e63",
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {
@@ -837,12 +838,12 @@
},
"source": [
"To provide a response to the user's question, we will utilize the [LangChain](https://python.langchain.com/en/latest/index.html) framework. With the LangChain framework we will augment the retrieved documents with respect to the user's question. Following this, we can request a response to the user's question from our framework."
- ],
- "id": "be48fedab0bc8e63"
+ ]
},
{
"cell_type": "code",
"execution_count": null,
+ "id": "5c22f50db797d61d",
"metadata": {},
"outputs": [],
"source": [
@@ -856,11 +857,11 @@
"openai.api_base = aoai_endpoint\n",
"openai.api_version = \"2022-12-01\"\n",
"openai.api_key = aoai_key"
- ],
- "id": "5c22f50db797d61d"
+ ]
},
{
"cell_type": "markdown",
+ "id": "ca2c56887f7dd034",
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {
@@ -875,12 +876,12 @@
},
"source": [
"We can now wrap up the Q&A journey by asking a question and checking the answer. You will see that Edgar Mitchell called Earth \"a sparkling blue and white jewel\"!"
- ],
- "id": "ca2c56887f7dd034"
+ ]
},
{
"cell_type": "code",
"execution_count": null,
+ "id": "bf9fb76f9bd16298",
"metadata": {},
"outputs": [],
"source": [
@@ -920,8 +921,7 @@
"answer = qa_chain.run({\"context\": context, \"query\": user_question})\n",
"\n",
"print(answer)"
- ],
- "id": "bf9fb76f9bd16298"
+ ]
}
],
"metadata": {
From edef2ae4860f95bafe272174f3d99134a4dd0994 Mon Sep 17 00:00:00 2001
From: JessicaXYWang <108437381+JessicaXYWang@users.noreply.github.com>
Date: Wed, 15 May 2024 16:36:40 -0700
Subject: [PATCH 6/7] update openai resource name in notebooks
---
.../Explore Algorithms/OpenAI/Langchain.ipynb | 4 ++--
docs/Explore Algorithms/OpenAI/OpenAI.ipynb | 4 ++--
...- OpenAI Embedding and GPU based KNN.ipynb | 4 ++--
.../Quickstart - OpenAI Embedding.ipynb | 20 +++++++++----------
...kstart - Understand and Search Forms.ipynb | 4 ++--
5 files changed, 18 insertions(+), 18 deletions(-)
diff --git a/docs/Explore Algorithms/OpenAI/Langchain.ipynb b/docs/Explore Algorithms/OpenAI/Langchain.ipynb
index 64579c2d70..78cebd956a 100644
--- a/docs/Explore Algorithms/OpenAI/Langchain.ipynb
+++ b/docs/Explore Algorithms/OpenAI/Langchain.ipynb
@@ -162,9 +162,9 @@
"outputs": [],
"source": [
"openai_api_key = find_secret(\n",
- " secret_name=\"openai-api-key\", keyvault=\"mmlspark-build-keys\"\n",
+ " secret_name=\"openai-api-key-2\", keyvault=\"mmlspark-build-keys\"\n",
")\n",
- "openai_api_base = \"https://synapseml-openai.openai.azure.com/\"\n",
+ "openai_api_base = \"https://synapseml-openai-2.openai.azure.com/\"\n",
"openai_api_version = \"2022-12-01\"\n",
"openai_api_type = \"azure\"\n",
"deployment_name = \"text-davinci-003\"\n",
diff --git a/docs/Explore Algorithms/OpenAI/OpenAI.ipynb b/docs/Explore Algorithms/OpenAI/OpenAI.ipynb
index 531a376143..efccf13565 100644
--- a/docs/Explore Algorithms/OpenAI/OpenAI.ipynb
+++ b/docs/Explore Algorithms/OpenAI/OpenAI.ipynb
@@ -88,12 +88,12 @@
"\n",
"# Fill in the following lines with your service information\n",
"# Learn more about selecting which embedding model to choose: https://openai.com/blog/new-and-improved-embedding-model\n",
- "service_name = \"synapseml-openai\"\n",
+ "service_name = \"synapseml-openai-2\"\n",
"deployment_name = \"gpt-35-turbo\"\n",
"deployment_name_embeddings = \"text-embedding-ada-002\"\n",
"\n",
"key = find_secret(\n",
- " secret_name=\"openai-api-key\", keyvault=\"mmlspark-build-keys\"\n",
+ " secret_name=\"openai-api-key-2\", keyvault=\"mmlspark-build-keys\"\n",
") # please replace this line with your key as a string\n",
"\n",
"assert key is not None and service_name is not None"
diff --git a/docs/Explore Algorithms/OpenAI/Quickstart - OpenAI Embedding and GPU based KNN.ipynb b/docs/Explore Algorithms/OpenAI/Quickstart - OpenAI Embedding and GPU based KNN.ipynb
index 5b4f9cc3c1..6e90974a48 100644
--- a/docs/Explore Algorithms/OpenAI/Quickstart - OpenAI Embedding and GPU based KNN.ipynb
+++ b/docs/Explore Algorithms/OpenAI/Quickstart - OpenAI Embedding and GPU based KNN.ipynb
@@ -67,11 +67,11 @@
"\n",
"# Fill in the following lines with your service information\n",
"# Learn more about selecting which embedding model to choose: https://openai.com/blog/new-and-improved-embedding-model\n",
- "service_name = \"synapseml-openai\"\n",
+ "service_name = \"synapseml-openai-2\"\n",
"deployment_name_embeddings = \"text-embedding-ada-002\"\n",
"\n",
"key = find_secret(\n",
- " secret_name=\"openai-api-key\", keyvault=\"mmlspark-build-keys\"\n",
+ " secret_name=\"openai-api-key-2\", keyvault=\"mmlspark-build-keys\"\n",
") # please replace this with your key as a string\n",
"\n",
"assert key is not None and service_name is not None"
diff --git a/docs/Explore Algorithms/OpenAI/Quickstart - OpenAI Embedding.ipynb b/docs/Explore Algorithms/OpenAI/Quickstart - OpenAI Embedding.ipynb
index 96d46b7824..6b973bab22 100644
--- a/docs/Explore Algorithms/OpenAI/Quickstart - OpenAI Embedding.ipynb
+++ b/docs/Explore Algorithms/OpenAI/Quickstart - OpenAI Embedding.ipynb
@@ -44,7 +44,7 @@
},
{
"cell_type": "code",
- "execution_count": 0,
+ "execution_count": null,
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {
@@ -63,11 +63,11 @@
"\n",
"# Fill in the following lines with your service information\n",
"# Learn more about selecting which embedding model to choose: https://openai.com/blog/new-and-improved-embedding-model\n",
- "service_name = \"synapseml-openai\"\n",
+ "service_name = \"synapseml-openai-2\"\n",
"deployment_name_embeddings = \"text-embedding-ada-002\"\n",
"\n",
"key = find_secret(\n",
- " secret_name=\"openai-api-key\", keyvault=\"mmlspark-build-keys\"\n",
+ " secret_name=\"openai-api-key-2\", keyvault=\"mmlspark-build-keys\"\n",
") # please replace this with your key as a string\n",
"\n",
"assert key is not None and service_name is not None"
@@ -95,7 +95,7 @@
},
{
"cell_type": "code",
- "execution_count": 0,
+ "execution_count": null,
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {
@@ -148,7 +148,7 @@
},
{
"cell_type": "code",
- "execution_count": 0,
+ "execution_count": null,
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {
@@ -200,7 +200,7 @@
},
{
"cell_type": "code",
- "execution_count": 0,
+ "execution_count": null,
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {
@@ -250,7 +250,7 @@
},
{
"cell_type": "code",
- "execution_count": 0,
+ "execution_count": null,
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {
@@ -307,7 +307,7 @@
},
{
"cell_type": "code",
- "execution_count": 0,
+ "execution_count": null,
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {
@@ -357,7 +357,7 @@
},
{
"cell_type": "code",
- "execution_count": 0,
+ "execution_count": null,
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {
@@ -407,7 +407,7 @@
},
{
"cell_type": "code",
- "execution_count": 0,
+ "execution_count": null,
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {
diff --git a/docs/Explore Algorithms/OpenAI/Quickstart - Understand and Search Forms.ipynb b/docs/Explore Algorithms/OpenAI/Quickstart - Understand and Search Forms.ipynb
index f424554e47..e20786e3e0 100644
--- a/docs/Explore Algorithms/OpenAI/Quickstart - Understand and Search Forms.ipynb
+++ b/docs/Explore Algorithms/OpenAI/Quickstart - Understand and Search Forms.ipynb
@@ -97,9 +97,9 @@
"search_index = \"form-demo-index-5\"\n",
"\n",
"openai_key = find_secret(\n",
- " secret_name=\"openai-api-key\", keyvault=\"mmlspark-build-keys\"\n",
+ " secret_name=\"openai-api-key-2\", keyvault=\"mmlspark-build-keys\"\n",
") # Replace the call to find_secret with your key as a python string.\n",
- "openai_service_name = \"synapseml-openai\"\n",
+ "openai_service_name = \"synapseml-openai-2\"\n",
"openai_deployment_name = \"gpt-35-turbo\"\n",
"openai_url = f\"https://{openai_service_name}.openai.azure.com/\""
]
From 6eed560f27cc31496f4f609fd5c0d07b470056eb Mon Sep 17 00:00:00 2001
From: JessicaXYWang <108437381+JessicaXYWang@users.noreply.github.com>
Date: Thu, 16 May 2024 10:32:03 -0700
Subject: [PATCH 7/7] clean up
---
.../synapse/ml/services/openai/OpenAICompletionSuite.scala | 2 --
.../src/test/scala/com/microsoft/azure/synapse/ml/Secrets.scala | 1 -
2 files changed, 3 deletions(-)
diff --git a/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/services/openai/OpenAICompletionSuite.scala b/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/services/openai/OpenAICompletionSuite.scala
index 8065ef23c0..807426c468 100644
--- a/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/services/openai/OpenAICompletionSuite.scala
+++ b/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/services/openai/OpenAICompletionSuite.scala
@@ -16,8 +16,6 @@ trait OpenAIAPIKey {
lazy val openAIServiceName: String = sys.env.getOrElse("OPENAI_SERVICE_NAME_2", "synapseml-openai-2")
lazy val deploymentName: String = "gpt-35-turbo"
lazy val modelName: String = "gpt-35-turbo"
- //lazy val openAIAPIKeyGpt4: String = sys.env.getOrElse("OPENAI_API_KEY_2", Secrets.OpenAIApiKeyGpt4)
- //lazy val openAIServiceNameGpt4: String = sys.env.getOrElse("OPENAI_SERVICE_NAME_2", "synapseml-openai-2")
lazy val deploymentNameGpt4: String = "gpt-4"
lazy val modelNameGpt4: String = "gpt-4"
}
diff --git a/core/src/test/scala/com/microsoft/azure/synapse/ml/Secrets.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/Secrets.scala
index 36123ccc56..17eed8a668 100644
--- a/core/src/test/scala/com/microsoft/azure/synapse/ml/Secrets.scala
+++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/Secrets.scala
@@ -55,7 +55,6 @@ object Secrets {
lazy val CognitiveApiKey: String = getSecret("cognitive-api-key")
lazy val OpenAIApiKey: String = getSecret("openai-api-key-2")
- //lazy val OpenAIApiKeyGpt4: String = getSecret("openai-api-key-2")
lazy val CustomSpeechApiKey: String = getSecret("custom-speech-api-key")
lazy val ConversationTranscriptionUrl: String = getSecret("conversation-transcription-url")