From 412620a88ceac095cd34f8a73ad90df5b3da6f82 Mon Sep 17 00:00:00 2001
From: Mark Hamilton <mhamilton723@gmail.com>
Date: Thu, 23 Mar 2023 16:19:33 +0000
Subject: [PATCH] docs: add custom chatbot creation to form demo (#1888)

---
 .../ml/nbtest/DatabricksUtilities.scala       |   3 +-
 ...ultilingual Search Engine from Forms.ipynb | 727 ++++++++++++++++--
 2 files changed, 668 insertions(+), 62 deletions(-)

diff --git a/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/DatabricksUtilities.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/DatabricksUtilities.scala
index f28e4a6103..9c4fd67493 100644
--- a/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/DatabricksUtilities.scala
+++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/DatabricksUtilities.scala
@@ -56,7 +56,8 @@ object DatabricksUtilities {
     Map("pypi" -> Map("package" -> "Pillow")),
     Map("pypi" -> Map("package" -> "onnxmltools==1.7.0")),
     Map("pypi" -> Map("package" -> "lightgbm")),
-    Map("pypi" -> Map("package" -> "mlflow"))
+    Map("pypi" -> Map("package" -> "mlflow")),
+    Map("pypi" -> Map("package" -> "openai"))
   ).toJson.compactPrint
 
   // TODO: install synapse.ml.dl wheel package here
diff --git a/notebooks/features/cognitive_services/CognitiveServices - Create a Multilingual Search Engine from Forms.ipynb b/notebooks/features/cognitive_services/CognitiveServices - Create a Multilingual Search Engine from Forms.ipynb
index 1beef9a6e5..28ce1121ed 100644
--- a/notebooks/features/cognitive_services/CognitiveServices - Create a Multilingual Search Engine from Forms.ipynb	
+++ b/notebooks/features/cognitive_services/CognitiveServices - Create a Multilingual Search Engine from Forms.ipynb	
@@ -1,9 +1,66 @@
 {
  "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "application/vnd.databricks.v1+cell": {
+     "cellMetadata": {
+      "byteLimit": 2048000,
+      "rowLimit": 10000
+     },
+     "inputWidgets": {},
+     "nuid": "566ce889-a458-49c2-a91e-4e5708251916",
+     "showTitle": false,
+     "title": ""
+    }
+   },
+   "source": [
+    "# Tutorial: Create a custom search engine and question-answering system\n",
+    "\n",
+    "In this tutorial, learn how to index and query large data loaded from a Spark cluster. You'll set up a Jupyter Notebook that performs the following actions:\n",
+    "\n",
+    "> + Load various forms (invoices) into a data frame in an Apache Spark session\n",
+    "> + Analyze them to determine their features\n",
+    "> + Assemble the resulting output into a tabular data structure\n",
+    "> + Write the output to a search index hosted in Azure Cognitive Search\n",
+    "> + Explore and query over the content you created"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "application/vnd.databricks.v1+cell": {
+     "cellMetadata": {
+      "byteLimit": 2048000,
+      "rowLimit": 10000
+     },
+     "inputWidgets": {},
+     "nuid": "e91ccad0-46df-4d49-bad8-99c36dc73d5c",
+     "showTitle": false,
+     "title": ""
+    }
+   },
+   "source": [
+    "## 1 - Set up dependencies\n",
+    "\n",
+    "We start by importing packages and connecting to the Azure resources used in this workflow."
+   ]
+  },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 0,
+   "metadata": {
+    "application/vnd.databricks.v1+cell": {
+     "cellMetadata": {
+      "byteLimit": 2048000,
+      "rowLimit": 10000
+     },
+     "inputWidgets": {},
+     "nuid": "6409e4d9-77cc-433f-aa8c-ccfc0f610de5",
+     "showTitle": false,
+     "title": ""
+    }
+   },
    "outputs": [],
    "source": [
     "import os\n",
@@ -14,6 +71,10 @@
     "spark = SparkSession.builder.getOrCreate()\n",
     "if running_on_synapse():\n",
     "    from notebookutils.visualization import display\n",
+    "    import subprocess\n",
+    "    import sys\n",
+    "\n",
+    "    subprocess.check_call([sys.executable, \"-m\", \"pip\", \"install\", \"openai\"])\n",
     "\n",
     "cognitive_key = find_secret(\"cognitive-api-key\")\n",
     "cognitive_location = \"eastus\"\n",
@@ -23,13 +84,49 @@
     "\n",
     "search_key = find_secret(\"azure-search-key\")\n",
     "search_service = \"mmlspark-azure-search\"\n",
-    "search_index = \"form-demo-index-2\""
+    "search_index = \"form-demo-index-5\"\n",
+    "\n",
+    "openai_key = find_secret(\"openai-api-key\")\n",
+    "openai_service_name = \"synapseml-openai\"\n",
+    "openai_deployment_name = \"gpt-35-turbo\"\n",
+    "openai_url = f\"https://{openai_service_name}.openai.azure.com/\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "application/vnd.databricks.v1+cell": {
+     "cellMetadata": {
+      "byteLimit": 2048000,
+      "rowLimit": 10000
+     },
+     "inputWidgets": {},
+     "nuid": "027c2491-110d-49a7-98ea-7e6d286bc63c",
+     "showTitle": false,
+     "title": ""
+    }
+   },
+   "source": [
+    "## 2 - Load data into Spark\n",
+    "\n",
+    "This code loads a few external files from an Azure storage account that's used for demo purposes. The files are various invoices, and they're read into a data frame."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 0,
+   "metadata": {
+    "application/vnd.databricks.v1+cell": {
+     "cellMetadata": {
+      "byteLimit": 2048000,
+      "rowLimit": 10000
+     },
+     "inputWidgets": {},
+     "nuid": "e2530924-cc39-45fb-9364-fc1365d97301",
+     "showTitle": false,
+     "title": ""
+    }
+   },
    "outputs": [],
    "source": [
     "from pyspark.sql.functions import udf\n",
@@ -52,33 +149,67 @@
     "    .limit(10)\n",
     "    .select(udf(blob_to_url, StringType())(\"path\").alias(\"url\"))\n",
     "    .cache()\n",
-    ")"
+    ")\n",
+    "\n",
+    "display(df2)"
    ]
   },
   {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
+   "cell_type": "markdown",
+   "metadata": {
+    "application/vnd.databricks.v1+cell": {
+     "cellMetadata": {
+      "byteLimit": 2048000,
+      "rowLimit": 10000
+     },
+     "inputWidgets": {},
+     "nuid": "f3cad37b-b020-498e-823e-350da5557d68",
+     "showTitle": false,
+     "title": ""
+    },
+    "pycharm": {
+     "name": "#%% md\n"
+    }
+   },
    "source": [
-    "display(df2)"
+    "<img src=\"https://mmlsparkdemo.blob.core.windows.net/ignite2021/form_svgs/Invoice11205.svg\" width=\"40%\"/>"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {
-    "pycharm": {
-     "name": "#%% md\n"
+    "application/vnd.databricks.v1+cell": {
+     "cellMetadata": {
+      "byteLimit": 2048000,
+      "rowLimit": 10000
+     },
+     "inputWidgets": {},
+     "nuid": "f47cf660-27af-42df-8349-3618b1f09478",
+     "showTitle": false,
+     "title": ""
     }
    },
    "source": [
-    "<embed src=\"https://mmlsparkdemo.blob.core.windows.net/ignite2021/form_svgs/Invoice11205.svg\" width=\"40%\"/>"
+    "## 3 - Apply form recognition\n",
+    "\n",
+    "This code loads the [AnalyzeInvoices transformer](https://microsoft.github.io/SynapseML/docs/documentation/transformers/transformers_cognitive/#analyzeinvoices) and passes a reference to the data frame containing the invoices. It calls the pre-built invoice model of Azure Forms Analyzer."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 0,
+   "metadata": {
+    "application/vnd.databricks.v1+cell": {
+     "cellMetadata": {
+      "byteLimit": 2048000,
+      "rowLimit": 10000
+     },
+     "inputWidgets": {},
+     "nuid": "c38db874-a1a5-49ae-913e-d55e3593c794",
+     "showTitle": false,
+     "title": ""
+    }
+   },
    "outputs": [],
    "source": [
     "from synapse.ml.cognitive import AnalyzeInvoices\n",
@@ -93,22 +224,48 @@
     "    .setConcurrency(5)\n",
     "    .transform(df2)\n",
     "    .cache()\n",
-    ")"
+    ")\n",
+    "\n",
+    "display(analyzed_df)"
    ]
   },
   {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
+   "cell_type": "markdown",
+   "metadata": {
+    "application/vnd.databricks.v1+cell": {
+     "cellMetadata": {
+      "byteLimit": 2048000,
+      "rowLimit": 10000
+     },
+     "inputWidgets": {},
+     "nuid": "42e8c9f8-2187-4f5e-b067-e271ea383c25",
+     "showTitle": false,
+     "title": ""
+    }
+   },
    "source": [
-    "display(analyzed_df)"
+    "## 4 - Simplify form recognition output\n",
+    "\n",
+    "This code uses the [FormOntologyLearner](https://mmlspark.blob.core.windows.net/docs/0.10.0/pyspark/synapse.ml.cognitive.html#module-synapse.ml.cognitive.FormOntologyTransformer), a transformer that analyzes the output of Form Recognizer transformers and infers a tabular data structure. The output of AnalyzeInvoices is dynamic and varies based on the features detected in your content.\n",
+    "\n",
+    "FormOntologyLearner extends the utility of the AnalyzeInvoices transformer by looking for patterns that can be used to create a tabular data structure. Organizing the output into multiple columns and rows makes for simpler downstream analysis."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 0,
+   "metadata": {
+    "application/vnd.databricks.v1+cell": {
+     "cellMetadata": {
+      "byteLimit": 2048000,
+      "rowLimit": 10000
+     },
+     "inputWidgets": {},
+     "nuid": "196165ee-59a9-4332-8fd2-8e9339a1015b",
+     "showTitle": false,
+     "title": ""
+    }
+   },
    "outputs": [],
    "source": [
     "from synapse.ml.cognitive import FormOntologyLearner\n",
@@ -121,22 +278,44 @@
     "    .transform(analyzed_df)\n",
     "    .select(\"url\", \"extracted.*\")\n",
     "    .cache()\n",
-    ")"
+    ")\n",
+    "\n",
+    "display(organized_df)"
    ]
   },
   {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
+   "cell_type": "markdown",
+   "metadata": {
+    "application/vnd.databricks.v1+cell": {
+     "cellMetadata": {
+      "byteLimit": 2048000,
+      "rowLimit": 10000
+     },
+     "inputWidgets": {},
+     "nuid": "39ef62b1-d8cf-411d-a225-67486782ae8e",
+     "showTitle": false,
+     "title": ""
+    }
+   },
    "source": [
-    "display(organized_df)"
+    "With our nice tabular dataframe, we can flatten the nested tables found in the forms with some SparkSQL"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 0,
+   "metadata": {
+    "application/vnd.databricks.v1+cell": {
+     "cellMetadata": {
+      "byteLimit": 2048000,
+      "rowLimit": 10000
+     },
+     "inputWidgets": {},
+     "nuid": "f096b5ed-7beb-4b3a-bb25-99dcccedfd9f",
+     "showTitle": false,
+     "title": ""
+    }
+   },
    "outputs": [],
    "source": [
     "from pyspark.sql.functions import explode, col\n",
@@ -146,31 +325,46 @@
     "    .drop(\"Items\")\n",
     "    .select(\"Item.*\", \"*\")\n",
     "    .drop(\"Item\")\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
+    ")\n",
+    "\n",
     "display(itemized_df)"
    ]
   },
   {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
+   "cell_type": "markdown",
+   "metadata": {
+    "application/vnd.databricks.v1+cell": {
+     "cellMetadata": {
+      "byteLimit": 2048000,
+      "rowLimit": 10000
+     },
+     "inputWidgets": {},
+     "nuid": "eb6997cc-e216-4b54-b2d3-953d5689c7e1",
+     "showTitle": false,
+     "title": ""
+    }
+   },
    "source": [
-    "display(itemized_df.where(col(\"ProductCode\") == 48))"
+    "## 5 - Add translations\n",
+    "\n",
+    "This code loads [Translate](https://microsoft.github.io/SynapseML/docs/documentation/transformers/transformers_cognitive/#translate), a transformer that calls the Azure Translator service in Cognitive Services. The original text, which is in English in the \"Description\" column, is machine-translated into various languages. All of the output is consolidated into \"output.translations\" array."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 0,
+   "metadata": {
+    "application/vnd.databricks.v1+cell": {
+     "cellMetadata": {
+      "byteLimit": 2048000,
+      "rowLimit": 10000
+     },
+     "inputWidgets": {},
+     "nuid": "58e5768b-fa03-4b9d-b71d-881ba2ee7da6",
+     "showTitle": false,
+     "title": ""
+    }
+   },
    "outputs": [],
    "source": [
     "from synapse.ml.cognitive import Translate\n",
@@ -188,29 +382,218 @@
     "    .withColumn(\"Translations\", col(\"output.translations\")[0])\n",
     "    .drop(\"output\", \"TranslationError\")\n",
     "    .cache()\n",
+    ")\n",
+    "\n",
+    "display(translated_df)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "application/vnd.databricks.v1+cell": {
+     "cellMetadata": {
+      "byteLimit": 2048000,
+      "rowLimit": 10000
+     },
+     "inputWidgets": {},
+     "nuid": "deb4444f-cc1d-44c7-976c-125b90b5cda6",
+     "showTitle": false,
+     "title": ""
+    }
+   },
+   "source": [
+    "## 6 - Translate products to emojis with OpenAI 🤯"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "application/vnd.databricks.v1+cell": {
+     "cellMetadata": {
+      "byteLimit": 2048000,
+      "rowLimit": 10000
+     },
+     "inputWidgets": {},
+     "nuid": "a9a3e173-1ef0-4c48-885a-aa2e431d361d",
+     "showTitle": false,
+     "title": ""
+    }
+   },
+   "outputs": [],
+   "source": [
+    "from synapse.ml.cognitive.openai import OpenAIPrompt\n",
+    "from pyspark.sql.functions import trim, split\n",
+    "\n",
+    "emoji_template = \"\"\" \n",
+    "  Your job is to translate item names into emoji. Do not add anything but the emoji and end the translation with a comma\n",
+    "  \n",
+    "  Two Ducks: 🦆🦆,\n",
+    "  Light Bulb: 💡,\n",
+    "  Three Peaches: 🍑🍑🍑,\n",
+    "  Two kitchen stoves: ♨️♨️,\n",
+    "  A red car: 🚗,\n",
+    "  A person and a cat: 🧍🐈,\n",
+    "  A {Description}: \"\"\"\n",
+    "\n",
+    "prompter = (\n",
+    "    OpenAIPrompt()\n",
+    "    .setSubscriptionKey(openai_key)\n",
+    "    .setDeploymentName(openai_deployment_name)\n",
+    "    .setUrl(openai_url)\n",
+    "    .setMaxTokens(5)\n",
+    "    .setPromptTemplate(emoji_template)\n",
+    "    .setErrorCol(\"error\")\n",
+    "    .setOutputCol(\"Emoji\")\n",
+    ")\n",
+    "\n",
+    "emoji_df = (\n",
+    "    prompter.transform(translated_df)\n",
+    "    .withColumn(\"Emoji\", trim(split(col(\"Emoji\"), \",\").getItem(0)))\n",
+    "    .drop(\"error\", \"prompt\")\n",
+    "    .cache()\n",
     ")"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 0,
+   "metadata": {
+    "application/vnd.databricks.v1+cell": {
+     "cellMetadata": {
+      "byteLimit": 2048000,
+      "rowLimit": 10000
+     },
+     "inputWidgets": {},
+     "nuid": "f3bb2538-3eae-475b-bb83-e11023e8bf5b",
+     "showTitle": false,
+     "title": ""
+    }
+   },
    "outputs": [],
    "source": [
-    "display(translated_df)"
+    "display(emoji_df.select(\"Description\", \"Emoji\"))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "application/vnd.databricks.v1+cell": {
+     "cellMetadata": {
+      "byteLimit": 2048000,
+      "rowLimit": 10000
+     },
+     "inputWidgets": {},
+     "nuid": "09ecd5f9-1b77-45fd-b209-4e3c04883bd6",
+     "showTitle": false,
+     "title": ""
+    }
+   },
+   "source": [
+    "## 7 - Infer vendor adress continent with OpenAI"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "application/vnd.databricks.v1+cell": {
+     "cellMetadata": {
+      "byteLimit": 2048000,
+      "rowLimit": 10000
+     },
+     "inputWidgets": {},
+     "nuid": "265cfcf2-1bc2-4705-b021-bec4492b05c7",
+     "showTitle": false,
+     "title": ""
+    }
+   },
+   "outputs": [],
+   "source": [
+    "continent_template = \"\"\"\n",
+    "Which continent does the following address belong to? \n",
+    "\n",
+    "Pick one value from Europe, Australia, North America, South America, Asia, Africa, Antarctica. \n",
+    "\n",
+    "Dont respond with anything but one of the above. If you don't know the answer or cannot figure it out from the text, return None. End your answer with a comma.\n",
+    "\n",
+    "Address: \"6693 Ryan Rd, North Whales\",\n",
+    "Continent: Europe,\n",
+    "Address: \"6693 Ryan Rd\",\n",
+    "Continent: None,\n",
+    "Address: \"{VendorAddress}\",\n",
+    "Continent:\"\"\"\n",
+    "\n",
+    "continent_df = (\n",
+    "    prompter.setOutputCol(\"Continent\")\n",
+    "    .setPromptTemplate(continent_template)\n",
+    "    .transform(emoji_df)\n",
+    "    .withColumn(\"Continent\", trim(split(col(\"Continent\"), \",\").getItem(0)))\n",
+    "    .drop(\"error\", \"prompt\")\n",
+    "    .cache()\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "application/vnd.databricks.v1+cell": {
+     "cellMetadata": {
+      "byteLimit": 2048000,
+      "rowLimit": 10000
+     },
+     "inputWidgets": {},
+     "nuid": "6ea14531-b44a-4f51-b6b0-3e1b4fbb7fb0",
+     "showTitle": false,
+     "title": ""
+    }
+   },
+   "outputs": [],
+   "source": [
+    "display(continent_df.select(\"VendorAddress\", \"Continent\"))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "application/vnd.databricks.v1+cell": {
+     "cellMetadata": {
+      "byteLimit": 2048000,
+      "rowLimit": 10000
+     },
+     "inputWidgets": {},
+     "nuid": "f8417058-59a1-4563-bb08-d824719fe01d",
+     "showTitle": false,
+     "title": ""
+    }
+   },
+   "source": [
+    "## 8 - Create an Azure Search Index for the Forms"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 0,
+   "metadata": {
+    "application/vnd.databricks.v1+cell": {
+     "cellMetadata": {
+      "byteLimit": 2048000,
+      "rowLimit": 10000
+     },
+     "inputWidgets": {},
+     "nuid": "052b7b4e-5b8c-4d55-b97d-80d7b9c1774d",
+     "showTitle": false,
+     "title": ""
+    }
+   },
    "outputs": [],
    "source": [
     "from synapse.ml.cognitive import *\n",
     "from pyspark.sql.functions import monotonically_increasing_id, lit\n",
     "\n",
     "(\n",
-    "    translated_df.withColumn(\"DocID\", monotonically_increasing_id().cast(\"string\"))\n",
+    "    continent_df.withColumn(\"DocID\", monotonically_increasing_id().cast(\"string\"))\n",
     "    .withColumn(\"SearchAction\", lit(\"upload\"))\n",
     "    .writeToAzureSearch(\n",
     "        subscriptionKey=search_key,\n",
@@ -222,29 +605,251 @@
     ")"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "application/vnd.databricks.v1+cell": {
+     "cellMetadata": {
+      "byteLimit": 2048000,
+      "rowLimit": 10000
+     },
+     "inputWidgets": {},
+     "nuid": "2afbbff7-8c9a-4c4d-a06c-c4e61e4fd7ae",
+     "showTitle": false,
+     "title": ""
+    }
+   },
+   "source": [
+    "## 9 - Try out a search query"
+   ]
+  },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 0,
+   "metadata": {
+    "application/vnd.databricks.v1+cell": {
+     "cellMetadata": {
+      "byteLimit": 2048000,
+      "rowLimit": 10000
+     },
+     "inputWidgets": {},
+     "nuid": "8ee97fbb-f037-451f-b23e-ca9cee1d9969",
+     "showTitle": false,
+     "title": ""
+    }
+   },
    "outputs": [],
    "source": [
     "import requests\n",
     "\n",
-    "url = \"https://{}.search.windows.net/indexes/{}/docs/search?api-version=2019-05-06\".format(\n",
+    "search_url = \"https://{}.search.windows.net/indexes/{}/docs/search?api-version=2019-05-06\".format(\n",
     "    search_service, search_index\n",
     ")\n",
-    "requests.post(url, json={\"search\": \"door\"}, headers={\"api-key\": search_key}).json()"
+    "requests.post(\n",
+    "    search_url, json={\"search\": \"door\"}, headers={\"api-key\": search_key}\n",
+    ").json()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "application/vnd.databricks.v1+cell": {
+     "cellMetadata": {
+      "byteLimit": 2048000,
+      "rowLimit": 10000
+     },
+     "inputWidgets": {},
+     "nuid": "b69e7da2-f89d-41b2-b08a-8cdc8bec18ed",
+     "showTitle": false,
+     "title": ""
+    }
+   },
+   "source": [
+    "## 10 - Build a simple chatbot that can use Azure Search as a tool 🧠🔧\n",
+    "#\n",
+    "<img src=\"https://mmlspark.blob.core.windows.net/graphics/notebooks/chatbot_flow_2.svg\" width=\"40%\" />"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "application/vnd.databricks.v1+cell": {
+     "cellMetadata": {
+      "byteLimit": 2048000,
+      "rowLimit": 10000
+     },
+     "inputWidgets": {},
+     "nuid": "daf5fc16-48c4-451b-a153-5e0d4013cf5c",
+     "showTitle": false,
+     "title": ""
+    }
+   },
+   "outputs": [],
+   "source": [
+    "import json\n",
+    "import openai\n",
+    "\n",
+    "openai.api_type = \"azure\"\n",
+    "openai.api_base = openai_url\n",
+    "openai.api_key = openai_key\n",
+    "openai.api_version = \"2023-03-15-preview\"\n",
+    "\n",
+    "chat_context_prompt = f\"\"\"\n",
+    "You are a chatbot designed to answer questions with the help of a search engine that has the following information:\n",
+    "\n",
+    "{continent_df.columns}\n",
+    "\n",
+    "If you dont know the answer to a question say \"I dont know\". Do not lie or hallucinate information. Be brief. If you need to use the search engine to solve the please output a json in the form of {{\"query\": \"example_query\"}}\n",
+    "\"\"\"\n",
+    "\n",
+    "\n",
+    "def search_query_prompt(question):\n",
+    "    return f\"\"\"\n",
+    "Given the search engine above, what would you search for to answer the following question?\n",
+    "\n",
+    "Question: \"{question}\"\n",
+    "\n",
+    "Please output a json in the form of {{\"query\": \"example_query\"}}\n",
+    "\"\"\"\n",
+    "\n",
+    "\n",
+    "def search_result_prompt(query):\n",
+    "    search_results = requests.post(\n",
+    "        search_url, json={\"search\": query}, headers={\"api-key\": search_key}\n",
+    "    ).json()\n",
+    "    return f\"\"\"\n",
+    "\n",
+    "You previously ran a search for \"{query}\" which returned the following results:\n",
+    "\n",
+    "{search_results}\n",
+    "\n",
+    "You should use the results to help you answer questions. If you dont know the answer to a question say \"I dont know\". Do not lie or hallucinate information. Be Brief and mention which query you used to solve the problem. \n",
+    "\"\"\"\n",
+    "\n",
+    "\n",
+    "def prompt_gpt(messages):\n",
+    "    response = openai.ChatCompletion.create(\n",
+    "        engine=openai_deployment_name, messages=messages, max_tokens=None, top_p=0.95\n",
+    "    )\n",
+    "    return response[\"choices\"][0][\"message\"][\"content\"]\n",
+    "\n",
+    "\n",
+    "def custom_chatbot(question):\n",
+    "    while True:\n",
+    "        try:\n",
+    "            query = json.loads(\n",
+    "                prompt_gpt(\n",
+    "                    [\n",
+    "                        {\"role\": \"system\", \"content\": chat_context_prompt},\n",
+    "                        {\"role\": \"user\", \"content\": search_query_prompt(question)},\n",
+    "                    ]\n",
+    "                )\n",
+    "            )[\"query\"]\n",
+    "\n",
+    "            return prompt_gpt(\n",
+    "                [\n",
+    "                    {\"role\": \"system\", \"content\": chat_context_prompt},\n",
+    "                    {\"role\": \"system\", \"content\": search_result_prompt(query)},\n",
+    "                    {\"role\": \"user\", \"content\": question},\n",
+    "                ]\n",
+    "            )\n",
+    "        except Exception as e:\n",
+    "            raise e"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "application/vnd.databricks.v1+cell": {
+     "cellMetadata": {
+      "byteLimit": 2048000,
+      "rowLimit": 10000
+     },
+     "inputWidgets": {},
+     "nuid": "bc4d292d-4782-4993-821a-6c55f382b23c",
+     "showTitle": false,
+     "title": ""
+    }
+   },
+   "source": [
+    "## 11 - Asking our chatbot a question"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "application/vnd.databricks.v1+cell": {
+     "cellMetadata": {
+      "byteLimit": 2048000,
+      "rowLimit": 10000
+     },
+     "inputWidgets": {},
+     "nuid": "7aeedaf1-d737-4453-baad-3aba02a3d069",
+     "showTitle": false,
+     "title": ""
+    }
+   },
+   "outputs": [],
+   "source": [
+    "custom_chatbot(\"What did Luke Diaz buy?\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "application/vnd.databricks.v1+cell": {
+     "cellMetadata": {
+      "byteLimit": 2048000,
+      "rowLimit": 10000
+     },
+     "inputWidgets": {},
+     "nuid": "61d0ed86-c961-4e18-80a8-e404ee6bc511",
+     "showTitle": false,
+     "title": ""
+    }
+   },
+   "source": [
+    "## 12 - A quick double check"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 0,
+   "metadata": {
+    "application/vnd.databricks.v1+cell": {
+     "cellMetadata": {
+      "byteLimit": 2048000,
+      "rowLimit": 10000
+     },
+     "inputWidgets": {},
+     "nuid": "48fa0d23-5711-4e3e-b15b-a47e96b6dee0",
+     "showTitle": false,
+     "title": ""
+    }
+   },
    "outputs": [],
-   "source": []
+   "source": [
+    "display(\n",
+    "    continent_df.where(col(\"CustomerName\") == \"Luke Diaz\")\n",
+    "    .select(\"Description\")\n",
+    "    .distinct()\n",
+    ")"
+   ]
   }
  ],
  "metadata": {
+  "application/vnd.databricks.v1+notebook": {
+   "dashboards": [],
+   "language": "python",
+   "notebookMetadata": {
+    "pythonIndentUnit": 2
+   },
+   "notebookName": "CognitiveServices - Create a Multilingual Search Engine from Forms",
+   "notebookOrigID": 1242000907990110,
+   "widgets": {}
+  },
   "description": null,
   "kernelspec": {
    "display_name": "Synapse PySpark",