From 412620a88ceac095cd34f8a73ad90df5b3da6f82 Mon Sep 17 00:00:00 2001 From: Mark Hamilton Date: Thu, 23 Mar 2023 16:19:33 +0000 Subject: [PATCH] docs: add custom chatbot creation to form demo (#1888) --- .../ml/nbtest/DatabricksUtilities.scala | 3 +- ...ultilingual Search Engine from Forms.ipynb | 727 ++++++++++++++++-- 2 files changed, 668 insertions(+), 62 deletions(-) diff --git a/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/DatabricksUtilities.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/DatabricksUtilities.scala index f28e4a6103..9c4fd67493 100644 --- a/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/DatabricksUtilities.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/DatabricksUtilities.scala @@ -56,7 +56,8 @@ object DatabricksUtilities { Map("pypi" -> Map("package" -> "Pillow")), Map("pypi" -> Map("package" -> "onnxmltools==1.7.0")), Map("pypi" -> Map("package" -> "lightgbm")), - Map("pypi" -> Map("package" -> "mlflow")) + Map("pypi" -> Map("package" -> "mlflow")), + Map("pypi" -> Map("package" -> "openai")) ).toJson.compactPrint // TODO: install synapse.ml.dl wheel package here diff --git a/notebooks/features/cognitive_services/CognitiveServices - Create a Multilingual Search Engine from Forms.ipynb b/notebooks/features/cognitive_services/CognitiveServices - Create a Multilingual Search Engine from Forms.ipynb index 1beef9a6e5..28ce1121ed 100644 --- a/notebooks/features/cognitive_services/CognitiveServices - Create a Multilingual Search Engine from Forms.ipynb +++ b/notebooks/features/cognitive_services/CognitiveServices - Create a Multilingual Search Engine from Forms.ipynb @@ -1,9 +1,66 @@ { "cells": [ + { + "cell_type": "markdown", + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "566ce889-a458-49c2-a91e-4e5708251916", + "showTitle": false, + "title": "" + } + }, + "source": [ + "# Tutorial: Create a custom search engine and question-answering system\n", + "\n", + "In this tutorial, learn how to index and query large data loaded from a Spark cluster. You'll set up a Jupyter Notebook that performs the following actions:\n", + "\n", + "> + Load various forms (invoices) into a data frame in an Apache Spark session\n", + "> + Analyze them to determine their features\n", + "> + Assemble the resulting output into a tabular data structure\n", + "> + Write the output to a search index hosted in Azure Cognitive Search\n", + "> + Explore and query over the content you created" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "e91ccad0-46df-4d49-bad8-99c36dc73d5c", + "showTitle": false, + "title": "" + } + }, + "source": [ + "## 1 - Set up dependencies\n", + "\n", + "We start by importing packages and connecting to the Azure resources used in this workflow." + ] + }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "6409e4d9-77cc-433f-aa8c-ccfc0f610de5", + "showTitle": false, + "title": "" + } + }, "outputs": [], "source": [ "import os\n", @@ -14,6 +71,10 @@ "spark = SparkSession.builder.getOrCreate()\n", "if running_on_synapse():\n", " from notebookutils.visualization import display\n", + " import subprocess\n", + " import sys\n", + "\n", + " subprocess.check_call([sys.executable, \"-m\", \"pip\", \"install\", \"openai\"])\n", "\n", "cognitive_key = find_secret(\"cognitive-api-key\")\n", "cognitive_location = \"eastus\"\n", @@ -23,13 +84,49 @@ "\n", "search_key = find_secret(\"azure-search-key\")\n", "search_service = \"mmlspark-azure-search\"\n", - "search_index = \"form-demo-index-2\"" + "search_index = \"form-demo-index-5\"\n", + "\n", + "openai_key = find_secret(\"openai-api-key\")\n", + "openai_service_name = \"synapseml-openai\"\n", + "openai_deployment_name = \"gpt-35-turbo\"\n", + "openai_url = f\"https://{openai_service_name}.openai.azure.com/\"" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "027c2491-110d-49a7-98ea-7e6d286bc63c", + "showTitle": false, + "title": "" + } + }, + "source": [ + "## 2 - Load data into Spark\n", + "\n", + "This code loads a few external files from an Azure storage account that's used for demo purposes. The files are various invoices, and they're read into a data frame." ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "e2530924-cc39-45fb-9364-fc1365d97301", + "showTitle": false, + "title": "" + } + }, "outputs": [], "source": [ "from pyspark.sql.functions import udf\n", @@ -52,33 +149,67 @@ " .limit(10)\n", " .select(udf(blob_to_url, StringType())(\"path\").alias(\"url\"))\n", " .cache()\n", - ")" + ")\n", + "\n", + "display(df2)" ] }, { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "cell_type": "markdown", + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "f3cad37b-b020-498e-823e-350da5557d68", + "showTitle": false, + "title": "" + }, + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ - "display(df2)" + "" ] }, { "cell_type": "markdown", "metadata": { - "pycharm": { - "name": "#%% md\n" + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "f47cf660-27af-42df-8349-3618b1f09478", + "showTitle": false, + "title": "" } }, "source": [ - "" + "## 3 - Apply form recognition\n", + "\n", + "This code loads the [AnalyzeInvoices transformer](https://microsoft.github.io/SynapseML/docs/documentation/transformers/transformers_cognitive/#analyzeinvoices) and passes a reference to the data frame containing the invoices. It calls the pre-built invoice model of Azure Forms Analyzer." ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "c38db874-a1a5-49ae-913e-d55e3593c794", + "showTitle": false, + "title": "" + } + }, "outputs": [], "source": [ "from synapse.ml.cognitive import AnalyzeInvoices\n", @@ -93,22 +224,48 @@ " .setConcurrency(5)\n", " .transform(df2)\n", " .cache()\n", - ")" + ")\n", + "\n", + "display(analyzed_df)" ] }, { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "cell_type": "markdown", + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "42e8c9f8-2187-4f5e-b067-e271ea383c25", + "showTitle": false, + "title": "" + } + }, "source": [ - "display(analyzed_df)" + "## 4 - Simplify form recognition output\n", + "\n", + "This code uses the [FormOntologyLearner](https://mmlspark.blob.core.windows.net/docs/0.10.0/pyspark/synapse.ml.cognitive.html#module-synapse.ml.cognitive.FormOntologyTransformer), a transformer that analyzes the output of Form Recognizer transformers and infers a tabular data structure. The output of AnalyzeInvoices is dynamic and varies based on the features detected in your content.\n", + "\n", + "FormOntologyLearner extends the utility of the AnalyzeInvoices transformer by looking for patterns that can be used to create a tabular data structure. Organizing the output into multiple columns and rows makes for simpler downstream analysis." ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "196165ee-59a9-4332-8fd2-8e9339a1015b", + "showTitle": false, + "title": "" + } + }, "outputs": [], "source": [ "from synapse.ml.cognitive import FormOntologyLearner\n", @@ -121,22 +278,44 @@ " .transform(analyzed_df)\n", " .select(\"url\", \"extracted.*\")\n", " .cache()\n", - ")" + ")\n", + "\n", + "display(organized_df)" ] }, { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "cell_type": "markdown", + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "39ef62b1-d8cf-411d-a225-67486782ae8e", + "showTitle": false, + "title": "" + } + }, "source": [ - "display(organized_df)" + "With our nice tabular dataframe, we can flatten the nested tables found in the forms with some SparkSQL" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "f096b5ed-7beb-4b3a-bb25-99dcccedfd9f", + "showTitle": false, + "title": "" + } + }, "outputs": [], "source": [ "from pyspark.sql.functions import explode, col\n", @@ -146,31 +325,46 @@ " .drop(\"Items\")\n", " .select(\"Item.*\", \"*\")\n", " .drop(\"Item\")\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ + ")\n", + "\n", "display(itemized_df)" ] }, { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "cell_type": "markdown", + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "eb6997cc-e216-4b54-b2d3-953d5689c7e1", + "showTitle": false, + "title": "" + } + }, "source": [ - "display(itemized_df.where(col(\"ProductCode\") == 48))" + "## 5 - Add translations\n", + "\n", + "This code loads [Translate](https://microsoft.github.io/SynapseML/docs/documentation/transformers/transformers_cognitive/#translate), a transformer that calls the Azure Translator service in Cognitive Services. The original text, which is in English in the \"Description\" column, is machine-translated into various languages. All of the output is consolidated into \"output.translations\" array." ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "58e5768b-fa03-4b9d-b71d-881ba2ee7da6", + "showTitle": false, + "title": "" + } + }, "outputs": [], "source": [ "from synapse.ml.cognitive import Translate\n", @@ -188,29 +382,218 @@ " .withColumn(\"Translations\", col(\"output.translations\")[0])\n", " .drop(\"output\", \"TranslationError\")\n", " .cache()\n", + ")\n", + "\n", + "display(translated_df)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "deb4444f-cc1d-44c7-976c-125b90b5cda6", + "showTitle": false, + "title": "" + } + }, + "source": [ + "## 6 - Translate products to emojis with OpenAI 🤯" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "a9a3e173-1ef0-4c48-885a-aa2e431d361d", + "showTitle": false, + "title": "" + } + }, + "outputs": [], + "source": [ + "from synapse.ml.cognitive.openai import OpenAIPrompt\n", + "from pyspark.sql.functions import trim, split\n", + "\n", + "emoji_template = \"\"\" \n", + " Your job is to translate item names into emoji. Do not add anything but the emoji and end the translation with a comma\n", + " \n", + " Two Ducks: 🦆🦆,\n", + " Light Bulb: 💡,\n", + " Three Peaches: 🍑🍑🍑,\n", + " Two kitchen stoves: ♨️♨️,\n", + " A red car: 🚗,\n", + " A person and a cat: 🧍🐈,\n", + " A {Description}: \"\"\"\n", + "\n", + "prompter = (\n", + " OpenAIPrompt()\n", + " .setSubscriptionKey(openai_key)\n", + " .setDeploymentName(openai_deployment_name)\n", + " .setUrl(openai_url)\n", + " .setMaxTokens(5)\n", + " .setPromptTemplate(emoji_template)\n", + " .setErrorCol(\"error\")\n", + " .setOutputCol(\"Emoji\")\n", + ")\n", + "\n", + "emoji_df = (\n", + " prompter.transform(translated_df)\n", + " .withColumn(\"Emoji\", trim(split(col(\"Emoji\"), \",\").getItem(0)))\n", + " .drop(\"error\", \"prompt\")\n", + " .cache()\n", ")" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "f3bb2538-3eae-475b-bb83-e11023e8bf5b", + "showTitle": false, + "title": "" + } + }, "outputs": [], "source": [ - "display(translated_df)" + "display(emoji_df.select(\"Description\", \"Emoji\"))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "09ecd5f9-1b77-45fd-b209-4e3c04883bd6", + "showTitle": false, + "title": "" + } + }, + "source": [ + "## 7 - Infer vendor adress continent with OpenAI" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "265cfcf2-1bc2-4705-b021-bec4492b05c7", + "showTitle": false, + "title": "" + } + }, + "outputs": [], + "source": [ + "continent_template = \"\"\"\n", + "Which continent does the following address belong to? \n", + "\n", + "Pick one value from Europe, Australia, North America, South America, Asia, Africa, Antarctica. \n", + "\n", + "Dont respond with anything but one of the above. If you don't know the answer or cannot figure it out from the text, return None. End your answer with a comma.\n", + "\n", + "Address: \"6693 Ryan Rd, North Whales\",\n", + "Continent: Europe,\n", + "Address: \"6693 Ryan Rd\",\n", + "Continent: None,\n", + "Address: \"{VendorAddress}\",\n", + "Continent:\"\"\"\n", + "\n", + "continent_df = (\n", + " prompter.setOutputCol(\"Continent\")\n", + " .setPromptTemplate(continent_template)\n", + " .transform(emoji_df)\n", + " .withColumn(\"Continent\", trim(split(col(\"Continent\"), \",\").getItem(0)))\n", + " .drop(\"error\", \"prompt\")\n", + " .cache()\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "6ea14531-b44a-4f51-b6b0-3e1b4fbb7fb0", + "showTitle": false, + "title": "" + } + }, + "outputs": [], + "source": [ + "display(continent_df.select(\"VendorAddress\", \"Continent\"))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "f8417058-59a1-4563-bb08-d824719fe01d", + "showTitle": false, + "title": "" + } + }, + "source": [ + "## 8 - Create an Azure Search Index for the Forms" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "052b7b4e-5b8c-4d55-b97d-80d7b9c1774d", + "showTitle": false, + "title": "" + } + }, "outputs": [], "source": [ "from synapse.ml.cognitive import *\n", "from pyspark.sql.functions import monotonically_increasing_id, lit\n", "\n", "(\n", - " translated_df.withColumn(\"DocID\", monotonically_increasing_id().cast(\"string\"))\n", + " continent_df.withColumn(\"DocID\", monotonically_increasing_id().cast(\"string\"))\n", " .withColumn(\"SearchAction\", lit(\"upload\"))\n", " .writeToAzureSearch(\n", " subscriptionKey=search_key,\n", @@ -222,29 +605,251 @@ ")" ] }, + { + "cell_type": "markdown", + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "2afbbff7-8c9a-4c4d-a06c-c4e61e4fd7ae", + "showTitle": false, + "title": "" + } + }, + "source": [ + "## 9 - Try out a search query" + ] + }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "8ee97fbb-f037-451f-b23e-ca9cee1d9969", + "showTitle": false, + "title": "" + } + }, "outputs": [], "source": [ "import requests\n", "\n", - "url = \"https://{}.search.windows.net/indexes/{}/docs/search?api-version=2019-05-06\".format(\n", + "search_url = \"https://{}.search.windows.net/indexes/{}/docs/search?api-version=2019-05-06\".format(\n", " search_service, search_index\n", ")\n", - "requests.post(url, json={\"search\": \"door\"}, headers={\"api-key\": search_key}).json()" + "requests.post(\n", + " search_url, json={\"search\": \"door\"}, headers={\"api-key\": search_key}\n", + ").json()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "b69e7da2-f89d-41b2-b08a-8cdc8bec18ed", + "showTitle": false, + "title": "" + } + }, + "source": [ + "## 10 - Build a simple chatbot that can use Azure Search as a tool 🧠🔧\n", + "#\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "daf5fc16-48c4-451b-a153-5e0d4013cf5c", + "showTitle": false, + "title": "" + } + }, + "outputs": [], + "source": [ + "import json\n", + "import openai\n", + "\n", + "openai.api_type = \"azure\"\n", + "openai.api_base = openai_url\n", + "openai.api_key = openai_key\n", + "openai.api_version = \"2023-03-15-preview\"\n", + "\n", + "chat_context_prompt = f\"\"\"\n", + "You are a chatbot designed to answer questions with the help of a search engine that has the following information:\n", + "\n", + "{continent_df.columns}\n", + "\n", + "If you dont know the answer to a question say \"I dont know\". Do not lie or hallucinate information. Be brief. If you need to use the search engine to solve the please output a json in the form of {{\"query\": \"example_query\"}}\n", + "\"\"\"\n", + "\n", + "\n", + "def search_query_prompt(question):\n", + " return f\"\"\"\n", + "Given the search engine above, what would you search for to answer the following question?\n", + "\n", + "Question: \"{question}\"\n", + "\n", + "Please output a json in the form of {{\"query\": \"example_query\"}}\n", + "\"\"\"\n", + "\n", + "\n", + "def search_result_prompt(query):\n", + " search_results = requests.post(\n", + " search_url, json={\"search\": query}, headers={\"api-key\": search_key}\n", + " ).json()\n", + " return f\"\"\"\n", + "\n", + "You previously ran a search for \"{query}\" which returned the following results:\n", + "\n", + "{search_results}\n", + "\n", + "You should use the results to help you answer questions. If you dont know the answer to a question say \"I dont know\". Do not lie or hallucinate information. Be Brief and mention which query you used to solve the problem. \n", + "\"\"\"\n", + "\n", + "\n", + "def prompt_gpt(messages):\n", + " response = openai.ChatCompletion.create(\n", + " engine=openai_deployment_name, messages=messages, max_tokens=None, top_p=0.95\n", + " )\n", + " return response[\"choices\"][0][\"message\"][\"content\"]\n", + "\n", + "\n", + "def custom_chatbot(question):\n", + " while True:\n", + " try:\n", + " query = json.loads(\n", + " prompt_gpt(\n", + " [\n", + " {\"role\": \"system\", \"content\": chat_context_prompt},\n", + " {\"role\": \"user\", \"content\": search_query_prompt(question)},\n", + " ]\n", + " )\n", + " )[\"query\"]\n", + "\n", + " return prompt_gpt(\n", + " [\n", + " {\"role\": \"system\", \"content\": chat_context_prompt},\n", + " {\"role\": \"system\", \"content\": search_result_prompt(query)},\n", + " {\"role\": \"user\", \"content\": question},\n", + " ]\n", + " )\n", + " except Exception as e:\n", + " raise e" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "bc4d292d-4782-4993-821a-6c55f382b23c", + "showTitle": false, + "title": "" + } + }, + "source": [ + "## 11 - Asking our chatbot a question" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "7aeedaf1-d737-4453-baad-3aba02a3d069", + "showTitle": false, + "title": "" + } + }, + "outputs": [], + "source": [ + "custom_chatbot(\"What did Luke Diaz buy?\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "61d0ed86-c961-4e18-80a8-e404ee6bc511", + "showTitle": false, + "title": "" + } + }, + "source": [ + "## 12 - A quick double check" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "48fa0d23-5711-4e3e-b15b-a47e96b6dee0", + "showTitle": false, + "title": "" + } + }, "outputs": [], - "source": [] + "source": [ + "display(\n", + " continent_df.where(col(\"CustomerName\") == \"Luke Diaz\")\n", + " .select(\"Description\")\n", + " .distinct()\n", + ")" + ] } ], "metadata": { + "application/vnd.databricks.v1+notebook": { + "dashboards": [], + "language": "python", + "notebookMetadata": { + "pythonIndentUnit": 2 + }, + "notebookName": "CognitiveServices - Create a Multilingual Search Engine from Forms", + "notebookOrigID": 1242000907990110, + "widgets": {} + }, "description": null, "kernelspec": { "display_name": "Synapse PySpark",