Skip to content

Commit

Permalink
fix: modified the search engine in the demo notebook to bing (microso…
Browse files Browse the repository at this point in the history
…ft#2013)

* modified the search engine in the demo notebook to bing

* reformatted

* Update notebooks/features/cognitive_services/CognitiveServices - LangchainTransformer.ipynb

---------

Co-authored-by: Mark Hamilton <[email protected]>
  • Loading branch information
2 people authored and JessicaXYWang committed Sep 14, 2023
1 parent 7ff4c59 commit 54f0b51
Showing 1 changed file with 56 additions and 24 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -40,12 +40,11 @@
"source": [
"## Step 1: Prerequisites\n",
"\n",
"The key prerequisites for this quickstart include a working Azure OpenAI resource, and an Apache Spark cluster with SynapseML installed. We suggest creating a Synapse workspace, but an Azure Databricks, HDInsight, or Spark on Kubernetes, or even a python environment with the `pyspark` package will work. If you need to use the last component of the chain - An agent with web searching capabilities, you also need a SerpAPIKey.\n",
"The key prerequisites for this quickstart include a working Azure OpenAI resource, and an Apache Spark cluster with SynapseML installed. We suggest creating a Synapse workspace, but an Azure Databricks, HDInsight, or Spark on Kubernetes, or even a python environment with the `pyspark` package will work. \n",
"\n",
"1. An Azure OpenAI resource – request access [here](https://customervoice.microsoft.com/Pages/ResponsePage.aspx?id=v4j5cvGGr0GRqy180BHbR7en2Ais5pxKtso_Pz4b1_xUOFA5Qk1UWDRBMjg0WFhPMkIzTzhKQ1dWNyQlQCN0PWcu) before [creating a resource](https://docs.microsoft.com/en-us/azure/cognitive-services/openai/how-to/create-resource?pivots=web-portal#create-a-resource)\n",
"1. [Create a Synapse workspace](https://docs.microsoft.com/en-us/azure/synapse-analytics/get-started-create-workspace)\n",
"1. [Create a serverless Apache Spark pool](https://docs.microsoft.com/en-us/azure/synapse-analytics/get-started-analyze-spark#create-a-serverless-apache-spark-pool)\n",
"1. Get a SerpAPIKey from [SerpApi](https://serpapi.com/)."
"1. [Create a serverless Apache Spark pool](https://docs.microsoft.com/en-us/azure/synapse-analytics/get-started-analyze-spark#create-a-serverless-apache-spark-pool)"
]
},
{
Expand Down Expand Up @@ -77,7 +76,10 @@
"execution_count": 0,
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {},
"cellMetadata": {
"byteLimit": 2048000,
"rowLimit": 10000
},
"inputWidgets": {},
"nuid": "d0642e69-1669-4b18-94a2-258af0fbcf9f",
"showTitle": false,
Expand Down Expand Up @@ -117,7 +119,7 @@
},
"outputs": [],
"source": [
"%pip install langchain openai pdf2image pdfminer.six pytesseract unstructured"
"%pip install openai langchain pdf2image pdfminer.six pytesseract unstructured"
]
},
{
Expand All @@ -140,9 +142,9 @@
"import os, openai, langchain, uuid\n",
"from langchain.llms import AzureOpenAI, OpenAI\n",
"from langchain.agents import load_tools, initialize_agent, AgentType\n",
"from langchain.chat_models import AzureChatOpenAI\n",
"from langchain.chains import TransformChain, LLMChain, SimpleSequentialChain\n",
"from langchain.document_loaders import OnlinePDFLoader\n",
"from langchain.tools.bing_search.tool import BingSearchRun, BingSearchAPIWrapper\n",
"from langchain.prompts import PromptTemplate\n",
"import pyspark.sql.functions as f\n",
"from synapse.ml.cognitive.langchain import LangchainTransformer\n",
Expand All @@ -169,7 +171,7 @@
"\n",
"`openai_api_key = \"99sj2w82o....\"`\n",
"\n",
"Note: If using SerpAPI you'll need to first [create a key](https://serpapi.com/dashboard)"
"`bing_subscription_key = \"...\"`"
]
},
{
Expand All @@ -189,19 +191,24 @@
},
"outputs": [],
"source": [
"os.environ[\"SERPAPI_API_KEY\"] = \"YOURSERPAPIKEY\"\n",
"openai_api_key = find_secret(\"openai-api-key\")\n",
"openai_api_base = \"https://synapseml-openai.openai.azure.com/\"\n",
"openai_api_version = \"2022-12-01\"\n",
"openai_api_type = \"azure\"\n",
"deployment_name = \"text-davinci-003\"\n",
"bing_search_url = \"https://api.bing.microsoft.com/v7.0/search\"\n",
"bing_subscription_key = find_secret(\"bing-search-key\")\n",
"\n",
"os.environ[\"BING_SUBSCRIPTION_KEY\"] = bing_subscription_key\n",
"os.environ[\"BING_SEARCH_URL\"] = bing_search_url\n",
"os.environ[\"OPENAI_API_TYPE\"] = openai_api_type\n",
"os.environ[\"OPENAI_API_VERSION\"] = openai_api_version\n",
"os.environ[\"OPENAI_API_BASE\"] = openai_api_base\n",
"os.environ[\"OPENAI_API_KEY\"] = openai_api_key\n",
"\n",
"llm = AzureOpenAI(\n",
" deployment_name=\"text-davinci-003\",\n",
" model_name=\"text-davinci-003\",\n",
" deployment_name=deployment_name,\n",
" model_name=deployment_name,\n",
" temperature=0.1,\n",
" verbose=True,\n",
")"
Expand Down Expand Up @@ -390,7 +397,7 @@
"1. **Transform Chain**: Extract Paper Content from arxiv Link **=>**\n",
"1. **LLMChain**: Summarize the Paper, extract paper title and authors **=>**\n",
"1. **Transform Chain**: to generate the prompt **=>**\n",
"1. **Agent with Web Search Tool**: Use Web Search to find the recent papers by the first author (this part is commented out as it needs the SerpAPIKey to run successfully)"
"1. **Agent with Web Search Tool**: Use Web Search to find the recent papers by the first author"
]
},
{
Expand Down Expand Up @@ -434,7 +441,7 @@
" verbose=False,\n",
")\n",
"\n",
"paper_summarizer_template = \"\"\"You are a paper summarizer, given the paper content, it is your job to summarize the paper into a short summary, and extract authors and paper title from the paper content.\n",
"paper_summarizer_template = \"\"\"You are a paper summarizer, given the paper content, it is your job to summarize the paper into a short summary, and extract authors and paper title from the paper content.\n",
"Here is the paper content:\n",
"{paper_content}\n",
"Output:\n",
Expand All @@ -445,19 +452,27 @@
")\n",
"summarize_chain = LLMChain(llm=llm, prompt=prompt, verbose=False)\n",
"\n",
"sequential_chain = SimpleSequentialChain(\n",
" chains=[paper_content_extraction_chain, summarize_chain]\n",
"prompt_generation_chain = TransformChain(\n",
" input_variables=[\"Output\"],\n",
" output_variables=[\"prompt\"],\n",
" transform=prompt_generation,\n",
" verbose=False,\n",
")\n",
"\n",
"\"\"\"\n",
"Uncomment the following when you have a SerpAPIKey to enable the final websearch component of the chain.\n",
"\"\"\"\n",
"# prompt_generation_chain = TransformChain(input_variables=[\"Output\"], output_variables=[\"prompt\"], transform=prompt_generation, verbose=False)\n",
"# tools = load_tools([\"serpapi\"], llm=llm)\n",
"# web_search_agent = initialize_agent(tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=False)\n",
"# sequential_chain = SimpleSequentialChain(chains=[\n",
"# paper_content_extraction_chain, summarize_chain, prompt_generation_chain, web_search_agent\n",
"# ])"
"bing = BingSearchAPIWrapper(k=3)\n",
"tools = [BingSearchRun(api_wrapper=bing)]\n",
"web_search_agent = initialize_agent(\n",
" tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=False\n",
")\n",
"\n",
"sequential_chain = SimpleSequentialChain(\n",
" chains=[\n",
" paper_content_extraction_chain,\n",
" summarize_chain,\n",
" prompt_generation_chain,\n",
" web_search_agent,\n",
" ]\n",
")"
]
},
{
Expand Down Expand Up @@ -531,8 +546,25 @@
"notebookMetadata": {
"pythonIndentUnit": 2
},
"notebookName": "LangchainTransformer",
"notebookName": "CognitiveServices - LangchainTransformer",
"widgets": {}
},
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.3"
}
},
"nbformat": 4,
Expand Down

0 comments on commit 54f0b51

Please sign in to comment.