From 7070116260540a2e2d3df8e76102fb9789901399 Mon Sep 17 00:00:00 2001 From: Shuhei Iitsuka Date: Sat, 14 Dec 2024 01:02:21 +0900 Subject: [PATCH 1/3] fix: Change HEY_LLM's default location from asia-northeast1 to us-central1 (#1529) # Description --------- Co-authored-by: code-review-assist[bot] <182814678+code-review-assist[bot]@users.noreply.github.com> --- vision/use-cases/hey_llm/src/main.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vision/use-cases/hey_llm/src/main.ts b/vision/use-cases/hey_llm/src/main.ts index 4b63eaab3c3..9f83ae341a3 100644 --- a/vision/use-cases/hey_llm/src/main.ts +++ b/vision/use-cases/hey_llm/src/main.ts @@ -19,8 +19,9 @@ import type {GenerateContentResponse} from '@google-cloud/vertexai'; /** * Vertex AI location. Change this const if you want to use another location. + * us-central1 is chosen as default to currently provide the most model availability. See [Vertex AI locations documentation](https://cloud.google.com/vertex-ai/docs/general/locations) for more details. */ -const LOCATION = 'asia-northeast1'; +const LOCATION = 'us-central1'; /** * Default Gemini model to use. From 076464885ddf63d57d91cac830ff83f774d5c877 Mon Sep 17 00:00:00 2001 From: Holt Skinner Date: Fri, 13 Dec 2024 10:06:13 -0600 Subject: [PATCH 2/3] fix: formatting --- ...tro_research_multi_agents_gemini_2_0.ipynb | 5152 +++++++++-------- .../code-execution/intro_code_execution.ipynb | 3336 ++++++----- .../real_time_rag_bank_loans_gemini_2_0.ipynb | 1115 ++-- .../real_time_rag_retail_gemini_2_0.ipynb | 3654 ++++++------ .../tutorial_langgraph_rag_agent.ipynb | 2258 ++++---- ...ning_using_gemini_on_multiple_images.ipynb | 157 +- ...upervised_finetuning_using_gemini_qa.ipynb | 183 +- 7 files changed, 7899 insertions(+), 7956 deletions(-) diff --git a/gemini/agents/research-multi-agents/intro_research_multi_agents_gemini_2_0.ipynb b/gemini/agents/research-multi-agents/intro_research_multi_agents_gemini_2_0.ipynb index 281d745e573..f6201082e09 100644 --- a/gemini/agents/research-multi-agents/intro_research_multi_agents_gemini_2_0.ipynb +++ b/gemini/agents/research-multi-agents/intro_research_multi_agents_gemini_2_0.ipynb @@ -1,2576 +1,2580 @@ { - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "jJZw3h2myqls" - }, - "outputs": [], - "source": [ - "# Copyright 2024 Google LLC\n", - "#\n", - "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", - "# you may not use this file except in compliance with the License.\n", - "# You may obtain a copy of the License at\n", - "#\n", - "# https://www.apache.org/licenses/LICENSE-2.0\n", - "#\n", - "# Unless required by applicable law or agreed to in writing, software\n", - "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", - "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", - "# See the License for the specific language governing permissions and\n", - "# limitations under the License." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "qGrXr6X4yXG5" - }, - "source": [ - "# Building a Research Multi Agent System - a Design Pattern Overview with Gemini 2.0\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - "
\n", - " \n", - " \"Google
Open in Colab\n", - "
\n", - "
\n", - " \n", - " \"Google
Open in Colab Enterprise\n", - "
\n", - "
\n", - " \n", - " \"Vertex
Open in Vertex AI Workbench\n", - "
\n", - "
\n", - " \n", - " \"GitHub
View on GitHub\n", - "
\n", - "
\n", - "\n", - "
\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "KJWqTM-CS0qC" - }, - "source": [ - "Share to:\n", - "\n", - "\n", - " \"LinkedIn\n", - "\n", - "\n", - "\n", - " \"Bluesky\n", - "\n", - "\n", - "\n", - " \"X\n", - "\n", - "\n", - "\n", - " \"Reddit\n", - "\n", - "\n", - "\n", - " \"Facebook\n", - "" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "EnoVKOgny2ZM" - }, - "source": [ - "| | |\n", - "|-|-|\n", - "| Author(s) | [Lavi Nigam](https://github.com/lavinigam-gcp)|" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "CSt0qUR2Sg61" - }, - "source": [ - "
\n", - "\n", - "⚠️ Gemini 2.0 Flash (Model ID: gemini-2.0-flash-exp) and the Google Gen AI SDK are currently experimental and output can vary ⚠️\n", - "
\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "IDDZhYrClJQK" - }, - "source": [ - "## Overview\n", - "\n", - "In today's rapidly evolving technology landscape, businesses frequently need to conduct comprehensive research and analysis that spans multiple data sources, requires complex reasoning, and demands clear actionable insights. Whether it's market research, competitive analysis, urban planning, or scientific research, the challenges remain similar: how to efficiently gather, process, and synthesize information while ensuring accuracy and scalability.\n", - "\n", - "In this notebook, as a developer, you'll discover how to create intelligent agents and multi-agent systems using Vertex AI Gemini 2.0.\n", - "\n", - "\n", - "### Learning Through Implementation\n", - "\n", - "Rather than using existing frameworks, we'll build our multi-agent system from scratch. This approach offers several benefits:\n", - "\n", - "1. **Core Understanding**: Building from the ground up helps you understand the fundamental principles of multi-agent systems\n", - "2. **Design Pattern Mastery**: Learn reusable patterns that work across different domains and technologies\n", - "3. **Custom Control**: Gain the ability to fine-tune every aspect of your system\n", - "4. **Debugging Confidence**: Understanding the internals makes troubleshooting much more straightforward\n", - "\n", - "While there are excellent open-source frameworks available for building multi-agent systems, such as [AutoGen](https://github.com/microsoft/autogen), [CrewAI](https://github.com/crewAIInc/crewAI), [PydanticAI](https://github.com/pydantic/pydantic-ai), and [LangGraph](https://github.com/langchain-ai/langgraph), we believe that a from-scratch approach in this notebook will provide a deeper understanding of the underlying concepts and mechanics.\n", - "\n", - "The open-source frameworks offers many valuable features like conditional routing, annotated global state, checkpointing, and more.\n", - "\n", - "Once you've grasped the fundamentals from this notebook, exploring these frameworks can unlock even more advanced capabilities and streamline your development process.\n", - "\n", - "\n", - "### Key Technical Components\n", - "\n", - "Our implementation showcases essential Vertex AI ***Gemini 2.0*** capabilities:\n", - "\n", - "1. **Function Calling**: Structure agent behaviors and interactions\n", - "2. **Structured Output**: Generate consistent, validatable data\n", - "3. **Async Operations**: Handle parallel agent tasks efficiently\n", - "4. **Google Search Integration**: Ground agent reasoning in real-world data\n", - "\n", - "\n", - "### To get started, let's explore some key questions:\n", - "\n", - "* What exactly is an agent, and how does it differ from a simple LLM call?\n", - "* How can agents use tools to achieve their goals?\n", - "* And what possibilities emerge when multiple agents work together in a multi-agent system?\n", - "\n", - "\n", - "#### **LLM Execution (The Foundation)**\n", - "\n", - "Think of an LLM as a powerful prediction engine. Given some input text (a prompt), it predicts what comes next, generating text, translating languages, writing different kinds of creative content, and answering your questions in an informative way. However, on its own, it simply reacts to your input and provides an output. It doesn't have a sense of purpose or the ability to act independently.\n", - "\n", - "**Example:** An LLM is like a super smart travel guidebook. You ask it \"What are some popular attractions in Paris?\" and it gives you a list. It provides information but doesn't actually do anything.\n", - "\n", - "![title](https://storage.googleapis.com/github-repo/generative-ai/gemini2/use-cases/research_multi_agent_ev/img/simple-llm-flow.png)\n", - "\n", - "#### **Agent (LLM with a Purpose)**\n", - "\n", - "Now, imagine giving that prediction engine some goals and the ability to act on them. This is essentially what an agent is. It's an LLM wrapped with extra code that allows it to:\n", - "\n", - "* **Understand the goal:** \"Book a flight to London.\"\n", - "* **Break it down into steps:** Search for flights, compare prices, choose a date, make a booking.\n", - "* **Use tools to achieve those steps:** Access a flight booking API, a web browser, or even interact with a human.\n", - "\n", - "**Example:** An agent is like a personal travel assistant. You tell it \"Plan a trip to Paris for me next month.\" The agent uses its LLM \"brain\" to understand what that means, then uses tools like flight booking websites, hotel search engines, and even weather apps to create an itinerary.\n", - "\n", - "\n", - "![title](https://storage.googleapis.com/github-repo/generative-ai/gemini2/use-cases/research_multi_agent_ev/img/agent-flow.png)\n", - "\n", - "#### **Multi-Agent (Teamwork Makes the Dream Work)**\n", - "\n", - "Now, imagine several of these specialized agents working together, each with its own skills and responsibilities. That's a multi-agent system. They can communicate, share information, and coordinate their actions to achieve a complex goal.\n", - "\n", - "**Example:** Now imagine a team of specialized travel agents working together. One agent books the flights, another finds the perfect hotel, a third arranges tours and activities. They communicate and coordinate to create an amazing Paris trip.\n", - "\n", - "![title](https://storage.googleapis.com/github-repo/generative-ai/gemini2/use-cases/research_multi_agent_ev/img/multi-agent-flow.png)\n", - "\n", - "\n", - "---\n", - "\n", - "Now that you have learned the fundamentals, moving forward, you'll learn the core design patterns behind agents and multi-agent systems. We'll demonstrate its capabilities through a practical use case - Electric Vehicle (EV) infrastructure expansion analysis - while keeping the core architecture adaptable for any research-intensive application." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "_ItHSSWRBG_D" - }, - "source": [ - "## Objective\n", - "\n", - "This notebook will guide you through building a research-focused multi-agent system. Here's what you'll learn:\n", - "\n", - "* **A design pattern for creating these systems:** We'll introduce a reusable structure for building multi-agent systems geared towards research tasks.\n", - "* **A practical example: EV Research Agent:** See how we applied the design pattern to create an agent specializing in Electronic Vehicle research. This agent can answer complex queries like \"EV Charging Station Expansion in [City Name]\" by planning, researching, and generating a comprehensive report.\n", - "* **Component integration and orchestration:** Understand how individual components within the agent work together seamlessly to produce the final output." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "DKeB9J-EPFeQ" - }, - "source": [ - "## Our Use Case: EV Infrastructure Analysis\n", - "\n", - "To demonstrate the power and flexibility of our Research Multi-Agent system, we'll tackle a real-world challenge: analyzing optimal locations for expanding Electric Vehicle (EV) charging infrastructure in cities across the United States.\n", - "\n", - "### The Challenge\n", - "\n", - "Urban planners and EV infrastructure companies face complex decisions when expanding charging networks:\n", - "- Understanding population density and movement patterns\n", - "- Analyzing existing charging infrastructure\n", - "- Evaluating proximity to major highways and transit routes\n", - "- Considering local demographics and economic factors\n", - "- Assessing grid capacity and infrastructure readiness\n", - "\n", - "### Our Solution\n", - "\n", - "We'll build a research system that:\n", - "1. Accepts queries about specific cities or regions\n", - "2. Gathers data from multiple sources (OpenStreetMap, NREL API)\n", - "3. Analyzes infrastructure patterns and gaps\n", - "4. Generates actionable insights with citations\n", - "5. Visualizes findings for better decision-making\n", - "\n", - "Simply, A team of research agents armed with data and search engines, technical know how, coordinated with a common goal across specialized skillsets and tasks.\n", - "\n", - "\n", - "While we focus on EV infrastructure, the patterns and approaches we develop can be applied to any research-intensive domain requiring similar data gathering, analysis, and insight generation capabilities." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "fAlzMGTdGhO-" - }, - "source": [ - "## Gemini 2.0" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "PkgSZ0wTzFU7" - }, - "source": [ - "## Overview\n", - "\n", - "[Gemini 2.0 Flash](https://cloud.google.com/vertex-ai/generative-ai/docs/gemini-v2) is a new multimodal generative ai model from the Gemini family developed by [Google DeepMind](https://deepmind.google/). It now available as an experimental preview release through the Gemini API in Vertex AI and Vertex AI Studio. The model introduces new features and enhanced core capabilities:\n", - "\n", - "- Multimodal Live API: This new API helps you create real-time vision and audio streaming applications with tool use.\n", - "- Speed and performance: Gemini 2.0 Flash is the fastest model in the industry, with a 3x improvement in time to first token (TTFT) over 1.5 Flash.\n", - "- Quality: The model maintains quality comparable to larger models like Gemini 1.5 Pro and GPT-4o.\n", - "- Improved agentic experiences: Gemini 2.0 delivers improvements to multimodal understanding, coding, complex instruction following, and function calling.\n", - "- New Modalities: Gemini 2.0 introduces native image generation and controllable text-to-speech capabilities, enabling image editing, localized artwork creation, and expressive storytelling.\n", - "- To support the new model, we're also shipping an all new SDK that supports simple migration between the Gemini Developer API and the Gemini API in Vertex AI." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "90JzDyyRzRRU" - }, - "source": [ - "## Getting Started" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "qE48lDlSzf81" - }, - "source": [ - "### Install Google Gen AI SDK for Python" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "db8O7nh0zw_B" - }, - "outputs": [], - "source": [ - "# Downloading Google Gen AI SDK (experimental)\n", - "%pip install google-genai\n", - "\n", - "# Libraries required for saving markdowns as external files.\n", - "! apt install pandoc\n", - "! apt install libreoffice" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "2jCwQQxO0WVx" - }, - "source": [ - "### Restart runtime\n", - "\n", - "To use the newly installed packages in this Jupyter runtime, you must restart the runtime. You can do this by running the cell below, which restarts the current kernel." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "sDXGN26_0Y0R" - }, - "outputs": [], - "source": [ - "import sys\n", - "\n", - "if \"google.colab\" in sys.modules:\n", - " import IPython\n", - "\n", - " app = IPython.Application.instance()\n", - " app.kernel.do_shutdown(True)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "IGcO4hXDzzuH" - }, - "source": [ - "### Authenticate your notebook environment (Colab only)\n", - "\n", - "If you are running this notebook on Google Colab, run the cell below to authenticate your environment." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "rbm_CqxKz1b6" - }, - "outputs": [], - "source": [ - "import sys\n", - "\n", - "if \"google.colab\" in sys.modules:\n", - " from google.colab import auth\n", - "\n", - " auth.authenticate_user()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "ES6LwuBr0GSD" - }, - "source": [ - "### Connect to a generative AI API service\n", - "\n", - "Google Gen AI APIs and models including Gemini are available in the following two API services:\n", - "\n", - "- **[Google AI for Developers](https://ai.google.dev/gemini-api/docs)**: Experiment, prototype, and deploy small projects.\n", - "- **[Vertex AI](https://cloud.google.com/vertex-ai/generative-ai/docs/overview)**: Build enterprise-ready projects on Google Cloud.\n", - "\n", - "The Google Gen AI SDK provides a unified interface to these two API services.\n", - "\n", - "This notebook shows how to use the Google Gen AI SDK with the Gemini API in Vertex AI." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "pMegXbM90JEk" - }, - "source": [ - "### Set Google Cloud project information\n", - "\n", - "To get started using Vertex AI, you must have an existing Google Cloud project and [enable the Vertex AI API](https://console.cloud.google.com/flows/enableapi?apiid=aiplatform.googleapis.com).\n", - "\n", - "Learn more about [setting up a project and a development environment](https://cloud.google.com/vertex-ai/docs/start/cloud-environment)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "4mrov4hC0OZ-" - }, - "outputs": [], - "source": [ - "import os\n", - "\n", - "PROJECT_ID = \"[your-project-id]\" # @param {type: \"string\", placeholder: \"[your-project-id]\", isTemplate: true}\n", - "if not PROJECT_ID or PROJECT_ID == \"[your-project-id]\":\n", - " PROJECT_ID = str(os.environ.get(\"GOOGLE_CLOUD_PROJECT\"))\n", - "\n", - "LOCATION = os.environ.get(\"GOOGLE_CLOUD_REGION\", \"us-central1\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Mp0umgC00TMZ" - }, - "source": [ - "### Import libraries\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "5FzK2TuA0SYe" - }, - "outputs": [], - "source": [ - "from google import genai\n", - "from rich import print as rich_print" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "C2iaXGH21j_U" - }, - "source": [ - "### Create Gen AI Client" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "maA6ZXozxphR" - }, - "outputs": [], - "source": [ - "client = genai.Client(vertexai=True, project=PROJECT_ID, location=LOCATION)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "B70wFwV61uiK" - }, - "source": [ - "### Load the Gemini 2.0 Flash model\n", - "\n", - "To learn more about all [Gemini models on Vertex AI](https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-models)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "9qipKyrW1vG9" - }, - "outputs": [], - "source": [ - "MODEL_ID = \"gemini-2.0-flash-exp\" # @param {type: \"string\"}\n", - "MODEL_ID_Flash = \"gemini-1.5-flash-002\" # For control generation for grounding with google search as a Tool" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "oMdna9zsJKx7" - }, - "source": [ - "To access comprehensive EV infrastructure data, you'll need an API key from the National Renewable Energy Laboratory (NREL). This key allows you to retrieve detailed information about EV charging stations, which is crucial for the `DataGatherAgent` to function correctly.\n", - "\n", - "**Here's how to get your NREL API key:**\n", - "\n", - "1. **Sign up:** Visit the [NREL Developer Network signup page](https://developer.nrel.gov/signup/).\n", - "2. **Email Confirmation:** You'll receive an email with your API key.\n", - "3. **Wait Time:** It might take some time to receive the email, so please be patient.\n", - "4. **Check Spam:** Make sure to check your spam or junk folder if you don't see the email in your inbox.\n", - "\n", - "**Enter your API key in the following code cell:**" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "HGKLVzF4JLJS" - }, - "outputs": [], - "source": [ - "NREL_API_KEY = \"[your-nrel-api-key]\" # @param {type: \"string\", placeholder: \"[your-nrel-api-key]\", isTemplate: true}" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "nscpdicqtHa8" - }, - "source": [ - "### Download utils" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "IyWeX1ZqbVMZ" - }, - "source": [ - "To streamline the process and keep our focus on the design, utility, and output of the multi-agent system, we've placed the core code for the `ev_agent` in an external location. This includes both the `agent_handler` and `api_handler`, which contain the main logic. However, we're now downloading it to our current environment to ensure we can import the necessary functions for our analysis:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "P4cULOECtKeU" - }, - "outputs": [], - "source": [ - "!git clone https://github.com/GoogleCloudPlatform/generative-ai.git \\\n", - " && cp -r generative-ai/gemini/agents/research-multi-agents/ev_agent ./ \\\n", - " && rm -rf generative-ai" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "ul4QvAKDbc6t" - }, - "source": [ - "This makes the code, including all the agents and API handlers, readily available for use. You can always explore the downloaded code and make changes as you see fit. This approach allows us to keep the notebook cleaner and focused on the higher-level aspects of the system while still providing access to the underlying implementation." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "cellView": "form", - "id": "UYMXmaWny0JH" - }, - "outputs": [], - "source": [ - "# @title Saving Report (DOCX/PDF) Helper Functions\n", - "\n", - "import os\n", - "import subprocess\n", - "\n", - "\n", - "def convert_markdown(markdown_text, output_path, filename, file_type):\n", - " \"\"\"\n", - " Converts markdown text to DOCX or PDF using pandoc.\n", - "\n", - " Args:\n", - " markdown_text: The markdown text to convert.\n", - " output_path: The directory where the output file should be saved.\n", - " filename: The name of the output file (without extension).\n", - " file_type: The desired output file type ('docx' or 'pdf').\n", - "\n", - " Raises:\n", - " ValueError: If an invalid file type is specified.\n", - " FileNotFoundError: If pandoc is not found in the system's PATH.\n", - " subprocess.CalledProcessError: If the pandoc command fails.\n", - " OSError: If there is an error during file operations.\n", - " \"\"\"\n", - " os.makedirs(output_path, exist_ok=True)\n", - "\n", - " if file_type not in [\"docx\", \"pdf\"]:\n", - " raise ValueError(\"Invalid file type specified. Must be 'docx' or 'pdf'.\")\n", - "\n", - " docx_filepath = os.path.join(output_path, f\"{filename}.docx\")\n", - "\n", - " try:\n", - " # Check if pandoc is available\n", - " subprocess.run([\"pandoc\", \"--version\"], capture_output=True, check=True)\n", - "\n", - " # Convert Markdown to DOCX\n", - " subprocess.run(\n", - " [\"pandoc\", \"-f\", \"markdown\", \"-t\", \"docx\", \"-o\", docx_filepath],\n", - " input=markdown_text,\n", - " encoding=\"utf-8\",\n", - " check=True,\n", - " )\n", - " # print(f\"DOCX file saved to: {docx_filepath}\")\n", - "\n", - " if file_type == \"pdf\":\n", - " pdf_filepath = os.path.join(output_path, f\"{filename}.pdf\")\n", - " # Convert DOCX to PDF (using libreoffice on Colab)\n", - " subprocess.run(\n", - " [\n", - " \"libreoffice\",\n", - " \"--headless\",\n", - " \"--convert-to\",\n", - " \"pdf\",\n", - " \"--outdir\",\n", - " output_path,\n", - " docx_filepath,\n", - " ],\n", - " check=True,\n", - " )\n", - " print(f\"PDF file saved to: {pdf_filepath}\")\n", - "\n", - " # Delete the temporary DOCX file\n", - " os.remove(docx_filepath)\n", - " print(f\"Temporary DOCX file deleted: {docx_filepath}\")\n", - "\n", - " except FileNotFoundError:\n", - " raise FileNotFoundError(\n", - " \"pandoc not found. Please ensure it is installed and in your system's PATH.\"\n", - " )\n", - " except subprocess.CalledProcessError as e:\n", - " raise subprocess.CalledProcessError(\n", - " e.returncode, e.cmd, output=e.output, stderr=e.stderr\n", - " )\n", - " except OSError as e:\n", - " raise OSError(f\"Error during file operations: {e}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "DUHn-HALvWuB" - }, - "source": [ - "# Gemini-Powered EV Research: A Multi-Agent Approach\n", - "\n", - "This section outlines a powerful multi-agent system designed for in-depth research on Electric Vehicle (EV) charging infrastructure in US cities. Built entirely using Gemini 2.0, this system showcases a streamlined approach to complex research tasks.\n", - "\n", - "**Core Idea:** We've assembled a team of specialized AI agents, each using Gemini 2.0, to automate and enhance the research process. This approach leverages Gemini's strengths in:\n", - "\n", - "* **Function Calling:** Enables agents to trigger specific actions and tools facilitating seamless interaction.\n", - "* **Structured Generations:** Ensures consistent, predictable output from each agent, simplifying inter-agent communication.\n", - "* **Async Model Calling:** Allows agents to work concurrently, significantly speeding up research.\n", - "* **Google Search Grounding:** Keeps the research grounded in real-world data and up-to-date information.\n", - "\n", - "## System Architecture\n", - "\n", - "At the heart of our system lies a clear, modular architecture, visualized below:\n", - "\n", - "![research-multi-agent-desing-pattern](https://storage.googleapis.com/github-repo/generative-ai/gemini2/use-cases/research_multi_agent_ev/img/multi-agent-design-pattern.png)\n", - "**Agent Breakdown:**\n", - "\n", - "The diagram illustrates the core components of our system:\n", - "\n", - "* **User (Pink):** Initiates the research process by submitting a query.\n", - "* **ExecutionAgent (Pink):** The central orchestrator, managing the workflow, handling communication between agents, and ensuring smooth execution. It also handles error recovery, such as retries and alternative execution paths, to maintain system robustness.\n", - "* **Core Research Agents (Green):**\n", - " * **PlanningAgent:** The strategist, converting the user's query into a detailed, step-by-step research plan.\n", - " * **QueryAnalysisAgent:** The interpreter, determining the specific data required and the desired output format (e.g., raw data, report, visualization).\n", - " * **DataGatherAgent:** The collector, responsible for fetching data from external APIs. It leverages Gemini's search grounding to ensure data accuracy and relevance. This agent is designed to be adaptable to various data sources.\n", - " * **ReportAgent:** The writer, transforming raw data into a comprehensive, well-structured report. It can incorporate search-based grounding for validation and supports multiple output formats.\n", - " * **VisualizeAgent:** The illustrator, creating clear and insightful visualizations (charts, graphs) to represent the findings. It adapts its output based on data types and user requirements.\n", - "* **Research Output (Pink):** The final, comprehensive research product delivered to the user.\n", - "* **External Systems (Blue):**\n", - " * **External APIs:** Data sources for the `DataGatherAgent`.\n", - " * **Visualization Tools:** Libraries used by the `VisualizeAgent`.\n", - " * **Document Tools:** Resources utilized by the `ReportAgent` for formatting and presentation.\n", - "\n", - "\n", - "**Benefits of the Gemini-Powered Approach:**\n", - "\n", - "* **Simplified Development:** Build the entire system using a single, powerful API – Gemini.\n", - "* **Native Functionality:** Leverage Gemini's built-in features for seamless agent interaction and consistent output.\n", - "* **Enhanced Performance:** Async model calling enables parallel processing, accelerating the research process.\n", - "* **Real-World Relevance:** Google Search grounding ensures your research is always based on the latest information.\n", - "* **Scalability and Flexibility:**\n", - " * Easily add new agents for specialized tasks (e.g., sentiment analysis of EV adoption).\n", - " * Modify existing agents to adapt to new data sources or research requirements.\n", - " * The modular design allows independent scaling of different system components." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "W9mEtr_yw0vZ" - }, - "source": [ - "## Exploring the EV Agent in Action\n", - "\n", - "Now that you've seen the architecture, let's dive into the practical side and see how our EV Research Agent works. We'll explore two ways to interact with it:\n", - "\n", - "**1. The \"Black Box\" Experience: Witnessing the Magic**\n", - "\n", - "Imagine the entire multi-agent system as a single, powerful unit – the `EVAgent`. In this section, we'll treat it as a \"black box.\" You'll simply send it a research query, and watch as it works behind the scenes, delivering a comprehensive report in about 1-2 minutes.\n", - "\n", - "We'll try two exciting examples:\n", - "\n", - "* **Example 1: Basic Report Generation:** See how the agent generates a structured report with predefined sections based on your query.\n", - "* **Example 2: Google Search Enhanced Report:** Observe how the agent leverages Google Search to enrich the report with citations, deeper insights, and up-to-the-minute information.\n", - "\n", - "**2. Deconstructing the Process: A Step-by-Step Journey**\n", - "\n", - "Ready to peek under the hood? In this section, we'll dissect the agent's inner workings. You'll follow along as your query is processed through each stage of the research pipeline:\n", - "\n", - "* **Planning:** Witness how the `PlanningAgent` crafts the initial research strategy. *We'll briefly touch upon the code behind this, highlighting the input it receives and the plan it outputs, along with the data models that structure this communication.*\n", - "* **Reasoning:** See how the `QueryAnalysisAgent` determines the necessary data and output format. *Again, we'll peek at the underlying code to understand its input, output, and the data models involved.*\n", - "* **Tool Selection:** Observe how the `DataGatherAgent` chooses the right APIs and leverages Google Search. *We'll examine the code's role in this selection process, focusing on the data models that guide its choices.*\n", - "* **Coordination:** Understand how the `ExecutionAgent` orchestrates the entire process. *We will shed some light on the code that enables this coordination, emphasizing the data models as the communication backbone between agents.*\n", - "* **Decision-Making:** Learn how the agents make choices at each step, leading to the final output.\n", - "\n", - "You'll see firsthand how these individual steps, powered by their underlying logic and data models, contribute to the final, polished report and visualizations.\n", - "\n", - "**A Note on Code Structure:**\n", - "\n", - "To keep this exploration clear and focused, the detailed code for each agent is neatly organized in separate files. **We are choosing not to put code directly in this notebook as it will make it unnecessarily complex.** So when we go through step by step, think of each agent as a black box. We will, however briefly talk about the design pattern it follows, what the data model it uses behind the scene to produce an output. Once you understand that, you can easily refer to the code from scratch or use any open-source library to implement a similar agent. Think of them as behind-the-scenes appendices you can explore later to dive deep into the implementation details of each agent.\n", - "\n", - "**The primary goal here is to showcase the power of agent collaboration with Gemini 2.0.** You'll witness how our team of Gemini-powered agents works together seamlessly to fulfill your research requests, demonstrating the elegance and efficiency of this multi-agent approach.\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "4Xh-cZwQFJpz" - }, - "source": [ - "## EV Agent - The \"Black Box\" Experience:" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "on2mGosD1WBp" - }, - "source": [ - "The `ExecutionAgent` is the heart of our EV infrastructure analysis system. Think of it as the conductor of an orchestra, coordinating a team of specialized agents to perform a comprehensive analysis based on your query.\n", - "\n", - "**Before you start:**\n", - "\n", - "* **What it does:** The `ExecutionAgent` takes your query about EV infrastructure, develops a plan, gathers relevant data, generates reports, and creates insightful visualizations.\n", - "* **How it works:** It delegates tasks to other agents (like a planning agent, data gathering agent, etc.) and manages the overall workflow.\n", - "* **What you get:** You'll receive a structured output containing the analysis plan, gathered data, a detailed report (if requested), and visualizations (if applicable).\n", - "* **Customization:** You can control the level of detail (debug mode), whether to see intermediate outputs (stage\\_output), and the type of output you desire (e.g., raw data, report, text).\n", - "\n", - "Essentially, the `ExecutionAgent` simplifies the complex process of EV infrastructure analysis, providing you with a powerful tool to gain valuable insights." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "K5O-SIBr0g7p" - }, - "outputs": [], - "source": [ - "# Importing ExecutionAgent from our agent_handler\n", - "\n", - "\n", - "from ev_agent.agent_handler.agent_01_ExecutionAgent import ExecutionAgent" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "EeXrJaqTIBJv" - }, - "outputs": [], - "source": [ - "# Create the agent\n", - "\n", - "agent = ExecutionAgent.create(\n", - " client=client,\n", - " model_name=MODEL_ID_Flash, # Gemini 2.0 Flash\n", - " api_key=NREL_API_KEY,\n", - " debug=False,\n", - " stage_output=False,\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "YgUWghr5IG5T" - }, - "source": [ - "### Basic Report Generation" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "pRvAIJmD2p76" - }, - "source": [ - "In this case, we're treating the `ExecutionAgent` as a **\"black box\"**. We provide the input query (\"I want to understand the EV charging situation in Austin.\") and it will eventually deliver the final report without revealing the inner workings." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "lgIT_0Wc2uPK" - }, - "source": [ - "Since we set `debug=False` and `stage_output=False` earlier, the agent is giving us some playful warnings. It's essentially saying, \"Hey, you've turned off all the visibility into the process, so you'll only see the final result! But, just so you know, there are four agents working hard behind the scenes\"." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "QI6Lfz6QIek4" - }, - "outputs": [], - "source": [ - "# Execute the analysis\n", - "results = await agent.execute(\n", - " \"I want to understand the EV charging situation in Austin.\"\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "a4I0IohK2wPn" - }, - "source": [ - "You'll notice a humorous warning: `*Deciphering your cryptic commands! It's like translating ancient hieroglyphs, but with more emojis.*` This is a subtle hint that the **QueryAnalysisAgent** is currently at work, interpreting your input query. If you ever want to peek behind the curtain, simply set `debug=True` or `stage_output=True` when creating the agent. But for now, we're embracing the black box experience and eagerly awaiting the final, comprehensive report." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "fMWrJ7AyD5FW" - }, - "source": [ - "---\n", - "If you want to save the generated report for later use or sharing, you can easily convert it to PDF or DOCX format. Here's how:\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "Q3n4cZJj5IjH" - }, - "outputs": [], - "source": [ - "# # You can save the report as PDF or DOCX\n", - "\n", - "markdown_text = (\n", - " results[\"report\"][\"full_text\"] + \"\\n\\n\\n\" + results[\"report\"][\"citations\"]\n", - ")\n", - "\n", - "convert_markdown(\n", - " markdown_text,\n", - " output_path=\"/content/generated_report\",\n", - " filename=\"austin_normal\",\n", - " file_type=\"pdf\", # or \"docx\"\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "ZXk8CAHE5pRE" - }, - "source": [ - "This will generate a nicely formatted report file in your chosen location, ready to be viewed or shared. You can see an example of a pre-generated report here: [Austin Report with Sections](https://storage.googleapis.com/github-repo/generative-ai/gemini2/use-cases/research_multi_agent_ev/sample_reports/austin_normal.pdf)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "7vb7X_RUFIPi" - }, - "source": [ - "The `results` object is a dictionary containing all the data generated from the analysis, including the `plan`, `query_analysis`, `data`, and the final `report` (with `citations`, `full_text`, and `sections`)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "UFH-oLQDKAxP" - }, - "outputs": [], - "source": [ - "rich_print(\n", - " \"The result object contains all these internal data points with the reports: \",\n", - " list(results.keys()),\n", - ")\n", - "rich_print(\n", - " \"The Report contains the citations, full text of the report and individual sections: \",\n", - " list(results[\"report\"].keys()),\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "R8JZGlBcGBKw" - }, - "source": [ - "We've saved the full report above, but for now, let's just look at one section to see how they're structured. This demonstrates the organized way we store information within the report.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "Osqut3cyLFkx" - }, - "outputs": [], - "source": [ - "for section_name, section_text in results[\"report\"][\"sections\"].items():\n", - " if section_name == \"Infrastructure Overview\":\n", - " print(section_name)\n", - " rich_print(section_text)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "JrGgCle_GsSr" - }, - "source": [ - "Let's focus on the data structure of each report section, which is crucial for developers to understand, especially in the context of our multi-agent system.\n", - "\n", - "As shown in the output, each section, like \"Infrastructure Overview,\" is represented as a `Section` object. This object neatly encapsulates:\n", - "\n", - "* **`title`:** The title of the section (e.g., \"Infrastructure Overview\").\n", - "* **`content`:** The main text of the section, generated by Gemini, providing a detailed analysis. It's important to note that this content is dynamically created based on the data gathered by the `DataGatherAgent` and the insights generated by the language model.\n", - "* **`citations`:** A dictionary containing `CitationData` objects. Each citation provides a `number`, `value`, `data_path`, `raw_value`, and `context`, meticulously linking claims in the content to specific data points retrieved by our `DataGatherAgent` via API calls.\n", - "* **`key_findings`:** A list of key insights extracted from the section's content.\n", - "* **`enhanced_content`:** An optional field for additional data or analysis.\n", - "\n", - "\n", - "In the normal \"Infrastructure Overview\" section, the numbers and facts presented are not manually entered; they are dynamically derived from our structured data model. This model is populated with real-world data fetched from various APIs by our dedicated `DataGatherAgent`. Let's see how this works with an example:\n", - "\n", - "**From the \"Infrastructure Overview\" section:**\n", - "\n", - "> \"Austin's total area encompasses 1679.20 sq km [1], with a significant portion dedicated to built areas (644.59 sq km) [1].\"\n", - "\n", - "The numbers \"1679.20\" and \"644.59\" are linked to **Citation 1**:\n", - "\n", - "```\n", - "1: CitationData(\n", - " number=1,\n", - " value='1679.20 sq km total area, 644.59 sq km built area, 42224 service roads, 476 EV charging\n", - "stations',\n", - " data_path='summary.area_metrics.total_area, summary.area_metrics.built_area,\n", - "summary.roads.service_roads, summary.parking.ev_charging',\n", - " raw_value=\"{'total_area': {'value': '1679.20', 'path': 'summary.area_metrics.total_area_sqkm', 'unit':\n", - "'sq km'}, 'built_area': {'value': '644.59', 'path': 'summary.area_metrics.built_area_sqkm', 'unit': 'sq km'},\n", - "'service_roads': {'value': '42224', 'path': 'summary.roads.service_roads', 'unit': 'roads'}, 'ev_charging':\n", - "{'value': '476', 'path': 'summary.parking.ev_charging', 'unit': 'stations'}}\",\n", - " context='Overall Austin metrics and existing EV charging station count'\n", - "),\n", - "```\n", - "\n", - "**Here's the breakdown:**\n", - "\n", - "1. **Data Source:** The `DataGatherAgent` makes API calls to sources like OpenStreetMap to gather data about Austin.\n", - "2. **Structured Data Model:** This fetched data is stored in a structured format. For example, `summary.area_metrics.total_area` is a specific field in our data model that holds Austin's total area.\n", - "3. **Citation Tracing:** Citation 1 clearly links the numbers in the text to their source in the data model. The `data_path` field shows where to find the data (e.g., `summary.area_metrics.total_area`), and the `raw_value` field reveals the exact value fetched from the API (\"1679.20\").\n", - "4. **Dynamic Content Generation:** When the report is generated, the system automatically pulls the relevant data from the model, based on the `data_path` specified in the citation, and inserts it into the text.\n", - "\n", - "**Why is this important?**\n", - "\n", - "* **Accuracy:** Our report is based on real data from trusted APIs, not on manual input, minimizing errors.\n", - "* **Traceability:** We can always trace the data back to its source, ensuring transparency and verifiability.\n", - "* **Automation:** The `DataGatherAgent` and our structured data model automate the data retrieval and integration process, making it efficient.\n", - "* **Consistency:** This structured approach ensures consistency across the report, as all agents use the same data model.\n", - "\n", - "In essence, the normal section demonstrates the power of our data-driven approach. The `DataGatherAgent`, our structured data model, and the `CitationData` system work together seamlessly to create a report grounded in accurate, traceable, and automatically updated information. This highlights the core strength of our multi-agent system: its ability to leverage structured data to produce reliable and insightful analysis.\n", - "\n", - "\n", - "\n", - "**Why is this data structure useful for developers and a multi-agent system?**\n", - "\n", - "This structured format promotes modularity, allowing developers to reuse sections and enabling different agents to collaborate seamlessly by contributing to specific parts of the report. The clear link between generated content and underlying data via CitationData ensures data integrity and transparency. Furthermore, the design is extensible, accommodating future growth and new types of analysis without disrupting the core structure, making it ideal for a multi-agent system." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "w7CchYhMItoe" - }, - "source": [ - "While we've focused on the report, you can also explore other parts of the `results` object. This provides a way to delve deeper into the agent's inner workings, but we'll break down each agent's role in more detail in the next section." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "926d0bb80f59" - }, - "outputs": [], - "source": [ - "# You can print the whole text of the report:\n", - "rich_print(results[\"report\"][\"full_text\"])\n", - "\n", - "# You can print the whole citations of the report:\n", - "rich_print(results[\"report\"][\"citations\"])\n", - "\n", - "# You can also check the data it has used to generate the report\n", - "rich_print(results[\"data\"])\n", - "\n", - "# If you want to see the whole plan of the agent that it executed\n", - "rich_print(results[\"plan\"])\n", - "\n", - "# If you want to see the query analysis of the agent\n", - "rich_print(results[\"query_analysis\"])" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "kiumiZy46oob" - }, - "source": [ - "---\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "8JIQYmPkIL1q" - }, - "source": [ - "### Google Search Enhanced Report" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "YKw5q6woJ8pt" - }, - "source": [ - "Now, let's kick it up a notch! We're going to run the analysis again with `results_grounded_plot = await agent.execute(\"\"\"I want to understand the EV charging situation in Austin. I need a report and enhance the sections of report with google. Also add some plots\"\"\")`. This time, we've added two new twists to our request: grounding the report sections with Google Search results and adding data plots.\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "t-3L1Nz-1nPy" - }, - "source": [ - "**Note on grounding with google search as a Tool with Gemini 2.0:**\n", - "\n", - "Currently, grounding with google search as a Tool on Gemini 2.0 does not support controlled generation. While you can still perform grounding with search, the output format and structure cannot be explicitly controlled at this time. Controlled generation is important for grounding as it allows us to specify the desired format and structure of the output, ensuring that the information retrieved from web search is integrated into the report in a consistent and organized manner. In the meantime, we are utilizing the Gemini 1.5 Flash model to perform grounding with controlled generation capabilities. You can explore examples of grounding with google search as a Tool Gemini 2.0 (without controlled generation) [here](link)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "rboGC6pP1EBW" - }, - "outputs": [], - "source": [ - "# Create the agent\n", - "\n", - "agent = ExecutionAgent.create(\n", - " client=client,\n", - " model_name=MODEL_ID_Flash, # Gemini 1.5 Flash\n", - " api_key=NREL_API_KEY,\n", - " debug=False,\n", - " stage_output=False,\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "bCZ23K5SIMMj" - }, - "outputs": [], - "source": [ - "# Execute the analysis\n", - "results_grounded_plot = await agent.execute(\n", - " \"\"\"I want to understand the EV charging situation in Austin. I need a report and enhance the sections of report with google. Also add some plots\"\"\"\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Jzz2vbC1J_Q8" - }, - "source": [ - "Just like before, you'll see the familiar playful warnings since we're still running in a \"black box\" mode. However, now you'll also notice `DEBUG` messages indicating that sections are being enhanced with new citations, for example: `DEBUG: Enhanced Executive Summary with 17 new citations`. This is where the magic happens! The agent is now smartly integrating information from Google Search to bolster the report.\n", - "\n", - "What can you expect? Not only will the report be more comprehensive and grounded in a wider range of sources, but you'll also get to see insightful visualizations of the data. This is a significant step up from the previous run, showcasing the agent's ability to dynamically adapt to our requests and provide a richer, more visually engaging analysis. Get ready to be impressed by the power of combining AI, data analysis, and web search in a single, seamless process!" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "90xiSV6G6Jts" - }, - "outputs": [], - "source": [ - "# Just like before, you can save this enhanced report as a PDF or DOCX using:\n", - "\n", - "convert_markdown(\n", - " markdown_text=results_grounded_plot[\"report\"][\"combined_report\"],\n", - " output_path=\"/content/generated_report\",\n", - " filename=\"austin_grounded\",\n", - " file_type=\"pdf\", # or \"docx\"\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "CVB0HBPP7Rgu" - }, - "source": [ - "This will generate a file with the grounded sections. If you're eager to see the complete report right away, you can check out the pre-generated version here: [Austin Report - Sections Grounded with Search](https://storage.googleapis.com/github-repo/generative-ai/gemini2/use-cases/research_multi_agent_ev/sample_reports/austin_grounded.pdf)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "H1yg0GWZN17j" - }, - "source": [ - "You've seen the full, enhanced report – now let's take a closer look at how a single grounded section compares to the normal section we saw earlier. We'll examine the \"Infrastructure Overview\" section again:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "4nCGfx-NMdel" - }, - "outputs": [], - "source": [ - "for section_name, section_text in results_grounded_plot[\"report\"][\"sections\"].items():\n", - " if section_name == \"Infrastructure Overview\":\n", - " print(section_name)\n", - " rich_print(section_text)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "1jLTWQe0N3ZY" - }, - "source": [ - "**Here's the \"aha\" moment:** Notice how the `content` of this section is now significantly richer and more detailed. It's not just stating facts from our initial data; it's weaving in insights and information gathered from the web through Google Search. This demonstrates the power of grounding our analysis in a broader context." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "uVlnk6OxQyn1" - }, - "source": [ - "Okay, let's break down how Google Search enhances the report by focusing on a specific example: **Citation 8**.\n", - "\n", - "In the grounded \"Infrastructure Overview\" section, we have:\n", - "\n", - "```\n", - " 8: CitationData(\n", - " number=8,\n", - " value=\"Report on global EV infrastructure trends and best practices. | Context: Informs strategic\n", - "recommendations for improving Austin's EV infrastructure. | URL: BloombergNEF\",\n", - " data_path='BloombergNEF',\n", - " raw_value='Report on global EV infrastructure trends and best practices.',\n", - " context=\"Informs strategic recommendations for improving Austin's EV infrastructure.\"\n", - " )\n", - "```\n", - "\n", - "This citation points to a report from **BloombergNEF** on global EV infrastructure trends. Now, let's see how this reference, found through Google Search, contributes to the enhanced content:\n", - "\n", - "**Original Content (Before Search):**\n", - "\n", - "> \"The existing EV charging infrastructure, while growing, needs significant expansion to meet the rising demand for EVs. Currently, there are 78 total EV charging stations [2] across the city. This number is significantly lower than other major cities with similar populations.\"\n", - "\n", - "**Enhanced Content (After Search):**\n", - "\n", - "> \"The existing EV charging infrastructure, while growing, needs significant expansion to meet the rising demand for EVs. Currently, there are 78 total EV charging stations [2] across the city. This number is significantly lower than other major cities with similar populations. **A recent study by BloombergNEF [3] highlights the need for a much higher density of charging stations to support widespread EV adoption.**\"\n", - "\n", - "**Here's the impact:**\n", - "\n", - "1. **External Validation:** The original content stated that Austin's charging station count is low compared to similar cities. The enhanced content, using the BloombergNEF report found via Google Search, adds external validation to this claim. It's no longer just an observation based on our data; it's now supported by a reputable source on global EV trends.\n", - "2. **Strategic Depth:** The BloombergNEF citation adds a layer of strategic depth. It's not just about the current number of stations; it connects to the broader concept of \"charging station density\" needed for \"widespread EV adoption\" – a key insight for planning Austin's EV future.\n", - "3. **Credibility Boost:** Referencing a well-known organization like BloombergNEF significantly enhances the credibility of the report. It demonstrates that our analysis is informed by industry experts and best practices.\n", - "\n", - "**In essence, Google Search, through this specific citation, helped us transform a simple observation into a well-supported, strategically relevant insight.** It demonstrates how our system leverages web knowledge to enhance the report's quality, moving beyond the limitations of our initial data and providing a more nuanced and impactful analysis. This dynamic integration of external information is a key strength of our multi-agent approach." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "CZLXKarZVcQC" - }, - "source": [ - "---\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "hUrIIjc6To8S" - }, - "source": [ - "Now, let's visualize the raw data that underpins our analysis. The following code will generate plots directly from the data fetched by our `DataGatherAgent` from external APIs.\n", - "\n", - "You can also check the data it has used to generate the plots\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "7cd5d15ec423" - }, - "outputs": [], - "source": [ - "rich_print(results_grounded_plot[\"data\"])" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "CuELS1YtVNVA" - }, - "source": [ - "Let's explore the visualizations generated from the raw API data, which offer a deeper understanding of Austin's EV infrastructure and its urban context. The code uses the `create_comprehensive_city_analysis` function to produce a set of Plotly figures, each shedding light on different aspects of the city:\n", - "\n", - "**1. EV Infrastructure Overview Dashboard:**\n", - "\n", - "* **Charging Station Types:** This bar chart breaks down the number of DC Fast, Level 2, and Level 1 charging stations. For Austin, it highlights the dominance of Level 2 chargers and the relative scarcity of DC Fast chargers. This is crucial for understanding the current charging landscape and identifying potential gaps, especially for users requiring faster charging options.\n", - "* **Connector Distribution:** This pie chart reveals the types of connectors available (e.g., CCS, CHAdeMO, Tesla). By examining this chart for Austin, you can assess the compatibility of the existing infrastructure with various EV models.\n", - "* **Network Distribution:** This bar chart displays the number of charging stations associated with different networks (e.g., ChargePoint, Tesla). For Austin, it might reveal a reliance on a particular network, which could influence decisions about network diversification and partnerships.\n", - "* **Access & Payment Methods:** This bar chart shows the percentage of stations offering various access and payment methods (e.g., credit card, mobile pay, 24/7 access). In Austin's case, it can indicate the ease of use and accessibility of the charging infrastructure for different users.\n", - "\n", - "**2. Transportation Infrastructure Analysis:**\n", - "\n", - "* **Public Transport Facilities:** This section visualizes the number of bus stops, train stations, bus stations, and bike rental locations. For Austin, this data helps assess the integration of EV charging with existing public transportation, which is vital for planning intermodal hubs.\n", - "* **Road Network Distribution:** This shows the distribution of motorways, primary, secondary, and residential roads. Understanding Austin's road network density and types can inform decisions about optimal charging station placement along major thoroughfares.\n", - "* **Parking Facilities:** This section charts the number of surface parking lots, parking structures, street parking spaces, and designated EV charging spots. For Austin, it helps evaluate the availability of parking spaces that could potentially be equipped with EV charging.\n", - "* **EV vs. Traditional Infrastructure:** This compares the number of EV charging stations, fuel stations, car dealerships, and car repair shops. In Austin's context, it provides insights into the current balance between EV and traditional vehicle infrastructure, indicating the progress of EV adoption.\n", - "\n", - "**3. Urban Amenities and Services:**\n", - "\n", - "* **Retail and Shopping:** This visualizes the distribution of shopping centers, supermarkets, department stores, and convenience stores. For Austin, it helps identify potential locations for charging stations near high-traffic retail areas.\n", - "* **Food and Entertainment:** This section charts restaurants, cafes, bars, and fast-food outlets. Understanding the density of these amenities in Austin can guide the placement of charging stations near popular destinations.\n", - "* **Emergency Services:** This displays the number of police stations, fire stations, hospitals, and clinics. For Austin, this information can be relevant for ensuring the resilience of the EV infrastructure and planning for emergency response related to EVs.\n", - "* **Public Amenities:** This visualizes the number of post offices, banks, ATMs, and public toilets. In Austin's context, it helps assess the availability of essential services near potential charging station locations.\n", - "\n", - "**4. Area Analysis:**\n", - "\n", - "* **Area Distribution:** This pie chart shows the breakdown of Austin's total area into water, green, built, and other areas. It provides a quick overview of the city's land use, which can be a factor in determining suitable locations for charging infrastructure.\n", - "\n", - "**Ideally, these charts would be integrated into the report itself, providing a visual complement to the textual analysis.** However, even as standalone visualizations, they offer valuable insights for decision-making related to EV charging station expansion. For example, by examining the distribution of charging types, connector types, and network providers, along with the city's transportation infrastructure and urban amenities, stakeholders can identify strategic locations for new charging stations, optimize the mix of charging options, and ensure that the expansion aligns with the city's overall development and EV adoption trends. By correlating the density of public transportation, road networks, and parking facilities with the location of existing EV charging stations, planners can pinpoint areas where additional infrastructure is most needed. They can also consider factors such as proximity to retail centers, food and entertainment venues, and public amenities to enhance the user experience and maximize the utilization of charging stations." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "7Q_MfimBMdbS" - }, - "outputs": [], - "source": [ - "print(\"\\n=== Single City Analysis ===\")\n", - "for name, fig in results_grounded_plot[\"visualizations\"][0].items():\n", - " print(f\"\\nDisplaying: {name.replace('_', ' ').title()}\")\n", - " fig.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "YE6bl8haTPL-" - }, - "source": [ - "The `results` object is a dictionary containing all the data generated from the analysis, includes extra variables to add visualizations and `combined_report`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "K_j2N-_5Kv_D" - }, - "outputs": [], - "source": [ - "rich_print(\n", - " \"The result object contains all these internal data points with the reports: \",\n", - " list(results_grounded_plot.keys()),\n", - ")\n", - "rich_print(\n", - " \"The Report contains the combined reports, citations, full text of the report and individual sections: \",\n", - " list(results_grounded_plot[\"report\"].keys()),\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Bz-FBnhJTi-5" - }, - "source": [ - "While we've focused on the report, you can also explore other parts of the `results` object. This provides a way to delve deeper into the agent's inner workings, but we'll break down each agent's role in more detail in the next section." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "1192d611f92b" - }, - "outputs": [], - "source": [ - "# You can print the whole report:\n", - "rich_print(results_grounded_plot[\"report\"][\"combined_report\"])\n", - "\n", - "# You can print the whole text of the report:\n", - "rich_print(results_grounded_plot[\"report\"][\"full_text\"])\n", - "\n", - "# You can print the whole citations of the report:\n", - "rich_print(results_grounded_plot[\"report\"][\"citations\"])\n", - "\n", - "# You can also check the data it has used to generate the report\n", - "rich_print(results_grounded_plot[\"data\"])\n", - "\n", - "# If you want to see the whole plan of the agent that it executed\n", - "rich_print(results_grounded_plot[\"plan\"])\n", - "\n", - "# If you want to see the query analysis of the agent\n", - "rich_print(results_grounded_plot[\"query_analysis\"])" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "GcW6BJTq8WuT" - }, - "source": [ - "---\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "jGdQeo_CRHh_" - }, - "source": [ - "## Deconstructing the Process: A Step-by-Step Journey of Agents" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "ruk0__600E3e" - }, - "source": [ - "Before we delve into the inner workings of each agent, let's take a look at the overall flow of our multi-agent system. This sequence diagram provides a visual representation of how the agents interact and collaborate to process your query and generate the final output:\n", - "\n", - "![research-multi-agent-desing-pattern](https://storage.googleapis.com/github-repo/generative-ai/gemini2/use-cases/research_multi_agent_ev/img/ev_agent_simple.png)\n", - "\n", - "\n", - "This sequence diagram serves as a visual roadmap for understanding the flow of our multi-agent system, and you can refer back to it as we explore each agent's inner workings. It illustrates how agents like the `ExecutionAgent`, `PlanningAgent`, `QueryAnalysisAgent`, `DataGatherAgent`, `ReportAgent`, and `VisualizeAgent` interact and collaborate to process your query, highlighting their roles, the flow of information, and key decision points. This diagram is crucial for grasping the big picture as we delve into the specifics of each agent, starting with the `PlanningAgent`, which initiates the analysis process based on your query." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "hIQNRrF9KWqL" - }, - "source": [ - "### Agent: PlanningAgent" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "BHYuwLIgDHyw" - }, - "source": [ - "### Agent: PlanningAgent\n", - "\n", - "The `PlanningAgent` is the first active agent in our sequence, responsible for taking your initial query and crafting a strategic execution plan. As seen in the sequence diagram, the `ExecutionAgent` passes the user's query to the `PlanningAgent`, which then returns a structured plan. Let's break down its role:\n", - "\n", - "**Input:**\n", - "\n", - "* **Query:** The user's raw query about EV infrastructure (e.g., \"Analyze EV charging stations in Austin\").\n", - "* **Client:** An instance of the generative AI model client (e.g., `gemini`).\n", - "* **Model Name:** The specific model to be used (e.g., \"gemini-pro\").\n", - "* **Debug:** A boolean flag to enable/disable debug mode.\n", - "* **API Key:** The API key for external services like NREL.\n", - "\n", - "**Output:**\n", - "\n", - "* **ExecutionPlan:** A structured plan containing:\n", - " * **Query:** The original user query.\n", - " * **Timestamp:** When the plan was created.\n", - " * **Validated Query:** Result of query validation, including validity, cities mentioned, missing elements, and suggestions for improvement.\n", - " * **Enable Search:** A boolean flag indicating if enhanced search/grounding is required.\n", - " * **Steps:** A list of `PlanStep` objects, each defining a step in the execution process with details like agent name, description, input/output formats, and status.\n", - " * **Debug:** A boolean flag indicating debug status.\n", - "\n", - "This section will explore five key aspects of the `PlanningAgent`: its setup, the creation of the `ExecutionPlan`, query validation and suggestions, handling of invalid queries, and a glimpse into its internal code structure." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "qIj_pscoDmQU" - }, - "source": [ - "Agent Code:\n", - "```\n", - "`/content/ev_agent/agent_handler/agent_02_PlanningAgent.py`\n", - "```\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "qLxSAgN3AyyW" - }, - "source": [ - "#### Setting up and Calling the agent\n", - "\n", - "First, we need to set up and call the `PlanningAgent`. Here's how we do it:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "6ZzI1mSbJ3bt" - }, - "outputs": [], - "source": [ - "from ev_agent.agent_handler.agent_02_PlanningAgent import *" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "pFrQSpP5E3Kv" - }, - "source": [ - "We start by importing the necessary `PlanningAgent` class. Then, we create an instance of the agent, providing the user's query, the client object, the model name, and setting `debug` to `False` for now. Finally, we call the `create_plan()` method to generate the execution plan. If `debug` is set to `False`, you might see a humorous warning about the complexity of plan creation, which is just a playful way to indicate that the agent is working behind the scenes." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "qONvCmwqJ3Yq" - }, - "outputs": [], - "source": [ - "agent = PlanningAgent(\n", - " query=\"I want to understand the EV charging situation in austin and proper vetted information and some plot\",\n", - " client=client,\n", - " model_name=MODEL_ID_Flash,\n", - " debug=False,\n", - ")\n", - "plan = agent.create_plan()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "GQjRth99ATj0" - }, - "source": [ - "#### ExecutionPlan" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "7BOaeF9aGULR" - }, - "source": [ - "Now, let's examine the `ExecutionPlan` generated by the `PlanningAgent`:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "VGN-ZitTJ3Vz" - }, - "outputs": [], - "source": [ - "rich_print(plan)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "rLe_ptdcGaf1" - }, - "source": [ - "The core of this plan lies in the `steps` list, which contains a sequence of `PlanStep` objects. Each `PlanStep` is defined by a structured data model, specifying:\n", - "\n", - "* **`step_id`:** A unique identifier for the step.\n", - "* **`agent_name`:** The name of the agent responsible for this step (e.g., `QueryAnalysisAgent`, `DataGatherAgent`).\n", - "* **`description`:** A brief description of the step's purpose.\n", - "* **`input_requirements`:** The data required for this step (e.g., the output of a previous step).\n", - "* **`output_format`:** The format of the data produced by this step (e.g., a specific data model like `QueryEntity` or `DataGatherAgentOutput`).\n", - "* **`status`:** The current status of the step (e.g., `PENDING`, `COMPLETED`).\n", - "* **`error`:** Any error encountered during the step (initially `None`).\n", - "* **`skip_conditions`:** Conditions under which this step should be skipped (currently `None` for all steps).\n", - "\n", - "**Leveraging Gemini's Function Calling for Planning:**\n", - "\n", - "The `PlanningAgent` intelligently determines the need for steps like visualization and enhanced search (grounding) by utilizing Gemini's function calling capabilities. It analyzes the user's query and calls specific functions (e.g., `_determine_visualization_requirement`, `_determine_search_requirement`) to decide whether these steps are required. This dynamic plan creation based on query analysis demonstrates the power of combining structured planning with advanced language model features.\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "s0cyXQBfAel0" - }, - "source": [ - "#### Query Validation and Suggestions\n", - "\n", - "A crucial part of the `PlanningAgent`'s role is to validate the user's query and provide suggestions for improvement. Let's see how this works:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "X1AwbwP__pxh" - }, - "outputs": [], - "source": [ - "rich_print(plan.validated_query)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "v9SEDT97HNYk" - }, - "source": [ - "Here, the `PlanningAgent` has determined that the query is valid (`is_valid=True`) and has identified 'Austin' as the city of interest. It also confirms that no essential elements are missing (`missing_elements=[]`)." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "53RSiAZjAjiB" - }, - "source": [ - "#### Query Suggestions\n", - "\n", - "Furthermore, the `PlanningAgent` provides suggestions to enhance the query:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "rirBCWfS_5Dc" - }, - "outputs": [], - "source": [ - "rich_print(plan.validated_query.suggestions)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "B2FxhYZmDA8S" - }, - "source": [ - "#### Failed Query\n", - "\n", - "What happens when the query is not valid? Let's see how the `PlanningAgent` handles such scenarios:\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "lad587v4J3P3" - }, - "outputs": [], - "source": [ - "agent = PlanningAgent(\n", - " query=\"I want to understand the EV charging situation in Paris and proper vetted information and some plot\",\n", - " client=client,\n", - " model_name=MODEL_ID_Flash,\n", - " debug=False,\n", - ")\n", - "plan = agent.create_plan()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "uFEeB2a8IlrJ" - }, - "source": [ - "In this case, the query mentions \"Paris,\" which is not a valid city in our predefined list (in `STATE_MAPPING`). The `PlanningAgent` detects this and returns:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "U7YCYqosIz5C" - }, - "outputs": [], - "source": [ - "rich_print(plan.validated_query)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "8BC-GeWPI3Ey" - }, - "source": [ - "The `is_valid` flag is now `False`, and the `missing_elements` indicate that a \"valid city\" is required. Importantly, the `suggestions` provide specific guidance on how to correct the query, even suggesting valid city replacements." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "4INx_rCJDjtp" - }, - "outputs": [], - "source": [ - "# You can see that it disable enabled search since the query didn't ask for anything \"enhance\" or \"grounding\"\n", - "rich_print(plan.enable_search)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "I9Z11rQwDfEE" - }, - "outputs": [], - "source": [ - "# it also skipped the visualization steps, since we didn't mention that in the query\n", - "rich_print(plan.steps)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "dYuYw015I8Du" - }, - "source": [ - "Since the query was invalid, the `PlanningAgent` disables the search functionality (`enable_search=False`) and creates an empty list of steps (`steps=[]`). This effectively halts the execution process, as there's no valid plan to execute. This demonstrates the agent's ability to gracefully handle invalid queries and prevent unnecessary processing." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "FG322SGOGnrb" - }, - "source": [ - "### Agent: QueryAnalysisAgent" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "lMeMdzKrKJAD" - }, - "source": [ - "### Agent: QueryAnalysisAgent\n", - "\n", - "The `QueryAnalysisAgent` comes right after the `PlanningAgent` in our sequence. Its primary role is to dissect the user's query, identify key entities, and determine the type of analysis requested. It then passes this structured information to the next agent in the pipeline.\n", - "\n", - "**Input:**\n", - "\n", - "* **Query:** The user's query about EV infrastructure, validated by the `PlanningAgent` (e.g., \"Analyze EV charging stations in Austin\").\n", - "* **Client:** An instance of the generative AI model client.\n", - "* **Model Name:** The specific model to be used (e.g., \"gemini-pro\").\n", - "\n", - "**Output:**\n", - "\n", - "* **Dictionary:** Containing:\n", - " * `status`: Whether the analysis was successful (\"success\" or \"error\").\n", - " * `entities`: A dictionary representing the extracted entities from the query, based on the `QueryEntities` data model. This includes:\n", - " * `pattern_type`: The type of analysis pattern detected (e.g., \"DISCOVERY\", \"COMPARISON\"). Although identified, these patterns are not yet used downstream in the current version but could be leveraged in future iterations.\n", - " * `cities`: A list of valid cities extracted from the query.\n", - " * `states`: A list of corresponding states for the extracted cities.\n", - " * `research_theme`: The general theme of the query (currently fixed to \"Electronic Vehicle\").\n", - " * `output_type`: The desired output type (e.g., \"Report\", \"Text\", \"Raw Data\").\n", - "\n", - "In essence, the `QueryAnalysisAgent` transforms the user's raw query into a structured format that can be easily understood and processed by the subsequent agents in the system. This section will delve into how the agent extracts these entities, handles different query patterns, and prepares the data for the next stage of the analysis." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "SzUjpbDoGn8j" - }, - "outputs": [], - "source": [ - "from ev_agent.agent_handler.agent_03_QueryAnalysisAgent import *" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Uq_s_HCWMVIL" - }, - "source": [ - "Let's see how the `QueryAnalysisAgent` processes different types of queries.\n", - "We'll examine three examples:" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "yQj--mhuqegX" - }, - "source": [ - "#### Extraction Type 1" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "dQM-sR_qMctq" - }, - "source": [ - "Here, the query asks about gaps in Austin's charging network and requests a report format. The agent successfully analyzes the query and returns:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "9bYBVxCoHF_S" - }, - "outputs": [], - "source": [ - "query = \"Where are the gaps in Austin charging network? Report format please\"\n", - "query_agent = QueryAnalysisAgent(client, MODEL_ID)\n", - "agent_1_result = query_agent.analyze(query)\n", - "rich_print(agent_1_result)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "zGizyMvLMfEF" - }, - "source": [ - "The agent correctly identifies the `pattern_type` as `GAPS`, extracts the city and state, and recognizes the desired `output_type` as `REPORT`." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "yOsBXcErqkLg" - }, - "source": [ - "#### Extraction Type 2" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "gxMepfZGMkYQ" - }, - "source": [ - "In this case, the query requests raw data for Dallas. The agent responds with:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "aaRoYZLUHbGL" - }, - "outputs": [], - "source": [ - "query = \"Need some raw data on Dallas for Ev charging stations\"\n", - "agent_1_result = query_agent.analyze(query)\n", - "rich_print(agent_1_result)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "kVWU7b4tMmnz" - }, - "source": [ - "The agent identifies the `pattern_type` as `DISCOVERY` (since it's a general inquiry), extracts the city and state, and correctly sets the `output_type` to `RAW`." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "C5bWr2alqk8J" - }, - "source": [ - "#### Extraction Type 3" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "ubffVGHoNMOY" - }, - "outputs": [], - "source": [ - "query = \"compare Dallas and Austin for EV Charging expansion and give me detail report.\"\n", - "agent_1_result = query_agent.analyze(query)\n", - "rich_print(agent_1_result)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "cbM1lY83MvKT" - }, - "source": [ - "These examples demonstrate the `QueryAnalysisAgent`'s ability to understand different query structures, extract relevant entities, and determine the user's intent regarding the analysis type and desired output format. This structured information is then passed on to subsequent agents in the pipeline, ensuring that the analysis stays focused and aligned with the user's needs." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "i-So9XlsE4L-" - }, - "source": [ - "### Agent: DataGatherAgent" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "y4IaUnAqNSRu" - }, - "source": [ - "### Agent: DataGatherAgent\n", - "\n", - "The `DataGatherAgent` is responsible for collecting the necessary data for our analysis by interacting with external APIs. It takes the structured output from the `QueryAnalysisAgent` and fetches relevant information about EV infrastructure and city demographics.\n", - "\n", - "**Input:**\n", - "\n", - "* **`api_key`:** Your NREL API key to access EV infrastructure data.\n", - "* **`radius_miles`:** The radius (in miles) around each city for which to gather data.\n", - "* **`debug`:** A boolean flag to enable/disable debug mode.\n", - "\n", - "**Output:**\n", - "\n", - "* **`DataGatherAgentOutput`:** A data object containing:\n", - " * `timestamp`: When the data was gathered.\n", - " * `cities_data`: A list of `CityData` objects, one for each city in the query. Each `CityData` object may contain:\n", - " * `city`: The name of the city.\n", - " * `state`: The state abbreviation.\n", - " * `summary`: A dictionary containing general city data retrieved from the Neighborhood Summary API (e.g., population, area, etc.).\n", - " * `ev_data`: A dictionary containing EV charging station data retrieved from the EV Infrastructure Station Analysis API (e.g., number of stations, charger types, etc.).\n", - " * `error`: Any error encountered while gathering data for the city.\n", - " * `status`: The overall status of the data gathering process (\"success\" or \"error\").\n", - " * `error`: Any general error encountered during the process.\n", - "\n", - "**Functionality:**\n", - "\n", - "The `DataGatherAgent` utilizes asynchronous programming (`asyncio`) to fetch data from two different APIs concurrently for each city:\n", - "\n", - "1. **Neighborhood Summary API:** Retrieves general demographic and infrastructure data about the city.\n", - "2. **EV Infrastructure Station Analysis API:** Retrieves detailed information about EV charging stations within the specified radius.\n", - "\n", - "It handles potential errors during API calls, provides informative debug messages (if enabled), and compiles the gathered data into a structured `DataGatherAgentOutput` object. This agent plays a crucial role in bridging the gap between our analytical system and the real-world data needed to generate a meaningful report." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "g9NKTXKHEpkq" - }, - "outputs": [], - "source": [ - "from ev_agent.agent_handler.agent_04_DataGatherAgent import *" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "nAMEg6_COMq0" - }, - "outputs": [], - "source": [ - "# The Agent hits the OpenMapStreets API and NREL Developer API to gather data for a given city that can be helpful for Analysis.\n", - "\n", - "data_gather_agent = DataGatherAgent(\n", - " api_key=NREL_API_KEY, radius_miles=100.0, debug=False\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "eTVSEplfOi5Y" - }, - "source": [ - "Here, we create an instance of the `DataGatherAgent`, providing our `NREL_API_KEY`, a `radius_miles` of 100.00 miles, and setting `debug` to `True` to see detailed output." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Tnsr3EzYOBqX" - }, - "source": [ - "#### Single City" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "tgpy8Q8-N9SM" - }, - "outputs": [], - "source": [ - "# Get the city from the QueryAnalysisAgent\n", - "agent_1_result = query_agent.analyze(\n", - " \"Need some raw data on Dallas for Ev charging stations\"\n", - ")\n", - "\n", - "# Get data from DataGatherAgent of the city\n", - "agent_2_result = await data_gather_agent.process(agent_1_result)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "XutOgx13ZRqr" - }, - "outputs": [], - "source": [ - "print(\"Number of cities given by the agent: \", len(agent_2_result.cities_data))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Vj5qPtLJqotV" - }, - "source": [ - "##### Data" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "15623699cf29" - }, - "outputs": [], - "source": [ - "# You can access the complete NeighborhoodSummary here:\n", - "rich_print(\"NeighborhoodSummary - Complete \\n\", agent_2_result.cities_data[0].summary)\n", - "\n", - "\n", - "# You can access the complete EVInfraSummary here:\n", - "rich_print(\"EV Infra Summary - Complete \\n\", agent_2_result.cities_data[0].ev_data)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "P4Kfv7uHOEsA" - }, - "source": [ - "#### Multi City" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "2OXo_jl-Epfz" - }, - "outputs": [], - "source": [ - "# Get the city from the QueryAnalysisAgent\n", - "agent_1_result_multi_city = query_agent.analyze(\n", - " \"compare Dallas and Austin for EV Charging expansion and give me detail report\"\n", - ")\n", - "\n", - "# Get data from DataGatherAgent of the city\n", - "agent_2_result_multi_city = await data_gather_agent.process(agent_1_result_multi_city)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "B66HlEAzZL1P" - }, - "outputs": [], - "source": [ - "print(\n", - " \"Number of cities given by the agent: \", len(agent_2_result_multi_city.cities_data)\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "bJgYsa0pqstF" - }, - "source": [ - "##### Data - NeighborhoodSummary (OpenStreetMap - Overpass API)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "wv0LiUHdsbi6" - }, - "source": [ - "This API Handler uses Nomination API and Overpass API (OpenStreetMap). You can find more details [here](https://nominatim.org/), [here](https://nominatim.org/release-docs/develop/api/Overview/), [here](https://wiki.openstreetmap.org/wiki/Overpass_API)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "we3aopWOmiZj" - }, - "outputs": [], - "source": [ - "index = 0 # 0 for Dallas, 1 for Austin\n", - "\n", - "# You can see the NeighborhoodSummary of the city\n", - "rich_print(\"City :\", agent_2_result_multi_city.cities_data[index].summary.city)\n", - "rich_print(\"State :\", agent_2_result_multi_city.cities_data[index].summary.state)\n", - "rich_print(\n", - " \"NeighborhoodSummary - Healthcare \\n\",\n", - " agent_2_result_multi_city.cities_data[index].summary.healthcare,\n", - ")\n", - "rich_print(\n", - " \"NeighborhoodSummary - Education \\n\",\n", - " agent_2_result_multi_city.cities_data[index].summary.education,\n", - ")\n", - "\n", - "# You can see the complete data and all the elements of NeighborhoodSummary:\n", - "rich_print(\n", - " \"NeighborhoodSummary - Complete \\n\",\n", - " agent_2_result_multi_city.cities_data[index].summary,\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "pVgA-XG2q-Q0" - }, - "source": [ - "##### Data - EVInfraSummary (NREL Developer API)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "OW5LGTu2ruGf" - }, - "source": [ - "You can get more details about the API [here](https://developer.nrel.gov/) and [here](https://developer.nrel.gov/docs/transportation/alt-fuel-stations-v1/)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "OfO4oJOvnK_o" - }, - "outputs": [], - "source": [ - "index = 0 # 0 for Dallas, 1 for Austin\n", - "\n", - "# You can see the EV Infra Summary of the city\n", - "rich_print(\n", - " \"City :\", agent_2_result_multi_city.cities_data[index].ev_data.metadata[\"city\"]\n", - ")\n", - "rich_print(\n", - " \"State :\", agent_2_result_multi_city.cities_data[index].ev_data.metadata[\"state\"]\n", - ")\n", - "rich_print(\n", - " \"EV Infra Summary - Charging Capability \\n\",\n", - " agent_2_result_multi_city.cities_data[index].ev_data.charging_capabilities,\n", - ")\n", - "rich_print(\n", - " \"EV Infra Summary - Accessibility \\n\",\n", - " agent_2_result_multi_city.cities_data[index].ev_data.accessibility,\n", - ")\n", - "\n", - "# You can see the complete data and all the elements of EV Infra Summary:\n", - "# rich_print(\"EV Infra Summary - Complete \\n\",\n", - "# agent_2_result_multi_city.cities_data[index].ev_data)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "x1fu8LdFqFR8" - }, - "source": [ - "### Agent: ReportAgent" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "93F5K5cjPjAn" - }, - "source": [ - "### Agent: ReportAgent\n", - "\n", - "The `ReportAgent` takes the structured data gathered by the `DataGatherAgent` and transforms it into a comprehensive, well-formatted report. It's responsible for generating individual sections of the report, citing data sources appropriately, and optionally enhancing the content with information from web search.\n", - "\n", - "**Input:**\n", - "\n", - "* **`client`:** An instance of the generative AI model client.\n", - "* **`model_name`:** The specific model to be used (e.g., \"gemini-pro-1.5\").\n", - "* **`enable_search`:** A boolean flag indicating whether to enhance the report with web search results.\n", - "* **`debug`:** A boolean flag to enable/disable debug mode.\n", - "\n", - "**Output:**\n", - "\n", - "* **`Report`:** A data object containing the entire report, structured as follows:\n", - " * `city`: The name of the city.\n", - " * `state`: The state abbreviation.\n", - " * `timestamp`: When the report was generated.\n", - " * `sections`: A dictionary of `Section` objects, each representing a section of the report (e.g., \"Executive Summary\", \"Infrastructure Overview\"). Each `Section` includes:\n", - " * `title`: The section title.\n", - " * `content`: The main text content of the section.\n", - " * `citations`: A dictionary of `CitationData` objects, mapping citation numbers to their corresponding data sources.\n", - " * `key_findings`: A list of key takeaways from the section.\n", - " * `enhanced_content`: Additional content generated through web search (if enabled).\n", - " * `citations_text`: A formatted string containing all citations used in the report.\n", - " * `full_text`: The entire report content as a single string.\n", - " * `combined_report`: The full report content along with formatted citations.\n", - "\n", - "**Functionality:**\n", - "\n", - "The `ReportAgent` performs several key tasks:\n", - "\n", - "1. **Section Generation:** It generates individual report sections based on predefined templates and the gathered data, citing specific data points using a structured `CitationData` model.\n", - "2. **Data Mapping:** It utilizes a detailed `_prepare_data_map` function to create a structured representation of the data from the `DataGatherAgent`, making it easier to reference specific data points in the report.\n", - "3. **Asynchronous Processing:** It leverages asynchronous programming to generate multiple sections concurrently, improving efficiency.\n", - "4. **Optional Search Enhancement:** If `enable_search` is set to `True`, it can enhance each section with information retrieved from Google Search, adding citations for the newly found data. This is achieved using the `_enhance_section_with_search` method.\n", - "5. **Report Assembly:** Finally, it assembles the individual sections into a complete `Report` object, generating a formatted string representation of the entire report and its citations.\n", - "\n", - "The `ReportAgent` plays a critical role in synthesizing the raw data into a coherent, insightful, and well-supported analysis of the EV infrastructure. The following subsections will explore how this agent is used to generate reports, either for a single city with search grounding or for multiple cities without grounding." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "ibu15GgxJ3G-" - }, - "outputs": [], - "source": [ - "from ev_agent.agent_handler.agent_05_ReportAgent import *" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "pUiv3t3M7foo" - }, - "source": [ - "#### Single City with grounding with Google" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "ihv4IAfn-5gm" - }, - "outputs": [], - "source": [ - "report_agent_single_grounded = ReportAgent(\n", - " client=client, model_name=MODEL_ID_Flash, enable_search=True, debug=True\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "TeqzU17t7ozN" - }, - "outputs": [], - "source": [ - "# Get the city from the QueryAnalysisAgent\n", - "agent_1_result = query_agent.analyze(\n", - " \"Need some raw data on Dallas for Ev charging stations\"\n", - ")\n", - "rich_print(agent_1_result)\n", - "\n", - "# Get data from DataGatherAgent of the city\n", - "agent_2_result = await data_gather_agent.process(agent_1_result)\n", - "\n", - "# Get the report built out using ReportAgent\n", - "reports_single_grounded = await report_agent_single_grounded.analyze(\n", - " agent_1_result, agent_2_result\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "UCmgBSuU7SQC" - }, - "outputs": [], - "source": [ - "print(\n", - " \"Report is on the city: \",\n", - " reports_single_grounded.city,\n", - " \" and state: \",\n", - " reports_single_grounded.state,\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "biVMqgbuAwMY" - }, - "source": [ - "Predefined/Available Section of the Reports:\n", - "\n", - "* Executive Summary\n", - "* Infrastructure Overview\n", - "* Current EV Assessment\n", - "* Demand Analysis\n", - "* Supply Analysis\n", - "* Gap Analysis\n", - "* Location Recommendations\n", - "* Implementation Strategy\n", - "\n", - "You can explore each section and see how grounding with Google, enhanced the section with updated text and citations." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "-Q7T-w9R7hwd" - }, - "outputs": [], - "source": [ - "for section_name, section_text in reports_single_grounded.sections.items():\n", - " if section_name == \"Infrastructure Overview\":\n", - " print(section_name)\n", - " rich_print(section_text)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "c3d5741c77ca" - }, - "outputs": [], - "source": [ - "# You can access other key areas of the report:\n", - "reports_single_grounded.full_text # Full text of the report - without citations\n", - "reports_single_grounded.citations_text # Full text of the citations - without text\n", - "reports_single_grounded.combined_report # Full text of the report combined with citations\n", - "reports_single_grounded.timestamp # Timestamp of report generations" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "vt9f7S5v7icz" - }, - "source": [ - "#### Multi City without grounding with Google" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "_Ifsrp-G-_T1" - }, - "outputs": [], - "source": [ - "report_agent_multi_city = ReportAgent(\n", - " client=client,\n", - " model_name=MODEL_ID_Flash,\n", - " enable_search=False, # you can enable grounding for both the cities if you want\n", - " debug=True,\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "TpRaJx-v7kNm" - }, - "outputs": [], - "source": [ - "# Get the city from the QueryAnalysisAgent\n", - "agent_1_result_multi_city = query_agent.analyze(\n", - " \"compare Dallas and Austin for EV Charging expansion and give me detail report\"\n", - ")\n", - "rich_print(agent_1_result_multi_city)\n", - "\n", - "# Get data from DataGatherAgent of the city\n", - "agent_2_result_multi_city = await data_gather_agent.process(agent_1_result_multi_city)\n", - "\n", - "# Get the report built out using ReportAgent\n", - "reports_multi_city = await report_agent_multi_city.analyze(\n", - " agent_1_result_multi_city, agent_2_result_multi_city\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "ph40v5rp7wPK" - }, - "outputs": [], - "source": [ - "index = 0\n", - "print(\n", - " \"Report is on the city: \",\n", - " reports_multi_city[index].city,\n", - " \" and state: \",\n", - " reports_multi_city[index].state,\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "zn1LtOvi7wIU" - }, - "outputs": [], - "source": [ - "index = 1\n", - "print(\n", - " \"Report is on the city: \",\n", - " reports_multi_city[index].city,\n", - " \" and state: \",\n", - " reports_multi_city[index].state,\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "kmU1fJFB8PTi" - }, - "outputs": [], - "source": [ - "for section_name, section_text in reports_multi_city[index].sections.items():\n", - " if section_name == \"Demand Analysis\":\n", - " print(section_name)\n", - " rich_print(section_text)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "582a01dfc15d" - }, - "outputs": [], - "source": [ - "# You can also check all the sections using object\n", - "\n", - "print(\"All the sections in the report\")\n", - "for section_name, section_text in reports_multi_city[index].sections.items():\n", - " print(section_name)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "3e0df2853b20" - }, - "outputs": [], - "source": [ - "# You can access other key areas of the report by passing appropriate indexes:\n", - "\n", - "reports_multi_city[index].full_text # Full text of the report - without citations\n", - "\n", - "reports_multi_city[index].citations_text # Full text of the citations - without text\n", - "\n", - "reports_multi_city[\n", - " index\n", - "].combined_report # Full text of the report combined with citations\n", - "\n", - "reports_multi_city[index].timestamp # Timestamp of report generations" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "_Yd9DWWLP7T2" - }, - "source": [ - "### Agent: VisualizeAgent\n", - "\n", - "The `VisualizeAgent` is responsible for creating insightful visualizations based on the data gathered by the `DataGatherAgent`. It uses the `plotly` library to generate various charts and graphs that help to understand the EV infrastructure landscape in a more visual and intuitive manner. Although it's called an \"agent\" here, it's important to note that this is essentially a set of helper functions for creating visualizations rather than an autonomous agent with decision-making capabilities.\n", - "\n", - "**Input:**\n", - "\n", - "* **`data`:** The `DataGatherAgentOutput` object, containing structured data for one or more cities.\n", - "\n", - "**Output:**\n", - "\n", - "* A tuple containing two dictionaries:\n", - " * **`single_city_figs`:** A dictionary of `plotly` figure objects, each representing a visualization specific to a single city.\n", - " * **`comparison_figs`:** A dictionary of `plotly` figure objects, each representing a comparative visualization across multiple cities (if applicable).\n", - "\n", - "**Functionality:**\n", - "\n", - "The `VisualizeAgent` performs the following tasks:\n", - "\n", - "1. **Single City Visualizations:** It generates a set of visualizations for each city using the `create_comprehensive_city_analysis` function. These include:\n", - " * **EV Infrastructure Overview:** Bar charts showing charging station types, connector distribution, network distribution, and access & payment methods.\n", - " * **Transportation Infrastructure Analysis:** A multi-panel plot showing public transport facilities, road network distribution, parking facilities, and a comparison of EV vs. traditional vehicle infrastructure.\n", - " * **Urban Amenities and Services:** A multi-panel plot showing the distribution of retail and shopping centers, food and entertainment venues, emergency services, and public amenities.\n", - " * **Area Analysis:** A pie chart displaying the distribution of total area, water area, green area, and built area.\n", - "\n", - "2. **Multi-City Comparisons (if applicable):** If the input data contains information for multiple cities, it uses the `plot_multi_city_comparison` function to generate comparative visualizations. These include:\n", - " * **EV Infrastructure Comparisons:** Bar charts comparing the number of EV stations vs. fuel stations, charging station types, and EV station density across cities.\n", - " * **Transportation Infrastructure:** Bar charts comparing public transport infrastructure, road network distribution, and parking facilities across cities.\n", - " * **Area Analysis:** A bar chart comparing area distribution (total, water, green, built) across cities.\n", - " * **Urban Amenities:** A bar chart comparing the prevalence of various urban amenities (e.g., shopping centers, restaurants, hospitals) across cities.\n", - "\n", - "3. **Visualization Organization:** It organizes all generated plots into the `single_city_figs` and `comparison_figs` dictionaries, making it easy to access specific visualizations.\n", - "\n", - "The `VisualizeAgent` plays a crucial role in making the data more accessible and understandable by providing a visual representation of key metrics and trends. These visualizations can aid in identifying patterns, making comparisons, and ultimately supporting decision-making related to EV infrastructure planning and development." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "LqGemlxj-ohw" - }, - "outputs": [], - "source": [ - "from ev_agent.agent_handler.agent_06_VisualizeAgent import *" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Ly_pvvoNCKr_" - }, - "source": [ - "#### Single City" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "zjveHtOhGzfn" - }, - "outputs": [], - "source": [ - "single_city_figs, comparison_figs = plot_all_visualizations(agent_2_result)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "SM9nbe5FHEap" - }, - "outputs": [], - "source": [ - "print(\"\\n=== Single City Analysis ===\")\n", - "for name, fig in single_city_figs.items():\n", - " print(f\"\\nDisplaying: {name.replace('_', ' ').title()}\")\n", - " fig.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "5iF_b6oMCVBL" - }, - "source": [ - "#### Multi City" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "33fASgQGHP6d" - }, - "outputs": [], - "source": [ - "single_city_figs, comparison_figs = plot_all_visualizations(agent_2_result_multi_city)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "dZuNdJeoHTVD" - }, - "outputs": [], - "source": [ - "print(\"\\n=== Multi-City Comparisons ===\")\n", - "for name, fig in comparison_figs.items():\n", - " print(f\"\\nDisplaying: {name.replace('_', ' ').title()}\")\n", - " fig.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "o4cW5bVtQ2Lo" - }, - "source": [ - "## Next Steps and Potential Improvements\n", - "\n", - "We've built a solid foundation for a multi-agent system that analyzes EV infrastructure. However, there's always room for improvement and expansion. Here are some potential next steps, inspired by features found in advanced multi-agent frameworks like AutoGen, CrewAI, and LangGraph:\n", - "\n", - "1. **Enhanced Agent Communication:** Implement dynamic inter-agent communication for iterative feedback, dynamic task allocation, and agent specialization.\n", - "2. **Sophisticated Planning:** Develop more advanced planning with conditional logic, sub-planning, and plan repair capabilities.\n", - "3. **Expanded Tool Integration:** Integrate with more APIs, databases, web scraping, and knowledge graphs to broaden the system's knowledge base.\n", - "4. **Interactive User Experience:** Allow for clarification dialogs, progress updates, interactive visualizations, and user feedback mechanisms.\n", - "5. **Robust Error Handling:** Implement comprehensive exception handling, retry mechanisms, and fallback strategies for increased reliability.\n", - "6. **Integrated Visualizations:** Incorporate visualizations directly into the generated reports for a more cohesive and engaging presentation.\n", - "7. **Agent Memory and Learning:** Introduce agent memory for caching, learning from user feedback, and potential model fine-tuning to improve performance over time.\n", - "\n", - "By implementing these enhancements, we can create a more powerful, flexible, and user-friendly multi-agent system for analyzing EV infrastructure and generating actionable insights." - ] - } - ], - "metadata": { - "colab": { - "name": "intro_research_multi_agents_gemini_2_0.ipynb", - "toc_visible": true - }, - "kernelspec": { - "display_name": "Python 3", - "name": "python3" - } - }, - "nbformat": 4, - "nbformat_minor": 0 + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "jJZw3h2myqls" + }, + "outputs": [], + "source": [ + "# Copyright 2024 Google LLC\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# https://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qGrXr6X4yXG5" + }, + "source": [ + "# Building a Research Multi Agent System - a Design Pattern Overview with Gemini 2.0\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " \n", + " \"Google
Open in Colab\n", + "
\n", + "
\n", + " \n", + " \"Google
Open in Colab Enterprise\n", + "
\n", + "
\n", + " \n", + " \"Vertex
Open in Vertex AI Workbench\n", + "
\n", + "
\n", + " \n", + " \"GitHub
View on GitHub\n", + "
\n", + "
\n", + "\n", + "
\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "KJWqTM-CS0qC" + }, + "source": [ + "Share to:\n", + "\n", + "\n", + " \"LinkedIn\n", + "\n", + "\n", + "\n", + " \"Bluesky\n", + "\n", + "\n", + "\n", + " \"X\n", + "\n", + "\n", + "\n", + " \"Reddit\n", + "\n", + "\n", + "\n", + " \"Facebook\n", + "" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "EnoVKOgny2ZM" + }, + "source": [ + "| | |\n", + "|-|-|\n", + "| Author(s) | [Lavi Nigam](https://github.com/lavinigam-gcp)|" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "CSt0qUR2Sg61" + }, + "source": [ + "
\n", + "\n", + "⚠️ Gemini 2.0 Flash (Model ID: gemini-2.0-flash-exp) and the Google Gen AI SDK are currently experimental and output can vary ⚠️\n", + "
\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IDDZhYrClJQK" + }, + "source": [ + "## Overview\n", + "\n", + "In today's rapidly evolving technology landscape, businesses frequently need to conduct comprehensive research and analysis that spans multiple data sources, requires complex reasoning, and demands clear actionable insights. Whether it's market research, competitive analysis, urban planning, or scientific research, the challenges remain similar: how to efficiently gather, process, and synthesize information while ensuring accuracy and scalability.\n", + "\n", + "In this notebook, as a developer, you'll discover how to create intelligent agents and multi-agent systems using Vertex AI Gemini 2.0.\n", + "\n", + "\n", + "### Learning Through Implementation\n", + "\n", + "Rather than using existing frameworks, we'll build our multi-agent system from scratch. This approach offers several benefits:\n", + "\n", + "1. **Core Understanding**: Building from the ground up helps you understand the fundamental principles of multi-agent systems\n", + "2. **Design Pattern Mastery**: Learn reusable patterns that work across different domains and technologies\n", + "3. **Custom Control**: Gain the ability to fine-tune every aspect of your system\n", + "4. **Debugging Confidence**: Understanding the internals makes troubleshooting much more straightforward\n", + "\n", + "While there are excellent open-source frameworks available for building multi-agent systems, such as [AutoGen](https://github.com/microsoft/autogen), [CrewAI](https://github.com/crewAIInc/crewAI), [PydanticAI](https://github.com/pydantic/pydantic-ai), and [LangGraph](https://github.com/langchain-ai/langgraph), we believe that a from-scratch approach in this notebook will provide a deeper understanding of the underlying concepts and mechanics.\n", + "\n", + "The open-source frameworks offers many valuable features like conditional routing, annotated global state, checkpointing, and more.\n", + "\n", + "Once you've grasped the fundamentals from this notebook, exploring these frameworks can unlock even more advanced capabilities and streamline your development process.\n", + "\n", + "\n", + "### Key Technical Components\n", + "\n", + "Our implementation showcases essential Vertex AI ***Gemini 2.0*** capabilities:\n", + "\n", + "1. **Function Calling**: Structure agent behaviors and interactions\n", + "2. **Structured Output**: Generate consistent, validatable data\n", + "3. **Async Operations**: Handle parallel agent tasks efficiently\n", + "4. **Google Search Integration**: Ground agent reasoning in real-world data\n", + "\n", + "\n", + "### To get started, let's explore some key questions:\n", + "\n", + "* What exactly is an agent, and how does it differ from a simple LLM call?\n", + "* How can agents use tools to achieve their goals?\n", + "* And what possibilities emerge when multiple agents work together in a multi-agent system?\n", + "\n", + "\n", + "#### **LLM Execution (The Foundation)**\n", + "\n", + "Think of an LLM as a powerful prediction engine. Given some input text (a prompt), it predicts what comes next, generating text, translating languages, writing different kinds of creative content, and answering your questions in an informative way. However, on its own, it simply reacts to your input and provides an output. It doesn't have a sense of purpose or the ability to act independently.\n", + "\n", + "**Example:** An LLM is like a super smart travel guidebook. You ask it \"What are some popular attractions in Paris?\" and it gives you a list. It provides information but doesn't actually do anything.\n", + "\n", + "![title](https://storage.googleapis.com/github-repo/generative-ai/gemini2/use-cases/research_multi_agent_ev/img/simple-llm-flow.png)\n", + "\n", + "#### **Agent (LLM with a Purpose)**\n", + "\n", + "Now, imagine giving that prediction engine some goals and the ability to act on them. This is essentially what an agent is. It's an LLM wrapped with extra code that allows it to:\n", + "\n", + "* **Understand the goal:** \"Book a flight to London.\"\n", + "* **Break it down into steps:** Search for flights, compare prices, choose a date, make a booking.\n", + "* **Use tools to achieve those steps:** Access a flight booking API, a web browser, or even interact with a human.\n", + "\n", + "**Example:** An agent is like a personal travel assistant. You tell it \"Plan a trip to Paris for me next month.\" The agent uses its LLM \"brain\" to understand what that means, then uses tools like flight booking websites, hotel search engines, and even weather apps to create an itinerary.\n", + "\n", + "\n", + "![title](https://storage.googleapis.com/github-repo/generative-ai/gemini2/use-cases/research_multi_agent_ev/img/agent-flow.png)\n", + "\n", + "#### **Multi-Agent (Teamwork Makes the Dream Work)**\n", + "\n", + "Now, imagine several of these specialized agents working together, each with its own skills and responsibilities. That's a multi-agent system. They can communicate, share information, and coordinate their actions to achieve a complex goal.\n", + "\n", + "**Example:** Now imagine a team of specialized travel agents working together. One agent books the flights, another finds the perfect hotel, a third arranges tours and activities. They communicate and coordinate to create an amazing Paris trip.\n", + "\n", + "![title](https://storage.googleapis.com/github-repo/generative-ai/gemini2/use-cases/research_multi_agent_ev/img/multi-agent-flow.png)\n", + "\n", + "\n", + "---\n", + "\n", + "Now that you have learned the fundamentals, moving forward, you'll learn the core design patterns behind agents and multi-agent systems. We'll demonstrate its capabilities through a practical use case - Electric Vehicle (EV) infrastructure expansion analysis - while keeping the core architecture adaptable for any research-intensive application." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_ItHSSWRBG_D" + }, + "source": [ + "## Objective\n", + "\n", + "This notebook will guide you through building a research-focused multi-agent system. Here's what you'll learn:\n", + "\n", + "* **A design pattern for creating these systems:** We'll introduce a reusable structure for building multi-agent systems geared towards research tasks.\n", + "* **A practical example: EV Research Agent:** See how we applied the design pattern to create an agent specializing in Electronic Vehicle research. This agent can answer complex queries like \"EV Charging Station Expansion in [City Name]\" by planning, researching, and generating a comprehensive report.\n", + "* **Component integration and orchestration:** Understand how individual components within the agent work together seamlessly to produce the final output." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DKeB9J-EPFeQ" + }, + "source": [ + "## Our Use Case: EV Infrastructure Analysis\n", + "\n", + "To demonstrate the power and flexibility of our Research Multi-Agent system, we'll tackle a real-world challenge: analyzing optimal locations for expanding Electric Vehicle (EV) charging infrastructure in cities across the United States.\n", + "\n", + "### The Challenge\n", + "\n", + "Urban planners and EV infrastructure companies face complex decisions when expanding charging networks:\n", + "- Understanding population density and movement patterns\n", + "- Analyzing existing charging infrastructure\n", + "- Evaluating proximity to major highways and transit routes\n", + "- Considering local demographics and economic factors\n", + "- Assessing grid capacity and infrastructure readiness\n", + "\n", + "### Our Solution\n", + "\n", + "We'll build a research system that:\n", + "1. Accepts queries about specific cities or regions\n", + "2. Gathers data from multiple sources (OpenStreetMap, NREL API)\n", + "3. Analyzes infrastructure patterns and gaps\n", + "4. Generates actionable insights with citations\n", + "5. Visualizes findings for better decision-making\n", + "\n", + "Simply, A team of research agents armed with data and search engines, technical know how, coordinated with a common goal across specialized skillsets and tasks.\n", + "\n", + "\n", + "While we focus on EV infrastructure, the patterns and approaches we develop can be applied to any research-intensive domain requiring similar data gathering, analysis, and insight generation capabilities." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "fAlzMGTdGhO-" + }, + "source": [ + "## Gemini 2.0" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "PkgSZ0wTzFU7" + }, + "source": [ + "## Overview\n", + "\n", + "[Gemini 2.0 Flash](https://cloud.google.com/vertex-ai/generative-ai/docs/gemini-v2) is a new multimodal generative ai model from the Gemini family developed by [Google DeepMind](https://deepmind.google/). It now available as an experimental preview release through the Gemini API in Vertex AI and Vertex AI Studio. The model introduces new features and enhanced core capabilities:\n", + "\n", + "- Multimodal Live API: This new API helps you create real-time vision and audio streaming applications with tool use.\n", + "- Speed and performance: Gemini 2.0 Flash is the fastest model in the industry, with a 3x improvement in time to first token (TTFT) over 1.5 Flash.\n", + "- Quality: The model maintains quality comparable to larger models like Gemini 1.5 Pro and GPT-4o.\n", + "- Improved agentic experiences: Gemini 2.0 delivers improvements to multimodal understanding, coding, complex instruction following, and function calling.\n", + "- New Modalities: Gemini 2.0 introduces native image generation and controllable text-to-speech capabilities, enabling image editing, localized artwork creation, and expressive storytelling.\n", + "- To support the new model, we're also shipping an all new SDK that supports simple migration between the Gemini Developer API and the Gemini API in Vertex AI." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "90JzDyyRzRRU" + }, + "source": [ + "## Getting Started" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qE48lDlSzf81" + }, + "source": [ + "### Install Google Gen AI SDK for Python" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "db8O7nh0zw_B" + }, + "outputs": [], + "source": [ + "# Downloading Google Gen AI SDK (experimental)\n", + "%pip install google-genai\n", + "\n", + "# Libraries required for saving markdowns as external files.\n", + "! apt install pandoc\n", + "! apt install libreoffice" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "2jCwQQxO0WVx" + }, + "source": [ + "### Restart runtime\n", + "\n", + "To use the newly installed packages in this Jupyter runtime, you must restart the runtime. You can do this by running the cell below, which restarts the current kernel." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "sDXGN26_0Y0R" + }, + "outputs": [], + "source": [ + "import sys\n", + "\n", + "if \"google.colab\" in sys.modules:\n", + " import IPython\n", + "\n", + " app = IPython.Application.instance()\n", + " app.kernel.do_shutdown(True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IGcO4hXDzzuH" + }, + "source": [ + "### Authenticate your notebook environment (Colab only)\n", + "\n", + "If you are running this notebook on Google Colab, run the cell below to authenticate your environment." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "rbm_CqxKz1b6" + }, + "outputs": [], + "source": [ + "import sys\n", + "\n", + "if \"google.colab\" in sys.modules:\n", + " from google.colab import auth\n", + "\n", + " auth.authenticate_user()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ES6LwuBr0GSD" + }, + "source": [ + "### Connect to a generative AI API service\n", + "\n", + "Google Gen AI APIs and models including Gemini are available in the following two API services:\n", + "\n", + "- **[Google AI for Developers](https://ai.google.dev/gemini-api/docs)**: Experiment, prototype, and deploy small projects.\n", + "- **[Vertex AI](https://cloud.google.com/vertex-ai/generative-ai/docs/overview)**: Build enterprise-ready projects on Google Cloud.\n", + "\n", + "The Google Gen AI SDK provides a unified interface to these two API services.\n", + "\n", + "This notebook shows how to use the Google Gen AI SDK with the Gemini API in Vertex AI." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pMegXbM90JEk" + }, + "source": [ + "### Set Google Cloud project information\n", + "\n", + "To get started using Vertex AI, you must have an existing Google Cloud project and [enable the Vertex AI API](https://console.cloud.google.com/flows/enableapi?apiid=aiplatform.googleapis.com).\n", + "\n", + "Learn more about [setting up a project and a development environment](https://cloud.google.com/vertex-ai/docs/start/cloud-environment)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "4mrov4hC0OZ-" + }, + "outputs": [], + "source": [ + "import os\n", + "\n", + "PROJECT_ID = \"[your-project-id]\" # @param {type: \"string\", placeholder: \"[your-project-id]\", isTemplate: true}\n", + "if not PROJECT_ID or PROJECT_ID == \"[your-project-id]\":\n", + " PROJECT_ID = str(os.environ.get(\"GOOGLE_CLOUD_PROJECT\"))\n", + "\n", + "LOCATION = os.environ.get(\"GOOGLE_CLOUD_REGION\", \"us-central1\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Mp0umgC00TMZ" + }, + "source": [ + "### Import libraries\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "5FzK2TuA0SYe" + }, + "outputs": [], + "source": [ + "from google import genai\n", + "from rich import print as rich_print" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "C2iaXGH21j_U" + }, + "source": [ + "### Create Gen AI Client" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "maA6ZXozxphR" + }, + "outputs": [], + "source": [ + "client = genai.Client(vertexai=True, project=PROJECT_ID, location=LOCATION)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "B70wFwV61uiK" + }, + "source": [ + "### Load the Gemini 2.0 Flash model\n", + "\n", + "To learn more about all [Gemini models on Vertex AI](https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-models)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "9qipKyrW1vG9" + }, + "outputs": [], + "source": [ + "MODEL_ID = \"gemini-2.0-flash-exp\" # @param {type: \"string\"}\n", + "MODEL_ID_Flash = \"gemini-1.5-flash-002\" # For control generation for grounding with google search as a Tool" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "oMdna9zsJKx7" + }, + "source": [ + "To access comprehensive EV infrastructure data, you'll need an API key from the National Renewable Energy Laboratory (NREL). This key allows you to retrieve detailed information about EV charging stations, which is crucial for the `DataGatherAgent` to function correctly.\n", + "\n", + "**Here's how to get your NREL API key:**\n", + "\n", + "1. **Sign up:** Visit the [NREL Developer Network signup page](https://developer.nrel.gov/signup/).\n", + "2. **Email Confirmation:** You'll receive an email with your API key.\n", + "3. **Wait Time:** It might take some time to receive the email, so please be patient.\n", + "4. **Check Spam:** Make sure to check your spam or junk folder if you don't see the email in your inbox.\n", + "\n", + "**Enter your API key in the following code cell:**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "HGKLVzF4JLJS" + }, + "outputs": [], + "source": [ + "NREL_API_KEY = \"[your-nrel-api-key]\" # @param {type: \"string\", placeholder: \"[your-nrel-api-key]\", isTemplate: true}" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nscpdicqtHa8" + }, + "source": [ + "### Download utils" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IyWeX1ZqbVMZ" + }, + "source": [ + "To streamline the process and keep our focus on the design, utility, and output of the multi-agent system, we've placed the core code for the `ev_agent` in an external location. This includes both the `agent_handler` and `api_handler`, which contain the main logic. However, we're now downloading it to our current environment to ensure we can import the necessary functions for our analysis:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "P4cULOECtKeU" + }, + "outputs": [], + "source": [ + "!git clone https://github.com/GoogleCloudPlatform/generative-ai.git \\\n", + " && cp -r generative-ai/gemini/agents/research-multi-agents/ev_agent ./ \\\n", + " && rm -rf generative-ai" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ul4QvAKDbc6t" + }, + "source": [ + "This makes the code, including all the agents and API handlers, readily available for use. You can always explore the downloaded code and make changes as you see fit. This approach allows us to keep the notebook cleaner and focused on the higher-level aspects of the system while still providing access to the underlying implementation." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "UYMXmaWny0JH" + }, + "outputs": [], + "source": [ + "# @title Saving Report (DOCX/PDF) Helper Functions\n", + "\n", + "import os\n", + "import subprocess\n", + "\n", + "\n", + "def convert_markdown(markdown_text, output_path, filename, file_type):\n", + " \"\"\"\n", + " Converts markdown text to DOCX or PDF using pandoc.\n", + "\n", + " Args:\n", + " markdown_text: The markdown text to convert.\n", + " output_path: The directory where the output file should be saved.\n", + " filename: The name of the output file (without extension).\n", + " file_type: The desired output file type ('docx' or 'pdf').\n", + "\n", + " Raises:\n", + " ValueError: If an invalid file type is specified.\n", + " FileNotFoundError: If pandoc is not found in the system's PATH.\n", + " subprocess.CalledProcessError: If the pandoc command fails.\n", + " OSError: If there is an error during file operations.\n", + " \"\"\"\n", + " os.makedirs(output_path, exist_ok=True)\n", + "\n", + " if file_type not in [\"docx\", \"pdf\"]:\n", + " raise ValueError(\"Invalid file type specified. Must be 'docx' or 'pdf'.\")\n", + "\n", + " docx_filepath = os.path.join(output_path, f\"{filename}.docx\")\n", + "\n", + " try:\n", + " # Check if pandoc is available\n", + " subprocess.run([\"pandoc\", \"--version\"], capture_output=True, check=True)\n", + "\n", + " # Convert Markdown to DOCX\n", + " subprocess.run(\n", + " [\"pandoc\", \"-f\", \"markdown\", \"-t\", \"docx\", \"-o\", docx_filepath],\n", + " input=markdown_text,\n", + " encoding=\"utf-8\",\n", + " check=True,\n", + " )\n", + " # print(f\"DOCX file saved to: {docx_filepath}\")\n", + "\n", + " if file_type == \"pdf\":\n", + " pdf_filepath = os.path.join(output_path, f\"{filename}.pdf\")\n", + " # Convert DOCX to PDF (using libreoffice on Colab)\n", + " subprocess.run(\n", + " [\n", + " \"libreoffice\",\n", + " \"--headless\",\n", + " \"--convert-to\",\n", + " \"pdf\",\n", + " \"--outdir\",\n", + " output_path,\n", + " docx_filepath,\n", + " ],\n", + " check=True,\n", + " )\n", + " print(f\"PDF file saved to: {pdf_filepath}\")\n", + "\n", + " # Delete the temporary DOCX file\n", + " os.remove(docx_filepath)\n", + " print(f\"Temporary DOCX file deleted: {docx_filepath}\")\n", + "\n", + " except FileNotFoundError:\n", + " raise FileNotFoundError(\n", + " \"pandoc not found. Please ensure it is installed and in your system's PATH.\"\n", + " )\n", + " except subprocess.CalledProcessError as e:\n", + " raise subprocess.CalledProcessError(\n", + " e.returncode, e.cmd, output=e.output, stderr=e.stderr\n", + " )\n", + " except OSError as e:\n", + " raise OSError(f\"Error during file operations: {e}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DUHn-HALvWuB" + }, + "source": [ + "# Gemini-Powered EV Research: A Multi-Agent Approach\n", + "\n", + "This section outlines a powerful multi-agent system designed for in-depth research on Electric Vehicle (EV) charging infrastructure in US cities. Built entirely using Gemini 2.0, this system showcases a streamlined approach to complex research tasks.\n", + "\n", + "**Core Idea:** We've assembled a team of specialized AI agents, each using Gemini 2.0, to automate and enhance the research process. This approach leverages Gemini's strengths in:\n", + "\n", + "* **Function Calling:** Enables agents to trigger specific actions and tools facilitating seamless interaction.\n", + "* **Structured Generations:** Ensures consistent, predictable output from each agent, simplifying inter-agent communication.\n", + "* **Async Model Calling:** Allows agents to work concurrently, significantly speeding up research.\n", + "* **Google Search Grounding:** Keeps the research grounded in real-world data and up-to-date information.\n", + "\n", + "## System Architecture\n", + "\n", + "At the heart of our system lies a clear, modular architecture, visualized below:\n", + "\n", + "![research-multi-agent-desing-pattern](https://storage.googleapis.com/github-repo/generative-ai/gemini2/use-cases/research_multi_agent_ev/img/multi-agent-design-pattern.png)\n", + "**Agent Breakdown:**\n", + "\n", + "The diagram illustrates the core components of our system:\n", + "\n", + "* **User (Pink):** Initiates the research process by submitting a query.\n", + "* **ExecutionAgent (Pink):** The central orchestrator, managing the workflow, handling communication between agents, and ensuring smooth execution. It also handles error recovery, such as retries and alternative execution paths, to maintain system robustness.\n", + "* **Core Research Agents (Green):**\n", + " * **PlanningAgent:** The strategist, converting the user's query into a detailed, step-by-step research plan.\n", + " * **QueryAnalysisAgent:** The interpreter, determining the specific data required and the desired output format (e.g., raw data, report, visualization).\n", + " * **DataGatherAgent:** The collector, responsible for fetching data from external APIs. It leverages Gemini's search grounding to ensure data accuracy and relevance. This agent is designed to be adaptable to various data sources.\n", + " * **ReportAgent:** The writer, transforming raw data into a comprehensive, well-structured report. It can incorporate search-based grounding for validation and supports multiple output formats.\n", + " * **VisualizeAgent:** The illustrator, creating clear and insightful visualizations (charts, graphs) to represent the findings. It adapts its output based on data types and user requirements.\n", + "* **Research Output (Pink):** The final, comprehensive research product delivered to the user.\n", + "* **External Systems (Blue):**\n", + " * **External APIs:** Data sources for the `DataGatherAgent`.\n", + " * **Visualization Tools:** Libraries used by the `VisualizeAgent`.\n", + " * **Document Tools:** Resources utilized by the `ReportAgent` for formatting and presentation.\n", + "\n", + "\n", + "**Benefits of the Gemini-Powered Approach:**\n", + "\n", + "* **Simplified Development:** Build the entire system using a single, powerful API – Gemini.\n", + "* **Native Functionality:** Leverage Gemini's built-in features for seamless agent interaction and consistent output.\n", + "* **Enhanced Performance:** Async model calling enables parallel processing, accelerating the research process.\n", + "* **Real-World Relevance:** Google Search grounding ensures your research is always based on the latest information.\n", + "* **Scalability and Flexibility:**\n", + " * Easily add new agents for specialized tasks (e.g., sentiment analysis of EV adoption).\n", + " * Modify existing agents to adapt to new data sources or research requirements.\n", + " * The modular design allows independent scaling of different system components." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "W9mEtr_yw0vZ" + }, + "source": [ + "## Exploring the EV Agent in Action\n", + "\n", + "Now that you've seen the architecture, let's dive into the practical side and see how our EV Research Agent works. We'll explore two ways to interact with it:\n", + "\n", + "**1. The \"Black Box\" Experience: Witnessing the Magic**\n", + "\n", + "Imagine the entire multi-agent system as a single, powerful unit – the `EVAgent`. In this section, we'll treat it as a \"black box.\" You'll simply send it a research query, and watch as it works behind the scenes, delivering a comprehensive report in about 1-2 minutes.\n", + "\n", + "We'll try two exciting examples:\n", + "\n", + "* **Example 1: Basic Report Generation:** See how the agent generates a structured report with predefined sections based on your query.\n", + "* **Example 2: Google Search Enhanced Report:** Observe how the agent leverages Google Search to enrich the report with citations, deeper insights, and up-to-the-minute information.\n", + "\n", + "**2. Deconstructing the Process: A Step-by-Step Journey**\n", + "\n", + "Ready to peek under the hood? In this section, we'll dissect the agent's inner workings. You'll follow along as your query is processed through each stage of the research pipeline:\n", + "\n", + "* **Planning:** Witness how the `PlanningAgent` crafts the initial research strategy. *We'll briefly touch upon the code behind this, highlighting the input it receives and the plan it outputs, along with the data models that structure this communication.*\n", + "* **Reasoning:** See how the `QueryAnalysisAgent` determines the necessary data and output format. *Again, we'll peek at the underlying code to understand its input, output, and the data models involved.*\n", + "* **Tool Selection:** Observe how the `DataGatherAgent` chooses the right APIs and leverages Google Search. *We'll examine the code's role in this selection process, focusing on the data models that guide its choices.*\n", + "* **Coordination:** Understand how the `ExecutionAgent` orchestrates the entire process. *We will shed some light on the code that enables this coordination, emphasizing the data models as the communication backbone between agents.*\n", + "* **Decision-Making:** Learn how the agents make choices at each step, leading to the final output.\n", + "\n", + "You'll see firsthand how these individual steps, powered by their underlying logic and data models, contribute to the final, polished report and visualizations.\n", + "\n", + "**A Note on Code Structure:**\n", + "\n", + "To keep this exploration clear and focused, the detailed code for each agent is neatly organized in separate files. **We are choosing not to put code directly in this notebook as it will make it unnecessarily complex.** So when we go through step by step, think of each agent as a black box. We will, however briefly talk about the design pattern it follows, what the data model it uses behind the scene to produce an output. Once you understand that, you can easily refer to the code from scratch or use any open-source library to implement a similar agent. Think of them as behind-the-scenes appendices you can explore later to dive deep into the implementation details of each agent.\n", + "\n", + "**The primary goal here is to showcase the power of agent collaboration with Gemini 2.0.** You'll witness how our team of Gemini-powered agents works together seamlessly to fulfill your research requests, demonstrating the elegance and efficiency of this multi-agent approach.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "4Xh-cZwQFJpz" + }, + "source": [ + "## EV Agent - The \"Black Box\" Experience:" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "on2mGosD1WBp" + }, + "source": [ + "The `ExecutionAgent` is the heart of our EV infrastructure analysis system. Think of it as the conductor of an orchestra, coordinating a team of specialized agents to perform a comprehensive analysis based on your query.\n", + "\n", + "**Before you start:**\n", + "\n", + "* **What it does:** The `ExecutionAgent` takes your query about EV infrastructure, develops a plan, gathers relevant data, generates reports, and creates insightful visualizations.\n", + "* **How it works:** It delegates tasks to other agents (like a planning agent, data gathering agent, etc.) and manages the overall workflow.\n", + "* **What you get:** You'll receive a structured output containing the analysis plan, gathered data, a detailed report (if requested), and visualizations (if applicable).\n", + "* **Customization:** You can control the level of detail (debug mode), whether to see intermediate outputs (stage\\_output), and the type of output you desire (e.g., raw data, report, text).\n", + "\n", + "Essentially, the `ExecutionAgent` simplifies the complex process of EV infrastructure analysis, providing you with a powerful tool to gain valuable insights." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "K5O-SIBr0g7p" + }, + "outputs": [], + "source": [ + "# Importing ExecutionAgent from our agent_handler\n", + "\n", + "\n", + "from ev_agent.agent_handler.agent_01_ExecutionAgent import ExecutionAgent" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "EeXrJaqTIBJv" + }, + "outputs": [], + "source": [ + "# Create the agent\n", + "\n", + "agent = ExecutionAgent.create(\n", + " client=client,\n", + " model_name=MODEL_ID_Flash, # Gemini 2.0 Flash\n", + " api_key=NREL_API_KEY,\n", + " debug=False,\n", + " stage_output=False,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "YgUWghr5IG5T" + }, + "source": [ + "### Basic Report Generation" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pRvAIJmD2p76" + }, + "source": [ + "In this case, we're treating the `ExecutionAgent` as a **\"black box\"**. We provide the input query (\"I want to understand the EV charging situation in Austin.\") and it will eventually deliver the final report without revealing the inner workings." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "lgIT_0Wc2uPK" + }, + "source": [ + "Since we set `debug=False` and `stage_output=False` earlier, the agent is giving us some playful warnings. It's essentially saying, \"Hey, you've turned off all the visibility into the process, so you'll only see the final result! But, just so you know, there are four agents working hard behind the scenes\"." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "QI6Lfz6QIek4" + }, + "outputs": [], + "source": [ + "# Execute the analysis\n", + "results = await agent.execute(\n", + " \"I want to understand the EV charging situation in Austin.\"\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "a4I0IohK2wPn" + }, + "source": [ + "You'll notice a humorous warning: `*Deciphering your cryptic commands! It's like translating ancient hieroglyphs, but with more emojis.*` This is a subtle hint that the **QueryAnalysisAgent** is currently at work, interpreting your input query. If you ever want to peek behind the curtain, simply set `debug=True` or `stage_output=True` when creating the agent. But for now, we're embracing the black box experience and eagerly awaiting the final, comprehensive report." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "fMWrJ7AyD5FW" + }, + "source": [ + "---\n", + "If you want to save the generated report for later use or sharing, you can easily convert it to PDF or DOCX format. Here's how:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Q3n4cZJj5IjH" + }, + "outputs": [], + "source": [ + "# # You can save the report as PDF or DOCX\n", + "\n", + "markdown_text = (\n", + " results[\"report\"][\"full_text\"] + \"\\n\\n\\n\" + results[\"report\"][\"citations\"]\n", + ")\n", + "\n", + "convert_markdown(\n", + " markdown_text,\n", + " output_path=\"/content/generated_report\",\n", + " filename=\"austin_normal\",\n", + " file_type=\"pdf\", # or \"docx\"\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ZXk8CAHE5pRE" + }, + "source": [ + "This will generate a nicely formatted report file in your chosen location, ready to be viewed or shared. You can see an example of a pre-generated report here: [Austin Report with Sections](https://storage.googleapis.com/github-repo/generative-ai/gemini2/use-cases/research_multi_agent_ev/sample_reports/austin_normal.pdf)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "7vb7X_RUFIPi" + }, + "source": [ + "The `results` object is a dictionary containing all the data generated from the analysis, including the `plan`, `query_analysis`, `data`, and the final `report` (with `citations`, `full_text`, and `sections`)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "UFH-oLQDKAxP" + }, + "outputs": [], + "source": [ + "rich_print(\n", + " \"The result object contains all these internal data points with the reports: \",\n", + " list(results.keys()),\n", + ")\n", + "rich_print(\n", + " \"The Report contains the citations, full text of the report and individual sections: \",\n", + " list(results[\"report\"].keys()),\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "R8JZGlBcGBKw" + }, + "source": [ + "We've saved the full report above, but for now, let's just look at one section to see how they're structured. This demonstrates the organized way we store information within the report.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Osqut3cyLFkx" + }, + "outputs": [], + "source": [ + "for section_name, section_text in results[\"report\"][\"sections\"].items():\n", + " if section_name == \"Infrastructure Overview\":\n", + " print(section_name)\n", + " rich_print(section_text)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "JrGgCle_GsSr" + }, + "source": [ + "Let's focus on the data structure of each report section, which is crucial for developers to understand, especially in the context of our multi-agent system.\n", + "\n", + "As shown in the output, each section, like \"Infrastructure Overview,\" is represented as a `Section` object. This object neatly encapsulates:\n", + "\n", + "* **`title`:** The title of the section (e.g., \"Infrastructure Overview\").\n", + "* **`content`:** The main text of the section, generated by Gemini, providing a detailed analysis. It's important to note that this content is dynamically created based on the data gathered by the `DataGatherAgent` and the insights generated by the language model.\n", + "* **`citations`:** A dictionary containing `CitationData` objects. Each citation provides a `number`, `value`, `data_path`, `raw_value`, and `context`, meticulously linking claims in the content to specific data points retrieved by our `DataGatherAgent` via API calls.\n", + "* **`key_findings`:** A list of key insights extracted from the section's content.\n", + "* **`enhanced_content`:** An optional field for additional data or analysis.\n", + "\n", + "\n", + "In the normal \"Infrastructure Overview\" section, the numbers and facts presented are not manually entered; they are dynamically derived from our structured data model. This model is populated with real-world data fetched from various APIs by our dedicated `DataGatherAgent`. Let's see how this works with an example:\n", + "\n", + "**From the \"Infrastructure Overview\" section:**\n", + "\n", + "> \"Austin's total area encompasses 1679.20 sq km [1], with a significant portion dedicated to built areas (644.59 sq km) [1].\"\n", + "\n", + "The numbers \"1679.20\" and \"644.59\" are linked to **Citation 1**:\n", + "\n", + "```\n", + "1: CitationData(\n", + " number=1,\n", + " value='1679.20 sq km total area, 644.59 sq km built area, 42224 service roads, 476 EV charging\n", + "stations',\n", + " data_path='summary.area_metrics.total_area, summary.area_metrics.built_area,\n", + "summary.roads.service_roads, summary.parking.ev_charging',\n", + " raw_value=\"{'total_area': {'value': '1679.20', 'path': 'summary.area_metrics.total_area_sqkm', 'unit':\n", + "'sq km'}, 'built_area': {'value': '644.59', 'path': 'summary.area_metrics.built_area_sqkm', 'unit': 'sq km'},\n", + "'service_roads': {'value': '42224', 'path': 'summary.roads.service_roads', 'unit': 'roads'}, 'ev_charging':\n", + "{'value': '476', 'path': 'summary.parking.ev_charging', 'unit': 'stations'}}\",\n", + " context='Overall Austin metrics and existing EV charging station count'\n", + "),\n", + "```\n", + "\n", + "**Here's the breakdown:**\n", + "\n", + "1. **Data Source:** The `DataGatherAgent` makes API calls to sources like OpenStreetMap to gather data about Austin.\n", + "2. **Structured Data Model:** This fetched data is stored in a structured format. For example, `summary.area_metrics.total_area` is a specific field in our data model that holds Austin's total area.\n", + "3. **Citation Tracing:** Citation 1 clearly links the numbers in the text to their source in the data model. The `data_path` field shows where to find the data (e.g., `summary.area_metrics.total_area`), and the `raw_value` field reveals the exact value fetched from the API (\"1679.20\").\n", + "4. **Dynamic Content Generation:** When the report is generated, the system automatically pulls the relevant data from the model, based on the `data_path` specified in the citation, and inserts it into the text.\n", + "\n", + "**Why is this important?**\n", + "\n", + "* **Accuracy:** Our report is based on real data from trusted APIs, not on manual input, minimizing errors.\n", + "* **Traceability:** We can always trace the data back to its source, ensuring transparency and verifiability.\n", + "* **Automation:** The `DataGatherAgent` and our structured data model automate the data retrieval and integration process, making it efficient.\n", + "* **Consistency:** This structured approach ensures consistency across the report, as all agents use the same data model.\n", + "\n", + "In essence, the normal section demonstrates the power of our data-driven approach. The `DataGatherAgent`, our structured data model, and the `CitationData` system work together seamlessly to create a report grounded in accurate, traceable, and automatically updated information. This highlights the core strength of our multi-agent system: its ability to leverage structured data to produce reliable and insightful analysis.\n", + "\n", + "\n", + "\n", + "**Why is this data structure useful for developers and a multi-agent system?**\n", + "\n", + "This structured format promotes modularity, allowing developers to reuse sections and enabling different agents to collaborate seamlessly by contributing to specific parts of the report. The clear link between generated content and underlying data via CitationData ensures data integrity and transparency. Furthermore, the design is extensible, accommodating future growth and new types of analysis without disrupting the core structure, making it ideal for a multi-agent system." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "w7CchYhMItoe" + }, + "source": [ + "While we've focused on the report, you can also explore other parts of the `results` object. This provides a way to delve deeper into the agent's inner workings, but we'll break down each agent's role in more detail in the next section." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "926d0bb80f59" + }, + "outputs": [], + "source": [ + "# You can print the whole text of the report:\n", + "rich_print(results[\"report\"][\"full_text\"])\n", + "\n", + "# You can print the whole citations of the report:\n", + "rich_print(results[\"report\"][\"citations\"])\n", + "\n", + "# You can also check the data it has used to generate the report\n", + "rich_print(results[\"data\"])\n", + "\n", + "# If you want to see the whole plan of the agent that it executed\n", + "rich_print(results[\"plan\"])\n", + "\n", + "# If you want to see the query analysis of the agent\n", + "rich_print(results[\"query_analysis\"])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "kiumiZy46oob" + }, + "source": [ + "---\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8JIQYmPkIL1q" + }, + "source": [ + "### Google Search Enhanced Report" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "YKw5q6woJ8pt" + }, + "source": [ + "Now, let's kick it up a notch! We're going to run the analysis again with `results_grounded_plot = await agent.execute(\"\"\"I want to understand the EV charging situation in Austin. I need a report and enhance the sections of report with google. Also add some plots\"\"\")`. This time, we've added two new twists to our request: grounding the report sections with Google Search results and adding data plots.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "t-3L1Nz-1nPy" + }, + "source": [ + "**Note on grounding with google search as a Tool with Gemini 2.0:**\n", + "\n", + "Currently, grounding with google search as a Tool on Gemini 2.0 does not support controlled generation. While you can still perform grounding with search, the output format and structure cannot be explicitly controlled at this time. Controlled generation is important for grounding as it allows us to specify the desired format and structure of the output, ensuring that the information retrieved from web search is integrated into the report in a consistent and organized manner. In the meantime, we are utilizing the Gemini 1.5 Flash model to perform grounding with controlled generation capabilities. You can explore examples of grounding with google search as a Tool Gemini 2.0 (without controlled generation) [here](link)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "rboGC6pP1EBW" + }, + "outputs": [], + "source": [ + "# Create the agent\n", + "\n", + "agent = ExecutionAgent.create(\n", + " client=client,\n", + " model_name=MODEL_ID_Flash, # Gemini 1.5 Flash\n", + " api_key=NREL_API_KEY,\n", + " debug=False,\n", + " stage_output=False,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "bCZ23K5SIMMj" + }, + "outputs": [], + "source": [ + "# Execute the analysis\n", + "results_grounded_plot = await agent.execute(\n", + " \"\"\"I want to understand the EV charging situation in Austin. I need a report and enhance the sections of report with google. Also add some plots\"\"\"\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Jzz2vbC1J_Q8" + }, + "source": [ + "Just like before, you'll see the familiar playful warnings since we're still running in a \"black box\" mode. However, now you'll also notice `DEBUG` messages indicating that sections are being enhanced with new citations, for example: `DEBUG: Enhanced Executive Summary with 17 new citations`. This is where the magic happens! The agent is now smartly integrating information from Google Search to bolster the report.\n", + "\n", + "What can you expect? Not only will the report be more comprehensive and grounded in a wider range of sources, but you'll also get to see insightful visualizations of the data. This is a significant step up from the previous run, showcasing the agent's ability to dynamically adapt to our requests and provide a richer, more visually engaging analysis. Get ready to be impressed by the power of combining AI, data analysis, and web search in a single, seamless process!" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "90xiSV6G6Jts" + }, + "outputs": [], + "source": [ + "# Just like before, you can save this enhanced report as a PDF or DOCX using:\n", + "\n", + "convert_markdown(\n", + " markdown_text=results_grounded_plot[\"report\"][\"combined_report\"],\n", + " output_path=\"/content/generated_report\",\n", + " filename=\"austin_grounded\",\n", + " file_type=\"pdf\", # or \"docx\"\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "CVB0HBPP7Rgu" + }, + "source": [ + "This will generate a file with the grounded sections. If you're eager to see the complete report right away, you can check out the pre-generated version here: [Austin Report - Sections Grounded with Search](https://storage.googleapis.com/github-repo/generative-ai/gemini2/use-cases/research_multi_agent_ev/sample_reports/austin_grounded.pdf)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "H1yg0GWZN17j" + }, + "source": [ + "You've seen the full, enhanced report – now let's take a closer look at how a single grounded section compares to the normal section we saw earlier. We'll examine the \"Infrastructure Overview\" section again:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "4nCGfx-NMdel" + }, + "outputs": [], + "source": [ + "for section_name, section_text in results_grounded_plot[\"report\"][\"sections\"].items():\n", + " if section_name == \"Infrastructure Overview\":\n", + " print(section_name)\n", + " rich_print(section_text)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "1jLTWQe0N3ZY" + }, + "source": [ + "**Here's the \"aha\" moment:** Notice how the `content` of this section is now significantly richer and more detailed. It's not just stating facts from our initial data; it's weaving in insights and information gathered from the web through Google Search. This demonstrates the power of grounding our analysis in a broader context." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "uVlnk6OxQyn1" + }, + "source": [ + "Okay, let's break down how Google Search enhances the report by focusing on a specific example: **Citation 8**.\n", + "\n", + "In the grounded \"Infrastructure Overview\" section, we have:\n", + "\n", + "```\n", + " 8: CitationData(\n", + " number=8,\n", + " value=\"Report on global EV infrastructure trends and best practices. | Context: Informs strategic\n", + "recommendations for improving Austin's EV infrastructure. | URL: BloombergNEF\",\n", + " data_path='BloombergNEF',\n", + " raw_value='Report on global EV infrastructure trends and best practices.',\n", + " context=\"Informs strategic recommendations for improving Austin's EV infrastructure.\"\n", + " )\n", + "```\n", + "\n", + "This citation points to a report from **BloombergNEF** on global EV infrastructure trends. Now, let's see how this reference, found through Google Search, contributes to the enhanced content:\n", + "\n", + "**Original Content (Before Search):**\n", + "\n", + "> \"The existing EV charging infrastructure, while growing, needs significant expansion to meet the rising demand for EVs. Currently, there are 78 total EV charging stations [2] across the city. This number is significantly lower than other major cities with similar populations.\"\n", + "\n", + "**Enhanced Content (After Search):**\n", + "\n", + "> \"The existing EV charging infrastructure, while growing, needs significant expansion to meet the rising demand for EVs. Currently, there are 78 total EV charging stations [2] across the city. This number is significantly lower than other major cities with similar populations. **A recent study by BloombergNEF [3] highlights the need for a much higher density of charging stations to support widespread EV adoption.**\"\n", + "\n", + "**Here's the impact:**\n", + "\n", + "1. **External Validation:** The original content stated that Austin's charging station count is low compared to similar cities. The enhanced content, using the BloombergNEF report found via Google Search, adds external validation to this claim. It's no longer just an observation based on our data; it's now supported by a reputable source on global EV trends.\n", + "2. **Strategic Depth:** The BloombergNEF citation adds a layer of strategic depth. It's not just about the current number of stations; it connects to the broader concept of \"charging station density\" needed for \"widespread EV adoption\" – a key insight for planning Austin's EV future.\n", + "3. **Credibility Boost:** Referencing a well-known organization like BloombergNEF significantly enhances the credibility of the report. It demonstrates that our analysis is informed by industry experts and best practices.\n", + "\n", + "**In essence, Google Search, through this specific citation, helped us transform a simple observation into a well-supported, strategically relevant insight.** It demonstrates how our system leverages web knowledge to enhance the report's quality, moving beyond the limitations of our initial data and providing a more nuanced and impactful analysis. This dynamic integration of external information is a key strength of our multi-agent approach." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "CZLXKarZVcQC" + }, + "source": [ + "---\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "hUrIIjc6To8S" + }, + "source": [ + "Now, let's visualize the raw data that underpins our analysis. The following code will generate plots directly from the data fetched by our `DataGatherAgent` from external APIs.\n", + "\n", + "You can also check the data it has used to generate the plots\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "7cd5d15ec423" + }, + "outputs": [], + "source": [ + "rich_print(results_grounded_plot[\"data\"])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "CuELS1YtVNVA" + }, + "source": [ + "Let's explore the visualizations generated from the raw API data, which offer a deeper understanding of Austin's EV infrastructure and its urban context. The code uses the `create_comprehensive_city_analysis` function to produce a set of Plotly figures, each shedding light on different aspects of the city:\n", + "\n", + "**1. EV Infrastructure Overview Dashboard:**\n", + "\n", + "* **Charging Station Types:** This bar chart breaks down the number of DC Fast, Level 2, and Level 1 charging stations. For Austin, it highlights the dominance of Level 2 chargers and the relative scarcity of DC Fast chargers. This is crucial for understanding the current charging landscape and identifying potential gaps, especially for users requiring faster charging options.\n", + "* **Connector Distribution:** This pie chart reveals the types of connectors available (e.g., CCS, CHAdeMO, Tesla). By examining this chart for Austin, you can assess the compatibility of the existing infrastructure with various EV models.\n", + "* **Network Distribution:** This bar chart displays the number of charging stations associated with different networks (e.g., ChargePoint, Tesla). For Austin, it might reveal a reliance on a particular network, which could influence decisions about network diversification and partnerships.\n", + "* **Access & Payment Methods:** This bar chart shows the percentage of stations offering various access and payment methods (e.g., credit card, mobile pay, 24/7 access). In Austin's case, it can indicate the ease of use and accessibility of the charging infrastructure for different users.\n", + "\n", + "**2. Transportation Infrastructure Analysis:**\n", + "\n", + "* **Public Transport Facilities:** This section visualizes the number of bus stops, train stations, bus stations, and bike rental locations. For Austin, this data helps assess the integration of EV charging with existing public transportation, which is vital for planning intermodal hubs.\n", + "* **Road Network Distribution:** This shows the distribution of motorways, primary, secondary, and residential roads. Understanding Austin's road network density and types can inform decisions about optimal charging station placement along major thoroughfares.\n", + "* **Parking Facilities:** This section charts the number of surface parking lots, parking structures, street parking spaces, and designated EV charging spots. For Austin, it helps evaluate the availability of parking spaces that could potentially be equipped with EV charging.\n", + "* **EV vs. Traditional Infrastructure:** This compares the number of EV charging stations, fuel stations, car dealerships, and car repair shops. In Austin's context, it provides insights into the current balance between EV and traditional vehicle infrastructure, indicating the progress of EV adoption.\n", + "\n", + "**3. Urban Amenities and Services:**\n", + "\n", + "* **Retail and Shopping:** This visualizes the distribution of shopping centers, supermarkets, department stores, and convenience stores. For Austin, it helps identify potential locations for charging stations near high-traffic retail areas.\n", + "* **Food and Entertainment:** This section charts restaurants, cafes, bars, and fast-food outlets. Understanding the density of these amenities in Austin can guide the placement of charging stations near popular destinations.\n", + "* **Emergency Services:** This displays the number of police stations, fire stations, hospitals, and clinics. For Austin, this information can be relevant for ensuring the resilience of the EV infrastructure and planning for emergency response related to EVs.\n", + "* **Public Amenities:** This visualizes the number of post offices, banks, ATMs, and public toilets. In Austin's context, it helps assess the availability of essential services near potential charging station locations.\n", + "\n", + "**4. Area Analysis:**\n", + "\n", + "* **Area Distribution:** This pie chart shows the breakdown of Austin's total area into water, green, built, and other areas. It provides a quick overview of the city's land use, which can be a factor in determining suitable locations for charging infrastructure.\n", + "\n", + "**Ideally, these charts would be integrated into the report itself, providing a visual complement to the textual analysis.** However, even as standalone visualizations, they offer valuable insights for decision-making related to EV charging station expansion. For example, by examining the distribution of charging types, connector types, and network providers, along with the city's transportation infrastructure and urban amenities, stakeholders can identify strategic locations for new charging stations, optimize the mix of charging options, and ensure that the expansion aligns with the city's overall development and EV adoption trends. By correlating the density of public transportation, road networks, and parking facilities with the location of existing EV charging stations, planners can pinpoint areas where additional infrastructure is most needed. They can also consider factors such as proximity to retail centers, food and entertainment venues, and public amenities to enhance the user experience and maximize the utilization of charging stations." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "7Q_MfimBMdbS" + }, + "outputs": [], + "source": [ + "print(\"\\n=== Single City Analysis ===\")\n", + "for name, fig in results_grounded_plot[\"visualizations\"][0].items():\n", + " print(f\"\\nDisplaying: {name.replace('_', ' ').title()}\")\n", + " fig.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "YE6bl8haTPL-" + }, + "source": [ + "The `results` object is a dictionary containing all the data generated from the analysis, includes extra variables to add visualizations and `combined_report`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "K_j2N-_5Kv_D" + }, + "outputs": [], + "source": [ + "rich_print(\n", + " \"The result object contains all these internal data points with the reports: \",\n", + " list(results_grounded_plot.keys()),\n", + ")\n", + "rich_print(\n", + " \"The Report contains the combined reports, citations, full text of the report and individual sections: \",\n", + " list(results_grounded_plot[\"report\"].keys()),\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Bz-FBnhJTi-5" + }, + "source": [ + "While we've focused on the report, you can also explore other parts of the `results` object. This provides a way to delve deeper into the agent's inner workings, but we'll break down each agent's role in more detail in the next section." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "1192d611f92b" + }, + "outputs": [], + "source": [ + "# You can print the whole report:\n", + "rich_print(results_grounded_plot[\"report\"][\"combined_report\"])\n", + "\n", + "# You can print the whole text of the report:\n", + "rich_print(results_grounded_plot[\"report\"][\"full_text\"])\n", + "\n", + "# You can print the whole citations of the report:\n", + "rich_print(results_grounded_plot[\"report\"][\"citations\"])\n", + "\n", + "# You can also check the data it has used to generate the report\n", + "rich_print(results_grounded_plot[\"data\"])\n", + "\n", + "# If you want to see the whole plan of the agent that it executed\n", + "rich_print(results_grounded_plot[\"plan\"])\n", + "\n", + "# If you want to see the query analysis of the agent\n", + "rich_print(results_grounded_plot[\"query_analysis\"])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "GcW6BJTq8WuT" + }, + "source": [ + "---\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "jGdQeo_CRHh_" + }, + "source": [ + "## Deconstructing the Process: A Step-by-Step Journey of Agents" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ruk0__600E3e" + }, + "source": [ + "Before we delve into the inner workings of each agent, let's take a look at the overall flow of our multi-agent system. This sequence diagram provides a visual representation of how the agents interact and collaborate to process your query and generate the final output:\n", + "\n", + "![research-multi-agent-desing-pattern](https://storage.googleapis.com/github-repo/generative-ai/gemini2/use-cases/research_multi_agent_ev/img/ev_agent_simple.png)\n", + "\n", + "\n", + "This sequence diagram serves as a visual roadmap for understanding the flow of our multi-agent system, and you can refer back to it as we explore each agent's inner workings. It illustrates how agents like the `ExecutionAgent`, `PlanningAgent`, `QueryAnalysisAgent`, `DataGatherAgent`, `ReportAgent`, and `VisualizeAgent` interact and collaborate to process your query, highlighting their roles, the flow of information, and key decision points. This diagram is crucial for grasping the big picture as we delve into the specifics of each agent, starting with the `PlanningAgent`, which initiates the analysis process based on your query." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "hIQNRrF9KWqL" + }, + "source": [ + "### Agent: PlanningAgent" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "BHYuwLIgDHyw" + }, + "source": [ + "### Agent: PlanningAgent\n", + "\n", + "The `PlanningAgent` is the first active agent in our sequence, responsible for taking your initial query and crafting a strategic execution plan. As seen in the sequence diagram, the `ExecutionAgent` passes the user's query to the `PlanningAgent`, which then returns a structured plan. Let's break down its role:\n", + "\n", + "**Input:**\n", + "\n", + "* **Query:** The user's raw query about EV infrastructure (e.g., \"Analyze EV charging stations in Austin\").\n", + "* **Client:** An instance of the generative AI model client (e.g., `gemini`).\n", + "* **Model Name:** The specific model to be used (e.g., \"gemini-pro\").\n", + "* **Debug:** A boolean flag to enable/disable debug mode.\n", + "* **API Key:** The API key for external services like NREL.\n", + "\n", + "**Output:**\n", + "\n", + "* **ExecutionPlan:** A structured plan containing:\n", + " * **Query:** The original user query.\n", + " * **Timestamp:** When the plan was created.\n", + " * **Validated Query:** Result of query validation, including validity, cities mentioned, missing elements, and suggestions for improvement.\n", + " * **Enable Search:** A boolean flag indicating if enhanced search/grounding is required.\n", + " * **Steps:** A list of `PlanStep` objects, each defining a step in the execution process with details like agent name, description, input/output formats, and status.\n", + " * **Debug:** A boolean flag indicating debug status.\n", + "\n", + "This section will explore five key aspects of the `PlanningAgent`: its setup, the creation of the `ExecutionPlan`, query validation and suggestions, handling of invalid queries, and a glimpse into its internal code structure." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qIj_pscoDmQU" + }, + "source": [ + "Agent Code:\n", + "```\n", + "`/content/ev_agent/agent_handler/agent_02_PlanningAgent.py`\n", + "```\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qLxSAgN3AyyW" + }, + "source": [ + "#### Setting up and Calling the agent\n", + "\n", + "First, we need to set up and call the `PlanningAgent`. Here's how we do it:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "6ZzI1mSbJ3bt" + }, + "outputs": [], + "source": [ + "from ev_agent.agent_handler.agent_02_PlanningAgent import *" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pFrQSpP5E3Kv" + }, + "source": [ + "We start by importing the necessary `PlanningAgent` class. Then, we create an instance of the agent, providing the user's query, the client object, the model name, and setting `debug` to `False` for now. Finally, we call the `create_plan()` method to generate the execution plan. If `debug` is set to `False`, you might see a humorous warning about the complexity of plan creation, which is just a playful way to indicate that the agent is working behind the scenes." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "qONvCmwqJ3Yq" + }, + "outputs": [], + "source": [ + "agent = PlanningAgent(\n", + " query=\"I want to understand the EV charging situation in austin and proper vetted information and some plot\",\n", + " client=client,\n", + " model_name=MODEL_ID_Flash,\n", + " debug=False,\n", + ")\n", + "plan = agent.create_plan()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "GQjRth99ATj0" + }, + "source": [ + "#### ExecutionPlan" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "7BOaeF9aGULR" + }, + "source": [ + "Now, let's examine the `ExecutionPlan` generated by the `PlanningAgent`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "VGN-ZitTJ3Vz" + }, + "outputs": [], + "source": [ + "rich_print(plan)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "rLe_ptdcGaf1" + }, + "source": [ + "The core of this plan lies in the `steps` list, which contains a sequence of `PlanStep` objects. Each `PlanStep` is defined by a structured data model, specifying:\n", + "\n", + "* **`step_id`:** A unique identifier for the step.\n", + "* **`agent_name`:** The name of the agent responsible for this step (e.g., `QueryAnalysisAgent`, `DataGatherAgent`).\n", + "* **`description`:** A brief description of the step's purpose.\n", + "* **`input_requirements`:** The data required for this step (e.g., the output of a previous step).\n", + "* **`output_format`:** The format of the data produced by this step (e.g., a specific data model like `QueryEntity` or `DataGatherAgentOutput`).\n", + "* **`status`:** The current status of the step (e.g., `PENDING`, `COMPLETED`).\n", + "* **`error`:** Any error encountered during the step (initially `None`).\n", + "* **`skip_conditions`:** Conditions under which this step should be skipped (currently `None` for all steps).\n", + "\n", + "**Leveraging Gemini's Function Calling for Planning:**\n", + "\n", + "The `PlanningAgent` intelligently determines the need for steps like visualization and enhanced search (grounding) by utilizing Gemini's function calling capabilities. It analyzes the user's query and calls specific functions (e.g., `_determine_visualization_requirement`, `_determine_search_requirement`) to decide whether these steps are required. This dynamic plan creation based on query analysis demonstrates the power of combining structured planning with advanced language model features.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "s0cyXQBfAel0" + }, + "source": [ + "#### Query Validation and Suggestions\n", + "\n", + "A crucial part of the `PlanningAgent`'s role is to validate the user's query and provide suggestions for improvement. Let's see how this works:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "X1AwbwP__pxh" + }, + "outputs": [], + "source": [ + "rich_print(plan.validated_query)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "v9SEDT97HNYk" + }, + "source": [ + "Here, the `PlanningAgent` has determined that the query is valid (`is_valid=True`) and has identified 'Austin' as the city of interest. It also confirms that no essential elements are missing (`missing_elements=[]`)." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "53RSiAZjAjiB" + }, + "source": [ + "#### Query Suggestions\n", + "\n", + "Furthermore, the `PlanningAgent` provides suggestions to enhance the query:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "rirBCWfS_5Dc" + }, + "outputs": [], + "source": [ + "rich_print(plan.validated_query.suggestions)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "B2FxhYZmDA8S" + }, + "source": [ + "#### Failed Query\n", + "\n", + "What happens when the query is not valid? Let's see how the `PlanningAgent` handles such scenarios:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "lad587v4J3P3" + }, + "outputs": [], + "source": [ + "agent = PlanningAgent(\n", + " query=\"I want to understand the EV charging situation in Paris and proper vetted information and some plot\",\n", + " client=client,\n", + " model_name=MODEL_ID_Flash,\n", + " debug=False,\n", + ")\n", + "plan = agent.create_plan()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "uFEeB2a8IlrJ" + }, + "source": [ + "In this case, the query mentions \"Paris,\" which is not a valid city in our predefined list (in `STATE_MAPPING`). The `PlanningAgent` detects this and returns:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "U7YCYqosIz5C" + }, + "outputs": [], + "source": [ + "rich_print(plan.validated_query)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8BC-GeWPI3Ey" + }, + "source": [ + "The `is_valid` flag is now `False`, and the `missing_elements` indicate that a \"valid city\" is required. Importantly, the `suggestions` provide specific guidance on how to correct the query, even suggesting valid city replacements." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "4INx_rCJDjtp" + }, + "outputs": [], + "source": [ + "# You can see that it disable enabled search since the query didn't ask for anything \"enhance\" or \"grounding\"\n", + "rich_print(plan.enable_search)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "I9Z11rQwDfEE" + }, + "outputs": [], + "source": [ + "# it also skipped the visualization steps, since we didn't mention that in the query\n", + "rich_print(plan.steps)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "dYuYw015I8Du" + }, + "source": [ + "Since the query was invalid, the `PlanningAgent` disables the search functionality (`enable_search=False`) and creates an empty list of steps (`steps=[]`). This effectively halts the execution process, as there's no valid plan to execute. This demonstrates the agent's ability to gracefully handle invalid queries and prevent unnecessary processing." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "FG322SGOGnrb" + }, + "source": [ + "### Agent: QueryAnalysisAgent" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "lMeMdzKrKJAD" + }, + "source": [ + "### Agent: QueryAnalysisAgent\n", + "\n", + "The `QueryAnalysisAgent` comes right after the `PlanningAgent` in our sequence. Its primary role is to dissect the user's query, identify key entities, and determine the type of analysis requested. It then passes this structured information to the next agent in the pipeline.\n", + "\n", + "**Input:**\n", + "\n", + "* **Query:** The user's query about EV infrastructure, validated by the `PlanningAgent` (e.g., \"Analyze EV charging stations in Austin\").\n", + "* **Client:** An instance of the generative AI model client.\n", + "* **Model Name:** The specific model to be used (e.g., \"gemini-pro\").\n", + "\n", + "**Output:**\n", + "\n", + "* **Dictionary:** Containing:\n", + " * `status`: Whether the analysis was successful (\"success\" or \"error\").\n", + " * `entities`: A dictionary representing the extracted entities from the query, based on the `QueryEntities` data model. This includes:\n", + " * `pattern_type`: The type of analysis pattern detected (e.g., \"DISCOVERY\", \"COMPARISON\"). Although identified, these patterns are not yet used downstream in the current version but could be leveraged in future iterations.\n", + " * `cities`: A list of valid cities extracted from the query.\n", + " * `states`: A list of corresponding states for the extracted cities.\n", + " * `research_theme`: The general theme of the query (currently fixed to \"Electronic Vehicle\").\n", + " * `output_type`: The desired output type (e.g., \"Report\", \"Text\", \"Raw Data\").\n", + "\n", + "In essence, the `QueryAnalysisAgent` transforms the user's raw query into a structured format that can be easily understood and processed by the subsequent agents in the system. This section will delve into how the agent extracts these entities, handles different query patterns, and prepares the data for the next stage of the analysis." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "SzUjpbDoGn8j" + }, + "outputs": [], + "source": [ + "from ev_agent.agent_handler.agent_03_QueryAnalysisAgent import *" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Uq_s_HCWMVIL" + }, + "source": [ + "Let's see how the `QueryAnalysisAgent` processes different types of queries.\n", + "We'll examine three examples:" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "yQj--mhuqegX" + }, + "source": [ + "#### Extraction Type 1" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "dQM-sR_qMctq" + }, + "source": [ + "Here, the query asks about gaps in Austin's charging network and requests a report format. The agent successfully analyzes the query and returns:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "9bYBVxCoHF_S" + }, + "outputs": [], + "source": [ + "query = \"Where are the gaps in Austin charging network? Report format please\"\n", + "query_agent = QueryAnalysisAgent(client, MODEL_ID)\n", + "agent_1_result = query_agent.analyze(query)\n", + "rich_print(agent_1_result)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "zGizyMvLMfEF" + }, + "source": [ + "The agent correctly identifies the `pattern_type` as `GAPS`, extracts the city and state, and recognizes the desired `output_type` as `REPORT`." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "yOsBXcErqkLg" + }, + "source": [ + "#### Extraction Type 2" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "gxMepfZGMkYQ" + }, + "source": [ + "In this case, the query requests raw data for Dallas. The agent responds with:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "aaRoYZLUHbGL" + }, + "outputs": [], + "source": [ + "query = \"Need some raw data on Dallas for Ev charging stations\"\n", + "agent_1_result = query_agent.analyze(query)\n", + "rich_print(agent_1_result)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "kVWU7b4tMmnz" + }, + "source": [ + "The agent identifies the `pattern_type` as `DISCOVERY` (since it's a general inquiry), extracts the city and state, and correctly sets the `output_type` to `RAW`." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "C5bWr2alqk8J" + }, + "source": [ + "#### Extraction Type 3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ubffVGHoNMOY" + }, + "outputs": [], + "source": [ + "query = \"compare Dallas and Austin for EV Charging expansion and give me detail report.\"\n", + "agent_1_result = query_agent.analyze(query)\n", + "rich_print(agent_1_result)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "cbM1lY83MvKT" + }, + "source": [ + "These examples demonstrate the `QueryAnalysisAgent`'s ability to understand different query structures, extract relevant entities, and determine the user's intent regarding the analysis type and desired output format. This structured information is then passed on to subsequent agents in the pipeline, ensuring that the analysis stays focused and aligned with the user's needs." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "i-So9XlsE4L-" + }, + "source": [ + "### Agent: DataGatherAgent" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "y4IaUnAqNSRu" + }, + "source": [ + "### Agent: DataGatherAgent\n", + "\n", + "The `DataGatherAgent` is responsible for collecting the necessary data for our analysis by interacting with external APIs. It takes the structured output from the `QueryAnalysisAgent` and fetches relevant information about EV infrastructure and city demographics.\n", + "\n", + "**Input:**\n", + "\n", + "* **`api_key`:** Your NREL API key to access EV infrastructure data.\n", + "* **`radius_miles`:** The radius (in miles) around each city for which to gather data.\n", + "* **`debug`:** A boolean flag to enable/disable debug mode.\n", + "\n", + "**Output:**\n", + "\n", + "* **`DataGatherAgentOutput`:** A data object containing:\n", + " * `timestamp`: When the data was gathered.\n", + " * `cities_data`: A list of `CityData` objects, one for each city in the query. Each `CityData` object may contain:\n", + " * `city`: The name of the city.\n", + " * `state`: The state abbreviation.\n", + " * `summary`: A dictionary containing general city data retrieved from the Neighborhood Summary API (e.g., population, area, etc.).\n", + " * `ev_data`: A dictionary containing EV charging station data retrieved from the EV Infrastructure Station Analysis API (e.g., number of stations, charger types, etc.).\n", + " * `error`: Any error encountered while gathering data for the city.\n", + " * `status`: The overall status of the data gathering process (\"success\" or \"error\").\n", + " * `error`: Any general error encountered during the process.\n", + "\n", + "**Functionality:**\n", + "\n", + "The `DataGatherAgent` utilizes asynchronous programming (`asyncio`) to fetch data from two different APIs concurrently for each city:\n", + "\n", + "1. **Neighborhood Summary API:** Retrieves general demographic and infrastructure data about the city.\n", + "2. **EV Infrastructure Station Analysis API:** Retrieves detailed information about EV charging stations within the specified radius.\n", + "\n", + "It handles potential errors during API calls, provides informative debug messages (if enabled), and compiles the gathered data into a structured `DataGatherAgentOutput` object. This agent plays a crucial role in bridging the gap between our analytical system and the real-world data needed to generate a meaningful report." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "g9NKTXKHEpkq" + }, + "outputs": [], + "source": [ + "from ev_agent.agent_handler.agent_04_DataGatherAgent import *" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "nAMEg6_COMq0" + }, + "outputs": [], + "source": [ + "# The Agent hits the OpenMapStreets API and NREL Developer API to gather data for a given city that can be helpful for Analysis.\n", + "\n", + "data_gather_agent = DataGatherAgent(\n", + " api_key=NREL_API_KEY, radius_miles=100.0, debug=False\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "eTVSEplfOi5Y" + }, + "source": [ + "Here, we create an instance of the `DataGatherAgent`, providing our `NREL_API_KEY`, a `radius_miles` of 100.00 miles, and setting `debug` to `True` to see detailed output." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Tnsr3EzYOBqX" + }, + "source": [ + "#### Single City" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "tgpy8Q8-N9SM" + }, + "outputs": [], + "source": [ + "# Get the city from the QueryAnalysisAgent\n", + "agent_1_result = query_agent.analyze(\n", + " \"Need some raw data on Dallas for Ev charging stations\"\n", + ")\n", + "\n", + "# Get data from DataGatherAgent of the city\n", + "agent_2_result = await data_gather_agent.process(agent_1_result)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "XutOgx13ZRqr" + }, + "outputs": [], + "source": [ + "print(\"Number of cities given by the agent: \", len(agent_2_result.cities_data))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Vj5qPtLJqotV" + }, + "source": [ + "##### Data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "15623699cf29" + }, + "outputs": [], + "source": [ + "# You can access the complete NeighborhoodSummary here:\n", + "rich_print(\"NeighborhoodSummary - Complete \\n\", agent_2_result.cities_data[0].summary)\n", + "\n", + "\n", + "# You can access the complete EVInfraSummary here:\n", + "rich_print(\"EV Infra Summary - Complete \\n\", agent_2_result.cities_data[0].ev_data)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "P4Kfv7uHOEsA" + }, + "source": [ + "#### Multi City" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "2OXo_jl-Epfz" + }, + "outputs": [], + "source": [ + "# Get the city from the QueryAnalysisAgent\n", + "agent_1_result_multi_city = query_agent.analyze(\n", + " \"compare Dallas and Austin for EV Charging expansion and give me detail report\"\n", + ")\n", + "\n", + "# Get data from DataGatherAgent of the city\n", + "agent_2_result_multi_city = await data_gather_agent.process(agent_1_result_multi_city)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "B66HlEAzZL1P" + }, + "outputs": [], + "source": [ + "print(\n", + " \"Number of cities given by the agent: \", len(agent_2_result_multi_city.cities_data)\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "bJgYsa0pqstF" + }, + "source": [ + "##### Data - NeighborhoodSummary (OpenStreetMap - Overpass API)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "wv0LiUHdsbi6" + }, + "source": [ + "This API Handler uses Nomination API and Overpass API (OpenStreetMap). You can find more details [here](https://nominatim.org/), [here](https://nominatim.org/release-docs/develop/api/Overview/), [here](https://wiki.openstreetmap.org/wiki/Overpass_API)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "we3aopWOmiZj" + }, + "outputs": [], + "source": [ + "index = 0 # 0 for Dallas, 1 for Austin\n", + "\n", + "# You can see the NeighborhoodSummary of the city\n", + "rich_print(\"City :\", agent_2_result_multi_city.cities_data[index].summary.city)\n", + "rich_print(\"State :\", agent_2_result_multi_city.cities_data[index].summary.state)\n", + "rich_print(\n", + " \"NeighborhoodSummary - Healthcare \\n\",\n", + " agent_2_result_multi_city.cities_data[index].summary.healthcare,\n", + ")\n", + "rich_print(\n", + " \"NeighborhoodSummary - Education \\n\",\n", + " agent_2_result_multi_city.cities_data[index].summary.education,\n", + ")\n", + "\n", + "# You can see the complete data and all the elements of NeighborhoodSummary:\n", + "rich_print(\n", + " \"NeighborhoodSummary - Complete \\n\",\n", + " agent_2_result_multi_city.cities_data[index].summary,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pVgA-XG2q-Q0" + }, + "source": [ + "##### Data - EVInfraSummary (NREL Developer API)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "OW5LGTu2ruGf" + }, + "source": [ + "You can get more details about the API [here](https://developer.nrel.gov/) and [here](https://developer.nrel.gov/docs/transportation/alt-fuel-stations-v1/)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "OfO4oJOvnK_o" + }, + "outputs": [], + "source": [ + "index = 0 # 0 for Dallas, 1 for Austin\n", + "\n", + "# You can see the EV Infra Summary of the city\n", + "rich_print(\n", + " \"City :\", agent_2_result_multi_city.cities_data[index].ev_data.metadata[\"city\"]\n", + ")\n", + "rich_print(\n", + " \"State :\", agent_2_result_multi_city.cities_data[index].ev_data.metadata[\"state\"]\n", + ")\n", + "rich_print(\n", + " \"EV Infra Summary - Charging Capability \\n\",\n", + " agent_2_result_multi_city.cities_data[index].ev_data.charging_capabilities,\n", + ")\n", + "rich_print(\n", + " \"EV Infra Summary - Accessibility \\n\",\n", + " agent_2_result_multi_city.cities_data[index].ev_data.accessibility,\n", + ")\n", + "\n", + "# You can see the complete data and all the elements of EV Infra Summary:\n", + "# rich_print(\"EV Infra Summary - Complete \\n\",\n", + "# agent_2_result_multi_city.cities_data[index].ev_data)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "x1fu8LdFqFR8" + }, + "source": [ + "### Agent: ReportAgent" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "93F5K5cjPjAn" + }, + "source": [ + "### Agent: ReportAgent\n", + "\n", + "The `ReportAgent` takes the structured data gathered by the `DataGatherAgent` and transforms it into a comprehensive, well-formatted report. It's responsible for generating individual sections of the report, citing data sources appropriately, and optionally enhancing the content with information from web search.\n", + "\n", + "**Input:**\n", + "\n", + "* **`client`:** An instance of the generative AI model client.\n", + "* **`model_name`:** The specific model to be used (e.g., \"gemini-pro-1.5\").\n", + "* **`enable_search`:** A boolean flag indicating whether to enhance the report with web search results.\n", + "* **`debug`:** A boolean flag to enable/disable debug mode.\n", + "\n", + "**Output:**\n", + "\n", + "* **`Report`:** A data object containing the entire report, structured as follows:\n", + " * `city`: The name of the city.\n", + " * `state`: The state abbreviation.\n", + " * `timestamp`: When the report was generated.\n", + " * `sections`: A dictionary of `Section` objects, each representing a section of the report (e.g., \"Executive Summary\", \"Infrastructure Overview\"). Each `Section` includes:\n", + " * `title`: The section title.\n", + " * `content`: The main text content of the section.\n", + " * `citations`: A dictionary of `CitationData` objects, mapping citation numbers to their corresponding data sources.\n", + " * `key_findings`: A list of key takeaways from the section.\n", + " * `enhanced_content`: Additional content generated through web search (if enabled).\n", + " * `citations_text`: A formatted string containing all citations used in the report.\n", + " * `full_text`: The entire report content as a single string.\n", + " * `combined_report`: The full report content along with formatted citations.\n", + "\n", + "**Functionality:**\n", + "\n", + "The `ReportAgent` performs several key tasks:\n", + "\n", + "1. **Section Generation:** It generates individual report sections based on predefined templates and the gathered data, citing specific data points using a structured `CitationData` model.\n", + "2. **Data Mapping:** It utilizes a detailed `_prepare_data_map` function to create a structured representation of the data from the `DataGatherAgent`, making it easier to reference specific data points in the report.\n", + "3. **Asynchronous Processing:** It leverages asynchronous programming to generate multiple sections concurrently, improving efficiency.\n", + "4. **Optional Search Enhancement:** If `enable_search` is set to `True`, it can enhance each section with information retrieved from Google Search, adding citations for the newly found data. This is achieved using the `_enhance_section_with_search` method.\n", + "5. **Report Assembly:** Finally, it assembles the individual sections into a complete `Report` object, generating a formatted string representation of the entire report and its citations.\n", + "\n", + "The `ReportAgent` plays a critical role in synthesizing the raw data into a coherent, insightful, and well-supported analysis of the EV infrastructure. The following subsections will explore how this agent is used to generate reports, either for a single city with search grounding or for multiple cities without grounding." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ibu15GgxJ3G-" + }, + "outputs": [], + "source": [ + "from ev_agent.agent_handler.agent_05_ReportAgent import *" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pUiv3t3M7foo" + }, + "source": [ + "#### Single City with grounding with Google" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ihv4IAfn-5gm" + }, + "outputs": [], + "source": [ + "report_agent_single_grounded = ReportAgent(\n", + " client=client, model_name=MODEL_ID_Flash, enable_search=True, debug=True\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "TeqzU17t7ozN" + }, + "outputs": [], + "source": [ + "# Get the city from the QueryAnalysisAgent\n", + "agent_1_result = query_agent.analyze(\n", + " \"Need some raw data on Dallas for Ev charging stations\"\n", + ")\n", + "rich_print(agent_1_result)\n", + "\n", + "# Get data from DataGatherAgent of the city\n", + "agent_2_result = await data_gather_agent.process(agent_1_result)\n", + "\n", + "# Get the report built out using ReportAgent\n", + "reports_single_grounded = await report_agent_single_grounded.analyze(\n", + " agent_1_result, agent_2_result\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "UCmgBSuU7SQC" + }, + "outputs": [], + "source": [ + "print(\n", + " \"Report is on the city: \",\n", + " reports_single_grounded.city,\n", + " \" and state: \",\n", + " reports_single_grounded.state,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "biVMqgbuAwMY" + }, + "source": [ + "Predefined/Available Section of the Reports:\n", + "\n", + "* Executive Summary\n", + "* Infrastructure Overview\n", + "* Current EV Assessment\n", + "* Demand Analysis\n", + "* Supply Analysis\n", + "* Gap Analysis\n", + "* Location Recommendations\n", + "* Implementation Strategy\n", + "\n", + "You can explore each section and see how grounding with Google, enhanced the section with updated text and citations." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "-Q7T-w9R7hwd" + }, + "outputs": [], + "source": [ + "for section_name, section_text in reports_single_grounded.sections.items():\n", + " if section_name == \"Infrastructure Overview\":\n", + " print(section_name)\n", + " rich_print(section_text)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "c3d5741c77ca" + }, + "outputs": [], + "source": [ + "# You can access other key areas of the report:\n", + "reports_single_grounded.full_text # Full text of the report - without citations\n", + "reports_single_grounded.citations_text # Full text of the citations - without text\n", + "reports_single_grounded.combined_report # Full text of the report combined with citations\n", + "reports_single_grounded.timestamp # Timestamp of report generations" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "vt9f7S5v7icz" + }, + "source": [ + "#### Multi City without grounding with Google" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "_Ifsrp-G-_T1" + }, + "outputs": [], + "source": [ + "report_agent_multi_city = ReportAgent(\n", + " client=client,\n", + " model_name=MODEL_ID_Flash,\n", + " enable_search=False, # you can enable grounding for both the cities if you want\n", + " debug=True,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "TpRaJx-v7kNm" + }, + "outputs": [], + "source": [ + "# Get the city from the QueryAnalysisAgent\n", + "agent_1_result_multi_city = query_agent.analyze(\n", + " \"compare Dallas and Austin for EV Charging expansion and give me detail report\"\n", + ")\n", + "rich_print(agent_1_result_multi_city)\n", + "\n", + "# Get data from DataGatherAgent of the city\n", + "agent_2_result_multi_city = await data_gather_agent.process(agent_1_result_multi_city)\n", + "\n", + "# Get the report built out using ReportAgent\n", + "reports_multi_city = await report_agent_multi_city.analyze(\n", + " agent_1_result_multi_city, agent_2_result_multi_city\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ph40v5rp7wPK" + }, + "outputs": [], + "source": [ + "index = 0\n", + "print(\n", + " \"Report is on the city: \",\n", + " reports_multi_city[index].city,\n", + " \" and state: \",\n", + " reports_multi_city[index].state,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "zn1LtOvi7wIU" + }, + "outputs": [], + "source": [ + "index = 1\n", + "print(\n", + " \"Report is on the city: \",\n", + " reports_multi_city[index].city,\n", + " \" and state: \",\n", + " reports_multi_city[index].state,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "kmU1fJFB8PTi" + }, + "outputs": [], + "source": [ + "for section_name, section_text in reports_multi_city[index].sections.items():\n", + " if section_name == \"Demand Analysis\":\n", + " print(section_name)\n", + " rich_print(section_text)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "582a01dfc15d" + }, + "outputs": [], + "source": [ + "# You can also check all the sections using object\n", + "\n", + "print(\"All the sections in the report\")\n", + "for section_name, section_text in reports_multi_city[index].sections.items():\n", + " print(section_name)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "3e0df2853b20" + }, + "outputs": [], + "source": [ + "# You can access other key areas of the report by passing appropriate indexes:\n", + "\n", + "reports_multi_city[index].full_text # Full text of the report - without citations\n", + "\n", + "reports_multi_city[index].citations_text # Full text of the citations - without text\n", + "\n", + "reports_multi_city[\n", + " index\n", + "].combined_report # Full text of the report combined with citations\n", + "\n", + "reports_multi_city[index].timestamp # Timestamp of report generations" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_Yd9DWWLP7T2" + }, + "source": [ + "### Agent: VisualizeAgent\n", + "\n", + "The `VisualizeAgent` is responsible for creating insightful visualizations based on the data gathered by the `DataGatherAgent`. It uses the `plotly` library to generate various charts and graphs that help to understand the EV infrastructure landscape in a more visual and intuitive manner. Although it's called an \"agent\" here, it's important to note that this is essentially a set of helper functions for creating visualizations rather than an autonomous agent with decision-making capabilities.\n", + "\n", + "**Input:**\n", + "\n", + "* **`data`:** The `DataGatherAgentOutput` object, containing structured data for one or more cities.\n", + "\n", + "**Output:**\n", + "\n", + "* A tuple containing two dictionaries:\n", + " * **`single_city_figs`:** A dictionary of `plotly` figure objects, each representing a visualization specific to a single city.\n", + " * **`comparison_figs`:** A dictionary of `plotly` figure objects, each representing a comparative visualization across multiple cities (if applicable).\n", + "\n", + "**Functionality:**\n", + "\n", + "The `VisualizeAgent` performs the following tasks:\n", + "\n", + "1. **Single City Visualizations:** It generates a set of visualizations for each city using the `create_comprehensive_city_analysis` function. These include:\n", + " * **EV Infrastructure Overview:** Bar charts showing charging station types, connector distribution, network distribution, and access & payment methods.\n", + " * **Transportation Infrastructure Analysis:** A multi-panel plot showing public transport facilities, road network distribution, parking facilities, and a comparison of EV vs. traditional vehicle infrastructure.\n", + " * **Urban Amenities and Services:** A multi-panel plot showing the distribution of retail and shopping centers, food and entertainment venues, emergency services, and public amenities.\n", + " * **Area Analysis:** A pie chart displaying the distribution of total area, water area, green area, and built area.\n", + "\n", + "2. **Multi-City Comparisons (if applicable):** If the input data contains information for multiple cities, it uses the `plot_multi_city_comparison` function to generate comparative visualizations. These include:\n", + " * **EV Infrastructure Comparisons:** Bar charts comparing the number of EV stations vs. fuel stations, charging station types, and EV station density across cities.\n", + " * **Transportation Infrastructure:** Bar charts comparing public transport infrastructure, road network distribution, and parking facilities across cities.\n", + " * **Area Analysis:** A bar chart comparing area distribution (total, water, green, built) across cities.\n", + " * **Urban Amenities:** A bar chart comparing the prevalence of various urban amenities (e.g., shopping centers, restaurants, hospitals) across cities.\n", + "\n", + "3. **Visualization Organization:** It organizes all generated plots into the `single_city_figs` and `comparison_figs` dictionaries, making it easy to access specific visualizations.\n", + "\n", + "The `VisualizeAgent` plays a crucial role in making the data more accessible and understandable by providing a visual representation of key metrics and trends. These visualizations can aid in identifying patterns, making comparisons, and ultimately supporting decision-making related to EV infrastructure planning and development." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "LqGemlxj-ohw" + }, + "outputs": [], + "source": [ + "from ev_agent.agent_handler.agent_06_VisualizeAgent import *" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Ly_pvvoNCKr_" + }, + "source": [ + "#### Single City" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "zjveHtOhGzfn" + }, + "outputs": [], + "source": [ + "single_city_figs, comparison_figs = plot_all_visualizations(agent_2_result)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "SM9nbe5FHEap" + }, + "outputs": [], + "source": [ + "print(\"\\n=== Single City Analysis ===\")\n", + "for name, fig in single_city_figs.items():\n", + " print(f\"\\nDisplaying: {name.replace('_', ' ').title()}\")\n", + " fig.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "5iF_b6oMCVBL" + }, + "source": [ + "#### Multi City" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "33fASgQGHP6d" + }, + "outputs": [], + "source": [ + "single_city_figs, comparison_figs = plot_all_visualizations(agent_2_result_multi_city)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "dZuNdJeoHTVD" + }, + "outputs": [], + "source": [ + "print(\"\\n=== Multi-City Comparisons ===\")\n", + "for name, fig in comparison_figs.items():\n", + " print(f\"\\nDisplaying: {name.replace('_', ' ').title()}\")\n", + " fig.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "o4cW5bVtQ2Lo" + }, + "source": [ + "## Next Steps and Potential Improvements\n", + "\n", + "We've built a solid foundation for a multi-agent system that analyzes EV infrastructure. However, there's always room for improvement and expansion. Here are some potential next steps, inspired by features found in advanced multi-agent frameworks like AutoGen, CrewAI, and LangGraph:\n", + "\n", + "1. **Enhanced Agent Communication:** Implement dynamic inter-agent communication for iterative feedback, dynamic task allocation, and agent specialization.\n", + "2. **Sophisticated Planning:** Develop more advanced planning with conditional logic, sub-planning, and plan repair capabilities.\n", + "3. **Expanded Tool Integration:** Integrate with more APIs, databases, web scraping, and knowledge graphs to broaden the system's knowledge base.\n", + "4. **Interactive User Experience:** Allow for clarification dialogs, progress updates, interactive visualizations, and user feedback mechanisms.\n", + "5. **Robust Error Handling:** Implement comprehensive exception handling, retry mechanisms, and fallback strategies for increased reliability.\n", + "6. **Integrated Visualizations:** Incorporate visualizations directly into the generated reports for a more cohesive and engaging presentation.\n", + "7. **Agent Memory and Learning:** Introduce agent memory for caching, learning from user feedback, and potential model fine-tuning to improve performance over time.\n", + "\n", + "By implementing these enhancements, we can create a more powerful, flexible, and user-friendly multi-agent system for analyzing EV infrastructure and generating actionable insights." + ] + } + ], + "metadata": { + "colab": { + "name": "intro_research_multi_agents_gemini_2_0.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.11.0" + } + }, + "nbformat": 4, + "nbformat_minor": 0 } diff --git a/gemini/code-execution/intro_code_execution.ipynb b/gemini/code-execution/intro_code_execution.ipynb index a91fab135e4..1dbdca00180 100644 --- a/gemini/code-execution/intro_code_execution.ipynb +++ b/gemini/code-execution/intro_code_execution.ipynb @@ -1,1845 +1,1803 @@ { - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "ur8xi4C7S06n" - }, - "outputs": [], - "source": [ - "# Copyright 2024 Google LLC\n", - "#\n", - "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", - "# you may not use this file except in compliance with the License.\n", - "# You may obtain a copy of the License at\n", - "#\n", - "# https://www.apache.org/licenses/LICENSE-2.0\n", - "#\n", - "# Unless required by applicable law or agreed to in writing, software\n", - "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", - "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", - "# See the License for the specific language governing permissions and\n", - "# limitations under the License." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "JAPoU8Sm5E6e" - }, - "source": [ - "# Intro to Generating and Executing Python Code with Gemini 2.0\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
\n", - " \n", - " \"Google
Open in Colab\n", - "
\n", - "
\n", - " \n", - " \"Google
Open in Colab Enterprise\n", - "
\n", - "
\n", - " \n", - " \"Vertex
Open in Vertex AI Workbench\n", - "
\n", - "
\n", - " \n", - " \"BigQuery
Open in BigQuery Studio\n", - "
\n", - "
\n", - " \n", - " \"GitHub
View on GitHub\n", - "
\n", - "
\n", - "\n", - "
\n", - "\n", - "Share to:\n", - "\n", - "\n", - " \"LinkedIn\n", - "\n", - "\n", - "\n", - " \"Bluesky\n", - "\n", - "\n", - "\n", - " \"X\n", - "\n", - "\n", - "\n", - " \"Reddit\n", - "\n", - "\n", - "\n", - " \"Facebook\n", - "" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "84f0f73a0f76" - }, - "source": [ - "| | |\n", - "|-|-|\n", - "| Author(s) | [Kristopher Overholt](https://github.com/koverholt/) |" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "tvgnzT1CKxrO" - }, - "source": [ - "## Overview\n", - "\n", - "This notebook introduces the code execution capabilities of the [Gemini 2.0 Flash model](https://cloud.google.com/vertex-ai/generative-ai/docs/gemini-v2), a new multimodal generative AI model from Google [DeepMind](https://deepmind.google/). Gemini 2.0 Flash offers improvements in speed, quality, and advanced reasoning capabilities including enhanced understanding, coding, and instruction following.\n", - "\n", - "## Code Execution\n", - "\n", - "A key feature of this model is [code execution](https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/code-execution), which is the ability to generate and execute Python code directly within the API. If you want the API to generate and run Python code and return the results, you can use code execution as demonstrated in this notebook.\n", - "\n", - "This code execution capability enables the model to generate code, execute and observe the results, correct the code if needed, and learn iteratively from the results until it produces a final output. This is particularly useful for applications that involve code-based reasoning such as solving mathematical equations or processing text.\n", - "\n", - "## Objectives\n", - "\n", - "In this tutorial, you will learn how to generate and execute code using the Gemini API in Vertex AI and the Google Gen AI SDK for Python with the Gemini 2.0 Flash model.\n", - "\n", - "You will complete the following tasks:\n", - "\n", - "- Generating and running sample Python code from text prompts\n", - "- Exploring data using code execution in multi-turn chats\n", - "- Using code execution in streaming sessions" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "61RBz8LLbxCR" - }, - "source": [ - "## Getting started" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "No17Cw5hgx12" - }, - "source": [ - "### Install Google Gen AI SDK for Python\n" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "id": "tFy3H3aPgx12" - }, - "outputs": [], - "source": [ - "%pip install --upgrade --quiet google-genai" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "dmWOrTJ3gx13" - }, - "source": [ - "### Authenticate your notebook environment (Colab only)\n", - "\n", - "If you're running this notebook on Google Colab, run the cell below to authenticate your environment." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "id": "NyKGtVQjgx13" - }, - "outputs": [], - "source": [ - "import sys\n", - "\n", - "if \"google.colab\" in sys.modules:\n", - " from google.colab import auth\n", - "\n", - " auth.authenticate_user()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "0fggiCx13zxX" - }, - "source": [ - "### Import libraries" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "id": "JbrnA9yv3zMC" - }, - "outputs": [], - "source": [ - "import os\n", - "from IPython.display import display, Markdown\n", - "\n", - "from google import genai\n", - "from google.genai.types import (\n", - " Content,\n", - " GenerateContentConfig,\n", - " Part,\n", - " Tool,\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "vXiC1rOE3gSZ" - }, - "source": [ - "### Connect to a generative AI API service\n", - "\n", - "Google Gen AI APIs and models including Gemini are available in the following two API services:\n", - "\n", - "- [Google AI for Developers](https://ai.google.dev/gemini-api/docs): Experiment, prototype, and deploy small projects.\n", - "- [Vertex AI](https://cloud.google.com/vertex-ai/generative-ai/docs/overview): Build enterprise-ready projects on Google Cloud.\n", - "The Google Gen AI SDK provides a unified interface to these two API services.\n", - "\n", - "This notebook shows how to use the Google Gen AI SDK with the Gemini API in Vertex AI." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "DF4l8DTdWgPY" - }, - "source": [ - "### Set Google Cloud project information and create client\n", - "\n", - "To get started using Vertex AI, you must have an existing Google Cloud project and [enable the Vertex AI API](https://console.cloud.google.com/flows/enableapi?apiid=aiplatform.googleapis.com).\n", - "\n", - "Learn more about [setting up a project and a development environment](https://cloud.google.com/vertex-ai/docs/start/cloud-environment)." - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "id": "Nqwi-5ufWp_B" - }, - "outputs": [], - "source": [ - "PROJECT_ID = \"[your-project-id]\" # @param {type: \"string\", placeholder: \"[your-project-id]\", isTemplate: true}\n", - "if not PROJECT_ID or PROJECT_ID == \"[your-project-id]\":\n", - " PROJECT_ID = str(os.environ.get(\"GOOGLE_CLOUD_PROJECT\"))\n", - "\n", - "LOCATION = os.environ.get(\"GOOGLE_CLOUD_REGION\", \"us-central1\")" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "id": "3Ab5NQwr4B8j" - }, - "outputs": [], - "source": [ - "client = genai.Client(vertexai=True, project=PROJECT_ID, location=LOCATION)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "YZNpgtKJDdPZ" - }, - "source": [ - "### Improve code rendering in cell outputs" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": { - "id": "Y2e1lK_f_YWN" - }, - "outputs": [], - "source": [ - "from IPython.display import HTML, Markdown\n", - "\n", - "\n", - "# Modify CSS to display the results more clearly in Colab\n", - "def set_css_in_cell_output(unused):\n", - " display(HTML(\"\"\"\"\"\"))\n", - "\n", - "get_ipython().events.register('pre_run_cell', set_css_in_cell_output)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "x1vpnyk-q-fz" - }, - "source": [ - "## Working with code execution in Gemini 2.0\n", - "\n", - "### Load the Gemini model\n", - "\n", - "The following code loads the Gemini 2.0 Flash model. You can learn about all Gemini models on Vertex AI by visiting the [documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models):" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 17 - }, - "id": "L8gLWcOFqqF2", - "outputId": "1b29d0fd-92d4-4cbb-a7bc-2d6f201069c5" - }, - "outputs": [ + "cells": [ { - "data": { - "text/html": [ - "" - ], - "text/plain": [ - "" + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ur8xi4C7S06n" + }, + "outputs": [], + "source": [ + "# Copyright 2024 Google LLC\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# https://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License." ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "MODEL_ID = \"gemini-2.0-flash-exp\" # @param {type: \"string\"}" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "q-jdBwXlM67j" - }, - "source": [ - "### Define the code execution tool\n", - "\n", - "The following code initializes the code execution tool by passing `code_execution` in a `Tool` definition.\n", - "\n", - "Later we'll register this tool with the model that it can use to generate and run Python code:" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 17 }, - "id": "BFxIcGkxbq3_", - "outputId": "e63a2531-1e94-4216-b440-6b3230c0773a" - }, - "outputs": [ { - "data": { - "text/html": [ - "" - ], - "text/plain": [ - "" + "cell_type": "markdown", + "metadata": { + "id": "JAPoU8Sm5E6e" + }, + "source": [ + "# Intro to Generating and Executing Python Code with Gemini 2.0\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " \n", + " \"Google
Open in Colab\n", + "
\n", + "
\n", + " \n", + " \"Google
Open in Colab Enterprise\n", + "
\n", + "
\n", + " \n", + " \"Vertex
Open in Vertex AI Workbench\n", + "
\n", + "
\n", + " \n", + " \"BigQuery
Open in BigQuery Studio\n", + "
\n", + "
\n", + " \n", + " \"GitHub
View on GitHub\n", + "
\n", + "
\n", + "\n", + "
\n", + "\n", + "Share to:\n", + "\n", + "\n", + " \"LinkedIn\n", + "\n", + "\n", + "\n", + " \"Bluesky\n", + "\n", + "\n", + "\n", + " \"X\n", + "\n", + "\n", + "\n", + " \"Reddit\n", + "\n", + "\n", + "\n", + " \"Facebook\n", + "" ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "code_execution_tool = Tool(\n", - " code_execution={}\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "mZgn5tm-NCfH" - }, - "source": [ - "### Generate and execute code\n", - "\n", - "The following code sends a prompt to the Gemini model, asking it to generate and execute Python code to calculate the sum of the first 50 prime numbers. The code execution tool is passed in so the model can generate and run the code:" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 17 }, - "id": "b52qMx0IGA0K", - "outputId": "27176edc-11d8-44e1-ff6d-34b2939f5fcb" - }, - "outputs": [ { - "data": { - "text/html": [ - "" - ], - "text/plain": [ - "" + "cell_type": "markdown", + "metadata": { + "id": "84f0f73a0f76" + }, + "source": [ + "| | |\n", + "|-|-|\n", + "| Author(s) | [Kristopher Overholt](https://github.com/koverholt/) |" ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "PROMPT = \"\"\"\n", - "What is the sum of the first 50 prime numbers?\n", - "Generate and run code for the calculation.\n", - "\"\"\"\n", - "\n", - "response = client.models.generate_content(\n", - " model=MODEL_ID,\n", - " contents=PROMPT,\n", - " config=GenerateContentConfig(\n", - " tools=[code_execution_tool],\n", - " temperature=0,\n", - " )\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "l-mfiMNasgqH" - }, - "source": [ - "### View the generated code\n", - "\n", - "The following code iterates through the response and displays any generated Python code by checking for `part.executable_code` in the response parts:" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 557 }, - "id": "J5mcXw6ZraLS", - "outputId": "32b45048-e529-439d-e7ea-d20dcd032a40" - }, - "outputs": [ { - "data": { - "text/html": [ - "" - ], - "text/plain": [ - "" + "cell_type": "markdown", + "metadata": { + "id": "tvgnzT1CKxrO" + }, + "source": [ + "## Overview\n", + "\n", + "This notebook introduces the code execution capabilities of the [Gemini 2.0 Flash model](https://cloud.google.com/vertex-ai/generative-ai/docs/gemini-v2), a new multimodal generative AI model from Google [DeepMind](https://deepmind.google/). Gemini 2.0 Flash offers improvements in speed, quality, and advanced reasoning capabilities including enhanced understanding, coding, and instruction following.\n", + "\n", + "## Code Execution\n", + "\n", + "A key feature of this model is [code execution](https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/code-execution), which is the ability to generate and execute Python code directly within the API. If you want the API to generate and run Python code and return the results, you can use code execution as demonstrated in this notebook.\n", + "\n", + "This code execution capability enables the model to generate code, execute and observe the results, correct the code if needed, and learn iteratively from the results until it produces a final output. This is particularly useful for applications that involve code-based reasoning such as solving mathematical equations or processing text.\n", + "\n", + "## Objectives\n", + "\n", + "In this tutorial, you will learn how to generate and execute code using the Gemini API in Vertex AI and the Google Gen AI SDK for Python with the Gemini 2.0 Flash model.\n", + "\n", + "You will complete the following tasks:\n", + "\n", + "- Generating and running sample Python code from text prompts\n", + "- Exploring data using code execution in multi-turn chats\n", + "- Using code execution in streaming sessions" ] - }, - "metadata": {}, - "output_type": "display_data" }, { - "data": { - "text/markdown": [ - "\n", - "```\n", - "\n", - "def is_prime(n):\n", - " if n <= 1:\n", - " return False\n", - " if n <= 3:\n", - " return True\n", - " if n % 2 == 0 or n % 3 == 0:\n", - " return False\n", - " i = 5\n", - " while i * i <= n:\n", - " if n % i == 0 or n % (i + 2) == 0:\n", - " return False\n", - " i += 6\n", - " return True\n", - "\n", - "primes = []\n", - "num = 2\n", - "while len(primes) < 50:\n", - " if is_prime(num):\n", - " primes.append(num)\n", - " num += 1\n", - "\n", - "sum_of_primes = sum(primes)\n", - "print(f'{sum_of_primes=}')\n", - "\n", - "```\n" - ], - "text/plain": [ - "" + "cell_type": "markdown", + "metadata": { + "id": "61RBz8LLbxCR" + }, + "source": [ + "## Getting started" ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "for part in response.candidates[0].content.parts:\n", - " if part.executable_code:\n", - " display(Markdown(\n", - "f\"\"\"\n", - "```\n", - "{part.executable_code.code}\n", - "```\n", - "\"\"\"))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "ppumif-94xTF" - }, - "source": [ - "### View the code execution results\n", - "\n", - "The following code iterates through the response and displays the execution result and outcome by checking for `part.code_execution_result` in the response parts:" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 136 }, - "id": "J891OBjc4xn9", - "outputId": "9df01f46-295f-407d-b79c-60704aa4f0d9" - }, - "outputs": [ { - "data": { - "text/html": [ - "" - ], - "text/plain": [ - "" + "cell_type": "markdown", + "metadata": { + "id": "No17Cw5hgx12" + }, + "source": [ + "### Install Google Gen AI SDK for Python\n" ] - }, - "metadata": {}, - "output_type": "display_data" }, { - "data": { - "text/markdown": [ - "sum_of_primes=5117\n" - ], - "text/plain": [ - "" + "cell_type": "code", + "execution_count": 1, + "metadata": { + "id": "tFy3H3aPgx12" + }, + "outputs": [], + "source": [ + "%pip install --upgrade --quiet google-genai" ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Outcome: OUTCOME_OK\n" - ] - } - ], - "source": [ - "for part in response.candidates[0].content.parts:\n", - " if part.code_execution_result:\n", - " display(Markdown(part.code_execution_result.output))\n", - " print(\"\\nOutcome:\", part.code_execution_result.outcome)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "5u_XuZlMnH9S" - }, - "source": [ - "Great! Now you have the answer (5117) as well as the generated (and verified via execution!) Python code.\n", - "\n", - "At this point in your application, you would save the output code, result, or outcome and display it to the end-user or use it downstream in your application." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "8uJ-Fk1I_AH8" - }, - "source": [ - "### Code execution in a chat session\n", - "\n", - "This section shows how to use code execution in an interactive chat with history using the Gemini API.\n", - "\n", - "You can use `client.chats.create` to create a chat session and passes in the code execution tool, enabling the model to generate and run code:" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 17 }, - "id": "puL91bq7tirC", - "outputId": "3ff3d89f-6153-46f9-96cc-37d1119d13de" - }, - "outputs": [ { - "data": { - "text/html": [ - "" - ], - "text/plain": [ - "" + "cell_type": "markdown", + "metadata": { + "id": "dmWOrTJ3gx13" + }, + "source": [ + "### Authenticate your notebook environment (Colab only)\n", + "\n", + "If you're running this notebook on Google Colab, run the cell below to authenticate your environment." ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "chat = client.chats.create(model=MODEL_ID,\n", - " config=GenerateContentConfig(\n", - " tools=[code_execution_tool],\n", - " temperature=0,\n", - "))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Bmu4bSApoECT" - }, - "source": [ - "You'll start the chat by asking the model to generate sample time series data with noise and then output a sample of 10 data points:" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 17 }, - "id": "8iyq5sKCtstH", - "outputId": "7a896abc-a4ad-4b5f-eed7-8f417a6b523b" - }, - "outputs": [ { - "data": { - "text/html": [ - "" - ], - "text/plain": [ - "" + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "NyKGtVQjgx13" + }, + "outputs": [], + "source": [ + "import sys\n", + "\n", + "if \"google.colab\" in sys.modules:\n", + " from google.colab import auth\n", + "\n", + " auth.authenticate_user()" ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "response = chat.send_message(\"\"\"Generate code that creates sample time series\n", - "data of temperature vs. time in a test furnace. Add noise to the data. Output\n", - "a sample of 10 data points from the time series data.\"\"\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "vVhCKKBioJga" - }, - "source": [ - "Now you can iterate through the response to display any generated Python code and execution results by checking for `part.executable_code` and `part.code_execution_result` in the response parts:" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 660 }, - "id": "8pjwEGzft29N", - "outputId": "d5b25483-b48c-4e67-b9a7-82ebc0a50640" - }, - "outputs": [ { - "data": { - "text/html": [ - "" - ], - "text/plain": [ - "" + "cell_type": "markdown", + "metadata": { + "id": "0fggiCx13zxX" + }, + "source": [ + "### Import libraries" ] - }, - "metadata": {}, - "output_type": "display_data" }, { - "data": { - "text/markdown": [ - "\n", - "```\n", - "\n", - "import numpy as np\n", - "\n", - "# 1. Define Time Range\n", - "time = np.linspace(0, 10, 100) # 100 points from 0 to 10 seconds\n", - "\n", - "# 2. Generate Base Temperature Data (linear increase)\n", - "base_temp = 20 + 5 * time # Start at 20 degrees, increase by 5 degrees per second\n", - "\n", - "# 3. Add Noise\n", - "noise = np.random.normal(0, 2, len(time)) # Gaussian noise with mean 0, std dev 2\n", - "noisy_temp = base_temp + noise\n", - "\n", - "# 4. Output Sample\n", - "sample_indices = np.linspace(0, len(time) - 1, 10, dtype=int)\n", - "sample_time = time[sample_indices]\n", - "sample_temp = noisy_temp[sample_indices]\n", - "\n", - "print(\"Sample Time Series Data (Time, Temperature):\")\n", - "for t, temp in zip(sample_time, sample_temp):\n", - " print(f\"Time: {t:.2f} s, Temperature: {temp:.2f} °C\")\n", - "\n", - "```\n" - ], - "text/plain": [ - "" + "cell_type": "code", + "execution_count": 3, + "metadata": { + "id": "JbrnA9yv3zMC" + }, + "outputs": [], + "source": [ + "import os\n", + "\n", + "from IPython.display import Markdown, display\n", + "from google import genai\n", + "from google.genai.types import GenerateContentConfig, Tool" ] - }, - "metadata": {}, - "output_type": "display_data" }, { - "data": { - "text/markdown": [ - "Sample Time Series Data (Time, Temperature):\n", - "Time: 0.00 s, Temperature: 21.10 °C\n", - "Time: 1.11 s, Temperature: 27.38 °C\n", - "Time: 2.22 s, Temperature: 32.54 °C\n", - "Time: 3.33 s, Temperature: 35.23 °C\n", - "Time: 4.44 s, Temperature: 44.09 °C\n", - "Time: 5.56 s, Temperature: 49.99 °C\n", - "Time: 6.67 s, Temperature: 52.68 °C\n", - "Time: 7.78 s, Temperature: 59.13 °C\n", - "Time: 8.89 s, Temperature: 64.07 °C\n", - "Time: 10.00 s, Temperature: 66.55 °C\n" - ], - "text/plain": [ - "" + "cell_type": "markdown", + "metadata": { + "id": "vXiC1rOE3gSZ" + }, + "source": [ + "### Connect to a generative AI API service\n", + "\n", + "Google Gen AI APIs and models including Gemini are available in the following two API services:\n", + "\n", + "- [Google AI for Developers](https://ai.google.dev/gemini-api/docs): Experiment, prototype, and deploy small projects.\n", + "- [Vertex AI](https://cloud.google.com/vertex-ai/generative-ai/docs/overview): Build enterprise-ready projects on Google Cloud.\n", + "The Google Gen AI SDK provides a unified interface to these two API services.\n", + "\n", + "This notebook shows how to use the Google Gen AI SDK with the Gemini API in Vertex AI." ] - }, - "metadata": {}, - "output_type": "display_data" }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Outcome: OUTCOME_OK\n" - ] - } - ], - "source": [ - "for part in response.candidates[0].content.parts:\n", - " if part.executable_code:\n", - " display(Markdown(\n", - "f\"\"\"\n", - "```\n", - "{part.executable_code.code}\n", - "```\n", - "\"\"\"))\n", - " if part.code_execution_result:\n", - " display(Markdown(part.code_execution_result.output))\n", - " print(\"\\nOutcome:\", part.code_execution_result.outcome)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "4AHoGmDBQuxn" - }, - "source": [ - "Now you can ask the model to add a smoothed data series to the time series data:" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 17 - }, - "id": "alR_tq3pss7j", - "outputId": "77f1acd6-e45f-4b8f-cea6-eafb3848af4c" - }, - "outputs": [ - { - "data": { - "text/html": [ - "" - ], - "text/plain": [ - "" + "cell_type": "markdown", + "metadata": { + "id": "DF4l8DTdWgPY" + }, + "source": [ + "### Set Google Cloud project information and create client\n", + "\n", + "To get started using Vertex AI, you must have an existing Google Cloud project and [enable the Vertex AI API](https://console.cloud.google.com/flows/enableapi?apiid=aiplatform.googleapis.com).\n", + "\n", + "Learn more about [setting up a project and a development environment](https://cloud.google.com/vertex-ai/docs/start/cloud-environment)." ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "response = chat.send_message(\"\"\"Now add a data series that smooths the data using an appropriate method.\"\"\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "MnSlnA5FQ9UH" - }, - "source": [ - "And then display the generated Python code and execution results:" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 1000 }, - "id": "uMXRpE0NtRYC", - "outputId": "8b4674b3-c6fe-4118-f6ed-1f7a8d73fd6a" - }, - "outputs": [ { - "data": { - "text/html": [ - "" - ], - "text/plain": [ - "" + "cell_type": "code", + "execution_count": 4, + "metadata": { + "id": "Nqwi-5ufWp_B" + }, + "outputs": [], + "source": [ + "PROJECT_ID = \"[your-project-id]\" # @param {type: \"string\", placeholder: \"[your-project-id]\", isTemplate: true}\n", + "if not PROJECT_ID or PROJECT_ID == \"[your-project-id]\":\n", + " PROJECT_ID = str(os.environ.get(\"GOOGLE_CLOUD_PROJECT\"))\n", + "\n", + "LOCATION = os.environ.get(\"GOOGLE_CLOUD_REGION\", \"us-central1\")" ] - }, - "metadata": {}, - "output_type": "display_data" }, { - "data": { - "text/markdown": [ - "\n", - "```\n", - "\n", - "import numpy as np\n", - "\n", - "def moving_average(data, window_size):\n", - " \"\"\"Calculates the moving average of a 1D array.\"\"\"\n", - " if window_size > len(data):\n", - " raise ValueError(\"Window size cannot be larger than the data length.\")\n", - " \n", - " weights = np.repeat(1.0, window_size) / window_size\n", - " return np.convolve(data, weights, 'valid')\n", - "\n", - "# 1. Define Time Range\n", - "time = np.linspace(0, 10, 100) # 100 points from 0 to 10 seconds\n", - "\n", - "# 2. Generate Base Temperature Data (linear increase)\n", - "base_temp = 20 + 5 * time # Start at 20 degrees, increase by 5 degrees per second\n", - "\n", - "# 3. Add Noise\n", - "noise = np.random.normal(0, 2, len(time)) # Gaussian noise with mean 0, std dev 2\n", - "noisy_temp = base_temp + noise\n", - "\n", - "# 4. Calculate Moving Average\n", - "window_size = 5\n", - "smoothed_temp = moving_average(noisy_temp, window_size)\n", - "\n", - "# Adjust time array to match the length of smoothed data\n", - "smoothed_time = time[window_size - 1:]\n", - "\n", - "# 5. Output Sample\n", - "sample_indices = np.linspace(0, len(smoothed_time) - 1, 10, dtype=int)\n", - "sample_time = smoothed_time[sample_indices]\n", - "sample_noisy_temp = noisy_temp[window_size - 1:][sample_indices]\n", - "sample_smoothed_temp = smoothed_temp[sample_indices]\n", - "\n", - "\n", - "print(\"Sample Time Series Data (Time, Noisy Temp, Smoothed Temp):\")\n", - "for t, noisy_temp, smoothed_temp in zip(sample_time, sample_noisy_temp, sample_smoothed_temp):\n", - " print(f\"Time: {t:.2f} s, Noisy Temp: {noisy_temp:.2f} °C, Smoothed Temp: {smoothed_temp:.2f} °C\")\n", - "\n", - "```\n" - ], - "text/plain": [ - "" + "cell_type": "code", + "execution_count": 5, + "metadata": { + "id": "3Ab5NQwr4B8j" + }, + "outputs": [], + "source": [ + "client = genai.Client(vertexai=True, project=PROJECT_ID, location=LOCATION)" ] - }, - "metadata": {}, - "output_type": "display_data" }, { - "data": { - "text/markdown": [ - "Sample Time Series Data (Time, Noisy Temp, Smoothed Temp):\n", - "Time: 0.40 s, Noisy Temp: 24.24 °C, Smoothed Temp: 20.87 °C\n", - "Time: 1.41 s, Noisy Temp: 25.06 °C, Smoothed Temp: 25.35 °C\n", - "Time: 2.53 s, Noisy Temp: 35.70 °C, Smoothed Temp: 31.72 °C\n", - "Time: 3.54 s, Noisy Temp: 37.72 °C, Smoothed Temp: 37.53 °C\n", - "Time: 4.65 s, Noisy Temp: 42.47 °C, Smoothed Temp: 41.59 °C\n", - "Time: 5.66 s, Noisy Temp: 46.74 °C, Smoothed Temp: 47.52 °C\n", - "Time: 6.77 s, Noisy Temp: 51.56 °C, Smoothed Temp: 52.91 °C\n", - "Time: 7.78 s, Noisy Temp: 59.30 °C, Smoothed Temp: 57.67 °C\n", - "Time: 8.89 s, Noisy Temp: 63.41 °C, Smoothed Temp: 62.32 °C\n", - "Time: 10.00 s, Noisy Temp: 69.06 °C, Smoothed Temp: 69.32 °C\n" - ], - "text/plain": [ - "" + "cell_type": "markdown", + "metadata": { + "id": "YZNpgtKJDdPZ" + }, + "source": [ + "### Improve code rendering in cell outputs" ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Outcome: OUTCOME_OK\n" - ] - } - ], - "source": [ - "for part in response.candidates[0].content.parts:\n", - " if part.executable_code:\n", - " display(Markdown(\n", - "f\"\"\"\n", - "```\n", - "{part.executable_code.code}\n", - "```\n", - "\"\"\"))\n", - " if part.code_execution_result:\n", - " display(Markdown(part.code_execution_result.output))\n", - " print(\"\\nOutcome:\", part.code_execution_result.outcome)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "I4VacTEyQ4lD" - }, - "source": [ - "Finally, you can ask the model to generate descriptive statistics for the time series data:" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 17 }, - "id": "dmhPzmP8tywL", - "outputId": "6c3d6f7c-f937-4a83-9ba1-82e91ce96664" - }, - "outputs": [ { - "data": { - "text/html": [ - "" - ], - "text/plain": [ - "" + "cell_type": "code", + "execution_count": 6, + "metadata": { + "id": "Y2e1lK_f_YWN" + }, + "outputs": [], + "source": [ + "from IPython.display import HTML, Markdown\n", + "\n", + "\n", + "# Modify CSS to display the results more clearly in Colab\n", + "def set_css_in_cell_output(unused):\n", + " display(\n", + " HTML(\n", + " \"\"\"\"\"\"\n", + " )\n", + " )\n", + "\n", + "\n", + "get_ipython().events.register(\"pre_run_cell\", set_css_in_cell_output)" ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "response = chat.send_message(\"\"\"Now generate and output descriptive statistics on the time series data.\"\"\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "I1t_zA5jRHsB" - }, - "source": [ - "And then display the generated Python code and execution results:" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 1000 }, - "id": "hIsMH3fPuKr5", - "outputId": "a56cf8bd-b65e-4913-a48e-bb4adb15962a" - }, - "outputs": [ { - "data": { - "text/html": [ - "" - ], - "text/plain": [ - "" + "cell_type": "markdown", + "metadata": { + "id": "x1vpnyk-q-fz" + }, + "source": [ + "## Working with code execution in Gemini 2.0\n", + "\n", + "### Load the Gemini model\n", + "\n", + "The following code loads the Gemini 2.0 Flash model. You can learn about all Gemini models on Vertex AI by visiting the [documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models):" ] - }, - "metadata": {}, - "output_type": "display_data" }, { - "data": { - "text/markdown": [ - "\n", - "```\n", - "\n", - "import numpy as np\n", - "\n", - "def moving_average(data, window_size):\n", - " \"\"\"Calculates the moving average of a 1D array.\"\"\"\n", - " if window_size > len(data):\n", - " raise ValueError(\"Window size cannot be larger than the data length.\")\n", - " \n", - " weights = np.repeat(1.0, window_size) / window_size\n", - " return np.convolve(data, weights, 'valid')\n", - "\n", - "# 1. Define Time Range\n", - "time = np.linspace(0, 10, 100) # 100 points from 0 to 10 seconds\n", - "\n", - "# 2. Generate Base Temperature Data (linear increase)\n", - "base_temp = 20 + 5 * time # Start at 20 degrees, increase by 5 degrees per second\n", - "\n", - "# 3. Add Noise\n", - "noise = np.random.normal(0, 2, len(time)) # Gaussian noise with mean 0, std dev 2\n", - "noisy_temp = base_temp + noise\n", - "\n", - "# 4. Calculate Moving Average\n", - "window_size = 5\n", - "smoothed_temp = moving_average(noisy_temp, window_size)\n", - "\n", - "# Adjust time array to match the length of smoothed data\n", - "smoothed_time = time[window_size - 1:]\n", - "\n", - "# 5. Calculate Descriptive Statistics\n", - "noisy_mean = np.mean(noisy_temp)\n", - "noisy_std = np.std(noisy_temp)\n", - "noisy_min = np.min(noisy_temp)\n", - "noisy_max = np.max(noisy_temp)\n", - "\n", - "smoothed_mean = np.mean(smoothed_temp)\n", - "smoothed_std = np.std(smoothed_temp)\n", - "smoothed_min = np.min(smoothed_temp)\n", - "smoothed_max = np.max(smoothed_temp)\n", - "\n", - "\n", - "# 6. Output Statistics\n", - "print(\"Descriptive Statistics:\")\n", - "print(\"--------------------------------------------------\")\n", - "print(\"Noisy Temperature Data:\")\n", - "print(f\" Mean: {noisy_mean:.2f} °C\")\n", - "print(f\" Standard Deviation: {noisy_std:.2f} °C\")\n", - "print(f\" Minimum: {noisy_min:.2f} °C\")\n", - "print(f\" Maximum: {noisy_max:.2f} °C\")\n", - "print(\"--------------------------------------------------\")\n", - "print(\"Smoothed Temperature Data:\")\n", - "print(f\" Mean: {smoothed_mean:.2f} °C\")\n", - "print(f\" Standard Deviation: {smoothed_std:.2f} °C\")\n", - "print(f\" Minimum: {smoothed_min:.2f} °C\")\n", - "print(f\" Maximum: {smoothed_max:.2f} °C\")\n", - "print(\"--------------------------------------------------\")\n", - "\n", - "```\n" + "cell_type": "code", + "execution_count": 7, + "metadata": { + "id": "L8gLWcOFqqF2" + }, + "outputs": [ + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } ], - "text/plain": [ - "" + "source": [ + "MODEL_ID = \"gemini-2.0-flash-exp\" # @param {type: \"string\"}" ] - }, - "metadata": {}, - "output_type": "display_data" }, { - "data": { - "text/markdown": [ - "Descriptive Statistics:\n", - "--------------------------------------------------\n", - "Noisy Temperature Data:\n", - " Mean: 44.80 °C\n", - " Standard Deviation: 14.48 °C\n", - " Minimum: 17.34 °C\n", - " Maximum: 70.30 °C\n", - "--------------------------------------------------\n", - "Smoothed Temperature Data:\n", - " Mean: 44.84 °C\n", - " Standard Deviation: 13.84 °C\n", - " Minimum: 21.89 °C\n", - " Maximum: 67.61 °C\n", - "--------------------------------------------------\n" - ], - "text/plain": [ - "" + "cell_type": "markdown", + "metadata": { + "id": "q-jdBwXlM67j" + }, + "source": [ + "### Define the code execution tool\n", + "\n", + "The following code initializes the code execution tool by passing `code_execution` in a `Tool` definition.\n", + "\n", + "Later we'll register this tool with the model that it can use to generate and run Python code:" ] - }, - "metadata": {}, - "output_type": "display_data" }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Outcome: OUTCOME_OK\n" - ] - } - ], - "source": [ - "for part in response.candidates[0].content.parts:\n", - " if part.executable_code:\n", - " display(Markdown(\n", - "f\"\"\"\n", - "```\n", - "{part.executable_code.code}\n", - "```\n", - "\"\"\"))\n", - " if part.code_execution_result:\n", - " display(Markdown(part.code_execution_result.output))\n", - " print(\"\\nOutcome:\", part.code_execution_result.outcome)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "TBbNyWtDRZto" - }, - "source": [ - "This chat example demonstrates how you can use the Gemini API with code execution as a powerful tool for exploratory data analysis and more. Go forth and adapt this approach to your own projects and use cases!" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Bl6KG5Ufu5XQ" - }, - "source": [ - "### Code execution in a streaming session\n", - "\n", - "You can also use the code execution functionality with streaming output from the Gemini API.\n", - "\n", - "The following code demonstrates how the Gemini API can generate and execute code while streaming the results:" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 1000 - }, - "id": "gTNMMLkNu5JH", - "outputId": "b2dd9780-297d-402e-9300-3254705540d3" - }, - "outputs": [ - { - "data": { - "text/html": [ - "" + "cell_type": "code", + "execution_count": 8, + "metadata": { + "id": "BFxIcGkxbq3_" + }, + "outputs": [ + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } ], - "text/plain": [ - "" + "source": [ + "code_execution_tool = Tool(code_execution={})" ] - }, - "metadata": {}, - "output_type": "display_data" }, { - "data": { - "text/markdown": [ - "#### Natural language stream" - ], - "text/plain": [ - "" + "cell_type": "markdown", + "metadata": { + "id": "mZgn5tm-NCfH" + }, + "source": [ + "### Generate and execute code\n", + "\n", + "The following code sends a prompt to the Gemini model, asking it to generate and execute Python code to calculate the sum of the first 50 prime numbers. The code execution tool is passed in so the model can generate and run the code:" ] - }, - "metadata": {}, - "output_type": "display_data" }, { - "data": { - "text/markdown": [ - "Okay" + "cell_type": "code", + "execution_count": 9, + "metadata": { + "id": "b52qMx0IGA0K" + }, + "outputs": [ + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } ], - "text/plain": [ - "" + "source": [ + "PROMPT = \"\"\"\n", + "What is the sum of the first 50 prime numbers?\n", + "Generate and run code for the calculation.\n", + "\"\"\"\n", + "\n", + "response = client.models.generate_content(\n", + " model=MODEL_ID,\n", + " contents=PROMPT,\n", + " config=GenerateContentConfig(\n", + " tools=[code_execution_tool],\n", + " temperature=0,\n", + " ),\n", + ")" ] - }, - "metadata": {}, - "output_type": "display_data" }, { - "data": { - "text/markdown": [ - "---" - ], - "text/plain": [ - "" + "cell_type": "markdown", + "metadata": { + "id": "l-mfiMNasgqH" + }, + "source": [ + "### View the generated code\n", + "\n", + "The following code iterates through the response and displays any generated Python code by checking for `part.executable_code` in the response parts:" ] - }, - "metadata": {}, - "output_type": "display_data" }, { - "data": { - "text/markdown": [ - "#### Natural language stream" + "cell_type": "code", + "execution_count": 10, + "metadata": { + "id": "J5mcXw6ZraLS" + }, + "outputs": [ + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "\n", + "```\n", + "\n", + "def is_prime(n):\n", + " if n <= 1:\n", + " return False\n", + " if n <= 3:\n", + " return True\n", + " if n % 2 == 0 or n % 3 == 0:\n", + " return False\n", + " i = 5\n", + " while i * i <= n:\n", + " if n % i == 0 or n % (i + 2) == 0:\n", + " return False\n", + " i += 6\n", + " return True\n", + "\n", + "primes = []\n", + "num = 2\n", + "while len(primes) < 50:\n", + " if is_prime(num):\n", + " primes.append(num)\n", + " num += 1\n", + "\n", + "sum_of_primes = sum(primes)\n", + "print(f'{sum_of_primes=}')\n", + "\n", + "```\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } ], - "text/plain": [ - "" + "source": [ + "for part in response.candidates[0].content.parts:\n", + " if part.executable_code:\n", + " display(\n", + " Markdown(\n", + " f\"\"\"\n", + "```\n", + "{part.executable_code.code}\n", + "```\n", + "\"\"\"\n", + " )\n", + " )" ] - }, - "metadata": {}, - "output_type": "display_data" }, { - "data": { - "text/markdown": [ - ", I can do that. Here's how I'll approach this:" - ], - "text/plain": [ - "" + "cell_type": "markdown", + "metadata": { + "id": "ppumif-94xTF" + }, + "source": [ + "### View the code execution results\n", + "\n", + "The following code iterates through the response and displays the execution result and outcome by checking for `part.code_execution_result` in the response parts:" ] - }, - "metadata": {}, - "output_type": "display_data" }, { - "data": { - "text/markdown": [ - "---" + "cell_type": "code", + "execution_count": 11, + "metadata": { + "id": "J891OBjc4xn9" + }, + "outputs": [ + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "sum_of_primes=5117\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Outcome: OUTCOME_OK\n" + ] + } ], - "text/plain": [ - "" + "source": [ + "for part in response.candidates[0].content.parts:\n", + " if part.code_execution_result:\n", + " display(Markdown(part.code_execution_result.output))\n", + " print(\"\\nOutcome:\", part.code_execution_result.outcome)" ] - }, - "metadata": {}, - "output_type": "display_data" }, { - "data": { - "text/markdown": [ - "#### Natural language stream" - ], - "text/plain": [ - "" + "cell_type": "markdown", + "metadata": { + "id": "5u_XuZlMnH9S" + }, + "source": [ + "Great! Now you have the answer (5117) as well as the generated (and verified via execution!) Python code.\n", + "\n", + "At this point in your application, you would save the output code, result, or outcome and display it to the end-user or use it downstream in your application." ] - }, - "metadata": {}, - "output_type": "display_data" }, { - "data": { - "text/markdown": [ - "\n", - "\n", - "1. **Generate 20 random names:** I'll use" - ], - "text/plain": [ - "" + "cell_type": "markdown", + "metadata": { + "id": "8uJ-Fk1I_AH8" + }, + "source": [ + "### Code execution in a chat session\n", + "\n", + "This section shows how to use code execution in an interactive chat with history using the Gemini API.\n", + "\n", + "You can use `client.chats.create` to create a chat session and passes in the code execution tool, enabling the model to generate and run code:" ] - }, - "metadata": {}, - "output_type": "display_data" }, { - "data": { - "text/markdown": [ - "---" + "cell_type": "code", + "execution_count": 12, + "metadata": { + "id": "puL91bq7tirC" + }, + "outputs": [ + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } ], - "text/plain": [ - "" + "source": [ + "chat = client.chats.create(\n", + " model=MODEL_ID,\n", + " config=GenerateContentConfig(\n", + " tools=[code_execution_tool],\n", + " temperature=0,\n", + " ),\n", + ")" ] - }, - "metadata": {}, - "output_type": "display_data" }, { - "data": { - "text/markdown": [ - "#### Natural language stream" - ], - "text/plain": [ - "" + "cell_type": "markdown", + "metadata": { + "id": "Bmu4bSApoECT" + }, + "source": [ + "You'll start the chat by asking the model to generate sample time series data with noise and then output a sample of 10 data points:" ] - }, - "metadata": {}, - "output_type": "display_data" }, { - "data": { - "text/markdown": [ - " Python's `random` module to generate a list of 20 random names. For simplicity, I'll use a combination of common first names." + "cell_type": "code", + "execution_count": 13, + "metadata": { + "id": "8iyq5sKCtstH" + }, + "outputs": [ + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } ], - "text/plain": [ - "" + "source": [ + "response = chat.send_message(\n", + " \"\"\"Generate code that creates sample time series\n", + "data of temperature vs. time in a test furnace. Add noise to the data. Output\n", + "a sample of 10 data points from the time series data.\"\"\"\n", + ")" ] - }, - "metadata": {}, - "output_type": "display_data" }, { - "data": { - "text/markdown": [ - "---" - ], - "text/plain": [ - "" + "cell_type": "markdown", + "metadata": { + "id": "vVhCKKBioJga" + }, + "source": [ + "Now you can iterate through the response to display any generated Python code and execution results by checking for `part.executable_code` and `part.code_execution_result` in the response parts:" ] - }, - "metadata": {}, - "output_type": "display_data" }, { - "data": { - "text/markdown": [ - "#### Natural language stream" + "cell_type": "code", + "execution_count": 14, + "metadata": { + "id": "8pjwEGzft29N" + }, + "outputs": [ + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "\n", + "```\n", + "\n", + "import numpy as np\n", + "\n", + "# 1. Define Time Range\n", + "time = np.linspace(0, 10, 100) # 100 points from 0 to 10 seconds\n", + "\n", + "# 2. Generate Base Temperature Data (linear increase)\n", + "base_temp = 20 + 5 * time # Start at 20 degrees, increase by 5 degrees per second\n", + "\n", + "# 3. Add Noise\n", + "noise = np.random.normal(0, 2, len(time)) # Gaussian noise with mean 0, std dev 2\n", + "noisy_temp = base_temp + noise\n", + "\n", + "# 4. Output Sample\n", + "sample_indices = np.linspace(0, len(time) - 1, 10, dtype=int)\n", + "sample_time = time[sample_indices]\n", + "sample_temp = noisy_temp[sample_indices]\n", + "\n", + "print(\"Sample Time Series Data (Time, Temperature):\")\n", + "for t, temp in zip(sample_time, sample_temp):\n", + " print(f\"Time: {t:.2f} s, Temperature: {temp:.2f} °C\")\n", + "\n", + "```\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "Sample Time Series Data (Time, Temperature):\n", + "Time: 0.00 s, Temperature: 21.10 °C\n", + "Time: 1.11 s, Temperature: 27.38 °C\n", + "Time: 2.22 s, Temperature: 32.54 °C\n", + "Time: 3.33 s, Temperature: 35.23 °C\n", + "Time: 4.44 s, Temperature: 44.09 °C\n", + "Time: 5.56 s, Temperature: 49.99 °C\n", + "Time: 6.67 s, Temperature: 52.68 °C\n", + "Time: 7.78 s, Temperature: 59.13 °C\n", + "Time: 8.89 s, Temperature: 64.07 °C\n", + "Time: 10.00 s, Temperature: 66.55 °C\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Outcome: OUTCOME_OK\n" + ] + } ], - "text/plain": [ - "" + "source": [ + "for part in response.candidates[0].content.parts:\n", + " if part.executable_code:\n", + " display(\n", + " Markdown(\n", + " f\"\"\"\n", + "```\n", + "{part.executable_code.code}\n", + "```\n", + "\"\"\"\n", + " )\n", + " )\n", + " if part.code_execution_result:\n", + " display(Markdown(part.code_execution_result.output))\n", + " print(\"\\nOutcome:\", part.code_execution_result.outcome)" ] - }, - "metadata": {}, - "output_type": "display_data" }, { - "data": { - "text/markdown": [ - "\n", - "2. **Filter for names with 'a':** I'll iterate through the list and create a new list containing only the names that include the" - ], - "text/plain": [ - "" + "cell_type": "markdown", + "metadata": { + "id": "4AHoGmDBQuxn" + }, + "source": [ + "Now you can ask the model to add a smoothed data series to the time series data:" ] - }, - "metadata": {}, - "output_type": "display_data" }, { - "data": { - "text/markdown": [ - "---" + "cell_type": "code", + "execution_count": 15, + "metadata": { + "id": "alR_tq3pss7j" + }, + "outputs": [ + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } ], - "text/plain": [ - "" + "source": [ + "response = chat.send_message(\n", + " \"\"\"Now add a data series that smooths the data using an appropriate method.\"\"\"\n", + ")" ] - }, - "metadata": {}, - "output_type": "display_data" }, { - "data": { - "text/markdown": [ - "#### Natural language stream" - ], - "text/plain": [ - "" + "cell_type": "markdown", + "metadata": { + "id": "MnSlnA5FQ9UH" + }, + "source": [ + "And then display the generated Python code and execution results:" ] - }, - "metadata": {}, - "output_type": "display_data" }, { - "data": { - "text/markdown": [ - " letter 'a' (case-insensitive).\n", - "3. **Count and output:** I'll count the number of names in the filtered list and output that count, along with the filtered list itself.\n", - "\n", - "Here's the code" + "cell_type": "code", + "execution_count": 16, + "metadata": { + "id": "uMXRpE0NtRYC" + }, + "outputs": [ + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "\n", + "```\n", + "\n", + "import numpy as np\n", + "\n", + "def moving_average(data, window_size):\n", + " \"\"\"Calculates the moving average of a 1D array.\"\"\"\n", + " if window_size > len(data):\n", + " raise ValueError(\"Window size cannot be larger than the data length.\")\n", + " \n", + " weights = np.repeat(1.0, window_size) / window_size\n", + " return np.convolve(data, weights, 'valid')\n", + "\n", + "# 1. Define Time Range\n", + "time = np.linspace(0, 10, 100) # 100 points from 0 to 10 seconds\n", + "\n", + "# 2. Generate Base Temperature Data (linear increase)\n", + "base_temp = 20 + 5 * time # Start at 20 degrees, increase by 5 degrees per second\n", + "\n", + "# 3. Add Noise\n", + "noise = np.random.normal(0, 2, len(time)) # Gaussian noise with mean 0, std dev 2\n", + "noisy_temp = base_temp + noise\n", + "\n", + "# 4. Calculate Moving Average\n", + "window_size = 5\n", + "smoothed_temp = moving_average(noisy_temp, window_size)\n", + "\n", + "# Adjust time array to match the length of smoothed data\n", + "smoothed_time = time[window_size - 1:]\n", + "\n", + "# 5. Output Sample\n", + "sample_indices = np.linspace(0, len(smoothed_time) - 1, 10, dtype=int)\n", + "sample_time = smoothed_time[sample_indices]\n", + "sample_noisy_temp = noisy_temp[window_size - 1:][sample_indices]\n", + "sample_smoothed_temp = smoothed_temp[sample_indices]\n", + "\n", + "\n", + "print(\"Sample Time Series Data (Time, Noisy Temp, Smoothed Temp):\")\n", + "for t, noisy_temp, smoothed_temp in zip(sample_time, sample_noisy_temp, sample_smoothed_temp):\n", + " print(f\"Time: {t:.2f} s, Noisy Temp: {noisy_temp:.2f} °C, Smoothed Temp: {smoothed_temp:.2f} °C\")\n", + "\n", + "```\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "Sample Time Series Data (Time, Noisy Temp, Smoothed Temp):\n", + "Time: 0.40 s, Noisy Temp: 24.24 °C, Smoothed Temp: 20.87 °C\n", + "Time: 1.41 s, Noisy Temp: 25.06 °C, Smoothed Temp: 25.35 °C\n", + "Time: 2.53 s, Noisy Temp: 35.70 °C, Smoothed Temp: 31.72 °C\n", + "Time: 3.54 s, Noisy Temp: 37.72 °C, Smoothed Temp: 37.53 °C\n", + "Time: 4.65 s, Noisy Temp: 42.47 °C, Smoothed Temp: 41.59 °C\n", + "Time: 5.66 s, Noisy Temp: 46.74 °C, Smoothed Temp: 47.52 °C\n", + "Time: 6.77 s, Noisy Temp: 51.56 °C, Smoothed Temp: 52.91 °C\n", + "Time: 7.78 s, Noisy Temp: 59.30 °C, Smoothed Temp: 57.67 °C\n", + "Time: 8.89 s, Noisy Temp: 63.41 °C, Smoothed Temp: 62.32 °C\n", + "Time: 10.00 s, Noisy Temp: 69.06 °C, Smoothed Temp: 69.32 °C\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Outcome: OUTCOME_OK\n" + ] + } ], - "text/plain": [ - "" + "source": [ + "for part in response.candidates[0].content.parts:\n", + " if part.executable_code:\n", + " display(\n", + " Markdown(\n", + " f\"\"\"\n", + "```\n", + "{part.executable_code.code}\n", + "```\n", + "\"\"\"\n", + " )\n", + " )\n", + " if part.code_execution_result:\n", + " display(Markdown(part.code_execution_result.output))\n", + " print(\"\\nOutcome:\", part.code_execution_result.outcome)" ] - }, - "metadata": {}, - "output_type": "display_data" }, { - "data": { - "text/markdown": [ - "---" - ], - "text/plain": [ - "" + "cell_type": "markdown", + "metadata": { + "id": "I4VacTEyQ4lD" + }, + "source": [ + "Finally, you can ask the model to generate descriptive statistics for the time series data:" ] - }, - "metadata": {}, - "output_type": "display_data" }, { - "data": { - "text/markdown": [ - "#### Natural language stream" + "cell_type": "code", + "execution_count": 17, + "metadata": { + "id": "dmhPzmP8tywL" + }, + "outputs": [ + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } ], - "text/plain": [ - "" + "source": [ + "response = chat.send_message(\n", + " \"\"\"Now generate and output descriptive statistics on the time series data.\"\"\"\n", + ")" ] - }, - "metadata": {}, - "output_type": "display_data" }, { - "data": { - "text/markdown": [ - ":\n", - "\n" - ], - "text/plain": [ - "" + "cell_type": "markdown", + "metadata": { + "id": "I1t_zA5jRHsB" + }, + "source": [ + "And then display the generated Python code and execution results:" ] - }, - "metadata": {}, - "output_type": "display_data" }, { - "data": { - "text/markdown": [ - "---" + "cell_type": "code", + "execution_count": 18, + "metadata": { + "id": "hIsMH3fPuKr5" + }, + "outputs": [ + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "\n", + "```\n", + "\n", + "import numpy as np\n", + "\n", + "def moving_average(data, window_size):\n", + " \"\"\"Calculates the moving average of a 1D array.\"\"\"\n", + " if window_size > len(data):\n", + " raise ValueError(\"Window size cannot be larger than the data length.\")\n", + " \n", + " weights = np.repeat(1.0, window_size) / window_size\n", + " return np.convolve(data, weights, 'valid')\n", + "\n", + "# 1. Define Time Range\n", + "time = np.linspace(0, 10, 100) # 100 points from 0 to 10 seconds\n", + "\n", + "# 2. Generate Base Temperature Data (linear increase)\n", + "base_temp = 20 + 5 * time # Start at 20 degrees, increase by 5 degrees per second\n", + "\n", + "# 3. Add Noise\n", + "noise = np.random.normal(0, 2, len(time)) # Gaussian noise with mean 0, std dev 2\n", + "noisy_temp = base_temp + noise\n", + "\n", + "# 4. Calculate Moving Average\n", + "window_size = 5\n", + "smoothed_temp = moving_average(noisy_temp, window_size)\n", + "\n", + "# Adjust time array to match the length of smoothed data\n", + "smoothed_time = time[window_size - 1:]\n", + "\n", + "# 5. Calculate Descriptive Statistics\n", + "noisy_mean = np.mean(noisy_temp)\n", + "noisy_std = np.std(noisy_temp)\n", + "noisy_min = np.min(noisy_temp)\n", + "noisy_max = np.max(noisy_temp)\n", + "\n", + "smoothed_mean = np.mean(smoothed_temp)\n", + "smoothed_std = np.std(smoothed_temp)\n", + "smoothed_min = np.min(smoothed_temp)\n", + "smoothed_max = np.max(smoothed_temp)\n", + "\n", + "\n", + "# 6. Output Statistics\n", + "print(\"Descriptive Statistics:\")\n", + "print(\"--------------------------------------------------\")\n", + "print(\"Noisy Temperature Data:\")\n", + "print(f\" Mean: {noisy_mean:.2f} °C\")\n", + "print(f\" Standard Deviation: {noisy_std:.2f} °C\")\n", + "print(f\" Minimum: {noisy_min:.2f} °C\")\n", + "print(f\" Maximum: {noisy_max:.2f} °C\")\n", + "print(\"--------------------------------------------------\")\n", + "print(\"Smoothed Temperature Data:\")\n", + "print(f\" Mean: {smoothed_mean:.2f} °C\")\n", + "print(f\" Standard Deviation: {smoothed_std:.2f} °C\")\n", + "print(f\" Minimum: {smoothed_min:.2f} °C\")\n", + "print(f\" Maximum: {smoothed_max:.2f} °C\")\n", + "print(\"--------------------------------------------------\")\n", + "\n", + "```\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "Descriptive Statistics:\n", + "--------------------------------------------------\n", + "Noisy Temperature Data:\n", + " Mean: 44.80 °C\n", + " Standard Deviation: 14.48 °C\n", + " Minimum: 17.34 °C\n", + " Maximum: 70.30 °C\n", + "--------------------------------------------------\n", + "Smoothed Temperature Data:\n", + " Mean: 44.84 °C\n", + " Standard Deviation: 13.84 °C\n", + " Minimum: 21.89 °C\n", + " Maximum: 67.61 °C\n", + "--------------------------------------------------\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Outcome: OUTCOME_OK\n" + ] + } ], - "text/plain": [ - "" + "source": [ + "for part in response.candidates[0].content.parts:\n", + " if part.executable_code:\n", + " display(\n", + " Markdown(\n", + " f\"\"\"\n", + "```\n", + "{part.executable_code.code}\n", + "```\n", + "\"\"\"\n", + " )\n", + " )\n", + " if part.code_execution_result:\n", + " display(Markdown(part.code_execution_result.output))\n", + " print(\"\\nOutcome:\", part.code_execution_result.outcome)" ] - }, - "metadata": {}, - "output_type": "display_data" }, { - "data": { - "text/markdown": [ - "#### Code stream" - ], - "text/plain": [ - "" + "cell_type": "markdown", + "metadata": { + "id": "TBbNyWtDRZto" + }, + "source": [ + "This chat example demonstrates how you can use the Gemini API with code execution as a powerful tool for exploratory data analysis and more. Go forth and adapt this approach to your own projects and use cases!" ] - }, - "metadata": {}, - "output_type": "display_data" }, { - "data": { - "text/markdown": [ - "\n", - "```\n", - "\n", - "import random\n", - "\n", - "def generate_random_names(num_names):\n", - " first_names = [\"Alice\", \"Bob\", \"Charlie\", \"David\", \"Eve\", \"Frank\", \"Grace\", \"Henry\", \"Ivy\", \"Jack\", \"Kate\", \"Liam\", \"Mia\", \"Noah\", \"Olivia\", \"Peter\", \"Quinn\", \"Ryan\", \"Sophia\", \"Tom\"]\n", - " return random.choices(first_names, k=num_names)\n", - "\n", - "def filter_names_with_a(names):\n", - " return [name for name in names if 'a' in name.lower()]\n", - "\n", - "# Generate 20 random names\n", - "random_names = generate_random_names(20)\n", - "\n", - "# Filter names containing 'a'\n", - "names_with_a = filter_names_with_a(random_names)\n", - "\n", - "# Count the names with 'a'\n", - "count_of_names_with_a = len(names_with_a)\n", - "\n", - "# Output the results\n", - "print(f'{random_names=}')\n", - "print(f'{count_of_names_with_a=}')\n", - "print(f'{names_with_a=}')\n", - "\n", - "```\n" - ], - "text/plain": [ - "" + "cell_type": "markdown", + "metadata": { + "id": "Bl6KG5Ufu5XQ" + }, + "source": [ + "### Code execution in a streaming session\n", + "\n", + "You can also use the code execution functionality with streaming output from the Gemini API.\n", + "\n", + "The following code demonstrates how the Gemini API can generate and execute code while streaming the results:" ] - }, - "metadata": {}, - "output_type": "display_data" }, { - "data": { - "text/markdown": [ - "---" + "cell_type": "code", + "execution_count": 19, + "metadata": { + "id": "gTNMMLkNu5JH" + }, + "outputs": [ + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "#### Natural language stream" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "Okay" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "---" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "#### Natural language stream" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + ", I can do that. Here's how I'll approach this:" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "---" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "#### Natural language stream" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "\n", + "\n", + "1. **Generate 20 random names:** I'll use" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "---" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "#### Natural language stream" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + " Python's `random` module to generate a list of 20 random names. For simplicity, I'll use a combination of common first names." + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "---" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "#### Natural language stream" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "\n", + "2. **Filter for names with 'a':** I'll iterate through the list and create a new list containing only the names that include the" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "---" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "#### Natural language stream" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + " letter 'a' (case-insensitive).\n", + "3. **Count and output:** I'll count the number of names in the filtered list and output that count, along with the filtered list itself.\n", + "\n", + "Here's the code" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "---" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "#### Natural language stream" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + ":\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "---" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "#### Code stream" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "\n", + "```\n", + "\n", + "import random\n", + "\n", + "def generate_random_names(num_names):\n", + " first_names = [\"Alice\", \"Bob\", \"Charlie\", \"David\", \"Eve\", \"Frank\", \"Grace\", \"Henry\", \"Ivy\", \"Jack\", \"Kate\", \"Liam\", \"Mia\", \"Noah\", \"Olivia\", \"Peter\", \"Quinn\", \"Ryan\", \"Sophia\", \"Tom\"]\n", + " return random.choices(first_names, k=num_names)\n", + "\n", + "def filter_names_with_a(names):\n", + " return [name for name in names if 'a' in name.lower()]\n", + "\n", + "# Generate 20 random names\n", + "random_names = generate_random_names(20)\n", + "\n", + "# Filter names containing 'a'\n", + "names_with_a = filter_names_with_a(random_names)\n", + "\n", + "# Count the names with 'a'\n", + "count_of_names_with_a = len(names_with_a)\n", + "\n", + "# Output the results\n", + "print(f'{random_names=}')\n", + "print(f'{count_of_names_with_a=}')\n", + "print(f'{names_with_a=}')\n", + "\n", + "```\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "---" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "#### Code result" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "\n", + "```\n", + "random_names=['Noah', 'Bob', 'Tom', 'Quinn', 'Jack', 'Ryan', 'Henry', 'Eve', 'Kate', 'Liam', 'Ivy', 'Ivy', 'Eve', 'Henry', 'Liam', 'Jack', 'Bob', 'Frank', 'Grace', 'Kate']\n", + "count_of_names_with_a=10\n", + "names_with_a=['Noah', 'Jack', 'Ryan', 'Kate', 'Liam', 'Liam', 'Jack', 'Frank', 'Grace', 'Kate']\n", + "\n", + "```\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "---" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } ], - "text/plain": [ - "" + "source": [ + "PROMPT = \"\"\"\n", + "Generate a list of 20 random names, then create a new list with just the names\n", + "containing the letter 'a', then output the number of names that contain 'a' and\n", + "finally show me that new list.\n", + "\"\"\"\n", + "\n", + "for chunk in client.models.generate_content_stream(\n", + " model=MODEL_ID,\n", + " contents=PROMPT,\n", + " config=GenerateContentConfig(\n", + " tools=[code_execution_tool],\n", + " temperature=0,\n", + " ),\n", + "):\n", + " for part in chunk.candidates[0].content.parts:\n", + " if part.text:\n", + " display(Markdown(\"#### Natural language stream\"))\n", + " display(Markdown(part.text))\n", + " display(Markdown(\"---\"))\n", + " if part.executable_code:\n", + " display(Markdown(\"#### Code stream\"))\n", + " display(\n", + " Markdown(\n", + " f\"\"\"\n", + "```\n", + "{part.executable_code.code}\n", + "```\n", + "\"\"\"\n", + " )\n", + " )\n", + " display(Markdown(\"---\"))\n", + " if part.code_execution_result:\n", + " display(Markdown(\"#### Code result\"))\n", + " display(\n", + " Markdown(\n", + " f\"\"\"\n", + "```\n", + "{part.code_execution_result.output}\n", + "```\n", + "\"\"\"\n", + " )\n", + " )\n", + " display(Markdown(\"---\"))" ] - }, - "metadata": {}, - "output_type": "display_data" }, { - "data": { - "text/markdown": [ - "#### Code result" - ], - "text/plain": [ - "" + "cell_type": "markdown", + "metadata": { + "id": "2a4e033321ad" + }, + "source": [ + "This streaming example demonstrated how the Gemini API can generate, execute code, and provide results within a streaming session.\n", + "\n", + "## Summary\n", + "\n", + "Refer to the [documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/code-execution) for more details about code execution, and in particular, the [recommendations](https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/code-execution#code-execution-vs-function-calling) regarding differences between code execution and [function calling](https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/function-calling).\n", + "\n", + "### Next steps\n", + "\n", + "- See the [Google Gen AI SDK reference docs](https://googleapis.github.io/python-genai/)\n", + "- Explore other notebooks in the [Google Cloud Generative AI GitHub repository](https://github.com/GoogleCloudPlatform/generative-ai)\n", + "- Explore AI models in [Model Garden](https://cloud.google.com/vertex-ai/generative-ai/docs/model-garden/explore-models)" ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "\n", - "```\n", - "random_names=['Noah', 'Bob', 'Tom', 'Quinn', 'Jack', 'Ryan', 'Henry', 'Eve', 'Kate', 'Liam', 'Ivy', 'Ivy', 'Eve', 'Henry', 'Liam', 'Jack', 'Bob', 'Frank', 'Grace', 'Kate']\n", - "count_of_names_with_a=10\n", - "names_with_a=['Noah', 'Jack', 'Ryan', 'Kate', 'Liam', 'Liam', 'Jack', 'Frank', 'Grace', 'Kate']\n", - "\n", - "```\n" + } + ], + "metadata": { + "colab": { + "collapsed_sections": [ + "YZNpgtKJDdPZ" ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" + "name": "intro_code_execution.ipynb", + "toc_visible": true }, - { - "data": { - "text/markdown": [ - "---" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" + "kernelspec": { + "display_name": "Python 3", + "name": "python3" } - ], - "source": [ - "PROMPT = \"\"\"\n", - "Generate a list of 20 random names, then create a new list with just the names\n", - "containing the letter 'a', then output the number of names that contain 'a' and\n", - "finally show me that new list.\n", - "\"\"\"\n", - "\n", - "for chunk in client.models.generate_content_stream(\n", - " model=MODEL_ID,\n", - " contents=PROMPT,\n", - "\n", - " config=GenerateContentConfig(\n", - " tools=[code_execution_tool],\n", - " temperature=0,\n", - " )\n", - "):\n", - " for part in chunk.candidates[0].content.parts:\n", - " if part.text:\n", - " display(Markdown(\"#### Natural language stream\"))\n", - " display(Markdown(part.text))\n", - " display(Markdown(\"---\"))\n", - " if part.executable_code:\n", - " display(Markdown(\"#### Code stream\"))\n", - " display(Markdown(\n", - "f\"\"\"\n", - "```\n", - "{part.executable_code.code}\n", - "```\n", - "\"\"\"))\n", - " display(Markdown(\"---\"))\n", - " if part.code_execution_result:\n", - " display(Markdown(\"#### Code result\"))\n", - " display(Markdown(\n", - "f\"\"\"\n", - "```\n", - "{part.code_execution_result.output}\n", - "```\n", - "\"\"\"))\n", - " display(Markdown(\"---\"))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "2a4e033321ad" - }, - "source": [ - "This streaming example demonstrated how the Gemini API can generate, execute code, and provide results within a streaming session.\n", - "\n", - "## Summary\n", - "\n", - "Refer to the [documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/code-execution) for more details about code execution, and in particular, the [recommendations](https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/code-execution#code-execution-vs-function-calling) regarding differences between code execution and [function calling](https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/function-calling).\n", - "\n", - "### Next steps\n", - "\n", - "- See the [Google Gen AI SDK reference docs](https://googleapis.github.io/python-genai/)\n", - "- Explore other notebooks in the [Google Cloud Generative AI GitHub repository](https://github.com/GoogleCloudPlatform/generative-ai)\n", - "- Explore AI models in [Model Garden](https://cloud.google.com/vertex-ai/generative-ai/docs/model-garden/explore-models)" - ] - } - ], - "metadata": { - "colab": { - "collapsed_sections": [ - "YZNpgtKJDdPZ" - ], - "provenance": [], - "toc_visible": true }, - "kernelspec": { - "display_name": "Python 3", - "name": "python3" - } - }, - "nbformat": 4, - "nbformat_minor": 0 + "nbformat": 4, + "nbformat_minor": 0 } diff --git a/gemini/multimodal-live-api/real_time_rag_bank_loans_gemini_2_0.ipynb b/gemini/multimodal-live-api/real_time_rag_bank_loans_gemini_2_0.ipynb index 0b2cf287082..7ec5d2d82c1 100644 --- a/gemini/multimodal-live-api/real_time_rag_bank_loans_gemini_2_0.ipynb +++ b/gemini/multimodal-live-api/real_time_rag_bank_loans_gemini_2_0.ipynb @@ -158,8 +158,7 @@ " * For longer documents, the entire content can be analyzed at once.\n", " * This might be slower, but can provide more comprehensive answers.\n", "\n", - "**More in depth techincal details in the code below**\n", - "\n" + "**More in depth techincal details in the code below**\n" ] }, { @@ -194,19 +193,20 @@ }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "mqkS0MFt53rB" + }, + "outputs": [], "source": [ "%%capture\n", "\n", "from google.colab import auth\n", + "\n", "auth.authenticate_user()\n", "\n", "!pip3 install PyPDF2" - ], - "metadata": { - "id": "mqkS0MFt53rB" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -291,26 +291,28 @@ }, "outputs": [], "source": [ + "import os\n", + "import subprocess\n", + "from typing import Any\n", + "\n", + "from IPython.display import Audio, Markdown, display\n", + "import PyPDF2\n", + "import gcsfs\n", "from google import genai\n", - "from google.genai.types import Tool, GenerateContentConfig, Retrieval, VertexAISearch, VertexRagStore, LiveConnectConfig, EmbedContentConfig\n", "from google.cloud import storage\n", - "\n", + "from google.genai.types import (\n", + " EmbedContentConfig,\n", + " GenerateContentConfig,\n", + " LiveConnectConfig,\n", + " Retrieval,\n", + " Tool,\n", + " VertexAISearch,\n", + " VertexRagStore,\n", + ")\n", "import numpy as np\n", - "import PyPDF2\n", - "from sklearn.metrics.pairwise import cosine_similarity\n", "import pandas as pd\n", - "import time\n", - "from tenacity import retry, wait_random_exponential, stop_after_attempt\n", - "from typing import List, Dict, Any\n", - "\n", - "import asyncio\n", - "import base64\n", - "import json\n", - "import os\n", - "import subprocess\n", - "from IPython.display import display, Audio, Markdown\n", - "\n", - "import gcsfs" + "from sklearn.metrics.pairwise import cosine_similarity\n", + "from tenacity import retry, stop_after_attempt, wait_random_exponential" ] }, { @@ -330,65 +332,72 @@ }, "outputs": [], "source": [ - "MODEL_ID = \"gemini-2.0-flash-exp\" # @param {type: \"string\"}\n", + "MODEL_ID = \"gemini-2.0-flash-exp\" # @param {type: \"string\"}\n", "\n", - "MODEL = f\"projects/{PROJECT_ID}/locations/{LOCATION}/publishers/google/models/{MODEL_ID}\"\n", - "text_embedding_model = \"text-embedding-004\" # @param {type:\"string\", isTemplate: true}" + "MODEL = (\n", + " f\"projects/{PROJECT_ID}/locations/{LOCATION}/publishers/google/models/{MODEL_ID}\"\n", + ")\n", + "text_embedding_model = \"text-embedding-004\" # @param {type:\"string\", isTemplate: true}" ] }, { "cell_type": "markdown", + "metadata": { + "id": "ibJJK_4ZfWhf" + }, "source": [ "### Initialize GenAi Client\n", "\n", "* Client for calling the Vertex AI GenAI APIs.\n", "* `vertexai=True`, indicates the client should communicate with the Vertex AI API endpoints." - ], - "metadata": { - "id": "ibJJK_4ZfWhf" - } + ] }, { "cell_type": "code", - "source": [ - "client = genai.Client(\n", - " vertexai=True, project=PROJECT_ID, location=LOCATION,\n", - ")\n" - ], + "execution_count": null, "metadata": { "id": "t7Jq0XZUnDz7" }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "client = genai.Client(\n", + " vertexai=True,\n", + " project=PROJECT_ID,\n", + " location=LOCATION,\n", + ")" + ] }, { "cell_type": "markdown", - "source": [ - "## Multimodal Live API Implementation\n", - "\n", - "\n" - ], "metadata": { "id": "IOsTjQqBER70" - } + }, + "source": [ + "## Multimodal Live API Implementation\n" + ] }, { "cell_type": "markdown", - "source": [ - "### Authentication and token setup" - ], "metadata": { "id": "wqCyzisyF6nK" - } + }, + "source": [ + "### Authentication and token setup" + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "eBeVELQzF5-O" + }, + "outputs": [], "source": [ "def get_access_token():\n", " \"\"\"Fetches the Google Cloud access token.\"\"\"\n", " try:\n", " return subprocess.check_output(\n", - " [\"gcloud\", \"auth\", \"print-access-token\"], universal_newlines=True\n", + " [\"gcloud\", \"auth\", \"print-access-token\"], text=True\n", " ).strip()\n", " except subprocess.CalledProcessError as e:\n", " print(f\"Error getting access token: {e}\")\n", @@ -402,24 +411,24 @@ " print(\"Error: API_ENDPOINT environment variable not set.\")\n", " return None\n", " return api_endpoint" - ], - "metadata": { - "id": "eBeVELQzF5-O" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", - "source": [ - "### Multimodal Live API - Text in text out implementation" - ], "metadata": { "id": "BuL_GUXiIGSc" - } + }, + "source": [ + "### Multimodal Live API - Text in text out implementation" + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Mco9ix1iIJhe" + }, + "outputs": [], "source": [ "def generate_text(prompt: str) -> str:\n", " \"\"\"Generates text using the specified model and prompt.\n", @@ -436,137 +445,136 @@ " \"\"\"\n", " modality = \"TEXT\"\n", " response = client.models.generate_content(\n", - " model=MODEL, contents=f\"{prompt}\",\n", + " model=MODEL,\n", + " contents=f\"{prompt}\",\n", " config=GenerateContentConfig(\n", - " response_modalities=[modality],\n", - " )\n", + " response_modalities=[modality],\n", + " ),\n", " )\n", " return response.text" - ], - "metadata": { - "id": "Mco9ix1iIJhe" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", - "source": [ - "### Multimodel Live API - Text to Audio implementation" - ], "metadata": { "id": "A0xNxpkevK8V" - } + }, + "source": [ + "### Multimodel Live API - Text to Audio implementation" + ] }, { "cell_type": "code", - "source": [ - "async def generate_n_play_audio(client, prompt):\n", - " \"\"\"Generates audio from text using Gemini and plays it.\n", - "\n", - " Args:\n", - " client: The GenAI client instance.\n", - " prompt: The text to convert to audio.\n", - " model_id: The ID of the Gemini model to use (default: 'gemini-2.0-flash-exp').\n", - "\n", - " Returns:\n", - " None. Plays the generated audio directly.\n", - " \"\"\"\n", - " config = LiveConnectConfig(response_modalities=[\"AUDIO\"])\n", - " async with client.aio.live.connect(\n", - " model=MODEL_ID,\n", - " config=config,\n", - " ) as session:\n", - " text_input = prompt\n", - " display(Markdown(f\"**Input:** {text_input}\"))\n", - "\n", - " await session.send(input=text_input, end_of_turn=True)\n", - "\n", - " audio_data = []\n", - " async for message in session.receive():\n", - " if message.server_content.model_turn:\n", - " for part in message.server_content.model_turn.parts:\n", - " if part.inline_data:\n", - " audio_data.append(\n", - " np.frombuffer(part.inline_data.data, dtype=np.int16)\n", - " )\n", - "\n", - " if audio_data:\n", - " display(Audio(np.concatenate(audio_data), rate=24000, autoplay=True))\n" - ], + "execution_count": null, "metadata": { "id": "ARidhpu0vIre" }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "async def generate_n_play_audio(client, prompt):\n", + " \"\"\"Generates audio from text using Gemini and plays it.\n", + "\n", + " Args:\n", + " client: The GenAI client instance.\n", + " prompt: The text to convert to audio.\n", + " model_id: The ID of the Gemini model to use (default: 'gemini-2.0-flash-exp').\n", + "\n", + " Returns:\n", + " None. Plays the generated audio directly.\n", + " \"\"\"\n", + " config = LiveConnectConfig(response_modalities=[\"AUDIO\"])\n", + " async with client.aio.live.connect(\n", + " model=MODEL_ID,\n", + " config=config,\n", + " ) as session:\n", + " text_input = prompt\n", + " display(Markdown(f\"**Input:** {text_input}\"))\n", + "\n", + " await session.send(input=text_input, end_of_turn=True)\n", + "\n", + " audio_data = []\n", + " async for message in session.receive():\n", + " if message.server_content.model_turn:\n", + " for part in message.server_content.model_turn.parts:\n", + " if part.inline_data:\n", + " audio_data.append(\n", + " np.frombuffer(part.inline_data.data, dtype=np.int16)\n", + " )\n", + "\n", + " if audio_data:\n", + " display(Audio(np.concatenate(audio_data), rate=24000, autoplay=True))" + ] }, { "cell_type": "markdown", + "metadata": { + "id": "7thHmNHkkJ3x" + }, "source": [ "## Quick Usages\n", "\n", "Verify the initilisation with simple question" - ], - "metadata": { - "id": "7thHmNHkkJ3x" - } + ] }, { "cell_type": "code", - "source": [ - "test_prompt = \"How many days are there in year 2025?\"" - ], + "execution_count": null, "metadata": { "id": "c3T0HI02Nf3G" }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "test_prompt = \"How many days are there in year 2025?\"" + ] }, { "cell_type": "markdown", + "metadata": { + "id": "HFPYjSi-Fu_C" + }, "source": [ "### Text In Text Out\n", "\n", "Quick verification of all setup before further proceeding" - ], - "metadata": { - "id": "HFPYjSi-Fu_C" - } + ] }, { "cell_type": "code", - "source": [ - "output = generate_text(test_prompt)\n", - "print(output)" - ], + "execution_count": null, "metadata": { "id": "aLXgIDu5FS7-" }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "output = generate_text(test_prompt)\n", + "print(output)" + ] }, { "cell_type": "markdown", - "source": [ - "### Text in Audio Out" - ], "metadata": { "id": "_z5XFF55valp" - } + }, + "source": [ + "### Text in Audio Out" + ] }, { "cell_type": "code", - "source": [ - "await generate_n_play_audio(client, test_prompt)" - ], + "execution_count": null, "metadata": { "id": "RuMEPbrjvkvQ" }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "await generate_n_play_audio(client, test_prompt)" + ] }, { "cell_type": "markdown", + "metadata": { + "id": "0sO_veps6Xt0" + }, "source": [ "## Option 1: Custom RAG (Retrieval-Augmented Generation)\n", "\n", @@ -607,49 +615,49 @@ " 2. Use Gemini to extract and generate an answer based on those sections.\n", " 3. Present the answer to you in a clear and concise format.\n", "\n", - "This approach significantly streamlines the process of extracting information from complex documents, enabling more efficient and informed decision-making.\n", - "\n" - ], - "metadata": { - "id": "0sO_veps6Xt0" - } + "This approach significantly streamlines the process of extracting information from complex documents, enabling more efficient and informed decision-making.\n" + ] }, { "cell_type": "markdown", + "metadata": { + "id": "cx1ALgNtnMUn" + }, "source": [ "#### Get your documents\n", "\n", "\n", "\n", "1. Local Content\n", - "2. GCS Files\n", - "\n" - ], - "metadata": { - "id": "cx1ALgNtnMUn" - } + "2. GCS Files\n" + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "aJclTv_l3llP" + }, + "outputs": [], "source": [ - "#GCS path for the demo document, please use this for only refrence.\n", + "# GCS path for the demo document, please use this for only refrence.\n", "\n", - "#gs://github-repo/generative-ai/gemini2/use-cases/loan_example_documents/DEMO-BANK-LOAN-DETAILS.pdf\n", - "#gs://github-repo/generative-ai/gemini2/use-cases/loan_example_documents/Demo-bank-home-loan-agreement.pdf\n", + "# gs://github-repo/generative-ai/gemini2/use-cases/loan_example_documents/DEMO-BANK-LOAN-DETAILS.pdf\n", + "# gs://github-repo/generative-ai/gemini2/use-cases/loan_example_documents/Demo-bank-home-loan-agreement.pdf\n", "\n", "document = [\n", " \"gs://github-repo/generative-ai/gemini2/use-cases/loan_example_documents/DEMO-BANK-LOAN-DETAILS.pdf\",\n", - " \"gs://github-repo/generative-ai/gemini2/use-cases/loan_example_documents/Demo-bank-home-loan-agreement.pdf\"\n", + " \"gs://github-repo/generative-ai/gemini2/use-cases/loan_example_documents/Demo-bank-home-loan-agreement.pdf\",\n", "]" - ], - "metadata": { - "id": "aJclTv_l3llP" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "tlB48y1IXj8e" + }, + "outputs": [], "source": [ "## Use and modify the below example code if you have the local document\n", "\n", @@ -657,43 +665,44 @@ "# \"/content/DEMO-BANK-LOAN-DETAILS.pdf\",\n", "# \"/content/Demo-bank-home-loan-agreement.pdf\"\n", "# ]" - ], - "metadata": { - "id": "tlB48y1IXj8e" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "aEbT4aCXSFF9" + }, + "outputs": [], "source": [ "# Document read from GCS\n", "\n", + "\n", "def extract_text_from_gcs(gcs_path):\n", - " \"\"\"Extracts text from a PDF file.\"\"\"\n", - " bucket_name = gcs_path.split(\"/\")[2]\n", - " file_name = \"/\".join(gcs_path.split(\"/\")[3:])\n", + " \"\"\"Extracts text from a PDF file.\"\"\"\n", + " bucket_name = gcs_path.split(\"/\")[2]\n", + " file_name = \"/\".join(gcs_path.split(\"/\")[3:])\n", "\n", - " storage_client = storage.Client()\n", - " bucket = storage_client.bucket(bucket_name)\n", - " blob = bucket.blob(file_name)\n", + " storage_client = storage.Client()\n", + " bucket = storage_client.bucket(bucket_name)\n", + " blob = bucket.blob(file_name)\n", "\n", - " document_content = blob.download_as_bytes()\n", - " return document_content" - ], - "metadata": { - "id": "aEbT4aCXSFF9" - }, - "execution_count": null, - "outputs": [] + " document_content = blob.download_as_bytes()\n", + " return document_content" + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "mbidBvbOZRGo" + }, + "outputs": [], "source": [ "def extract_text_from_pdf(pdf_path):\n", " \"\"\"Extracts text from a PDF file, skipping blanks and handling empty PDFs.\"\"\"\n", " try:\n", - " with open(pdf_path, 'rb') as pdf_file:\n", + " with open(pdf_path, \"rb\") as pdf_file:\n", " pdf_reader = PyPDF2.PdfReader(pdf_file)\n", " text = []\n", "\n", @@ -712,40 +721,41 @@ "\n", " # Join the extracted text from all pages into a single string\n", " final_text = \"\\n\".join(text)\n", - " return final_text if final_text else \"Error: No readable text found in the PDF.\"\n", + " return (\n", + " final_text\n", + " if final_text\n", + " else \"Error: No readable text found in the PDF.\"\n", + " )\n", "\n", " except FileNotFoundError:\n", " return \"Error: PDF file not found.\"\n", " except PyPDF2.errors.PdfReadError:\n", - " return \"Error: Could not read the PDF file. It may be corrupted or encrypted.\"\n" - ], - "metadata": { - "id": "mbidBvbOZRGo" - }, - "execution_count": null, - "outputs": [] + " return \"Error: Could not read the PDF file. It may be corrupted or encrypted.\"" + ] }, { "cell_type": "markdown", + "metadata": { + "id": "BPoVUUQ2WBbI" + }, "source": [ "#### RAG Creation\n", "\n", "RAG based on the large files chunking and embedding using text-embedding-004 with vector db" - ], - "metadata": { - "id": "BPoVUUQ2WBbI" - } + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "87dsljf4WFAH" + }, + "outputs": [], "source": [ "@retry(wait=wait_random_exponential(multiplier=1, max=120), stop=stop_after_attempt(4))\n", "def get_embeddings(\n", - " embedding_client: Any,\n", - " embedding_model: str,\n", - " text: str,\n", - " output_dim: int = 768\n", - ") -> List[float]:\n", + " embedding_client: Any, embedding_model: str, text: str, output_dim: int = 768\n", + ") -> list[float]:\n", " \"\"\"\n", " Generate embeddings for text with retry logic for API quota management.\n", " \"\"\"\n", @@ -753,7 +763,7 @@ " response = embedding_client.models.embed_content(\n", " model=embedding_model,\n", " contents=[text],\n", - " config=EmbedContentConfig(output_dimensionality=output_dim)\n", + " config=EmbedContentConfig(output_dimensionality=output_dim),\n", " )\n", " return [response.embeddings[0].values]\n", " except Exception as e:\n", @@ -761,21 +771,21 @@ " return None\n", " print(f\"Error generating embeddings: {str(e)}\")\n", " raise" - ], - "metadata": { - "id": "87dsljf4WFAH" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "FeKzRYQSWF7a" + }, + "outputs": [], "source": [ "def build_index(\n", - " document_paths: List[str],\n", + " document_paths: list[str],\n", " embedding_client: Any,\n", " embedding_model: str,\n", - " chunk_size: int = 500\n", + " chunk_size: int = 500,\n", ") -> pd.DataFrame:\n", " \"\"\"\n", " Build searchable index from documents with page-wise processing.\n", @@ -793,28 +803,28 @@ " page_text = page.extract_text()\n", "\n", " chunks = [\n", - " page_text[i:i+chunk_size]\n", + " page_text[i : i + chunk_size]\n", " for i in range(0, len(page_text), chunk_size)\n", " ]\n", "\n", " for chunk_num, chunk_text in enumerate(chunks):\n", " embeddings = get_embeddings(\n", - " embedding_client,\n", - " embedding_model,\n", - " chunk_text\n", + " embedding_client, embedding_model, chunk_text\n", " )\n", "\n", " if embeddings is None:\n", - " print(f\"Warning: Could not generate embeddings for chunk {chunk_num} on page {page_num + 1}\")\n", + " print(\n", + " f\"Warning: Could not generate embeddings for chunk {chunk_num} on page {page_num + 1}\"\n", + " )\n", " continue\n", "\n", " chunk_info = {\n", - " 'document_name': doc_path,\n", - " 'page_number': page_num + 1,\n", - " 'page_text': page_text,\n", - " 'chunk_number': chunk_num,\n", - " 'chunk_text': chunk_text,\n", - " 'embeddings': embeddings\n", + " \"document_name\": doc_path,\n", + " \"page_number\": page_num + 1,\n", + " \"page_text\": page_text,\n", + " \"chunk_number\": chunk_num,\n", + " \"chunk_text\": chunk_text,\n", + " \"embeddings\": embeddings,\n", " }\n", " all_chunks.append(chunk_info)\n", "\n", @@ -826,22 +836,22 @@ " raise ValueError(\"No chunks were created from the documents\")\n", "\n", " return pd.DataFrame(all_chunks)" - ], - "metadata": { - "id": "FeKzRYQSWF7a" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "817nRd1kWQ8q" + }, + "outputs": [], "source": [ "def get_relevant_chunks(\n", " query: str,\n", " vector_db: pd.DataFrame,\n", " embedding_client: Any,\n", " embedding_model: str,\n", - " top_k: int = 3\n", + " top_k: int = 3,\n", ") -> str:\n", " \"\"\"\n", " Retrieve most relevant document chunks for a query using similarity search.\n", @@ -854,7 +864,7 @@ "\n", " similarities = [\n", " cosine_similarity(query_embedding, chunk_emb)[0][0]\n", - " for chunk_emb in vector_db['embeddings']\n", + " for chunk_emb in vector_db[\"embeddings\"]\n", " ]\n", "\n", " top_indices = np.argsort(similarities)[-top_k:]\n", @@ -862,44 +872,46 @@ "\n", " context = []\n", " for _, row in relevant_chunks.iterrows():\n", - " context.append({\n", - " 'document_name': row['document_name'],\n", - " 'page_number': row['page_number'],\n", - " 'chunk_number': row['chunk_number'],\n", - " 'chunk_text': row['chunk_text']\n", - " })\n", - "\n", - " return \"\\n\\n\".join([\n", - " f\"[Page {chunk['page_number']}, Chunk {chunk['chunk_number']}]: {chunk['chunk_text']}\"\n", - " for chunk in context\n", - " ])\n", + " context.append(\n", + " {\n", + " \"document_name\": row[\"document_name\"],\n", + " \"page_number\": row[\"page_number\"],\n", + " \"chunk_number\": row[\"chunk_number\"],\n", + " \"chunk_text\": row[\"chunk_text\"],\n", + " }\n", + " )\n", + "\n", + " return \"\\n\\n\".join(\n", + " [\n", + " f\"[Page {chunk['page_number']}, Chunk {chunk['chunk_number']}]: {chunk['chunk_text']}\"\n", + " for chunk in context\n", + " ]\n", + " )\n", "\n", " except Exception as e:\n", " print(f\"Error getting relevant chunks: {str(e)}\")\n", " return \"Error retrieving relevant chunks\"" - ], - "metadata": { - "id": "817nRd1kWQ8q" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "6MWtTrJ-WXs7" + }, + "outputs": [], "source": [ "@retry(wait=wait_random_exponential(multiplier=1, max=120), stop=stop_after_attempt(4))\n", - "def generate_answer(\n", - " query: str,\n", - " context: str,\n", - " llm_client: Any,\n", - " llm_model: str\n", - ") -> str:\n", + "def generate_answer(query: str, context: str, llm_client: Any, llm_model: str) -> str:\n", " \"\"\"\n", " Generate answer using LLM with retry logic for API quota management.\n", " \"\"\"\n", " try:\n", " # If context indicates earlier quota issues, return early\n", - " if context in [\"Could not process query due to quota issues\", \"Error retrieving relevant chunks\"]:\n", + " if context in [\n", + " \"Could not process query due to quota issues\",\n", + " \"Error retrieving relevant chunks\",\n", + " ]:\n", " return \"Can't Process, Quota Issues\"\n", "\n", " prompt = f\"\"\"Based on the following context, please answer the question.\n", @@ -912,10 +924,7 @@ "\n", " Answer:\"\"\"\n", "\n", - " response = llm_client.models.generate_content(\n", - " model=llm_model,\n", - " contents=prompt\n", - " )\n", + " response = llm_client.models.generate_content(model=llm_model, contents=prompt)\n", " return response.text\n", "\n", " except Exception as e:\n", @@ -923,25 +932,25 @@ " return \"Can't Process, Quota Issues\"\n", " print(f\"Error generating answer: {str(e)}\")\n", " return \"Error generating answer\"" - ], - "metadata": { - "id": "6MWtTrJ-WXs7" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", - "source": [ - "def rag(\n", - " document_name: str,\n", - " question_set: List[Dict],\n", + "execution_count": null, + "metadata": { + "id": "zXDUqw9IWYey" + }, + "outputs": [], + "source": [ + "def rag(\n", + " document_name: str,\n", + " question_set: list[dict],\n", " vector_db: pd.DataFrame,\n", " embedding_client: Any,\n", " embedding_model: str,\n", " llm_client: Any,\n", " top_k: int,\n", - " llm_model: str\n", + " llm_model: str,\n", ") -> pd.DataFrame:\n", " \"\"\"\n", " RAG Pipeline.\n", @@ -964,82 +973,102 @@ " try:\n", " # Get relevant context for question\n", " relevant_context = get_relevant_chunks(\n", - " question['question'],\n", + " question[\"question\"],\n", " vector_db,\n", " embedding_client,\n", " embedding_model,\n", - " top_k=top_k\n", + " top_k=top_k,\n", " )\n", "\n", " # Generate answer using LLM\n", " generated_answer = generate_answer(\n", - " question['question'],\n", - " relevant_context,\n", - " llm_client,\n", - " llm_model\n", + " question[\"question\"], relevant_context, llm_client, llm_model\n", " )\n", "\n", " # Store results\n", - " results.append({\n", - " 'document_name': document_name,\n", - " 'question': question['question'],\n", - " 'source_page_num': question['page'],\n", - " 'answer': question['answer'],\n", - " 'generated_answer': generated_answer\n", - " })\n", + " results.append(\n", + " {\n", + " \"document_name\": document_name,\n", + " \"question\": question[\"question\"],\n", + " \"source_page_num\": question[\"page\"],\n", + " \"answer\": question[\"answer\"],\n", + " \"generated_answer\": generated_answer,\n", + " }\n", + " )\n", "\n", " except Exception as e:\n", " print(f\"Error processing question '{question['question']}': {str(e)}\")\n", - " results.append({\n", - " 'document_name': document_name,\n", - " 'question': question['question'],\n", - " 'source_page_num': question['page'],\n", - " 'answer': question['answer'],\n", - " 'generated_answer': \"Error processing question\"\n", - " })\n", + " results.append(\n", + " {\n", + " \"document_name\": document_name,\n", + " \"question\": question[\"question\"],\n", + " \"source_page_num\": question[\"page\"],\n", + " \"answer\": question[\"answer\"],\n", + " \"generated_answer\": \"Error processing question\",\n", + " }\n", + " )\n", "\n", - " return pd.DataFrame(results)\n" - ], - "metadata": { - "id": "zXDUqw9IWYey" - }, - "execution_count": null, - "outputs": [] + " return pd.DataFrame(results)" + ] }, { "cell_type": "code", - "source": [ - "vector_db_mini_vertex = build_index(document,\n", - " embedding_client=client,\n", - " embedding_model=text_embedding_model\n", - " )\n", - "vector_db_mini_vertex.head()" - ], + "execution_count": null, "metadata": { "id": "iCn8euT3Wh0r" }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "vector_db_mini_vertex = build_index(\n", + " document, embedding_client=client, embedding_model=text_embedding_model\n", + ")\n", + "vector_db_mini_vertex.head()" + ] }, { "cell_type": "code", - "source": [ - "question_set_1 = [\n", - " {\"question\": \"What are the loan products avaliable?\", \"answer\": \"Home Loan, Smart Loan, Loan Against Property, Smart loan againg property\", \"page\": 6},\n", - " {\"question\": \"How much is the Processing fee for the loan?\", \"answer\": \"1% of the sanctioned loan amount or 10000 INR, which ever is higher\", \"page\": 7},\n", - " {\"question\": \"Documents to submit as proof od identity?\", \"answer\": \"Passport, Election/voters IDs, Permanent Driving license, permanent account number, Adhaar card\", \"page\": 2},\n", - " {\"question\": \"How many days it take for Loan Pay Order?\", \"answer\": \"1 day\", \"page\": 5},\n", - " {\"question\": \"Phone number for phone banking service?\", \"answer\": \"+91-49-3111-1111\", \"page\": 16 },\n", - " ]" - ], + "execution_count": null, "metadata": { "id": "5yiwVFC_Z0Vj" }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "question_set_1 = [\n", + " {\n", + " \"question\": \"What are the loan products avaliable?\",\n", + " \"answer\": \"Home Loan, Smart Loan, Loan Against Property, Smart loan againg property\",\n", + " \"page\": 6,\n", + " },\n", + " {\n", + " \"question\": \"How much is the Processing fee for the loan?\",\n", + " \"answer\": \"1% of the sanctioned loan amount or 10000 INR, which ever is higher\",\n", + " \"page\": 7,\n", + " },\n", + " {\n", + " \"question\": \"Documents to submit as proof od identity?\",\n", + " \"answer\": \"Passport, Election/voters IDs, Permanent Driving license, permanent account number, Adhaar card\",\n", + " \"page\": 2,\n", + " },\n", + " {\n", + " \"question\": \"How many days it take for Loan Pay Order?\",\n", + " \"answer\": \"1 day\",\n", + " \"page\": 5,\n", + " },\n", + " {\n", + " \"question\": \"Phone number for phone banking service?\",\n", + " \"answer\": \"+91-49-3111-1111\",\n", + " \"page\": 16,\n", + " },\n", + "]" + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "gc3TwXS8Wks6" + }, + "outputs": [], "source": [ "%%time\n", "\n", @@ -1047,49 +1076,44 @@ " document_name=document[0].split(\"/\")[-1],\n", " question_set=question_set_1,\n", " vector_db=vector_db_mini_vertex,\n", - " embedding_client=client, # For embedding generation\n", - " embedding_model=text_embedding_model, # For embedding model\n", - " llm_client=client, # For answer generation,\n", + " embedding_client=client, # For embedding generation\n", + " embedding_model=text_embedding_model, # For embedding model\n", + " llm_client=client, # For answer generation,\n", " top_k=10,\n", - " llm_model=MODEL\n", + " llm_model=MODEL,\n", ")" - ], - "metadata": { - "id": "gc3TwXS8Wks6", - "collapsed": true - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", - "source": [ - "### Loan QnA with Gemini 2.0 Model - RAG" - ], "metadata": { "id": "NsLTh_J2RznY" - } + }, + "source": [ + "### Loan QnA with Gemini 2.0 Model - RAG" + ] }, { "cell_type": "code", - "source": [ - "question = \"What are different types of home loan?\"" - ], + "execution_count": null, "metadata": { "id": "tfB3BQIrSDXv" }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "question = \"What are different types of home loan?\"" + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "mOFE7VFTa9R3" + }, + "outputs": [], "source": [ "relevant_context = get_relevant_chunks(\n", - " question,\n", - " vector_db_mini_vertex,\n", - " client,\n", - " text_embedding_model,\n", - " top_k=10\n", + " question, vector_db_mini_vertex, client, text_embedding_model, top_k=10\n", ")\n", "rag_prompt = f\"\"\"Based on the following context, please answer the question.\n", "\n", @@ -1098,58 +1122,55 @@ "\n", "Question: {question}\n", "\n", - "Answer:\"\"\"\n", - "\n" - ], - "metadata": { - "id": "mOFE7VFTa9R3" - }, - "execution_count": null, - "outputs": [] + "Answer:\"\"\"" + ] }, { "cell_type": "markdown", - "source": [ - "#### Text output - RAG" - ], "metadata": { "id": "vcmD3qWJniMv" - } + }, + "source": [ + "#### Text output - RAG" + ] }, { "cell_type": "code", - "source": [ - "response = generate_text(prompt=rag_prompt)\n", - "print(response)" - ], + "execution_count": null, "metadata": { "id": "dQ2e3ZXEm1D7" }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "response = generate_text(prompt=rag_prompt)\n", + "print(response)" + ] }, { "cell_type": "markdown", - "source": [ - "#### Audio Output" - ], "metadata": { "id": "1NVC8L0nLmRB" - } + }, + "source": [ + "#### Audio Output" + ] }, { "cell_type": "code", - "source": [ - "await generate_n_play_audio(client, rag_prompt)" - ], + "execution_count": null, "metadata": { "id": "1hQ7JBwtLlMj" }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "await generate_n_play_audio(client, rag_prompt)" + ] }, { "cell_type": "markdown", + "metadata": { + "id": "R5KLDaAOWGge" + }, "source": [ "## Option 2 : Large Context Window\n", "\n", @@ -1208,49 +1229,51 @@ " * You need deeper insights and comprehensive answers.\n", " * Your questions involve understanding information spread across the entire document.\n", " * Accuracy and detailed analysis are critical for your task." - ], - "metadata": { - "id": "R5KLDaAOWGge" - } + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "hIfczQlbhG4y" + }, + "outputs": [], "source": [ "# Taking document from the GCS path\n", "# document_path = \"gs://github-repo/generative-ai/gemini2/use-cases/loan_example_documents/Demo-bank-home-loan-agreement.pdf\"\n", "# document_content = extract_text_from_gcs(document_path)" - ], - "metadata": { - "id": "hIfczQlbhG4y" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "e7GNNxptVVHk" + }, + "outputs": [], "source": [ "# Taking document from the local path\n", "# download the example file and keep in the colab files.\n", "document_path = \"/content/DEMO-BANK-LOAN-DETAILS.pdf\"\n", "document_content = extract_text_from_pdf(document_path)" - ], - "metadata": { - "id": "e7GNNxptVVHk" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", - "source": [ - "### Text Output - Large Context" - ], "metadata": { "id": "7wuk15mxEfxE" - } + }, + "source": [ + "### Text Output - Large Context" + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Teg4LiNqEPxh" + }, + "outputs": [], "source": [ "query = \"what are the type of loans?\"\n", "\n", @@ -1261,58 +1284,46 @@ "\n", " Question: {query}\n", "\n", - " Answer:\"\"\"\n", - "\n" - ], - "metadata": { - "id": "Teg4LiNqEPxh" - }, - "execution_count": null, - "outputs": [] + " Answer:\"\"\"" + ] }, { "cell_type": "code", - "source": [ - "response = client.models.generate_content(\n", - " model=MODEL, contents= large_context_prompt\n", - ")\n", - "display(Markdown(response.text))" - ], + "execution_count": null, "metadata": { "id": "ogwQmErQPb-g" }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "response = client.models.generate_content(model=MODEL, contents=large_context_prompt)\n", + "display(Markdown(response.text))" + ] }, { "cell_type": "markdown", - "source": [ - "### Text In Audio Out, Multimodal Live API" - ], "metadata": { "id": "JggCQgsdv16Q" - } + }, + "source": [ + "### Text In Audio Out, Multimodal Live API" + ] }, { "cell_type": "code", - "source": [ - "await generate_n_play_audio(client, large_context_prompt)" - ], + "execution_count": null, "metadata": { "id": "rLjZeApGvy4d" }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "await generate_n_play_audio(client, large_context_prompt)" + ] }, { "cell_type": "markdown", - "source": [], "metadata": { - "id": "H95OvWs6zW6J" - } - }, - { - "cell_type": "markdown", + "id": "0ZShwrOt0KSi" + }, "source": [ "## Option 3 : With Vertex Datastore\n", "\n", @@ -1369,79 +1380,73 @@ "* You have a large dataset of documents.\n", "* You need high performance and scalability.\n", "* You want to integrate with other Google Cloud services.\n" - ], - "metadata": { - "id": "0ZShwrOt0KSi" - } + ] }, { "cell_type": "markdown", - "source": [ - "### Initlise the vertex datatstore" - ], "metadata": { "id": "SSHYTbIq5Q7N" - } + }, + "source": [ + "### Initlise the vertex datatstore" + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "-aH8OrA_0VMr" + }, + "outputs": [], "source": [ "## Vertex datastore\n", "\n", - "datastore_id = \"Your-datastore\" # @param {type: \"string\", isTemplate: true}\n", + "datastore_id = \"Your-datastore\" # @param {type: \"string\", isTemplate: true}\n", "\n", "datastore_path = f\"projects/{PROJECT_ID}/locations/global/collections/default_collection/dataStores/{datastore_id}\"\n", "\n", "vertext_ai_search_tool = Tool(\n", - " retrieval=Retrieval(\n", - " vertex_ai_search=VertexAISearch(\n", - " datastore=datastore_path\n", - " )\n", - " )\n", + " retrieval=Retrieval(vertex_ai_search=VertexAISearch(datastore=datastore_path))\n", ")" - ], - "metadata": { - "id": "-aH8OrA_0VMr" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", - "source": [ - "### Get your QnA with vertex datastore and Vertex Multimodel API\n", - "\n" - ], "metadata": { "id": "EU__4wc65Zhi" - } + }, + "source": [ + "### Get your QnA with vertex datastore and Vertex Multimodel API\n" + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "MT_SK9iC0ZnF" + }, + "outputs": [], "source": [ - "query = \"what are the types of loans?\" # @param {type: \"string\", isTemplate: true, label: \"Select Modality\"}\n", + "query = \"what are the types of loans?\" # @param {type: \"string\", isTemplate: true, label: \"Select Modality\"}\n", "\n", "modality = \"TEXT\"\n", "\n", "response = client.models.generate_content(\n", " model=MODEL_ID,\n", - " contents = query,\n", - " config = GenerateContentConfig(\n", - " tools=[vertext_ai_search_tool],\n", - " response_modalities=[modality]\n", - " ),\n", + " contents=query,\n", + " config=GenerateContentConfig(\n", + " tools=[vertext_ai_search_tool], response_modalities=[modality]\n", + " ),\n", ")\n", "\n", "display(Markdown(response.text))" - ], - "metadata": { - "id": "MT_SK9iC0ZnF" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", + "metadata": { + "id": "iiEE2Brm5MkU" + }, "source": [ "## Option 4: with Vertex AI Search\n", "\n", @@ -1494,34 +1499,35 @@ "* You need high performance and scalability.\n", "* You want to integrate with other Google Cloud services.\n", "* You want to improve search result diversity, quality, and ranking through ranking and recall tuning features of vector search." - ], - "metadata": { - "id": "iiEE2Brm5MkU" - } + ] }, { "cell_type": "markdown", - "source": [ - "### Import for api platform" - ], "metadata": { "id": "Zc14X-p31sci" - } + }, + "source": [ + "### Import for api platform" + ] }, { "cell_type": "code", - "source": [ - "from google.cloud import aiplatform\n", - "aiplatform.init(project=PROJECT_ID, location=LOCATION)" - ], + "execution_count": null, "metadata": { "id": "l_uQrCum1U2O" }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "from google.cloud import aiplatform\n", + "\n", + "aiplatform.init(project=PROJECT_ID, location=LOCATION)" + ] }, { "cell_type": "markdown", + "metadata": { + "id": "etYrU8Zq1xEC" + }, "source": [ "### Setup Vertex AI Vector Search index and index endpoint\n", "\n", @@ -1542,13 +1548,15 @@ " to use in the RAG corpus. Other parameters can be tuned based on\n", " your choices, which determine whether the additional parameters can be\n", " tuned." - ], - "metadata": { - "id": "etYrU8Zq1xEC" - } + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "1M_dnU9P1gvQ" + }, + "outputs": [], "source": [ "# create the index\n", "my_index = aiplatform.MatchingEngineIndex.create_tree_ah_index(\n", @@ -1562,38 +1570,36 @@ " feature_norm_type=\"UNIT_L2_NORM\",\n", " index_update_method=\"STREAM_UPDATE\",\n", ")" - ], - "metadata": { - "id": "1M_dnU9P1gvQ" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", - "source": [ - "### Vertex ai search public endpoint [public endpoints](https://cloud.google.com/vertex-ai/docs/vector-search/deploy-index-public)." - ], "metadata": { "id": "gnPusIuK1mpD" - } + }, + "source": [ + "### Vertex ai search public endpoint [public endpoints](https://cloud.google.com/vertex-ai/docs/vector-search/deploy-index-public)." + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "If92NcCT1mDD" + }, + "outputs": [], "source": [ "# create IndexEndpoint\n", "my_index_endpoint = aiplatform.MatchingEngineIndexEndpoint.create(\n", " display_name=\"loanDemoRag\", public_endpoint_enabled=True\n", ")" - ], - "metadata": { - "id": "If92NcCT1mDD" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", + "metadata": { + "id": "0VCAa0qe2X6F" + }, "source": [ "Deploying the Index to the Index Endpoint\n", "When deploying an index to an index endpoint for the first time, it takes approximately 30 minutes to automatically build and initialize the backend. Subsequent deployments are significantly faster, with the index becoming ready in seconds.\n", @@ -1611,50 +1617,52 @@ "\n", "\n", "If you're unsure of the resource names, use the following command to retrieve them:" - ], - "metadata": { - "id": "0VCAa0qe2X6F" - } + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "CAB9Sj0p2YUH" + }, + "outputs": [], "source": [ "print(my_index_endpoint.resource_name)\n", "print(my_index.resource_name)\n", "print(my_index.name)\n", "print(my_index)" - ], - "metadata": { - "id": "CAB9Sj0p2YUH" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", - "source": [ - "# Deploy Index\n", - "my_index_endpoint.deploy_index(index=my_index, deployed_index_id=\"loanDemoRag\")" - ], + "execution_count": null, "metadata": { "id": "IteN67ln20cm" }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "# Deploy Index\n", + "my_index_endpoint.deploy_index(index=my_index, deployed_index_id=\"loanDemoRag\")" + ] }, { "cell_type": "code", - "source": [ - "from vertexai.preview import rag" - ], + "execution_count": null, "metadata": { "id": "JqggwGu721AT" }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "from vertexai.preview import rag" + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "caa_Vprs237T" + }, + "outputs": [], "source": [ "vector_db = rag.VertexVectorSearch(\n", " index=my_index.resource_name, index_endpoint=my_index_endpoint.resource_name\n", @@ -1666,24 +1674,24 @@ "# Create RAG Corpus\n", "rag_corpus = rag.create_corpus(display_name=DISPLAY_NAME, vector_db=vector_db)\n", "print(f\"Created RAG Corpus resource: {rag_corpus.name}\")" - ], - "metadata": { - "id": "caa_Vprs237T" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", - "source": [ - "### Import the files from the GCS" - ], "metadata": { "id": "XuTDZP7c28yi" - } + }, + "source": [ + "### Import the files from the GCS" + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "_V_S8aC_29Mj" + }, + "outputs": [], "source": [ "GCS_BUCKET = \"gs://demo-loan-documents/\" # @param {type:\"string\", \"placeholder\": \"your-gs-bucket\"}\n", "\n", @@ -1693,36 +1701,36 @@ " chunk_size=512,\n", " chunk_overlap=50,\n", ")" - ], - "metadata": { - "id": "_V_S8aC_29Mj" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", - "source": [ - "### Check the files just imported. It may take a few seconds to process the imported files.\n", - "rag.list_files(corpus_name=rag_corpus.name)" - ], + "execution_count": null, "metadata": { "id": "8U_TcPZU3C0j" }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "### Check the files just imported. It may take a few seconds to process the imported files.\n", + "rag.list_files(corpus_name=rag_corpus.name)" + ] }, { "cell_type": "markdown", - "source": [ - "### Add Rag corpus to the context" - ], "metadata": { "id": "HCpBI2DZ3F_Z" - } + }, + "source": [ + "### Add Rag corpus to the context" + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "jwjcsMjx3Gem" + }, + "outputs": [], "source": [ "rag_resource = rag.RagResource(\n", " rag_corpus=rag_corpus.name,\n", @@ -1737,40 +1745,37 @@ " ),\n", " )\n", ")" - ], - "metadata": { - "id": "jwjcsMjx3Gem" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "jqljXg1f3VzU" + }, + "outputs": [], "source": [ - "query = \"what are the types of loans?\" # @param {type: \"string\", isTemplate: true, label: \"Select Modality\"}\n", + "query = \"what are the types of loans?\" # @param {type: \"string\", isTemplate: true, label: \"Select Modality\"}\n", "\n", "modality = \"TEXT\"\n", "\n", "\n", "response = client.models.generate_content(\n", " model=MODEL_ID,\n", - " contents = query,\n", - " config = GenerateContentConfig(\n", - " tools=[vertext_ai_rag_tool],\n", - " response_modalities=[modality]\n", - " ),\n", + " contents=query,\n", + " config=GenerateContentConfig(\n", + " tools=[vertext_ai_rag_tool], response_modalities=[modality]\n", + " ),\n", ")\n", "\n", "display(Markdown(response.text))" - ], - "metadata": { - "id": "jqljXg1f3VzU" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", + "metadata": { + "id": "4PWWqy4J_lTm" + }, "source": [ "## Conclusion\n", "\n", @@ -1795,17 +1800,13 @@ "\n", "* Developing Q&A Applications: Build a Question-and-Answer application powered by Gemini 2.0.\n", "\n", - "This comprehensive guide equips you with practical knowledge for utilizing Gemini 2.0 in diverse scenarios, from multimodal data handling to advanced AI-powered application development.\n", - "\n" - ], - "metadata": { - "id": "4PWWqy4J_lTm" - } + "This comprehensive guide equips you with practical knowledge for utilizing Gemini 2.0 in diverse scenarios, from multimodal data handling to advanced AI-powered application development.\n" + ] } ], "metadata": { "colab": { - "provenance": [], + "name": "real_time_rag_bank_loans_gemini_2_0.ipynb", "toc_visible": true }, "kernelspec": { @@ -1815,4 +1816,4 @@ }, "nbformat": 4, "nbformat_minor": 0 -} \ No newline at end of file +} diff --git a/gemini/multimodal-live-api/real_time_rag_retail_gemini_2_0.ipynb b/gemini/multimodal-live-api/real_time_rag_retail_gemini_2_0.ipynb index 90aaa0b666e..6537a42a049 100644 --- a/gemini/multimodal-live-api/real_time_rag_retail_gemini_2_0.ipynb +++ b/gemini/multimodal-live-api/real_time_rag_retail_gemini_2_0.ipynb @@ -1,1827 +1,1831 @@ { - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "ur8xi4C7S06n" - }, - "outputs": [], - "source": [ - "# Copyright 2024 Google LLC\n", - "#\n", - "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", - "# you may not use this file except in compliance with the License.\n", - "# You may obtain a copy of the License at\n", - "#\n", - "# https://www.apache.org/licenses/LICENSE-2.0\n", - "#\n", - "# Unless required by applicable law or agreed to in writing, software\n", - "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", - "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", - "# See the License for the specific language governing permissions and\n", - "# limitations under the License." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "JAPoU8Sm5E6e" - }, - "source": [ - "# Real-time Retrieval Augmented Generation (RAG) using the Multimodal Live API with Gemini 2.0\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - "
\n", - " \n", - " \"Google
Open in Colab\n", - "
\n", - "
\n", - " \n", - " \"Google
Open in Colab Enterprise\n", - "
\n", - "
\n", - " \n", - " \"Vertex
Open in Vertex AI Workbench\n", - "
\n", - "
\n", - " \n", - " \"GitHub
View on GitHub\n", - "
\n", - "
\n", - "\n", - "
\n", - "\n", - "
\n", - "\n", - "
\n", - "
\n", - "Share to:\n", - "\n", - "\n", - " \"LinkedIn\n", - "\n", - "\n", - "\n", - " \"Bluesky\n", - "\n", - "\n", - "\n", - " \"X\n", - "\n", - "\n", - "\n", - " \"Reddit\n", - "\n", - "\n", - "\n", - " \"Facebook\n", - "" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "84f0f73a0f76" - }, - "source": [ - "| | |\n", - "|-|-|\n", - "| Author(s) | [Deepak Moonat](https://github.com/dmoonat/) |" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "-MDW_A-nBksi" - }, - "source": [ - "
\n", - "\n", - "⚠️ Gemini 2.0 Flash (Model ID: gemini-2.0-flash-exp) and the Google Gen AI SDK are currently experimental and output can vary ⚠️\n", - "
\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "tvgnzT1CKxrO" - }, - "source": [ - "## Overview\n", - "\n", - "This notebook provides a comprehensive demonstration of the Vertex AI Gemini and Multimodal Live APIs, showcasing text and audio generation capabilities. Users will learn to develop a real-time Retrieval Augmented Generation (RAG) system leveraging the Multimodal Live API for a retail use-case. This system will generate audio and text responses grounded in provided documents. The tutorial covers the following:\n", - "\n", - "- **Gemini API:** Text output generation.\n", - "- **Multimodal Live API:** Text and audio output generation.\n", - "- **Retrieval Augmented Generation (RAG):** Text and audio output generation grounded in provided documents for a retail use-case." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "xKVzRJhgJ4EZ" - }, - "source": [ - "### Gemini 2.0\n", - "\n", - "[Gemini 2.0 Flash](https://cloud.google.com/vertex-ai/generative-ai/docs/gemini-v2) is a new multimodal generative ai model from the Gemini family developed by [Google DeepMind](https://deepmind.google/). It now available as an experimental preview release through the Gemini API in Vertex AI and Vertex AI Studio. The model introduces new features and enhanced core capabilities:\n", - "\n", - "- Multimodal Live API: This new API helps you create real-time vision and audio streaming applications with tool use.\n", - "- Speed and performance: Gemini 2.0 Flash is the fastest model in the industry, with a 3x improvement in time to first token (TTFT) over 1.5 Flash.\n", - "- Quality: The model maintains quality comparable to larger models like Gemini 1.5 Pro and GPT-4o.\n", - "- Improved agentic experiences: Gemini 2.0 delivers improvements to multimodal understanding, coding, complex instruction following, and function calling.\n", - "- New Modalities: Gemini 2.0 introduces native image generation and controllable text-to-speech capabilities, enabling image editing, localized artwork creation, and expressive storytelling.\n", - "- To support the new model, we're also shipping an all new SDK that supports simple migration between the Gemini Developer API and the Gemini API in Vertex AI.\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "61RBz8LLbxCR" - }, - "source": [ - "## Get started" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "No17Cw5hgx12" - }, - "source": [ - "### Install Dependencies\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "ue_G9ZU80ON0" - }, - "source": [ - "- `google-genai`: Google Gen AI python library\n", - "- `PyPDF2`: To read PDFs" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "tFy3H3aPgx12" - }, - "outputs": [], - "source": [ - "%%capture\n", - "\n", - "%pip install --upgrade --quiet google-genai PyPDF2" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "R5Xep4W9lq-Z" - }, - "source": [ - "### Restart runtime\n", - "\n", - "To use the newly installed packages in this Jupyter runtime, you must restart the runtime. You can do this by running the cell below, which restarts the current kernel.\n", - "\n", - "The restart might take a minute or longer. After it's restarted, continue to the next step." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "XRvKdaPDTznN" - }, - "outputs": [], - "source": [ - "import IPython\n", - "\n", - "app = IPython.Application.instance()\n", - "app.kernel.do_shutdown(True)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "SbmM4z7FOBpM" - }, - "source": [ - "
\n", - "⚠️ The kernel is going to restart. Wait until it's finished before continuing to the next step. ⚠️\n", - "
\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "dmWOrTJ3gx13" - }, - "source": [ - "### Authenticate your notebook environment (Colab only)\n", - "\n", - "If you're running this notebook on Google Colab, run the cell below to authenticate your environment." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "NyKGtVQjgx13" - }, - "outputs": [], - "source": [ - "import sys\n", - "\n", - "if \"google.colab\" in sys.modules:\n", - " from google.colab import auth\n", - "\n", - " auth.authenticate_user()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "DF4l8DTdWgPY" - }, - "source": [ - "### Set Google Cloud project information\n", - "\n", - "To get started using Vertex AI, you must have an existing Google Cloud project and [enable the Vertex AI API](https://console.cloud.google.com/flows/enableapi?apiid=aiplatform.googleapis.com).\n", - "\n", - "Learn more about [setting up a project and a development environment](https://cloud.google.com/vertex-ai/docs/start/cloud-environment)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "Nqwi-5ufWp_B" - }, - "outputs": [], - "source": [ - "import os\n", - "\n", - "PROJECT_ID = \"[your-project-id]\" # @param {type: \"string\", placeholder: \"[your-project-id]\", isTemplate: true}\n", - "if not PROJECT_ID or PROJECT_ID == \"[your-project-id]\":\n", - " PROJECT_ID = str(os.environ.get(\"GOOGLE_CLOUD_PROJECT\"))\n", - "\n", - "LOCATION = os.environ.get(\"GOOGLE_CLOUD_REGION\", \"us-central1\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "5303c05f7aa6" - }, - "source": [ - "### Import libraries" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "6fc324893334" - }, - "outputs": [], - "source": [ - "# For asynchronous operations\n", - "import asyncio\n", - "\n", - "# For data processing\n", - "import glob\n", - "from typing import Any\n", - "\n", - "from IPython.display import Audio, Markdown, display\n", - "import PyPDF2\n", - "\n", - "# For GenerativeAI\n", - "from google import genai\n", - "from google.genai import types\n", - "from google.genai.types import LiveConnectConfig\n", - "import numpy as np\n", - "import pandas as pd\n", - "\n", - "# For similarity score\n", - "from sklearn.metrics.pairwise import cosine_similarity\n", - "\n", - "# For retry mechanism\n", - "from tenacity import retry, stop_after_attempt, wait_random_exponential" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "OV5bFDTVE3oX" - }, - "source": [ - "#### Initialize Gen AI client" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "3pjBP_V7JqhD" - }, - "source": [ - "- Client for calling the Gemini API in Vertex AI\n", - "- `vertexai=True`, indicates the client should communicate with the Vertex AI API endpoints." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "bEhq_4GBEW2a" - }, - "outputs": [], - "source": [ - "# Vertex AI API\n", - "client = genai.Client(\n", - " vertexai=True,\n", - " project=PROJECT_ID,\n", - " location=LOCATION,\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "e43229f3ad4f" - }, - "source": [ - "### Initialize model" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "cf93d5f0ce00" - }, - "outputs": [], - "source": [ - "MODEL_ID = \"gemini-2.0-flash-exp\" # @param {type:\"string\", isTemplate: true}\n", - "MODEL = (\n", - " f\"projects/{PROJECT_ID}/locations/{LOCATION}/publishers/google/models/{MODEL_ID}\"\n", - ")\n", - "\n", - "text_embedding_model = \"text-embedding-004\" # @param {type:\"string\", isTemplate: true}" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "H4TDOc3aqwuz" - }, - "source": [ - "## Sample Use Case - Retail Customer Support Assistance" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "cH6zJeecq6SU" - }, - "source": [ - "Let's imagine a bicycle shop called `Cymbal Bikes` that offers various services like brake repair, chain replacement, and more. Our goal is to create a straightforward support system that can answer customer questions based on the shop's policies and service offerings." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "uA3X24j86uE7" - }, - "source": [ - "Having a customer support assistance offers numerous advantages for businesses, ultimately leading to improved customer satisfaction and loyalty, as well as increased profitability. Here are some key benefits:\n", - "\n", - "- Faster Resolution of Issues: Users can quickly find answers to their questions without having to search through store's website.\n", - "- Improved Efficiency: The assistant can handle simple, repetitive questions, freeing up human agents to focus on more complex or strategic tasks.\n", - "- 24/7 Availability: Unlike human colleagues, the assistant is available around the clock, providing immediate support regardless of time zones or working hours.\n", - "- Consistent Information: The assistant provides standardized answers, ensuring consistency and accuracy." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "mZZLuCecsp0e" - }, - "source": [ - "#### Context Documents" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "nWrK7HHjssqB" - }, - "source": [ - "- Download the documents from Google Cloud Storage bucket\n", - "- These documents are specific to `Cymbal Bikes` store\n", - " - [`Cymbal Bikes Return Policy`](https://storage.googleapis.com/github-repo/generative-ai/gemini2/use-cases/retail_rag/documents/CymbalBikesReturnPolicy.pdf): Contains information about return policy\n", - " - [`Cymbal Bikes Services`](https://storage.googleapis.com/github-repo/generative-ai/gemini2/use-cases/retail_rag/documents/CymbalBikesServices.pdf): Contains information about services provided by Cymbal Bikes" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "iLhNfYfYspnC" - }, - "outputs": [], - "source": [ - "!gsutil cp \"gs://github-repo/generative-ai/gemini2/use-cases/retail_rag/documents/CymbalBikesReturnPolicy.pdf\" \"documents/CymbalBikesReturnPolicy.pdf\"\n", - "!gsutil cp \"gs://github-repo/generative-ai/gemini2/use-cases/retail_rag/documents/CymbalBikesServices.pdf\" \"documents/CymbalBikesServices.pdf\"" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "GOFNGNGjjEzD" - }, - "source": [ - "### Text" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "QlcEVrUtP9TI" - }, - "source": [ - "- Let's check a specific query to our retail use-case" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "eLqbaZjoCzng" - }, - "outputs": [], - "source": [ - "query = \"What is the price of a basic tune-up at Cymbal Bikes?\"\n", - "\n", - "response = client.models.generate_content(\n", - " model=MODEL_ID,\n", - " contents=query,\n", - ")\n", - "\n", - "display(Markdown(response.text))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "-D6q7KUDuH-E" - }, - "source": [ - "> The correct answer to the query is `A basic tune-up costs $100.`\n", - "\n", - "![BasicTuneUp](https://storage.googleapis.com/github-repo/generative-ai/gemini2/use-cases/retail_rag/images/BasicTuneUp.png)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "uoigEKWkQjwi" - }, - "source": [ - "- You can see, the model is unable to answer it correctly, as it's very specific to our hypothetical use-case. However, it does provide some details to get the answer from the internet.\n", - "\n", - "- Without the necessary context, the model's response is essentially a guess and may not align with the desired information.\n", - "\n", - "- LLM is trained on vast amount of data, which leads to hallucinations. To overcome this challenge, in coming sections we'll look into how to ground the answers using Retrieval Augmented Generation (RAG)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "nhzKqZdunwYJ" - }, - "source": [ - "## Grounding" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "kzNcDkRevJi3" - }, - "source": [ - "Grounding is crucial in this scenario because the model needs to access and process relevant information from external sources (the \"Cymbal Bikes Return Policy\" and \"Cymbal Bikes Services\" documents) to answer specific queries accurately. Without grounding, the model relies solely on its pre-trained knowledge, which may not contain the specific details about the bike store's policies.\n", - "\n", - "In the example, the question about the return policy for bike helmets at Cymbal Bikes cannot be answered correctly without accessing the provided documents. The model's general knowledge of return policies is insufficient. Grounding allows the model to:\n", - "\n", - "1. **Retrieve relevant information:** The system must first locate the pertinent sections within the provided documents that address the user's question about bike helmet returns.\n", - "\n", - "2. **Process and synthesize information:** After retrieving relevant passages, the model must then understand and synthesize the information to construct an accurate answer.\n", - "\n", - "3. **Generate a grounded response:** Finally, the response needs to be directly derived from the factual content of the documents. This ensures accuracy and avoids hallucinations – generating incorrect or nonsensical information not present in the source documents.\n", - "\n", - "Without grounding, the model is forced to guess or extrapolate from its general knowledge, which can lead to inaccurate or misleading responses. The grounding process makes the model's responses more reliable and trustworthy, especially for domain-specific knowledge like store policies or procedures.\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "-SyokS1pUR9O" - }, - "source": [ - "## Multimodal Live API" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "pwZeOc5-UXKD" - }, - "source": [ - "The multimodal live API enables you to build low-latency, multi-modal applications. It currently supports text as input and text & audio as output.\n", - "\n", - "- Low Latency, where audio output is required, where the Text-to-Speech step can be skipped\n", - "- Provides a more interactive user experience.\n", - "- Suitable for applications requiring immediate audio feedback" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "See the [Multimodal Live API](https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/multimodal-live) page for more details." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "aS1zTjSMcij2" - }, - "source": [ - "#### Asynchronous (async) operation" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "iH9CBOpncnK8" - }, - "source": [ - "When to use async calls:\n", - "1. **I/O-bound operations**: When your code spends a significant amount of time waiting for external resources\n", - " (e.g., network requests, file operations, database queries). Async allows other tasks to run while waiting. \n", - " This is especially beneficial for real-time applications or when dealing with multiple concurrent requests.\n", - " \n", - " Example:\n", - " - Fetching data from a remote server.\n", - "\n", - "2. **Parallel tasks**: When you have independent tasks that can run concurrently without blocking each other. Async\n", - " allows you to efficiently utilize multiple CPU cores or network connections.\n", - " \n", - " Example:\n", - " - Processing a large number of prompts and generating audio for each.\n", - "\n", - "\n", - "3. **User interfaces**: In applications with graphical user interfaces (GUIs), async operations prevent the UI from\n", - " freezing while performing long-running tasks. Users can interact with the interface even when background\n", - " operations are active.\n", - " \n", - " Example: \n", - " - A chatbot interacting in real time, where an audio response is generated in the background.\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "aB4U6s1-UlFw" - }, - "source": [ - "### Text" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "YvUJzbgPM26m" - }, - "source": [ - "For text generation, you need to set the `response_modalities` to `TEXT`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "YQOurRs5UU9p" - }, - "outputs": [], - "source": [ - "async def generate_content(query: str) -> str:\n", - " \"\"\"Function to generate text content using Gemini live API.\n", - "\n", - " Args:\n", - " query: The query to generate content for.\n", - "\n", - " Returns:\n", - " The generated content.\n", - " \"\"\"\n", - " config = LiveConnectConfig(response_modalities=[\"TEXT\"])\n", - "\n", - " async with client.aio.live.connect(model=MODEL, config=config) as session:\n", - "\n", - " await session.send(input=query, end_of_turn=True)\n", - "\n", - " response = []\n", - " async for message in session.receive():\n", - " try:\n", - " if message.text:\n", - " response.append(message.text)\n", - " except AttributeError:\n", - " pass\n", - "\n", - " if message.server_content.turn_complete:\n", - " response = \"\".join(str(x) for x in response)\n", - " return response" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "ye1TwWVaVSxF" - }, - "source": [ - "- Try a specific query" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "gGqsp6nFDNsG" - }, - "outputs": [], - "source": [ - "query = \"What is the price of a basic tune-up at Cymbal Bikes?\"\n", - "\n", - "response = await generate_content(query)\n", - "display(Markdown(response))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "roXuCp_cXE9q" - }, - "source": [ - "### Audio" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "lBnz34QaakVM" - }, - "source": [ - "- For audio generation, you need to set the `response_modalities` to `AUDIO`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "BmLuvxnFbC4Z" - }, - "outputs": [], - "source": [ - "async def generate_audio_content(query: str):\n", - " \"\"\"Function to generate audio response for provided query using Gemini Multimodal Live API.\n", - "\n", - " Args:\n", - " query: The query to generate audio response for.\n", - "\n", - " Returns:\n", - " The audio response.\n", - " \"\"\"\n", - " config = LiveConnectConfig(response_modalities=[\"AUDIO\"])\n", - " async with client.aio.live.connect(model=MODEL, config=config) as session:\n", - "\n", - " await session.send(input=query, end_of_turn=True)\n", - "\n", - " audio_parts = []\n", - " async for message in session.receive():\n", - " if message.server_content.model_turn:\n", - " for part in message.server_content.model_turn.parts:\n", - " if part.inline_data:\n", - " audio_parts.append(\n", - " np.frombuffer(part.inline_data.data, dtype=np.int16)\n", - " )\n", - "\n", - " if message.server_content.turn_complete:\n", - " if audio_parts:\n", - " audio_data = np.concatenate(audio_parts, axis=0)\n", - " await asyncio.sleep(0.4)\n", - " display(Audio(audio_data, rate=24000, autoplay=True))\n", - " break" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "xKQ_l6wiLH_w" - }, - "source": [ - "In this example, you send a text prompt and request the model response in audio." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "rXJRoxUAcFVB" - }, - "source": [ - "- Let's check the same query as before" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "CfZy_XZeDUtS" - }, - "outputs": [], - "source": [ - "query = \"What is the price of a basic tune-up at Cymbal Bikes?\"\n", - "\n", - "await generate_audio_content(query)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "clfXp2PZmxDZ" - }, - "source": [ - "- Model is unable to answer the query, but with the Multimodal Live API, it doesn't hallucinate, which is pretty good!!" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "wT2oB1BOqDYP" - }, - "source": [ - "### Continuous Audio Interaction (Not multiturn)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "T4iAJCstqR5s" - }, - "source": [ - " - Below function generates audio output based on the provided text prompt.\n", - " - The generated audio is displayed using `IPython.display.Audio`." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "bZntNTPiYLA8" - }, - "source": [ - "- Input your prompts (type `q` or `quit` or `exit` to exit).\n", - "- Example prompts:\n", - " - Hello\n", - " - Who are you?\n", - " - What's the largest planet in our solar system?\n", - " - Tell me 3 fun facts about the universe?" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "7M0zkHNrOBQf" - }, - "outputs": [], - "source": [ - "async def continuous_audio_generation():\n", - " \"\"\"Continuously generates audio responses for the asked queries.\"\"\"\n", - " while True:\n", - " query = input(\"Your query > \")\n", - " if any(query.lower() in s for s in [\"q\", \"quit\", \"exit\"]):\n", - " break\n", - " await generate_audio_content(query)\n", - "\n", - "\n", - "await continuous_audio_generation()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "QX9k92TlJ864" - }, - "source": [ - "## Enhancing LLM Accuracy with RAG" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "oOJ-Wx18hpju" - }, - "source": [ - "We'll be showcasing the design pattern for how to implement Real-time Retrieval Augmented Generation (RAG) using Gemini 2.0 multimodal live API.\n", - "\n", - "- Multimodal live API uses websockets to communicate over the internet\n", - "- It maintains a continuous connection\n", - "- Ideal for real-time applications which require persistent communication\n", - "\n", - "\n", - "> Note: Replicating real-life scenarios with Python can be challenging within the constraints of a Colab environment.\n", - "\n", - "\n", - "However, the flow shown in this section can be modified for streaming audio input and output.\n", - "\n", - "
\n", - "\n", - "We'll build the RAG pipeline from scratch to help you understand each and every components of the pipeline.\n", - "\n", - "There are other ways to build the RAG pipeline using open source tools such as [LangChain](https://github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/use-cases/retrieval-augmented-generation/multimodal_rag_langchain.ipynb), [LlamaIndex](https://github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/use-cases/retrieval-augmented-generation/llamaindex_rag.ipynb) etc." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "u5CXTtsPEyJ0" - }, - "source": [ - "### Context Documents" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "vvdcw1AOg4se" - }, - "source": [ - "- Documents are the building blocks of any RAG pipeline, as it provides the relevant context needed to ground the LLM responses\n", - "- We'll be using the documents already downloaded at the start of the notebook\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "M22BSDb2Xxpb" - }, - "outputs": [], - "source": [ - "documents = glob.glob(\"documents/*\")\n", - "documents" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "zNpUL7t0e054" - }, - "source": [ - "### Retrieval Augmented Generation Architecture" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "vV5Et4YHbqqE" - }, - "source": [ - "In general, RAG architecture consists of the following components\n", - "\n", - "**Data Preparation**\n", - "1. Chunking: Dividing the document into smaller, manageable pieces for processing.\n", - "2. Embedding: Transforming text chunks into numerical vectors representing semantic meaning.\n", - "3. Indexing: Organizing embeddings for efficient similarity search." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "563756fa3b7f" - }, - "source": [ - "![RAGArchitecture](https://storage.googleapis.com/github-repo/generative-ai/gemini2/use-cases/retail_rag/images/RAGArchitecture.png)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "pf4sXzYUby57" - }, - "source": [ - "**Inference**\n", - "1. Retrieval: Finding the most relevant chunks based on the query embedding.\n", - "2. Query Augmentation: Enhancing the query with retrieved context for improved generation.\n", - "3. Generation: Synthesizing a coherent and informative answer based on the augmented query." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "1a30b41b63f1" - }, - "source": [ - "![LiveAPI](https://storage.googleapis.com/github-repo/generative-ai/gemini2/use-cases/retail_rag/images/LiveAPI.png)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "M-0zlJ3_FRfa" - }, - "source": [ - "#### Document Embedding and Indexing" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "0fY3xLaFKBIS" - }, - "source": [ - "Following blocks of code shows how to process unstructured data(PDFs), extract text, and divide them into smaller chunks for efficient embedding and retrieval." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "JTTOQ35Ia-V2" - }, - "source": [ - "- Embeddings:\n", - " - Numerical representations of text\n", - " - It capture the semantic meaning and context of the text\n", - " - We'll use Vertex AI's text embedding model to generate embeddings\n", - " - Error handling (like the retry mechanism) during embedding generation due to potential API quota limits.\n", - "\n", - "- Indexing:\n", - " - Build a searchable index from embeddings, enabling efficient similarity search.\n", - " - For example, the index is like a detailed table of contents for a massive reference book.\n", - "\n", - "\n", - "Check out the Google Cloud Platform [documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/embeddings) for detailed understanding and example use-cases." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "Vun69x23FWiw" - }, - "outputs": [], - "source": [ - "@retry(wait=wait_random_exponential(multiplier=1, max=120), stop=stop_after_attempt(4))\n", - "def get_embeddings(\n", - " embedding_client: Any, embedding_model: str, text: str, output_dim: int = 768\n", - ") -> list[float]:\n", - " \"\"\"\n", - " Generate embeddings for text with retry logic for API quota management.\n", - "\n", - " Args:\n", - " embedding_client: The client object used to generate embeddings.\n", - " embedding_model: The name of the embedding model to use.\n", - " text: The text for which to generate embeddings.\n", - " output_dim: The desired dimensionality of the output embeddings (default is 768).\n", - "\n", - " Returns:\n", - " A list of floats representing the generated embeddings. Returns None if a \"RESOURCE_EXHAUSTED\" error occurs.\n", - "\n", - " Raises:\n", - " Exception: Any exception encountered during embedding generation, excluding \"RESOURCE_EXHAUSTED\" errors.\n", - " \"\"\"\n", - " try:\n", - " response = embedding_client.models.embed_content(\n", - " model=embedding_model,\n", - " contents=[text],\n", - " config=types.EmbedContentConfig(output_dimensionality=output_dim),\n", - " )\n", - " return [response.embeddings[0].values]\n", - " except Exception as e:\n", - " if \"RESOURCE_EXHAUSTED\" in str(e):\n", - " return None\n", - " print(f\"Error generating embeddings: {str(e)}\")\n", - " raise" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "2csDY5NsswwJ" - }, - "source": [ - "- The code block executes the following steps:\n", - "\n", - " - Extracts text from PDF documents and segments it into smaller chunks for processing.\n", - " - Employs a Vertex AI model to transform each text chunk into a numerical embedding vector, facilitating semantic representation and search.\n", - " - Constructs a Pandas DataFrame to store the embeddings, enriched with metadata such as document name and page number, effectively creating a searchable index for efficient retrieval.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "9TJlvdIsRfmX" - }, - "outputs": [], - "source": [ - "def build_index(\n", - " document_paths: list[str],\n", - " embedding_client: Any,\n", - " embedding_model: str,\n", - " chunk_size: int = 512,\n", - ") -> pd.DataFrame:\n", - " \"\"\"\n", - " Build searchable index from a list of PDF documents with page-wise processing.\n", - "\n", - " Args:\n", - " document_paths: A list of file paths to PDF documents.\n", - " embedding_client: The client object used to generate embeddings.\n", - " embedding_model: The name of the embedding model to use.\n", - " chunk_size: The maximum size (in characters) of each text chunk. Defaults to 512.\n", - "\n", - " Returns:\n", - " A Pandas DataFrame where each row represents a text chunk. The DataFrame includes columns for:\n", - " - 'document_name': The path to the source PDF document.\n", - " - 'page_number': The page number within the document.\n", - " - 'page_text': The full text of the page.\n", - " - 'chunk_number': The chunk number within the page.\n", - " - 'chunk_text': The text content of the chunk.\n", - " - 'embeddings': The embedding vector for the chunk.\n", - "\n", - " Raises:\n", - " ValueError: If no chunks are created from the input documents.\n", - " Exception: Any exceptions encountered during file processing are printed to the console and the function continues to the next document.\n", - " \"\"\"\n", - " all_chunks = []\n", - "\n", - " for doc_path in document_paths:\n", - " try:\n", - " with open(doc_path, \"rb\") as file:\n", - " pdf_reader = PyPDF2.PdfReader(file)\n", - "\n", - " for page_num in range(len(pdf_reader.pages)):\n", - " page = pdf_reader.pages[page_num]\n", - " page_text = page.extract_text()\n", - "\n", - " chunks = [\n", - " page_text[i : i + chunk_size]\n", - " for i in range(0, len(page_text), chunk_size)\n", - " ]\n", - "\n", - " for chunk_num, chunk_text in enumerate(chunks):\n", - " embeddings = get_embeddings(\n", - " embedding_client, embedding_model, chunk_text\n", - " )\n", - "\n", - " if embeddings is None:\n", - " print(\n", - " f\"Warning: Could not generate embeddings for chunk {chunk_num} on page {page_num + 1}\"\n", - " )\n", - " continue\n", - "\n", - " chunk_info = {\n", - " \"document_name\": doc_path,\n", - " \"page_number\": page_num + 1,\n", - " \"page_text\": page_text,\n", - " \"chunk_number\": chunk_num,\n", - " \"chunk_text\": chunk_text,\n", - " \"embeddings\": embeddings,\n", - " }\n", - " all_chunks.append(chunk_info)\n", - "\n", - " except Exception as e:\n", - " print(f\"Error processing document {doc_path}: {str(e)}\")\n", - " continue\n", - "\n", - " if not all_chunks:\n", - " raise ValueError(\"No chunks were created from the documents\")\n", - "\n", - " return pd.DataFrame(all_chunks)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "yFGsl-Zvlej6" - }, - "source": [ - "Let's create embeddings and an index using the provided documents" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "hjl5FDQckDcO" - }, - "outputs": [], - "source": [ - "vector_db_mini_vertex = build_index(\n", - " documents, embedding_client=client, embedding_model=text_embedding_model\n", - ")\n", - "vector_db_mini_vertex" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "pZLX5ozMlxTX" - }, - "outputs": [], - "source": [ - "# Index size\n", - "vector_db_mini_vertex.shape" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "cvNVn3kT9FiB" - }, - "outputs": [], - "source": [ - "# Example of how a chunk looks like\n", - "vector_db_mini_vertex.loc[0, \"chunk_text\"]" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Hul4bjAkBkg0" - }, - "source": [ - "To enhance the performance of retrieval systems, consider the following:\n", - "\n", - "- Optimize chunk size selection to balance granularity and context.\n", - "- Evaluate various chunking strategies to identify the most effective approach for your data.\n", - "- Explore managed services and scalable indexing solutions, such as [Vertex AI Search](https://cloud.google.com/generative-ai-app-builder/docs/create-datastore-ingest), to enhance performance and efficiency." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "43txjyVlHT6v" - }, - "source": [ - "#### Retrieval" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "y92jM-v8KBfV" - }, - "source": [ - "The below code demonstrates how to query the index and uses a cosine similarity measure for comparing query vectors against the index. " - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "bI1YsFoKtyxY" - }, - "source": [ - "* **Input:** Accepts a query string and parameters like the number of relevant chunks to return.\n", - "* **Embedding Generation:** Generates an embedding for the input query using the same model used to embed the document chunks.\n", - "* **Similarity Search:** Compares the query embedding to the embeddings of all indexed document chunks, using cosine similarity. Could use other distance metrics as well.\n", - "* **Ranking:** Ranks the chunks based on their similarity scores to the query.\n", - "* **Top-k Retrieval:** Returns the top *k* most similar chunks, where *k* is specified by the input parameters. This could be configurable.\n", - "* **Output:** Returns a list of relevant chunks, potentially including the original chunk text, similarity score, document source (filename, page number), and chunk metadata.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "88ndL_2wJ5ZD" - }, - "outputs": [], - "source": [ - "def get_relevant_chunks(\n", - " query: str,\n", - " vector_db: pd.DataFrame,\n", - " embedding_client: Any,\n", - " embedding_model: str,\n", - " top_k: int = 3,\n", - ") -> str:\n", - " \"\"\"\n", - " Retrieve the most relevant document chunks for a query using similarity search.\n", - "\n", - " Args:\n", - " query: The search query string.\n", - " vector_db: A pandas DataFrame containing the vectorized document chunks.\n", - " It must contain columns named 'embeddings', 'document_name',\n", - " 'page_number', and 'chunk_text'.\n", - " The 'embeddings' column should contain lists or numpy arrays\n", - " representing the embeddings.\n", - " embedding_client: The client object used to generate embeddings.\n", - " embedding_model: The name of the embedding model to use.\n", - " top_k: The number of most similar chunks to retrieve. Defaults to 3.\n", - "\n", - " Returns:\n", - " A formatted string containing the top_k most relevant chunks. Each chunk is\n", - " presented with its page number and chunk number. Returns an error message if\n", - " the query processing fails or if an error occurs during chunk retrieval.\n", - "\n", - " Raises:\n", - " Exception: If any error occurs during the process (e.g., issues with the embedding client,\n", - " data format problems in the vector database).\n", - " The specific error is printed to the console.\n", - " \"\"\"\n", - " try:\n", - " query_embedding = get_embeddings(embedding_client, embedding_model, query)\n", - "\n", - " if query_embedding is None:\n", - " return \"Could not process query due to quota issues\"\n", - "\n", - " similarities = [\n", - " cosine_similarity(query_embedding, chunk_emb)[0][0]\n", - " for chunk_emb in vector_db[\"embeddings\"]\n", - " ]\n", - "\n", - " top_indices = np.argsort(similarities)[-top_k:]\n", - " relevant_chunks = vector_db.iloc[top_indices]\n", - "\n", - " context = []\n", - " for _, row in relevant_chunks.iterrows():\n", - " context.append(\n", - " {\n", - " \"document_name\": row[\"document_name\"],\n", - " \"page_number\": row[\"page_number\"],\n", - " \"chunk_number\": row[\"chunk_number\"],\n", - " \"chunk_text\": row[\"chunk_text\"],\n", - " }\n", - " )\n", - "\n", - " return \"\\n\\n\".join(\n", - " [\n", - " f\"[Page {chunk['page_number']}, Chunk {chunk['chunk_number']}]: {chunk['chunk_text']}\"\n", - " for chunk in context\n", - " ]\n", - " )\n", - "\n", - " except Exception as e:\n", - " print(f\"Error getting relevant chunks: {str(e)}\")\n", - " return \"Error retrieving relevant chunks\"" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "3hxyLlTjsstI" - }, - "source": [ - "Let's test out our retrieval component" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Ek4aF0Esck2H" - }, - "source": [ - "- Let's try the same query for which the model was not able to answer earlier, due to lack of context" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "lSd8ZeH6D7m4" - }, - "outputs": [], - "source": [ - "query = \"What is the price of a basic tune-up at Cymbal Bikes?\"\n", - "relevant_context = get_relevant_chunks(\n", - " query, vector_db_mini_vertex, client, text_embedding_model, top_k=3\n", - ")\n", - "relevant_context" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "YBxnXReUn8Iy" - }, - "source": [ - "- You can see, with the help of the relevant context we can derive the answer as it contains the chunks specific to the asked query.\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "25eb6422c9cf" - }, - "source": [ - "![Context](https://storage.googleapis.com/github-repo/generative-ai/gemini2/use-cases/retail_rag/images/Context.png)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "kHzw7_UwzutC" - }, - "source": [ - "For optimal performance, consider these points:\n", - "\n", - "* **Context Window:** Considers a context window around the retrieved chunks to provide more comprehensive context. This could involve returning neighboring chunks or a specified window size.\n", - "* **Filtering:** Option to filter retrieved chunks based on criteria like minimum similarity score or source document.\n", - "* **Efficiency:** Designed for efficient retrieval, especially for large indexes, potentially using optimized search algorithms or data structures." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "ZEfJkwSqJ5KR" - }, - "source": [ - "### Generation" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "b7OZpv33KBx_" - }, - "source": [ - "* **Contextual Answer Synthesis:** The core function of the generation component is to synthesize a coherent and informative answer based on the retrieved context. It takes the user's query and the relevant document chunks as input.\n", - "* **Large Language Model (LLM) Integration:** It leverages a large language model (LLM) to generate the final answer. The LLM processes both the query and the retrieved context to produce a response. The quality of the answer heavily relies on the capabilities of the chosen LLM.\n", - "* **Coherence and Relevance:** A good generation function ensures the generated answer is coherent, factually accurate, and directly addresses the user's query, using only the provided context. It avoids hallucinations (generating information not present in the context).\n", - "* **Prompt Engineering:** The effectiveness of the LLM is heavily influenced by the prompt. The generation function likely incorporates prompt engineering techniques to guide the LLM towards generating the desired output. This may involve carefully crafting instructions for the LLM or providing examples.\n", - "\n", - "For more details on prompt engineering, check out the [documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/learn/prompts/prompt-design-strategies)." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "0xs-AQmqm03l" - }, - "source": [ - "Let's see two use-cases, `Text-In-Text-Out` and `Text-In-Audio-Out`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "xp7doymTJ7Iu" - }, - "outputs": [], - "source": [ - "@retry(wait=wait_random_exponential(multiplier=1, max=120), stop=stop_after_attempt(4))\n", - "async def generate_answer(\n", - " query: str, context: str, llm_client: Any, modality: str = \"text\"\n", - ") -> str:\n", - " \"\"\"\n", - " Generate answer using LLM with retry logic for API quota management.\n", - "\n", - " Args:\n", - " query: User query.\n", - " context: Relevant text providing context for the query.\n", - " llm_client: Client for accessing LLM API.\n", - " modality: Output modality (text or audio).\n", - "\n", - " Returns:\n", - " Generated answer.\n", - "\n", - " Raises:\n", - " Exception: If an unexpected error occurs during the LLM call (after retry attempts are exhausted).\n", - " \"\"\"\n", - " try:\n", - " # If context indicates earlier quota issues, return early\n", - " if context in [\n", - " \"Could not process query due to quota issues\",\n", - " \"Error retrieving relevant chunks\",\n", - " ]:\n", - " return \"Can't Process, Quota Issues\"\n", - "\n", - " prompt = f\"\"\"Based on the following context, please answer the question.\n", - "\n", - " Context:\n", - " {context}\n", - "\n", - " Question: {query}\n", - "\n", - " Answer:\"\"\"\n", - "\n", - " if modality == \"text\":\n", - " # Generate text answer using LLM\n", - " response = await generate_content(prompt)\n", - " return response\n", - "\n", - " elif modality == \"audio\":\n", - " # Generate audio answer using LLM\n", - " await generate_audio_content(prompt)\n", - "\n", - " except Exception as e:\n", - " if \"RESOURCE_EXHAUSTED\" in str(e):\n", - " return \"Can't Process, Quota Issues\"\n", - " print(f\"Error generating answer: {str(e)}\")\n", - " return \"Error generating answer\"" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "11q0Sf0oJ7wL" - }, - "source": [ - "Let's test our `Generation` component" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "S-iesR2BEHnI" - }, - "outputs": [], - "source": [ - "query = \"What is the price of a basic tune-up at Cymbal Bikes?\"\n", - "\n", - "generated_answer = await generate_answer(\n", - " query, relevant_context, client, modality=\"text\"\n", - ")\n", - "display(Markdown(generated_answer))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "W7EHYeP-EMpN" - }, - "outputs": [], - "source": [ - "await generate_answer(query, relevant_context, client, modality=\"audio\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "CbQB5PbMrrsB" - }, - "source": [ - "> And the answer is... CORRECT !! 🎉" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "1gnr-j-ocxlx" - }, - "source": [ - "- The accuracy of the generated answer is attributed to the provision of relevant context to the Large Language Model (LLM), enabling it to effectively comprehend the query and produce an appropriate response." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "2MNlAoAHR0Do" - }, - "source": [ - "### Pipeline" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "8LemsW6WrOfm" - }, - "source": [ - "Let's put `Retrieval` and `Generation` components together in a pipeline." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "yoOeqxETR2G_" - }, - "outputs": [], - "source": [ - "async def rag(\n", - " question: str,\n", - " vector_db: pd.DataFrame,\n", - " embedding_client: Any,\n", - " embedding_model: str,\n", - " llm_client: Any,\n", - " top_k: int,\n", - " llm_model: str,\n", - " modality: str = \"text\",\n", - ") -> str | None:\n", - " \"\"\"\n", - " RAG Pipeline.\n", - "\n", - " Args:\n", - " question: User query.\n", - " vector_db: DataFrame containing document chunks and embeddings.\n", - " embedding_client: Client for accessing embedding API.\n", - " embedding_model: Name of the embedding model.\n", - " llm_client: Client for accessing LLM API.\n", - " top_k: The number of top relevant chunks to retrieve from the vector database.\n", - " llm_model: Name of the LLM model.\n", - " modality: Output modality (text or audio).\n", - "\n", - " Returns:\n", - " For text modality, generated answer.\n", - " For audio modality, audio playback widget.\n", - "\n", - " Raises:\n", - " Exception: Catches and prints any exceptions during processing. Returns an error message.\n", - " \"\"\"\n", - "\n", - " try:\n", - " # Get relevant context for question\n", - " relevant_context = get_relevant_chunks(\n", - " question, vector_db, embedding_client, embedding_model, top_k=top_k\n", - " )\n", - "\n", - " if modality == \"text\":\n", - " # Generate text answer using LLM\n", - " generated_answer = await generate_answer(\n", - " question,\n", - " relevant_context,\n", - " llm_client,\n", - " )\n", - " return generated_answer\n", - "\n", - " elif modality == \"audio\":\n", - " # Generate audio answer using LLM\n", - " await generate_answer(\n", - " question, relevant_context, llm_client, modality=modality\n", - " )\n", - " return\n", - "\n", - " except Exception as e:\n", - " print(f\"Error processing question '{question}': {str(e)}\")\n", - " return {\"question\": question, \"generated_answer\": \"Error processing question\"}" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Q8bNzUvbVJcx" - }, - "source": [ - "Our Retrieval Augmented Generation (RAG) architecture allows for flexible output modality(text and audio) selection. By modifying only the generation component, we can produce both text and audio output while maintaining the same retrieval mechanism. This highlights the adaptability of RAG in catering to diverse content presentation needs." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Pkn75-1cFW1J" - }, - "source": [ - "### Inference" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "QMGtlPWcVXT0" - }, - "source": [ - "Let's test our simple RAG pipeline" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "0vwfQbodn89Y" - }, - "source": [ - "#### Sample Queries" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "Zx_GwXESk9aP" - }, - "outputs": [], - "source": [ - "question_set = [\n", - " {\n", - " \"question\": \"What is the price of a basic tune-up at Cymbal Bikes?\",\n", - " \"answer\": \"A basic tune-up costs $100.\",\n", - " },\n", - " {\n", - " \"question\": \"How much does it cost to replace a tire at Cymbal Bikes?\",\n", - " \"answer\": \"Replacing a tire at Cymbal Bikes costs $50 per tire.\",\n", - " },\n", - " {\n", - " \"question\": \"What does gear repair at Cymbal Bikes include?\",\n", - " \"answer\": \"Gear repair includes inspection and repair of the gears, including replacement of chainrings, cogs, and cables as needed.\",\n", - " },\n", - " {\n", - " \"question\": \"What is the cost of replacing a tube at Cymbal Bikes?\",\n", - " \"answer\": \"Replacing a tube at Cymbal Bikes costs $20.\",\n", - " },\n", - " {\n", - " \"question\": \"Can I return clothing items to Cymbal Bikes?\",\n", - " \"answer\": \"Clothing can only be returned if it is unworn and in the original packaging.\",\n", - " },\n", - " {\n", - " \"question\": \"What is the time frame for returning items to Cymbal Bikes?\",\n", - " \"answer\": \"Cymbal Bikes offers a 30-day return policy on all items.\",\n", - " },\n", - " {\n", - " \"question\": \"Can I return edible items like energy gels?\",\n", - " \"answer\": \"No, edible items are not returnable.\",\n", - " },\n", - " {\n", - " \"question\": \"How can I return an item purchased online from Cymbal Bikes?\",\n", - " \"answer\": \"Items purchased online can be returned to any Cymbal Bikes store or mailed back.\",\n", - " },\n", - " {\n", - " \"question\": \"What should I include when returning an item to Cymbal Bikes?\",\n", - " \"answer\": \"Please include the original receipt and a copy of your shipping confirmation when returning an item.\",\n", - " },\n", - " {\n", - " \"question\": \"Does Cymbal Bikes offer refunds for shipping charges?\",\n", - " \"answer\": \"Cymbal Bikes does not offer refunds for shipping charges, except for defective items.\",\n", - " },\n", - " {\n", - " \"question\": \"How do I process a return for a defective item at Cymbal Bikes?\",\n", - " \"answer\": \"To process a return for a defective item, please contact Cymbal Bikes first.\",\n", - " },\n", - "]" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "ZUo_fcNzoAp3" - }, - "source": [ - "#### Text" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "y1RC5-djV0-r" - }, - "source": [ - "First we will try, `modality='text'`." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "dmyN-h18EZdT" - }, - "outputs": [], - "source": [ - "question_set[0]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "-f3hsHqBEbwc" - }, - "outputs": [], - "source": [ - "response = await rag(\n", - " question=question_set[0][\"question\"],\n", - " vector_db=vector_db_mini_vertex,\n", - " embedding_client=client, # For embedding generation\n", - " embedding_model=text_embedding_model, # For embedding model\n", - " llm_client=client, # For answer generation,\n", - " top_k=3,\n", - " llm_model=MODEL,\n", - " modality=\"text\",\n", - ")\n", - "display(Markdown(response))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Nb3VytmIyo-1" - }, - "source": [ - "#### Audio" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "kEl80N8VV_6E" - }, - "source": [ - "Now, let's try `modality='audio'` to get audio response." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "STdO_EtxEhFA" - }, - "outputs": [], - "source": [ - "await rag(\n", - " question=question_set[0][\"question\"],\n", - " vector_db=vector_db_mini_vertex,\n", - " embedding_client=client, # For embedding generation\n", - " embedding_model=text_embedding_model, # For embedding model\n", - " llm_client=client, # For answer generation,\n", - " top_k=3,\n", - " llm_model=MODEL,\n", - " modality=\"audio\",\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "l9NMyJm-_0lM" - }, - "source": [ - "Evaluating Retrieval Augmented Generation (RAG) applications before production is crucial for identifying areas for improvement and ensuring optimal performance.\n", - "Check out the Vertex AI [Gen AI evaluation service](https://cloud.google.com/vertex-ai/generative-ai/docs/models/evaluation-overview)." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Erp1ImX9Lu1Y" - }, - "source": [ - "## Conclusion" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "W2A4xXWP1EB4" - }, - "source": [ - "Congratulations on making it through this notebook!" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Uyc3uq1uYHEN" - }, - "source": [ - "- We have seen how to use the Gemini API in Vertex AI to generate text and Multimodal Live API to generate text and audio output.\n", - "- Developed a fully functional Retrieval Augmented Generation (RAG) pipeline capable of answering questions based on provided documents.\n", - "- Demonstrated the versatility of the RAG architecture by enabling both text and audio output modalities.\n", - "- Ensured the adaptability of the RAG pipeline to various use cases by enabling seamless integration of different context documents.\n", - "- Established a foundation for building more advanced RAG systems leveraging larger document sets and sophisticated indexing/retrieval services like Vertex AI Datastore/Agent Builder and Vertex AI Multimodal Live API." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## What's next\n", - "\n", - "- Learn how to [build a web application that enables you to use your voice and camera to talk to Gemini 2.0 through the Multimodal Live API.](https://github.com/GoogleCloudPlatform/generative-ai/tree/main/gemini/multimodal-live-api/websocket-demo-app)\n", - "- See the [Multimodal Live API reference docs](https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/multimodal-live).\n", - "- See the [Google Gen AI SDK reference docs](https://googleapis.github.io/python-genai/).\n", - "- Explore other notebooks in the [Google Cloud Generative AI GitHub repository](https://github.com/GoogleCloudPlatform/generative-ai)." - ] - } - ], - "metadata": { - "colab": { - "name": "real_time_rag_retail_gemini_2_0.ipynb", - "toc_visible": true - }, - "kernelspec": { - "display_name": "Python 3", - "name": "python3" - } - }, - "nbformat": 4, - "nbformat_minor": 0 + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ur8xi4C7S06n" + }, + "outputs": [], + "source": [ + "# Copyright 2024 Google LLC\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# https://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "JAPoU8Sm5E6e" + }, + "source": [ + "# Real-time Retrieval Augmented Generation (RAG) using the Multimodal Live API with Gemini 2.0\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " \n", + " \"Google
Open in Colab\n", + "
\n", + "
\n", + " \n", + " \"Google
Open in Colab Enterprise\n", + "
\n", + "
\n", + " \n", + " \"Vertex
Open in Vertex AI Workbench\n", + "
\n", + "
\n", + " \n", + " \"GitHub
View on GitHub\n", + "
\n", + "
\n", + "\n", + "
\n", + "\n", + "
\n", + "\n", + "
\n", + "
\n", + "Share to:\n", + "\n", + "\n", + " \"LinkedIn\n", + "\n", + "\n", + "\n", + " \"Bluesky\n", + "\n", + "\n", + "\n", + " \"X\n", + "\n", + "\n", + "\n", + " \"Reddit\n", + "\n", + "\n", + "\n", + " \"Facebook\n", + "" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "84f0f73a0f76" + }, + "source": [ + "| | |\n", + "|-|-|\n", + "| Author(s) | [Deepak Moonat](https://github.com/dmoonat/) |" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-MDW_A-nBksi" + }, + "source": [ + "
\n", + "\n", + "⚠️ Gemini 2.0 Flash (Model ID: gemini-2.0-flash-exp) and the Google Gen AI SDK are currently experimental and output can vary ⚠️\n", + "
\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "tvgnzT1CKxrO" + }, + "source": [ + "## Overview\n", + "\n", + "This notebook provides a comprehensive demonstration of the Vertex AI Gemini and Multimodal Live APIs, showcasing text and audio generation capabilities. Users will learn to develop a real-time Retrieval Augmented Generation (RAG) system leveraging the Multimodal Live API for a retail use-case. This system will generate audio and text responses grounded in provided documents. The tutorial covers the following:\n", + "\n", + "- **Gemini API:** Text output generation.\n", + "- **Multimodal Live API:** Text and audio output generation.\n", + "- **Retrieval Augmented Generation (RAG):** Text and audio output generation grounded in provided documents for a retail use-case." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "xKVzRJhgJ4EZ" + }, + "source": [ + "### Gemini 2.0\n", + "\n", + "[Gemini 2.0 Flash](https://cloud.google.com/vertex-ai/generative-ai/docs/gemini-v2) is a new multimodal generative ai model from the Gemini family developed by [Google DeepMind](https://deepmind.google/). It now available as an experimental preview release through the Gemini API in Vertex AI and Vertex AI Studio. The model introduces new features and enhanced core capabilities:\n", + "\n", + "- Multimodal Live API: This new API helps you create real-time vision and audio streaming applications with tool use.\n", + "- Speed and performance: Gemini 2.0 Flash is the fastest model in the industry, with a 3x improvement in time to first token (TTFT) over 1.5 Flash.\n", + "- Quality: The model maintains quality comparable to larger models like Gemini 1.5 Pro and GPT-4o.\n", + "- Improved agentic experiences: Gemini 2.0 delivers improvements to multimodal understanding, coding, complex instruction following, and function calling.\n", + "- New Modalities: Gemini 2.0 introduces native image generation and controllable text-to-speech capabilities, enabling image editing, localized artwork creation, and expressive storytelling.\n", + "- To support the new model, we're also shipping an all new SDK that supports simple migration between the Gemini Developer API and the Gemini API in Vertex AI.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "61RBz8LLbxCR" + }, + "source": [ + "## Get started" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "No17Cw5hgx12" + }, + "source": [ + "### Install Dependencies\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ue_G9ZU80ON0" + }, + "source": [ + "- `google-genai`: Google Gen AI python library\n", + "- `PyPDF2`: To read PDFs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "tFy3H3aPgx12" + }, + "outputs": [], + "source": [ + "%%capture\n", + "\n", + "%pip install --upgrade --quiet google-genai PyPDF2" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "R5Xep4W9lq-Z" + }, + "source": [ + "### Restart runtime\n", + "\n", + "To use the newly installed packages in this Jupyter runtime, you must restart the runtime. You can do this by running the cell below, which restarts the current kernel.\n", + "\n", + "The restart might take a minute or longer. After it's restarted, continue to the next step." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "XRvKdaPDTznN" + }, + "outputs": [], + "source": [ + "import IPython\n", + "\n", + "app = IPython.Application.instance()\n", + "app.kernel.do_shutdown(True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "SbmM4z7FOBpM" + }, + "source": [ + "
\n", + "⚠️ The kernel is going to restart. Wait until it's finished before continuing to the next step. ⚠️\n", + "
\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "dmWOrTJ3gx13" + }, + "source": [ + "### Authenticate your notebook environment (Colab only)\n", + "\n", + "If you're running this notebook on Google Colab, run the cell below to authenticate your environment." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "NyKGtVQjgx13" + }, + "outputs": [], + "source": [ + "import sys\n", + "\n", + "if \"google.colab\" in sys.modules:\n", + " from google.colab import auth\n", + "\n", + " auth.authenticate_user()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DF4l8DTdWgPY" + }, + "source": [ + "### Set Google Cloud project information\n", + "\n", + "To get started using Vertex AI, you must have an existing Google Cloud project and [enable the Vertex AI API](https://console.cloud.google.com/flows/enableapi?apiid=aiplatform.googleapis.com).\n", + "\n", + "Learn more about [setting up a project and a development environment](https://cloud.google.com/vertex-ai/docs/start/cloud-environment)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Nqwi-5ufWp_B" + }, + "outputs": [], + "source": [ + "import os\n", + "\n", + "PROJECT_ID = \"[your-project-id]\" # @param {type: \"string\", placeholder: \"[your-project-id]\", isTemplate: true}\n", + "if not PROJECT_ID or PROJECT_ID == \"[your-project-id]\":\n", + " PROJECT_ID = str(os.environ.get(\"GOOGLE_CLOUD_PROJECT\"))\n", + "\n", + "LOCATION = os.environ.get(\"GOOGLE_CLOUD_REGION\", \"us-central1\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "5303c05f7aa6" + }, + "source": [ + "### Import libraries" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "6fc324893334" + }, + "outputs": [], + "source": [ + "# For asynchronous operations\n", + "import asyncio\n", + "\n", + "# For data processing\n", + "import glob\n", + "from typing import Any\n", + "\n", + "from IPython.display import Audio, Markdown, display\n", + "import PyPDF2\n", + "\n", + "# For GenerativeAI\n", + "from google import genai\n", + "from google.genai import types\n", + "from google.genai.types import LiveConnectConfig\n", + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "# For similarity score\n", + "from sklearn.metrics.pairwise import cosine_similarity\n", + "\n", + "# For retry mechanism\n", + "from tenacity import retry, stop_after_attempt, wait_random_exponential" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "OV5bFDTVE3oX" + }, + "source": [ + "#### Initialize Gen AI client" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "3pjBP_V7JqhD" + }, + "source": [ + "- Client for calling the Gemini API in Vertex AI\n", + "- `vertexai=True`, indicates the client should communicate with the Vertex AI API endpoints." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "bEhq_4GBEW2a" + }, + "outputs": [], + "source": [ + "# Vertex AI API\n", + "client = genai.Client(\n", + " vertexai=True,\n", + " project=PROJECT_ID,\n", + " location=LOCATION,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "e43229f3ad4f" + }, + "source": [ + "### Initialize model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "cf93d5f0ce00" + }, + "outputs": [], + "source": [ + "MODEL_ID = \"gemini-2.0-flash-exp\" # @param {type:\"string\", isTemplate: true}\n", + "MODEL = (\n", + " f\"projects/{PROJECT_ID}/locations/{LOCATION}/publishers/google/models/{MODEL_ID}\"\n", + ")\n", + "\n", + "text_embedding_model = \"text-embedding-004\" # @param {type:\"string\", isTemplate: true}" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "H4TDOc3aqwuz" + }, + "source": [ + "## Sample Use Case - Retail Customer Support Assistance" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "cH6zJeecq6SU" + }, + "source": [ + "Let's imagine a bicycle shop called `Cymbal Bikes` that offers various services like brake repair, chain replacement, and more. Our goal is to create a straightforward support system that can answer customer questions based on the shop's policies and service offerings." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "uA3X24j86uE7" + }, + "source": [ + "Having a customer support assistance offers numerous advantages for businesses, ultimately leading to improved customer satisfaction and loyalty, as well as increased profitability. Here are some key benefits:\n", + "\n", + "- Faster Resolution of Issues: Users can quickly find answers to their questions without having to search through store's website.\n", + "- Improved Efficiency: The assistant can handle simple, repetitive questions, freeing up human agents to focus on more complex or strategic tasks.\n", + "- 24/7 Availability: Unlike human colleagues, the assistant is available around the clock, providing immediate support regardless of time zones or working hours.\n", + "- Consistent Information: The assistant provides standardized answers, ensuring consistency and accuracy." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "mZZLuCecsp0e" + }, + "source": [ + "#### Context Documents" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nWrK7HHjssqB" + }, + "source": [ + "- Download the documents from Google Cloud Storage bucket\n", + "- These documents are specific to `Cymbal Bikes` store\n", + " - [`Cymbal Bikes Return Policy`](https://storage.googleapis.com/github-repo/generative-ai/gemini2/use-cases/retail_rag/documents/CymbalBikesReturnPolicy.pdf): Contains information about return policy\n", + " - [`Cymbal Bikes Services`](https://storage.googleapis.com/github-repo/generative-ai/gemini2/use-cases/retail_rag/documents/CymbalBikesServices.pdf): Contains information about services provided by Cymbal Bikes" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "iLhNfYfYspnC" + }, + "outputs": [], + "source": [ + "!gsutil cp \"gs://github-repo/generative-ai/gemini2/use-cases/retail_rag/documents/CymbalBikesReturnPolicy.pdf\" \"documents/CymbalBikesReturnPolicy.pdf\"\n", + "!gsutil cp \"gs://github-repo/generative-ai/gemini2/use-cases/retail_rag/documents/CymbalBikesServices.pdf\" \"documents/CymbalBikesServices.pdf\"" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "GOFNGNGjjEzD" + }, + "source": [ + "### Text" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "QlcEVrUtP9TI" + }, + "source": [ + "- Let's check a specific query to our retail use-case" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "eLqbaZjoCzng" + }, + "outputs": [], + "source": [ + "query = \"What is the price of a basic tune-up at Cymbal Bikes?\"\n", + "\n", + "response = client.models.generate_content(\n", + " model=MODEL_ID,\n", + " contents=query,\n", + ")\n", + "\n", + "display(Markdown(response.text))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-D6q7KUDuH-E" + }, + "source": [ + "> The correct answer to the query is `A basic tune-up costs $100.`\n", + "\n", + "![BasicTuneUp](https://storage.googleapis.com/github-repo/generative-ai/gemini2/use-cases/retail_rag/images/BasicTuneUp.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "uoigEKWkQjwi" + }, + "source": [ + "- You can see, the model is unable to answer it correctly, as it's very specific to our hypothetical use-case. However, it does provide some details to get the answer from the internet.\n", + "\n", + "- Without the necessary context, the model's response is essentially a guess and may not align with the desired information.\n", + "\n", + "- LLM is trained on vast amount of data, which leads to hallucinations. To overcome this challenge, in coming sections we'll look into how to ground the answers using Retrieval Augmented Generation (RAG)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nhzKqZdunwYJ" + }, + "source": [ + "## Grounding" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "kzNcDkRevJi3" + }, + "source": [ + "Grounding is crucial in this scenario because the model needs to access and process relevant information from external sources (the \"Cymbal Bikes Return Policy\" and \"Cymbal Bikes Services\" documents) to answer specific queries accurately. Without grounding, the model relies solely on its pre-trained knowledge, which may not contain the specific details about the bike store's policies.\n", + "\n", + "In the example, the question about the return policy for bike helmets at Cymbal Bikes cannot be answered correctly without accessing the provided documents. The model's general knowledge of return policies is insufficient. Grounding allows the model to:\n", + "\n", + "1. **Retrieve relevant information:** The system must first locate the pertinent sections within the provided documents that address the user's question about bike helmet returns.\n", + "\n", + "2. **Process and synthesize information:** After retrieving relevant passages, the model must then understand and synthesize the information to construct an accurate answer.\n", + "\n", + "3. **Generate a grounded response:** Finally, the response needs to be directly derived from the factual content of the documents. This ensures accuracy and avoids hallucinations – generating incorrect or nonsensical information not present in the source documents.\n", + "\n", + "Without grounding, the model is forced to guess or extrapolate from its general knowledge, which can lead to inaccurate or misleading responses. The grounding process makes the model's responses more reliable and trustworthy, especially for domain-specific knowledge like store policies or procedures.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-SyokS1pUR9O" + }, + "source": [ + "## Multimodal Live API" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pwZeOc5-UXKD" + }, + "source": [ + "The multimodal live API enables you to build low-latency, multi-modal applications. It currently supports text as input and text & audio as output.\n", + "\n", + "- Low Latency, where audio output is required, where the Text-to-Speech step can be skipped\n", + "- Provides a more interactive user experience.\n", + "- Suitable for applications requiring immediate audio feedback" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ad9d532aab36" + }, + "source": [ + "See the [Multimodal Live API](https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/multimodal-live) page for more details." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "aS1zTjSMcij2" + }, + "source": [ + "#### Asynchronous (async) operation" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "iH9CBOpncnK8" + }, + "source": [ + "When to use async calls:\n", + "1. **I/O-bound operations**: When your code spends a significant amount of time waiting for external resources\n", + " (e.g., network requests, file operations, database queries). Async allows other tasks to run while waiting. \n", + " This is especially beneficial for real-time applications or when dealing with multiple concurrent requests.\n", + " \n", + " Example:\n", + " - Fetching data from a remote server.\n", + "\n", + "2. **Parallel tasks**: When you have independent tasks that can run concurrently without blocking each other. Async\n", + " allows you to efficiently utilize multiple CPU cores or network connections.\n", + " \n", + " Example:\n", + " - Processing a large number of prompts and generating audio for each.\n", + "\n", + "\n", + "3. **User interfaces**: In applications with graphical user interfaces (GUIs), async operations prevent the UI from\n", + " freezing while performing long-running tasks. Users can interact with the interface even when background\n", + " operations are active.\n", + " \n", + " Example: \n", + " - A chatbot interacting in real time, where an audio response is generated in the background.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "aB4U6s1-UlFw" + }, + "source": [ + "### Text" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "YvUJzbgPM26m" + }, + "source": [ + "For text generation, you need to set the `response_modalities` to `TEXT`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "YQOurRs5UU9p" + }, + "outputs": [], + "source": [ + "async def generate_content(query: str) -> str:\n", + " \"\"\"Function to generate text content using Gemini live API.\n", + "\n", + " Args:\n", + " query: The query to generate content for.\n", + "\n", + " Returns:\n", + " The generated content.\n", + " \"\"\"\n", + " config = LiveConnectConfig(response_modalities=[\"TEXT\"])\n", + "\n", + " async with client.aio.live.connect(model=MODEL, config=config) as session:\n", + "\n", + " await session.send(input=query, end_of_turn=True)\n", + "\n", + " response = []\n", + " async for message in session.receive():\n", + " try:\n", + " if message.text:\n", + " response.append(message.text)\n", + " except AttributeError:\n", + " pass\n", + "\n", + " if message.server_content.turn_complete:\n", + " response = \"\".join(str(x) for x in response)\n", + " return response" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ye1TwWVaVSxF" + }, + "source": [ + "- Try a specific query" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "gGqsp6nFDNsG" + }, + "outputs": [], + "source": [ + "query = \"What is the price of a basic tune-up at Cymbal Bikes?\"\n", + "\n", + "response = await generate_content(query)\n", + "display(Markdown(response))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "roXuCp_cXE9q" + }, + "source": [ + "### Audio" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "lBnz34QaakVM" + }, + "source": [ + "- For audio generation, you need to set the `response_modalities` to `AUDIO`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "BmLuvxnFbC4Z" + }, + "outputs": [], + "source": [ + "async def generate_audio_content(query: str):\n", + " \"\"\"Function to generate audio response for provided query using Gemini Multimodal Live API.\n", + "\n", + " Args:\n", + " query: The query to generate audio response for.\n", + "\n", + " Returns:\n", + " The audio response.\n", + " \"\"\"\n", + " config = LiveConnectConfig(response_modalities=[\"AUDIO\"])\n", + " async with client.aio.live.connect(model=MODEL, config=config) as session:\n", + "\n", + " await session.send(input=query, end_of_turn=True)\n", + "\n", + " audio_parts = []\n", + " async for message in session.receive():\n", + " if message.server_content.model_turn:\n", + " for part in message.server_content.model_turn.parts:\n", + " if part.inline_data:\n", + " audio_parts.append(\n", + " np.frombuffer(part.inline_data.data, dtype=np.int16)\n", + " )\n", + "\n", + " if message.server_content.turn_complete:\n", + " if audio_parts:\n", + " audio_data = np.concatenate(audio_parts, axis=0)\n", + " await asyncio.sleep(0.4)\n", + " display(Audio(audio_data, rate=24000, autoplay=True))\n", + " break" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "xKQ_l6wiLH_w" + }, + "source": [ + "In this example, you send a text prompt and request the model response in audio." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "rXJRoxUAcFVB" + }, + "source": [ + "- Let's check the same query as before" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "CfZy_XZeDUtS" + }, + "outputs": [], + "source": [ + "query = \"What is the price of a basic tune-up at Cymbal Bikes?\"\n", + "\n", + "await generate_audio_content(query)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "clfXp2PZmxDZ" + }, + "source": [ + "- Model is unable to answer the query, but with the Multimodal Live API, it doesn't hallucinate, which is pretty good!!" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "wT2oB1BOqDYP" + }, + "source": [ + "### Continuous Audio Interaction (Not multiturn)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "T4iAJCstqR5s" + }, + "source": [ + " - Below function generates audio output based on the provided text prompt.\n", + " - The generated audio is displayed using `IPython.display.Audio`." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "bZntNTPiYLA8" + }, + "source": [ + "- Input your prompts (type `q` or `quit` or `exit` to exit).\n", + "- Example prompts:\n", + " - Hello\n", + " - Who are you?\n", + " - What's the largest planet in our solar system?\n", + " - Tell me 3 fun facts about the universe?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "7M0zkHNrOBQf" + }, + "outputs": [], + "source": [ + "async def continuous_audio_generation():\n", + " \"\"\"Continuously generates audio responses for the asked queries.\"\"\"\n", + " while True:\n", + " query = input(\"Your query > \")\n", + " if any(query.lower() in s for s in [\"q\", \"quit\", \"exit\"]):\n", + " break\n", + " await generate_audio_content(query)\n", + "\n", + "\n", + "await continuous_audio_generation()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "QX9k92TlJ864" + }, + "source": [ + "## Enhancing LLM Accuracy with RAG" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "oOJ-Wx18hpju" + }, + "source": [ + "We'll be showcasing the design pattern for how to implement Real-time Retrieval Augmented Generation (RAG) using Gemini 2.0 multimodal live API.\n", + "\n", + "- Multimodal live API uses websockets to communicate over the internet\n", + "- It maintains a continuous connection\n", + "- Ideal for real-time applications which require persistent communication\n", + "\n", + "\n", + "> Note: Replicating real-life scenarios with Python can be challenging within the constraints of a Colab environment.\n", + "\n", + "\n", + "However, the flow shown in this section can be modified for streaming audio input and output.\n", + "\n", + "
\n", + "\n", + "We'll build the RAG pipeline from scratch to help you understand each and every components of the pipeline.\n", + "\n", + "There are other ways to build the RAG pipeline using open source tools such as [LangChain](https://github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/use-cases/retrieval-augmented-generation/multimodal_rag_langchain.ipynb), [LlamaIndex](https://github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/use-cases/retrieval-augmented-generation/llamaindex_rag.ipynb) etc." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "u5CXTtsPEyJ0" + }, + "source": [ + "### Context Documents" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "vvdcw1AOg4se" + }, + "source": [ + "- Documents are the building blocks of any RAG pipeline, as it provides the relevant context needed to ground the LLM responses\n", + "- We'll be using the documents already downloaded at the start of the notebook\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "M22BSDb2Xxpb" + }, + "outputs": [], + "source": [ + "documents = glob.glob(\"documents/*\")\n", + "documents" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "zNpUL7t0e054" + }, + "source": [ + "### Retrieval Augmented Generation Architecture" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "vV5Et4YHbqqE" + }, + "source": [ + "In general, RAG architecture consists of the following components\n", + "\n", + "**Data Preparation**\n", + "1. Chunking: Dividing the document into smaller, manageable pieces for processing.\n", + "2. Embedding: Transforming text chunks into numerical vectors representing semantic meaning.\n", + "3. Indexing: Organizing embeddings for efficient similarity search." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "563756fa3b7f" + }, + "source": [ + "![RAGArchitecture](https://storage.googleapis.com/github-repo/generative-ai/gemini2/use-cases/retail_rag/images/RAGArchitecture.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pf4sXzYUby57" + }, + "source": [ + "**Inference**\n", + "1. Retrieval: Finding the most relevant chunks based on the query embedding.\n", + "2. Query Augmentation: Enhancing the query with retrieved context for improved generation.\n", + "3. Generation: Synthesizing a coherent and informative answer based on the augmented query." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "1a30b41b63f1" + }, + "source": [ + "![LiveAPI](https://storage.googleapis.com/github-repo/generative-ai/gemini2/use-cases/retail_rag/images/LiveAPI.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "M-0zlJ3_FRfa" + }, + "source": [ + "#### Document Embedding and Indexing" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "0fY3xLaFKBIS" + }, + "source": [ + "Following blocks of code shows how to process unstructured data(PDFs), extract text, and divide them into smaller chunks for efficient embedding and retrieval." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "JTTOQ35Ia-V2" + }, + "source": [ + "- Embeddings:\n", + " - Numerical representations of text\n", + " - It capture the semantic meaning and context of the text\n", + " - We'll use Vertex AI's text embedding model to generate embeddings\n", + " - Error handling (like the retry mechanism) during embedding generation due to potential API quota limits.\n", + "\n", + "- Indexing:\n", + " - Build a searchable index from embeddings, enabling efficient similarity search.\n", + " - For example, the index is like a detailed table of contents for a massive reference book.\n", + "\n", + "\n", + "Check out the Google Cloud Platform [documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/embeddings) for detailed understanding and example use-cases." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Vun69x23FWiw" + }, + "outputs": [], + "source": [ + "@retry(wait=wait_random_exponential(multiplier=1, max=120), stop=stop_after_attempt(4))\n", + "def get_embeddings(\n", + " embedding_client: Any, embedding_model: str, text: str, output_dim: int = 768\n", + ") -> list[float]:\n", + " \"\"\"\n", + " Generate embeddings for text with retry logic for API quota management.\n", + "\n", + " Args:\n", + " embedding_client: The client object used to generate embeddings.\n", + " embedding_model: The name of the embedding model to use.\n", + " text: The text for which to generate embeddings.\n", + " output_dim: The desired dimensionality of the output embeddings (default is 768).\n", + "\n", + " Returns:\n", + " A list of floats representing the generated embeddings. Returns None if a \"RESOURCE_EXHAUSTED\" error occurs.\n", + "\n", + " Raises:\n", + " Exception: Any exception encountered during embedding generation, excluding \"RESOURCE_EXHAUSTED\" errors.\n", + " \"\"\"\n", + " try:\n", + " response = embedding_client.models.embed_content(\n", + " model=embedding_model,\n", + " contents=[text],\n", + " config=types.EmbedContentConfig(output_dimensionality=output_dim),\n", + " )\n", + " return [response.embeddings[0].values]\n", + " except Exception as e:\n", + " if \"RESOURCE_EXHAUSTED\" in str(e):\n", + " return None\n", + " print(f\"Error generating embeddings: {str(e)}\")\n", + " raise" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "2csDY5NsswwJ" + }, + "source": [ + "- The code block executes the following steps:\n", + "\n", + " - Extracts text from PDF documents and segments it into smaller chunks for processing.\n", + " - Employs a Vertex AI model to transform each text chunk into a numerical embedding vector, facilitating semantic representation and search.\n", + " - Constructs a Pandas DataFrame to store the embeddings, enriched with metadata such as document name and page number, effectively creating a searchable index for efficient retrieval.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "9TJlvdIsRfmX" + }, + "outputs": [], + "source": [ + "def build_index(\n", + " document_paths: list[str],\n", + " embedding_client: Any,\n", + " embedding_model: str,\n", + " chunk_size: int = 512,\n", + ") -> pd.DataFrame:\n", + " \"\"\"\n", + " Build searchable index from a list of PDF documents with page-wise processing.\n", + "\n", + " Args:\n", + " document_paths: A list of file paths to PDF documents.\n", + " embedding_client: The client object used to generate embeddings.\n", + " embedding_model: The name of the embedding model to use.\n", + " chunk_size: The maximum size (in characters) of each text chunk. Defaults to 512.\n", + "\n", + " Returns:\n", + " A Pandas DataFrame where each row represents a text chunk. The DataFrame includes columns for:\n", + " - 'document_name': The path to the source PDF document.\n", + " - 'page_number': The page number within the document.\n", + " - 'page_text': The full text of the page.\n", + " - 'chunk_number': The chunk number within the page.\n", + " - 'chunk_text': The text content of the chunk.\n", + " - 'embeddings': The embedding vector for the chunk.\n", + "\n", + " Raises:\n", + " ValueError: If no chunks are created from the input documents.\n", + " Exception: Any exceptions encountered during file processing are printed to the console and the function continues to the next document.\n", + " \"\"\"\n", + " all_chunks = []\n", + "\n", + " for doc_path in document_paths:\n", + " try:\n", + " with open(doc_path, \"rb\") as file:\n", + " pdf_reader = PyPDF2.PdfReader(file)\n", + "\n", + " for page_num in range(len(pdf_reader.pages)):\n", + " page = pdf_reader.pages[page_num]\n", + " page_text = page.extract_text()\n", + "\n", + " chunks = [\n", + " page_text[i : i + chunk_size]\n", + " for i in range(0, len(page_text), chunk_size)\n", + " ]\n", + "\n", + " for chunk_num, chunk_text in enumerate(chunks):\n", + " embeddings = get_embeddings(\n", + " embedding_client, embedding_model, chunk_text\n", + " )\n", + "\n", + " if embeddings is None:\n", + " print(\n", + " f\"Warning: Could not generate embeddings for chunk {chunk_num} on page {page_num + 1}\"\n", + " )\n", + " continue\n", + "\n", + " chunk_info = {\n", + " \"document_name\": doc_path,\n", + " \"page_number\": page_num + 1,\n", + " \"page_text\": page_text,\n", + " \"chunk_number\": chunk_num,\n", + " \"chunk_text\": chunk_text,\n", + " \"embeddings\": embeddings,\n", + " }\n", + " all_chunks.append(chunk_info)\n", + "\n", + " except Exception as e:\n", + " print(f\"Error processing document {doc_path}: {str(e)}\")\n", + " continue\n", + "\n", + " if not all_chunks:\n", + " raise ValueError(\"No chunks were created from the documents\")\n", + "\n", + " return pd.DataFrame(all_chunks)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "yFGsl-Zvlej6" + }, + "source": [ + "Let's create embeddings and an index using the provided documents" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "hjl5FDQckDcO" + }, + "outputs": [], + "source": [ + "vector_db_mini_vertex = build_index(\n", + " documents, embedding_client=client, embedding_model=text_embedding_model\n", + ")\n", + "vector_db_mini_vertex" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "pZLX5ozMlxTX" + }, + "outputs": [], + "source": [ + "# Index size\n", + "vector_db_mini_vertex.shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "cvNVn3kT9FiB" + }, + "outputs": [], + "source": [ + "# Example of how a chunk looks like\n", + "vector_db_mini_vertex.loc[0, \"chunk_text\"]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Hul4bjAkBkg0" + }, + "source": [ + "To enhance the performance of retrieval systems, consider the following:\n", + "\n", + "- Optimize chunk size selection to balance granularity and context.\n", + "- Evaluate various chunking strategies to identify the most effective approach for your data.\n", + "- Explore managed services and scalable indexing solutions, such as [Vertex AI Search](https://cloud.google.com/generative-ai-app-builder/docs/create-datastore-ingest), to enhance performance and efficiency." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "43txjyVlHT6v" + }, + "source": [ + "#### Retrieval" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "y92jM-v8KBfV" + }, + "source": [ + "The below code demonstrates how to query the index and uses a cosine similarity measure for comparing query vectors against the index. " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "bI1YsFoKtyxY" + }, + "source": [ + "* **Input:** Accepts a query string and parameters like the number of relevant chunks to return.\n", + "* **Embedding Generation:** Generates an embedding for the input query using the same model used to embed the document chunks.\n", + "* **Similarity Search:** Compares the query embedding to the embeddings of all indexed document chunks, using cosine similarity. Could use other distance metrics as well.\n", + "* **Ranking:** Ranks the chunks based on their similarity scores to the query.\n", + "* **Top-k Retrieval:** Returns the top *k* most similar chunks, where *k* is specified by the input parameters. This could be configurable.\n", + "* **Output:** Returns a list of relevant chunks, potentially including the original chunk text, similarity score, document source (filename, page number), and chunk metadata.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "88ndL_2wJ5ZD" + }, + "outputs": [], + "source": [ + "def get_relevant_chunks(\n", + " query: str,\n", + " vector_db: pd.DataFrame,\n", + " embedding_client: Any,\n", + " embedding_model: str,\n", + " top_k: int = 3,\n", + ") -> str:\n", + " \"\"\"\n", + " Retrieve the most relevant document chunks for a query using similarity search.\n", + "\n", + " Args:\n", + " query: The search query string.\n", + " vector_db: A pandas DataFrame containing the vectorized document chunks.\n", + " It must contain columns named 'embeddings', 'document_name',\n", + " 'page_number', and 'chunk_text'.\n", + " The 'embeddings' column should contain lists or numpy arrays\n", + " representing the embeddings.\n", + " embedding_client: The client object used to generate embeddings.\n", + " embedding_model: The name of the embedding model to use.\n", + " top_k: The number of most similar chunks to retrieve. Defaults to 3.\n", + "\n", + " Returns:\n", + " A formatted string containing the top_k most relevant chunks. Each chunk is\n", + " presented with its page number and chunk number. Returns an error message if\n", + " the query processing fails or if an error occurs during chunk retrieval.\n", + "\n", + " Raises:\n", + " Exception: If any error occurs during the process (e.g., issues with the embedding client,\n", + " data format problems in the vector database).\n", + " The specific error is printed to the console.\n", + " \"\"\"\n", + " try:\n", + " query_embedding = get_embeddings(embedding_client, embedding_model, query)\n", + "\n", + " if query_embedding is None:\n", + " return \"Could not process query due to quota issues\"\n", + "\n", + " similarities = [\n", + " cosine_similarity(query_embedding, chunk_emb)[0][0]\n", + " for chunk_emb in vector_db[\"embeddings\"]\n", + " ]\n", + "\n", + " top_indices = np.argsort(similarities)[-top_k:]\n", + " relevant_chunks = vector_db.iloc[top_indices]\n", + "\n", + " context = []\n", + " for _, row in relevant_chunks.iterrows():\n", + " context.append(\n", + " {\n", + " \"document_name\": row[\"document_name\"],\n", + " \"page_number\": row[\"page_number\"],\n", + " \"chunk_number\": row[\"chunk_number\"],\n", + " \"chunk_text\": row[\"chunk_text\"],\n", + " }\n", + " )\n", + "\n", + " return \"\\n\\n\".join(\n", + " [\n", + " f\"[Page {chunk['page_number']}, Chunk {chunk['chunk_number']}]: {chunk['chunk_text']}\"\n", + " for chunk in context\n", + " ]\n", + " )\n", + "\n", + " except Exception as e:\n", + " print(f\"Error getting relevant chunks: {str(e)}\")\n", + " return \"Error retrieving relevant chunks\"" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "3hxyLlTjsstI" + }, + "source": [ + "Let's test out our retrieval component" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Ek4aF0Esck2H" + }, + "source": [ + "- Let's try the same query for which the model was not able to answer earlier, due to lack of context" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "lSd8ZeH6D7m4" + }, + "outputs": [], + "source": [ + "query = \"What is the price of a basic tune-up at Cymbal Bikes?\"\n", + "relevant_context = get_relevant_chunks(\n", + " query, vector_db_mini_vertex, client, text_embedding_model, top_k=3\n", + ")\n", + "relevant_context" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "YBxnXReUn8Iy" + }, + "source": [ + "- You can see, with the help of the relevant context we can derive the answer as it contains the chunks specific to the asked query.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "25eb6422c9cf" + }, + "source": [ + "![Context](https://storage.googleapis.com/github-repo/generative-ai/gemini2/use-cases/retail_rag/images/Context.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "kHzw7_UwzutC" + }, + "source": [ + "For optimal performance, consider these points:\n", + "\n", + "* **Context Window:** Considers a context window around the retrieved chunks to provide more comprehensive context. This could involve returning neighboring chunks or a specified window size.\n", + "* **Filtering:** Option to filter retrieved chunks based on criteria like minimum similarity score or source document.\n", + "* **Efficiency:** Designed for efficient retrieval, especially for large indexes, potentially using optimized search algorithms or data structures." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ZEfJkwSqJ5KR" + }, + "source": [ + "### Generation" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "b7OZpv33KBx_" + }, + "source": [ + "* **Contextual Answer Synthesis:** The core function of the generation component is to synthesize a coherent and informative answer based on the retrieved context. It takes the user's query and the relevant document chunks as input.\n", + "* **Large Language Model (LLM) Integration:** It leverages a large language model (LLM) to generate the final answer. The LLM processes both the query and the retrieved context to produce a response. The quality of the answer heavily relies on the capabilities of the chosen LLM.\n", + "* **Coherence and Relevance:** A good generation function ensures the generated answer is coherent, factually accurate, and directly addresses the user's query, using only the provided context. It avoids hallucinations (generating information not present in the context).\n", + "* **Prompt Engineering:** The effectiveness of the LLM is heavily influenced by the prompt. The generation function likely incorporates prompt engineering techniques to guide the LLM towards generating the desired output. This may involve carefully crafting instructions for the LLM or providing examples.\n", + "\n", + "For more details on prompt engineering, check out the [documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/learn/prompts/prompt-design-strategies)." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "0xs-AQmqm03l" + }, + "source": [ + "Let's see two use-cases, `Text-In-Text-Out` and `Text-In-Audio-Out`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "xp7doymTJ7Iu" + }, + "outputs": [], + "source": [ + "@retry(wait=wait_random_exponential(multiplier=1, max=120), stop=stop_after_attempt(4))\n", + "async def generate_answer(\n", + " query: str, context: str, llm_client: Any, modality: str = \"text\"\n", + ") -> str:\n", + " \"\"\"\n", + " Generate answer using LLM with retry logic for API quota management.\n", + "\n", + " Args:\n", + " query: User query.\n", + " context: Relevant text providing context for the query.\n", + " llm_client: Client for accessing LLM API.\n", + " modality: Output modality (text or audio).\n", + "\n", + " Returns:\n", + " Generated answer.\n", + "\n", + " Raises:\n", + " Exception: If an unexpected error occurs during the LLM call (after retry attempts are exhausted).\n", + " \"\"\"\n", + " try:\n", + " # If context indicates earlier quota issues, return early\n", + " if context in [\n", + " \"Could not process query due to quota issues\",\n", + " \"Error retrieving relevant chunks\",\n", + " ]:\n", + " return \"Can't Process, Quota Issues\"\n", + "\n", + " prompt = f\"\"\"Based on the following context, please answer the question.\n", + "\n", + " Context:\n", + " {context}\n", + "\n", + " Question: {query}\n", + "\n", + " Answer:\"\"\"\n", + "\n", + " if modality == \"text\":\n", + " # Generate text answer using LLM\n", + " response = await generate_content(prompt)\n", + " return response\n", + "\n", + " elif modality == \"audio\":\n", + " # Generate audio answer using LLM\n", + " await generate_audio_content(prompt)\n", + "\n", + " except Exception as e:\n", + " if \"RESOURCE_EXHAUSTED\" in str(e):\n", + " return \"Can't Process, Quota Issues\"\n", + " print(f\"Error generating answer: {str(e)}\")\n", + " return \"Error generating answer\"" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "11q0Sf0oJ7wL" + }, + "source": [ + "Let's test our `Generation` component" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "S-iesR2BEHnI" + }, + "outputs": [], + "source": [ + "query = \"What is the price of a basic tune-up at Cymbal Bikes?\"\n", + "\n", + "generated_answer = await generate_answer(\n", + " query, relevant_context, client, modality=\"text\"\n", + ")\n", + "display(Markdown(generated_answer))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "W7EHYeP-EMpN" + }, + "outputs": [], + "source": [ + "await generate_answer(query, relevant_context, client, modality=\"audio\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "CbQB5PbMrrsB" + }, + "source": [ + "> And the answer is... CORRECT !! 🎉" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "1gnr-j-ocxlx" + }, + "source": [ + "- The accuracy of the generated answer is attributed to the provision of relevant context to the Large Language Model (LLM), enabling it to effectively comprehend the query and produce an appropriate response." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "2MNlAoAHR0Do" + }, + "source": [ + "### Pipeline" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8LemsW6WrOfm" + }, + "source": [ + "Let's put `Retrieval` and `Generation` components together in a pipeline." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "yoOeqxETR2G_" + }, + "outputs": [], + "source": [ + "async def rag(\n", + " question: str,\n", + " vector_db: pd.DataFrame,\n", + " embedding_client: Any,\n", + " embedding_model: str,\n", + " llm_client: Any,\n", + " top_k: int,\n", + " llm_model: str,\n", + " modality: str = \"text\",\n", + ") -> str | None:\n", + " \"\"\"\n", + " RAG Pipeline.\n", + "\n", + " Args:\n", + " question: User query.\n", + " vector_db: DataFrame containing document chunks and embeddings.\n", + " embedding_client: Client for accessing embedding API.\n", + " embedding_model: Name of the embedding model.\n", + " llm_client: Client for accessing LLM API.\n", + " top_k: The number of top relevant chunks to retrieve from the vector database.\n", + " llm_model: Name of the LLM model.\n", + " modality: Output modality (text or audio).\n", + "\n", + " Returns:\n", + " For text modality, generated answer.\n", + " For audio modality, audio playback widget.\n", + "\n", + " Raises:\n", + " Exception: Catches and prints any exceptions during processing. Returns an error message.\n", + " \"\"\"\n", + "\n", + " try:\n", + " # Get relevant context for question\n", + " relevant_context = get_relevant_chunks(\n", + " question, vector_db, embedding_client, embedding_model, top_k=top_k\n", + " )\n", + "\n", + " if modality == \"text\":\n", + " # Generate text answer using LLM\n", + " generated_answer = await generate_answer(\n", + " question,\n", + " relevant_context,\n", + " llm_client,\n", + " )\n", + " return generated_answer\n", + "\n", + " elif modality == \"audio\":\n", + " # Generate audio answer using LLM\n", + " await generate_answer(\n", + " question, relevant_context, llm_client, modality=modality\n", + " )\n", + " return\n", + "\n", + " except Exception as e:\n", + " print(f\"Error processing question '{question}': {str(e)}\")\n", + " return {\"question\": question, \"generated_answer\": \"Error processing question\"}" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Q8bNzUvbVJcx" + }, + "source": [ + "Our Retrieval Augmented Generation (RAG) architecture allows for flexible output modality(text and audio) selection. By modifying only the generation component, we can produce both text and audio output while maintaining the same retrieval mechanism. This highlights the adaptability of RAG in catering to diverse content presentation needs." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Pkn75-1cFW1J" + }, + "source": [ + "### Inference" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "QMGtlPWcVXT0" + }, + "source": [ + "Let's test our simple RAG pipeline" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "0vwfQbodn89Y" + }, + "source": [ + "#### Sample Queries" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Zx_GwXESk9aP" + }, + "outputs": [], + "source": [ + "question_set = [\n", + " {\n", + " \"question\": \"What is the price of a basic tune-up at Cymbal Bikes?\",\n", + " \"answer\": \"A basic tune-up costs $100.\",\n", + " },\n", + " {\n", + " \"question\": \"How much does it cost to replace a tire at Cymbal Bikes?\",\n", + " \"answer\": \"Replacing a tire at Cymbal Bikes costs $50 per tire.\",\n", + " },\n", + " {\n", + " \"question\": \"What does gear repair at Cymbal Bikes include?\",\n", + " \"answer\": \"Gear repair includes inspection and repair of the gears, including replacement of chainrings, cogs, and cables as needed.\",\n", + " },\n", + " {\n", + " \"question\": \"What is the cost of replacing a tube at Cymbal Bikes?\",\n", + " \"answer\": \"Replacing a tube at Cymbal Bikes costs $20.\",\n", + " },\n", + " {\n", + " \"question\": \"Can I return clothing items to Cymbal Bikes?\",\n", + " \"answer\": \"Clothing can only be returned if it is unworn and in the original packaging.\",\n", + " },\n", + " {\n", + " \"question\": \"What is the time frame for returning items to Cymbal Bikes?\",\n", + " \"answer\": \"Cymbal Bikes offers a 30-day return policy on all items.\",\n", + " },\n", + " {\n", + " \"question\": \"Can I return edible items like energy gels?\",\n", + " \"answer\": \"No, edible items are not returnable.\",\n", + " },\n", + " {\n", + " \"question\": \"How can I return an item purchased online from Cymbal Bikes?\",\n", + " \"answer\": \"Items purchased online can be returned to any Cymbal Bikes store or mailed back.\",\n", + " },\n", + " {\n", + " \"question\": \"What should I include when returning an item to Cymbal Bikes?\",\n", + " \"answer\": \"Please include the original receipt and a copy of your shipping confirmation when returning an item.\",\n", + " },\n", + " {\n", + " \"question\": \"Does Cymbal Bikes offer refunds for shipping charges?\",\n", + " \"answer\": \"Cymbal Bikes does not offer refunds for shipping charges, except for defective items.\",\n", + " },\n", + " {\n", + " \"question\": \"How do I process a return for a defective item at Cymbal Bikes?\",\n", + " \"answer\": \"To process a return for a defective item, please contact Cymbal Bikes first.\",\n", + " },\n", + "]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ZUo_fcNzoAp3" + }, + "source": [ + "#### Text" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "y1RC5-djV0-r" + }, + "source": [ + "First we will try, `modality='text'`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "dmyN-h18EZdT" + }, + "outputs": [], + "source": [ + "question_set[0]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "-f3hsHqBEbwc" + }, + "outputs": [], + "source": [ + "response = await rag(\n", + " question=question_set[0][\"question\"],\n", + " vector_db=vector_db_mini_vertex,\n", + " embedding_client=client, # For embedding generation\n", + " embedding_model=text_embedding_model, # For embedding model\n", + " llm_client=client, # For answer generation,\n", + " top_k=3,\n", + " llm_model=MODEL,\n", + " modality=\"text\",\n", + ")\n", + "display(Markdown(response))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Nb3VytmIyo-1" + }, + "source": [ + "#### Audio" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "kEl80N8VV_6E" + }, + "source": [ + "Now, let's try `modality='audio'` to get audio response." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "STdO_EtxEhFA" + }, + "outputs": [], + "source": [ + "await rag(\n", + " question=question_set[0][\"question\"],\n", + " vector_db=vector_db_mini_vertex,\n", + " embedding_client=client, # For embedding generation\n", + " embedding_model=text_embedding_model, # For embedding model\n", + " llm_client=client, # For answer generation,\n", + " top_k=3,\n", + " llm_model=MODEL,\n", + " modality=\"audio\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "l9NMyJm-_0lM" + }, + "source": [ + "Evaluating Retrieval Augmented Generation (RAG) applications before production is crucial for identifying areas for improvement and ensuring optimal performance.\n", + "Check out the Vertex AI [Gen AI evaluation service](https://cloud.google.com/vertex-ai/generative-ai/docs/models/evaluation-overview)." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Erp1ImX9Lu1Y" + }, + "source": [ + "## Conclusion" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "W2A4xXWP1EB4" + }, + "source": [ + "Congratulations on making it through this notebook!" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Uyc3uq1uYHEN" + }, + "source": [ + "- We have seen how to use the Gemini API in Vertex AI to generate text and Multimodal Live API to generate text and audio output.\n", + "- Developed a fully functional Retrieval Augmented Generation (RAG) pipeline capable of answering questions based on provided documents.\n", + "- Demonstrated the versatility of the RAG architecture by enabling both text and audio output modalities.\n", + "- Ensured the adaptability of the RAG pipeline to various use cases by enabling seamless integration of different context documents.\n", + "- Established a foundation for building more advanced RAG systems leveraging larger document sets and sophisticated indexing/retrieval services like Vertex AI Datastore/Agent Builder and Vertex AI Multimodal Live API." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "54e180a34fd0" + }, + "source": [ + "## What's next\n", + "\n", + "- Learn how to [build a web application that enables you to use your voice and camera to talk to Gemini 2.0 through the Multimodal Live API.](https://github.com/GoogleCloudPlatform/generative-ai/tree/main/gemini/multimodal-live-api/websocket-demo-app)\n", + "- See the [Multimodal Live API reference docs](https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/multimodal-live).\n", + "- See the [Google Gen AI SDK reference docs](https://googleapis.github.io/python-genai/).\n", + "- Explore other notebooks in the [Google Cloud Generative AI GitHub repository](https://github.com/GoogleCloudPlatform/generative-ai)." + ] + } + ], + "metadata": { + "colab": { + "name": "real_time_rag_retail_gemini_2_0.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 } diff --git a/gemini/reasoning-engine/tutorial_langgraph_rag_agent.ipynb b/gemini/reasoning-engine/tutorial_langgraph_rag_agent.ipynb index 3398a789dc5..b53f620d1f0 100644 --- a/gemini/reasoning-engine/tutorial_langgraph_rag_agent.ipynb +++ b/gemini/reasoning-engine/tutorial_langgraph_rag_agent.ipynb @@ -1,1140 +1,1140 @@ { - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "3YcBnq20nC6r" - }, - "outputs": [], - "source": [ - "# Copyright 2024 Google LLC\n", - "#\n", - "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", - "# you may not use this file except in compliance with the License.\n", - "# You may obtain a copy of the License at\n", - "#\n", - "# https://www.apache.org/licenses/LICENSE-2.0\n", - "#\n", - "# Unless required by applicable law or agreed to in writing, software\n", - "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", - "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", - "# See the License for the specific language governing permissions and\n", - "# limitations under the License." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "xU0F5ObiGgF4" - }, - "source": [ - "# Building a Multi-Agent RAG Application with LangGraph and Reasoning Engine\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - "
\n", - " \n", - " \"Google
Run in Colab\n", - "
\n", - "
\n", - " \n", - " \"Google
Run in Colab Enterprise\n", - "
\n", - "
\n", - " \n", - " \"GitHub
View on GitHub\n", - "
\n", - "
\n", - " \n", - " \"Vertex
Open in Vertex AI Workbench\n", - "
\n", - "
\n", - "\n", - "
\n", - "\n", - "Share to:\n", - "\n", - "\n", - " \"LinkedIn\n", - "\n", - "\n", - "\n", - " \"Bluesky\n", - "\n", - "\n", - "\n", - " \"X\n", - "\n", - "\n", - "\n", - " \"Reddit\n", - "\n", - "\n", - "\n", - " \"Facebook\n", - " " - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "4sA9r45YMz_O" - }, - "source": [ - "| | |\n", - "|-|-|\n", - "|Author(s) | [Xiaolong Yang](https://github.com/shawn-yang-google) |" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "GZft-jYpHmYv" - }, - "source": [ - "## Overview\n", - "\n", - "[Reasoning Engine](https://cloud.google.com/vertex-ai/generative-ai/docs/reasoning-engine/overview) (LangChain on Vertex AI) is a managed service in Vertex AI that helps you to build and deploy an agent reasoning framework. It gives you the flexibility to choose how much reasoning you want to delegate to the LLM and how much you want to handle with customized code.\n", - "\n", - "RAG (Retrieval-Augmented Generation) is an AI framework that combines the strengths of traditional information retrieval systems (such as databases) with the capabilities of generative large language models (LLMs). \n", - "\n", - "[LangGraph](https://langchain-ai.github.io/langgraph/) is a library for building stateful, multi-actor applications with LLMs, used to create agent and multi-agent workflows.\n", - "\n", - "This notebook demonstrates how to build, deploy, and test a LangGraph + RAG application using [Reasoning Engine](https://cloud.google.com/vertex-ai/generative-ai/docs/reasoning-engine/overview) in Vertex AI.\n", - "\n", - "\n", - "## Context\n", - "\n", - "In previous tutorials:\n", - "* [LangGraph application with Reasoning Engine](https://github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/reasoning-engine/tutorial_langgraph.ipynb?)\n", - "You have learned how to combine LangGraph's workflow orchestration with the scalability of Vertex AI, which enables you to build custom generative AI applications.\n", - "* [RAG application with Reasoning Engine and Cloud SQL for PostgreSQL](https://github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/reasoning-engine/tutorial_cloud_sql_pg_rag_agent.ipynb)\n", - "By combining this extra knowledge with its own language skills, the AI can write text that is more accurate, up-to-date, and relevant to your specific needs.\n", - "Your [LangChain](https://python.langchain.com/docs/get_started/introduction) agent uses an [Postgres Vector Store](https://github.com/googleapis/langchain-google-cloud-sql-pg-python/tree/main) to perform a similary search and retrieve related data to ground the LLM response.\n", - "\n", - "## Objectives\n", - "\n", - "In this tutorial, you will learn how to build and deploy an agent (model, tools, and reasoning) using the Vertex AI SDK for Python and Cloud SQL for PostgreSQL LangGraph integration.\n", - "\n", - "We're using the `Multi Agent Collaboration` [approach](https://blog.langchain.dev/langgraph-multi-agent-workflows/). \n", - "This sample notebook could be adapted to use other multi-agent implementations described in the [link](https://blog.langchain.dev/langgraph-multi-agent-workflows/), such as the `Agent Supervisor` or other approaches. \n", - "\n", - "You will develop a LangGraph Application like: \"Image \n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "QL58mPu9Hw7g" - }, - "source": [ - "## Before you begin\n", - "\n", - "1. In the Google Cloud console, on the project selector page, select or [create a Google Cloud project](https://cloud.google.com/resource-manager/docs/creating-managing-projects).\n", - "2. [Make sure that billing is enabled for your Google Cloud project](https://cloud.google.com/billing/docs/how-to/verify-billing-enabled#console).\n", - "3. Follow the instruction in [RAG application with Reasoning Engine and Cloud SQL for PostgreSQL](https://github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/reasoning-engine/tutorial_cloud_sql_pg_rag_agent.ipynb), set up Cloud SQL for PostgreSQL.\n", - "\n", - "### Required roles\n", - "\n", - "To get the permissions that you need to complete the tutorial, ask your administrator to grant you the [Owner](https://cloud.google.com/iam/docs/understanding-roles#owner) (`roles/owner`) IAM role on your project. For more information about granting roles, see [Manage access](https://cloud.google.com/iam/docs/granting-changing-revoking-access).\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "-RYpMytsZ882" - }, - "source": [ - "### Install and import dependencies" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "w_94DKOCX5pG" - }, - "outputs": [], - "source": [ - "%pip install --upgrade --user --quiet \\\n", - " \"google-cloud-aiplatform[reasoningengine,langchain]\"==1.60.0 \\\n", - " langchain-google-cloud-sql-pg==0.6.1 \\\n", - " cloud-sql-python-connector==1.9.0 \\\n", - " langchain-google-vertexai==1.0.4 \\\n", - " cloudpickle==3.0.0 \\\n", - " pydantic==2.7.4 \\\n", - " langgraph==0.0.51 \\\n", - " httpx==0.27.2" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "R5Xep4W9lq-Z" - }, - "source": [ - "### Restart runtime\n", - "\n", - "To use the newly installed packages in this Jupyter runtime, you must restart the runtime. You can do this by running the cell below, which restarts the current kernel.\n", - "\n", - "The restart might take a minute or longer. After it's restarted, continue to the next step." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "XRvKdaPDTznN" - }, - "outputs": [], - "source": [ - "import IPython\n", - "\n", - "app = IPython.Application.instance()\n", - "app.kernel.do_shutdown(True)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "bThFamq9351N" - }, - "source": [ - "### Import libraries\n", - "\n", - "Import the necessary Python libraries. These libraries provide the tools we need to interact with LangGraph, Vertex AI, and other components of our application." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "id": "KNt0YeKaMz_Q" - }, - "outputs": [], - "source": [ - "import json\n", - "from typing import Literal\n", - "import uuid\n", - "\n", - "from google.cloud import storage\n", - "from langchain_core.documents import Document\n", - "from langchain_core.messages import BaseMessage, HumanMessage\n", - "from langchain_core.tools import tool\n", - "from langchain_google_cloud_sql_pg import PostgresEngine, PostgresVectorStore\n", - "from langchain_google_vertexai import ChatVertexAI, VertexAIEmbeddings\n", - "from langgraph.graph import END, MessageGraph\n", - "from langgraph.prebuilt import ToolNode\n", - "import vertexai\n", - "from vertexai.preview import reasoning_engines" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "yPKXjZrFZuUZ" - }, - "source": [ - "### Authenticate to Google Cloud\n", - "\n", - "Authenticate to Google Cloud as the IAM user logged into this notebook in order to access your Google Cloud Project." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "id": "NyKGtVQjgx13" - }, - "outputs": [], - "source": [ - "import sys\n", - "\n", - "if \"google.colab\" in sys.modules:\n", - " from google.colab import auth\n", - "\n", - " auth.authenticate_user()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "9aGBuLA7aQ6O" - }, - "source": [ - "### Define project information\n", - "\n", - "Initialize `gcloud` with your Project ID and resource location. At this time, only `us-central1` is supported." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "DF4l8DTdWgPY" - }, - "source": [ - "### Set Google Cloud project information and initialize Vertex AI SDK\n", - "\n", - "To get started using Vertex AI, you must have an existing Google Cloud project and [enable the Vertex AI API](https://console.cloud.google.com/flows/enableapi?apiid=aiplatform.googleapis.com).\n", - "\n", - "Learn more about [setting up a project and a development environment](https://cloud.google.com/vertex-ai/docs/start/cloud-environment)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "Nqwi-5ufWp_B" - }, - "outputs": [], - "source": [ - "PROJECT_ID = \"[your-project-id]\" # @param {type:\"string\"}\n", - "LOCATION = \"us-central1\" # @param {type:\"string\"}\n", - "STAGING_BUCKET = \"gs://[your-staging-bucket]\" # @param {type:\"string\"}\n", - "\n", - "vertexai.init(project=PROJECT_ID, location=LOCATION, staging_bucket=STAGING_BUCKET)\n", - "!gcloud config set project {PROJECT_ID}" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "S_yG0kddIvr7" - }, - "source": [ - "## Set up Cloud SQL\n", - "\n", - "You should have already set up Cloud SQL in [RAG application with Reasoning Engine and Cloud SQL for PostgreSQL](https://github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/reasoning-engine/tutorial_cloud_sql_pg_rag_agent.ipynb):\n", - "* Enable APIs.\n", - "* Create a Cloud SQL instance.\n", - "* Create a database.\n", - "* Initialize multiple vector store tables.\n", - "* Create a user.\n", - "\n", - "\n", - "In this Colab, we will create two new vector store tables: Book and Movie.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "XtiB5-LVVkv0" - }, - "outputs": [], - "source": [ - "REGION = \"us-central1\" # @param {type:\"string\"}\n", - "INSTANCE = \"langgraph-rag-instance\" # @param {type:\"string\"}\n", - "DATABASE = \"harry_potter_data\" # @param {type:\"string\"}\n", - "MOVIE_TABLE_NAME = \"my-movie\" # @param {type:\"string\"}\n", - "BOOK_TABLE_NAME = \"my-book\" # @param {type:\"string\"}\n", - "PASSWORD = input(\"Please provide a password to be used for 'postgres' database user: \")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "dqDjyLpS5zCm" - }, - "source": [ - "### Grant access to vector store table to IAM users" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "id": "24NjnjF95ySA" - }, - "outputs": [], - "source": [ - "engine = await PostgresEngine.afrom_instance(\n", - " PROJECT_ID, REGION, INSTANCE, DATABASE, user=\"postgres\", password=PASSWORD\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "OaP1LRhPi0y7" - }, - "source": [ - "### Initialize multiple vector store tables\n", - "\n", - "The `PostgresEngine` has a helper method `init_vectorstore_table()` that can be used to create a table with the proper schema to store vector embeddings." - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": { - "id": "GGd89YWIi2qg" - }, - "outputs": [], - "source": [ - "for table_name in [MOVIE_TABLE_NAME, BOOK_TABLE_NAME]:\n", - " engine = await PostgresEngine.afrom_instance(\n", - " PROJECT_ID, REGION, INSTANCE, DATABASE, user=\"postgres\", password=PASSWORD\n", - " )\n", - "\n", - " await engine.ainit_vectorstore_table(\n", - " table_name=table_name,\n", - " vector_size=768, # Vector size for VertexAI model(textembedding-gecko@latest)\n", - " )" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "sQ1MI8ARi5Rr" - }, - "source": [ - "### Add embeddings to the vector store\n", - "\n", - "Load data from a CSV file to generate and insert embeddings to the vector store.\n", - "\n", - "We will use two datasets:\n", - "\n", - "* Harry Potter Movie\n", - " - Intro: https://www.kaggle.com/datasets/rounakbanik/the-movies-dataset\n", - " - Data: gs://github-repo/generative-ai/gemini/reasoning-engine/sample_data/harry_potter_movies.json\n", - "* Harry Potter Book\n", - " - Intro: https://www.kaggle.com/datasets/shubhammaindola/harry-potter-books\n", - " - Data: gs://github-repo/generative-ai/gemini/reasoning-engine/sample_data/harry_potter_books.json\n" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": { - "id": "Kcawj2ILdNmN" - }, - "outputs": [], - "source": [ - "def get_docs_from_gcs(bucket_name, gcs_dir, blob_name):\n", - " \"\"\"Fetches a JSON file from GCS, deserializes it, and returns the data.\n", - "\n", - " Args:\n", - " bucket_name: Name of the GCS bucket (e.g., 'my-bucket').\n", - " gcs_dir: Directory within the bucket where the JSON file is located.\n", - " blob_name: Path and filename within the bucket\n", - " (e.g., 'my_data.json').\n", - "\n", - " Returns:\n", - " A Python object representing the Document, or None if the file\n", - " is not found or an error occurs.\n", - " \"\"\"\n", - "\n", - " storage_client = storage.Client()\n", - " bucket = storage_client.bucket(bucket_name)\n", - " blob = bucket.blob(f\"{gcs_dir}/{blob_name}\")\n", - "\n", - " if not blob.exists():\n", - " print(f\"File not found: gs://{bucket_name}/{gcs_dir}/{blob_name}\")\n", - " return None\n", - "\n", - " try:\n", - " with blob.open(\"r\") as f:\n", - " json_docs = json.loads(f.read())\n", - " except json.JSONDecodeError:\n", - " print(f\"Error: Invalid JSON format in gs://{bucket_name}/{gcs_dir}/{blob_name}\")\n", - " return None\n", - "\n", - " docs = []\n", - " for json_doc in json_docs:\n", - " docs.append(Document(**(json_doc[\"kwargs\"])))\n", - "\n", - " return docs" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Pg5k6FyykfzW" - }, - "source": [ - "#### Movies" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "dkMjEXEmi4ro" - }, - "outputs": [], - "source": [ - "# Initialize the vector store for movies\n", - "vector_store = await PostgresVectorStore.create(\n", - " engine,\n", - " table_name=MOVIE_TABLE_NAME,\n", - " embedding_service=VertexAIEmbeddings(\n", - " model_name=\"textembedding-gecko@latest\", project=PROJECT_ID\n", - " ),\n", - ")\n", - "docs = get_docs_from_gcs(\n", - " \"github-repo\",\n", - " \"generative-ai/gemini/reasoning-engine/sample_data\",\n", - " \"harry_potter_movies.json\",\n", - ")\n", - "# Add data to the vector store\n", - "ids = [str(uuid.uuid4()) for i in range(len(docs))]\n", - "await vector_store.aadd_documents(docs, ids=ids)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "GBdIVxqVkjT-" - }, - "source": [ - "#### Books" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "PWAElYbEkyzB" - }, - "outputs": [], - "source": [ - "# Initialize the vector store for books\n", - "vector_store = await PostgresVectorStore.create(\n", - " engine,\n", - " table_name=BOOK_TABLE_NAME,\n", - " embedding_service=VertexAIEmbeddings(\n", - " model_name=\"textembedding-gecko@latest\", project=PROJECT_ID\n", - " ),\n", - ")\n", - "docs = get_docs_from_gcs(\n", - " \"github-repo\",\n", - " \"generative-ai/gemini/reasoning-engine/sample_data\",\n", - " \"harry_potter_books.json\",\n", - ")\n", - "# Add data to the vector store\n", - "ids = [str(uuid.uuid4()) for i in range(len(docs))]\n", - "await vector_store.aadd_documents(docs, ids=ids)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "XCra5kJVKyg5" - }, - "source": [ - "## Define the retriever tool\n", - "\n", - "Tools are interfaces that an agent, chain, or LLM can use to enable the Gemini model to interact with external systems, databases, document stores, and other APIs so that the model can get the most up-to-date information or take action with those systems.\n", - "\n", - "In this example, you'll define a function that will retrieve similar documents from the vector store using semantic search." - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": { - "id": "vLx7O_UdqDyr" - }, - "outputs": [], - "source": [ - "@tool\n", - "def movie_similarity_search(query: str) -> str:\n", - " \"\"\"\n", - " Perform a similarity search for movies based on the user's last message.\n", - "\n", - " Args:\n", - " query str: The current conversation state, where the last message contains the query.\n", - "\n", - " Returns:\n", - " str: A list of BaseMessage containing the search results.\n", - " \"\"\"\n", - " engine = PostgresEngine.from_instance(\n", - " PROJECT_ID,\n", - " REGION,\n", - " INSTANCE,\n", - " DATABASE,\n", - " quota_project=PROJECT_ID,\n", - " user=\"postgres\",\n", - " password=PASSWORD,\n", - " )\n", - "\n", - " vector_store = PostgresVectorStore.create_sync(\n", - " engine,\n", - " table_name=MOVIE_TABLE_NAME,\n", - " embedding_service=VertexAIEmbeddings(\n", - " model_name=\"textembedding-gecko@latest\", project=PROJECT_ID\n", - " ),\n", - " )\n", - " retriever = vector_store.as_retriever()\n", - " return str([doc for doc in retriever.invoke(query)])" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": { - "id": "XVjf2fT_bIGa" - }, - "outputs": [], - "source": [ - "@tool\n", - "def book_similarity_search(query: str) -> str:\n", - " \"\"\"\n", - " Perform a similarity search for books based on the user's last message.\n", - "\n", - " Args:\n", - " state (List[BaseMessage]): The current conversation state, where the last message contains the query.\n", - "\n", - " Returns:\n", - " List[BaseMessage]: A list of BaseMessage containing the search results.\n", - " \"\"\"\n", - " engine = PostgresEngine.from_instance(\n", - " PROJECT_ID,\n", - " REGION,\n", - " INSTANCE,\n", - " DATABASE,\n", - " quota_project=PROJECT_ID,\n", - " # Uncomment to use built-in authentication instead of IAM authentication\n", - " user=\"postgres\",\n", - " password=PASSWORD,\n", - " )\n", - "\n", - " vector_store = PostgresVectorStore.create_sync(\n", - " engine,\n", - " table_name=BOOK_TABLE_NAME,\n", - " embedding_service=VertexAIEmbeddings(\n", - " model_name=\"textembedding-gecko@latest\", project=PROJECT_ID\n", - " ),\n", - " )\n", - " retriever = vector_store.as_retriever()\n", - " return str([doc for doc in retriever.invoke(query)])" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "BF8xqd84351O" - }, - "source": [ - "### Define router\n", - "\n", - "We're using the `Multi Agent Collaboration` [approach](https://blog.langchain.dev/langgraph-multi-agent-workflows/). \n", - "This sample notebook could be adapted to use other multi-agent implementations described in the [link](https://blog.langchain.dev/langgraph-multi-agent-workflows/), such as the `Agent Supervisor` or other approaches. \n", - "\n", - "Then, you'll define a router to control the flow of the conversation, determining which tool to use based on user input or the state of the interaction. Here we'll use a simple router setup, and you can customize the behavior of your router to handle multiple tools, custom logic, or multi-agent workflows.\n", - "\n", - "In this example, the router will invoke different nodes in the graph based on whether the user prompt contains the word 'book' or 'movie'." - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "metadata": { - "id": "7m9XRSn3351O" - }, - "outputs": [], - "source": [ - "def router(\n", - " state: list[BaseMessage],\n", - ") -> Literal[\"book_similarity_search\", \"movie_similarity_search\", \"__end__\"]:\n", - " if not state[0].content or len(state[1].tool_calls) == 0:\n", - " return \"__end__\"\n", - " if \"book\" in state[0].content:\n", - " return \"book_similarity_search\"\n", - " if \"movie\" in state[0].content:\n", - " return \"movie_similarity_search\"\n", - " return \"__end__\"" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "CFeOIbed351O" - }, - "source": [ - "## Define LangGraph application\n", - "\n", - "Now you'll bring everything together to define your LangGraph application as a custom template in Reasoning Engine.\n", - "\n", - "This application will use the tool and router that you just defined. LangGraph provides a powerful way to structure these interactions and leverage the capabilities of LLMs." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "tZYtR6-zqudb" - }, - "source": [ - "#### Multi stage" - ] - }, - { - "cell_type": "code", - "execution_count": 36, - "metadata": { - "id": "WWjFaLeW351O" - }, - "outputs": [], - "source": [ - "class MultiStageLangGraphApp:\n", - " def __init__(self, project: str, location: str) -> None:\n", - " self.project_id = project\n", - " self.location = location\n", - "\n", - " # The set_up method is used to define application initialization logic\n", - " def set_up(self) -> None:\n", - " model = ChatVertexAI(model=\"gemini-1.5-pro\")\n", - " builder = MessageGraph()\n", - "\n", - " # Checker node\n", - " def checker(state: list[BaseMessage]):\n", - " if not state[0].content:\n", - " return \"__end__\"\n", - " user_question = state[0].content\n", - " response = model.invoke(\n", - " [\n", - " HumanMessage(\n", - " content=(\n", - " f\"What is the type of the question? {user_question}\"\n", - " \"Think step by step, then answer one of the following:\"\n", - " \"* movie\"\n", - " \"* book\"\n", - " \"* no\"\n", - " )\n", - " )\n", - " ]\n", - " )\n", - " table_name = response.content.split(\"\")[1].split(\"\")[0]\n", - " # Multiturn requests alternate between user and model.\n", - " state[0].content = f\"query:{state[0].content},table_name:{table_name}\"\n", - "\n", - " builder.add_node(\"checker\", checker)\n", - " # Set entry point to checker node so it is reachable\n", - " builder.set_entry_point(\"checker\")\n", - "\n", - " # Tool node.\n", - " model_with_tools = model.bind_tools(\n", - " [book_similarity_search, movie_similarity_search]\n", - " )\n", - " builder.add_node(\"tools\", model_with_tools)\n", - " # Add edge from tools to checker so the flow is checker->tools->router...\n", - " builder.add_edge(\"checker\", \"tools\")\n", - "\n", - " # Summerize node.\n", - " # node\n", - " def summerizar(state: list[BaseMessage]):\n", - " question = state[0].content\n", - " related_docs = state[-1].content\n", - " response = model.invoke(\n", - " [\n", - " HumanMessage(\n", - " content=(\n", - " f\"\"\"\n", - " Use the docs: {related_docs} to answer question:{question}.\n", - " The answer format should be json dict.\n", - " \"\"\"\n", - " )\n", - " )\n", - " ]\n", - " )\n", - " # Multiturn requests alternate between user and model.\n", - " state.append(response)\n", - "\n", - " builder.add_node(\"summerizar_node\", summerizar)\n", - " builder.add_edge(\"summerizar_node\", END)\n", - " # Book retrieval node\n", - " book_node = ToolNode([book_similarity_search])\n", - " builder.add_node(\"book_similarity_search\", book_node)\n", - " builder.add_edge(\"book_similarity_search\", \"summerizar_node\")\n", - "\n", - " # Movie retrieval node\n", - " movie_node = ToolNode([movie_similarity_search])\n", - " builder.add_node(\"movie_similarity_search\", movie_node)\n", - " builder.add_edge(\"movie_similarity_search\", \"summerizar_node\")\n", - "\n", - " # Router to check condition.\n", - " builder.add_conditional_edges(\"tools\", router)\n", - "\n", - " self.runnable = builder.compile()\n", - "\n", - " # The query method will be used to send inputs to the agent\n", - " def query(self, message: str):\n", - " \"\"\"Query the application.\n", - "\n", - " Args:\n", - " message: The user message.\n", - "\n", - " Returns:\n", - " str: The LLM response.\n", - " \"\"\"\n", - " chat_history = self.runnable.invoke(HumanMessage(message))\n", - "\n", - " return chat_history[-1].content" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "sEfQYtgSm9ol" - }, - "source": [ - "### Local test" - ] - }, - { - "cell_type": "code", - "execution_count": 37, - "metadata": { - "id": "IcWux9IskE-c" - }, - "outputs": [], - "source": [ - "agent = MultiStageLangGraphApp(project=PROJECT_ID, location=LOCATION)\n", - "agent.set_up()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "0aaf11c1677a" - }, - "source": [ - "Expect a JSON format answer like \n", - "```json\n", - "{\"company\": [\"Warner Bros.\", \"Heyday Films\"]}\n", - "```" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "J9yUujSokJpQ" - }, - "outputs": [ + "cells": [ { - "data": { - "application/vnd.google.colaboratory.intrinsic+json": { - "type": "string" + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "3YcBnq20nC6r" }, - "text/plain": [ - "```json\n", - "{\n", - " 'answer': 'Warner Bros and Heyday Films produce Harry Potter and the Deathly Hallows: Part 2.'\n", - "}\n", - "```" + "outputs": [], + "source": [ + "# Copyright 2024 Google LLC\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# https://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License." ] - }, - "execution_count": 16, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "agent.query(message=\"Which company produces and distributes Harry Potter films\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "077f1396f641" - }, - "source": [ - "Expect a JSON format answer like \n", - "```json\n", - "{\n", - " \"answer\": [\"Daniel Radcliffe\", \"Darren Criss\"]\n", - "}\n", - "```" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "Vn1wBUEyLGSG" - }, - "outputs": [ + }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "```json\n", - "[\n", - " {\n", - " \"actor\": \"Darren Criss\",\n", - " \"movie\": \"A Very Potter Musical\"\n", - " },\n", - " {\n", - " \"actor\": \"Daniel Radcliffe\",\n", - " \"movie\": \"Harry Potter and the Deathly Hallows: Part 2\"\n", - " }\n", - "]\n", - "```\n" - ] - } - ], - "source": [ - "agent.query(message=\"Who acts as Harry Potter\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "df468170bc6d" - }, - "source": [ - "Expect a JSON format answer like \n", - "```json\n", - "{\n", - " \"answer\": \"Harry Potter and the Chamber of Secrets.\"\n", - "}\n", - "```" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "EPGRJjdEb228" - }, - "outputs": [ + "cell_type": "markdown", + "metadata": { + "id": "xU0F5ObiGgF4" + }, + "source": [ + "# Building a Multi-Agent RAG Application with LangGraph and Reasoning Engine\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " \n", + " \"Google
Run in Colab\n", + "
\n", + "
\n", + " \n", + " \"Google
Run in Colab Enterprise\n", + "
\n", + "
\n", + " \n", + " \"GitHub
View on GitHub\n", + "
\n", + "
\n", + " \n", + " \"Vertex
Open in Vertex AI Workbench\n", + "
\n", + "
\n", + "\n", + "
\n", + "\n", + "Share to:\n", + "\n", + "\n", + " \"LinkedIn\n", + "\n", + "\n", + "\n", + " \"Bluesky\n", + "\n", + "\n", + "\n", + " \"X\n", + "\n", + "\n", + "\n", + " \"Reddit\n", + "\n", + "\n", + "\n", + " \"Facebook\n", + " " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "4sA9r45YMz_O" + }, + "source": [ + "| | |\n", + "|-|-|\n", + "|Author(s) | [Xiaolong Yang](https://github.com/shawn-yang-google) |" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "GZft-jYpHmYv" + }, + "source": [ + "## Overview\n", + "\n", + "[Reasoning Engine](https://cloud.google.com/vertex-ai/generative-ai/docs/reasoning-engine/overview) (LangChain on Vertex AI) is a managed service in Vertex AI that helps you to build and deploy an agent reasoning framework. It gives you the flexibility to choose how much reasoning you want to delegate to the LLM and how much you want to handle with customized code.\n", + "\n", + "RAG (Retrieval-Augmented Generation) is an AI framework that combines the strengths of traditional information retrieval systems (such as databases) with the capabilities of generative large language models (LLMs). \n", + "\n", + "[LangGraph](https://langchain-ai.github.io/langgraph/) is a library for building stateful, multi-actor applications with LLMs, used to create agent and multi-agent workflows.\n", + "\n", + "This notebook demonstrates how to build, deploy, and test a LangGraph + RAG application using [Reasoning Engine](https://cloud.google.com/vertex-ai/generative-ai/docs/reasoning-engine/overview) in Vertex AI.\n", + "\n", + "\n", + "## Context\n", + "\n", + "In previous tutorials:\n", + "* [LangGraph application with Reasoning Engine](https://github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/reasoning-engine/tutorial_langgraph.ipynb?)\n", + "You have learned how to combine LangGraph's workflow orchestration with the scalability of Vertex AI, which enables you to build custom generative AI applications.\n", + "* [RAG application with Reasoning Engine and Cloud SQL for PostgreSQL](https://github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/reasoning-engine/tutorial_cloud_sql_pg_rag_agent.ipynb)\n", + "By combining this extra knowledge with its own language skills, the AI can write text that is more accurate, up-to-date, and relevant to your specific needs.\n", + "Your [LangChain](https://python.langchain.com/docs/get_started/introduction) agent uses an [Postgres Vector Store](https://github.com/googleapis/langchain-google-cloud-sql-pg-python/tree/main) to perform a similary search and retrieve related data to ground the LLM response.\n", + "\n", + "## Objectives\n", + "\n", + "In this tutorial, you will learn how to build and deploy an agent (model, tools, and reasoning) using the Vertex AI SDK for Python and Cloud SQL for PostgreSQL LangGraph integration.\n", + "\n", + "We're using the `Multi Agent Collaboration` [approach](https://blog.langchain.dev/langgraph-multi-agent-workflows/). \n", + "This sample notebook could be adapted to use other multi-agent implementations described in the [link](https://blog.langchain.dev/langgraph-multi-agent-workflows/), such as the `Agent Supervisor` or other approaches. \n", + "\n", + "You will develop a LangGraph Application like: \"Image \n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "QL58mPu9Hw7g" + }, + "source": [ + "## Before you begin\n", + "\n", + "1. In the Google Cloud console, on the project selector page, select or [create a Google Cloud project](https://cloud.google.com/resource-manager/docs/creating-managing-projects).\n", + "2. [Make sure that billing is enabled for your Google Cloud project](https://cloud.google.com/billing/docs/how-to/verify-billing-enabled#console).\n", + "3. Follow the instruction in [RAG application with Reasoning Engine and Cloud SQL for PostgreSQL](https://github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/reasoning-engine/tutorial_cloud_sql_pg_rag_agent.ipynb), set up Cloud SQL for PostgreSQL.\n", + "\n", + "### Required roles\n", + "\n", + "To get the permissions that you need to complete the tutorial, ask your administrator to grant you the [Owner](https://cloud.google.com/iam/docs/understanding-roles#owner) (`roles/owner`) IAM role on your project. For more information about granting roles, see [Manage access](https://cloud.google.com/iam/docs/granting-changing-revoking-access).\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-RYpMytsZ882" + }, + "source": [ + "### Install and import dependencies" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "w_94DKOCX5pG" + }, + "outputs": [], + "source": [ + "%pip install --upgrade --user --quiet \\\n", + " \"google-cloud-aiplatform[reasoningengine,langchain]\"==1.60.0 \\\n", + " langchain-google-cloud-sql-pg==0.6.1 \\\n", + " cloud-sql-python-connector==1.9.0 \\\n", + " langchain-google-vertexai==1.0.4 \\\n", + " cloudpickle==3.0.0 \\\n", + " pydantic==2.7.4 \\\n", + " langgraph==0.0.51 \\\n", + " httpx==0.27.2" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "R5Xep4W9lq-Z" + }, + "source": [ + "### Restart runtime\n", + "\n", + "To use the newly installed packages in this Jupyter runtime, you must restart the runtime. You can do this by running the cell below, which restarts the current kernel.\n", + "\n", + "The restart might take a minute or longer. After it's restarted, continue to the next step." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "XRvKdaPDTznN" + }, + "outputs": [], + "source": [ + "import IPython\n", + "\n", + "app = IPython.Application.instance()\n", + "app.kernel.do_shutdown(True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "bThFamq9351N" + }, + "source": [ + "### Import libraries\n", + "\n", + "Import the necessary Python libraries. These libraries provide the tools we need to interact with LangGraph, Vertex AI, and other components of our application." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "id": "KNt0YeKaMz_Q" + }, + "outputs": [], + "source": [ + "import json\n", + "from typing import Literal\n", + "import uuid\n", + "\n", + "from google.cloud import storage\n", + "from langchain_core.documents import Document\n", + "from langchain_core.messages import BaseMessage, HumanMessage\n", + "from langchain_core.tools import tool\n", + "from langchain_google_cloud_sql_pg import PostgresEngine, PostgresVectorStore\n", + "from langchain_google_vertexai import ChatVertexAI, VertexAIEmbeddings\n", + "from langgraph.graph import END, MessageGraph\n", + "from langgraph.prebuilt import ToolNode\n", + "import vertexai\n", + "from vertexai.preview import reasoning_engines" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "yPKXjZrFZuUZ" + }, + "source": [ + "### Authenticate to Google Cloud\n", + "\n", + "Authenticate to Google Cloud as the IAM user logged into this notebook in order to access your Google Cloud Project." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "NyKGtVQjgx13" + }, + "outputs": [], + "source": [ + "import sys\n", + "\n", + "if \"google.colab\" in sys.modules:\n", + " from google.colab import auth\n", + "\n", + " auth.authenticate_user()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "9aGBuLA7aQ6O" + }, + "source": [ + "### Define project information\n", + "\n", + "Initialize `gcloud` with your Project ID and resource location. At this time, only `us-central1` is supported." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DF4l8DTdWgPY" + }, + "source": [ + "### Set Google Cloud project information and initialize Vertex AI SDK\n", + "\n", + "To get started using Vertex AI, you must have an existing Google Cloud project and [enable the Vertex AI API](https://console.cloud.google.com/flows/enableapi?apiid=aiplatform.googleapis.com).\n", + "\n", + "Learn more about [setting up a project and a development environment](https://cloud.google.com/vertex-ai/docs/start/cloud-environment)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Nqwi-5ufWp_B" + }, + "outputs": [], + "source": [ + "PROJECT_ID = \"[your-project-id]\" # @param {type:\"string\"}\n", + "LOCATION = \"us-central1\" # @param {type:\"string\"}\n", + "STAGING_BUCKET = \"gs://[your-staging-bucket]\" # @param {type:\"string\"}\n", + "\n", + "vertexai.init(project=PROJECT_ID, location=LOCATION, staging_bucket=STAGING_BUCKET)\n", + "!gcloud config set project {PROJECT_ID}" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "S_yG0kddIvr7" + }, + "source": [ + "## Set up Cloud SQL\n", + "\n", + "You should have already set up Cloud SQL in [RAG application with Reasoning Engine and Cloud SQL for PostgreSQL](https://github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/reasoning-engine/tutorial_cloud_sql_pg_rag_agent.ipynb):\n", + "* Enable APIs.\n", + "* Create a Cloud SQL instance.\n", + "* Create a database.\n", + "* Initialize multiple vector store tables.\n", + "* Create a user.\n", + "\n", + "\n", + "In this Colab, we will create two new vector store tables: Book and Movie.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "XtiB5-LVVkv0" + }, + "outputs": [], + "source": [ + "REGION = \"us-central1\" # @param {type:\"string\"}\n", + "INSTANCE = \"langgraph-rag-instance\" # @param {type:\"string\"}\n", + "DATABASE = \"harry_potter_data\" # @param {type:\"string\"}\n", + "MOVIE_TABLE_NAME = \"my-movie\" # @param {type:\"string\"}\n", + "BOOK_TABLE_NAME = \"my-book\" # @param {type:\"string\"}\n", + "PASSWORD = input(\"Please provide a password to be used for 'postgres' database user: \")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "dqDjyLpS5zCm" + }, + "source": [ + "### Grant access to vector store table to IAM users" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "id": "24NjnjF95ySA" + }, + "outputs": [], + "source": [ + "engine = await PostgresEngine.afrom_instance(\n", + " PROJECT_ID, REGION, INSTANCE, DATABASE, user=\"postgres\", password=PASSWORD\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "OaP1LRhPi0y7" + }, + "source": [ + "### Initialize multiple vector store tables\n", + "\n", + "The `PostgresEngine` has a helper method `init_vectorstore_table()` that can be used to create a table with the proper schema to store vector embeddings." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "id": "GGd89YWIi2qg" + }, + "outputs": [], + "source": [ + "for table_name in [MOVIE_TABLE_NAME, BOOK_TABLE_NAME]:\n", + " engine = await PostgresEngine.afrom_instance(\n", + " PROJECT_ID, REGION, INSTANCE, DATABASE, user=\"postgres\", password=PASSWORD\n", + " )\n", + "\n", + " await engine.ainit_vectorstore_table(\n", + " table_name=table_name,\n", + " vector_size=768, # Vector size for VertexAI model(textembedding-gecko@latest)\n", + " )" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "sQ1MI8ARi5Rr" + }, + "source": [ + "### Add embeddings to the vector store\n", + "\n", + "Load data from a CSV file to generate and insert embeddings to the vector store.\n", + "\n", + "We will use two datasets:\n", + "\n", + "* Harry Potter Movie\n", + " - Intro: https://www.kaggle.com/datasets/rounakbanik/the-movies-dataset\n", + " - Data: gs://github-repo/generative-ai/gemini/reasoning-engine/sample_data/harry_potter_movies.json\n", + "* Harry Potter Book\n", + " - Intro: https://www.kaggle.com/datasets/shubhammaindola/harry-potter-books\n", + " - Data: gs://github-repo/generative-ai/gemini/reasoning-engine/sample_data/harry_potter_books.json\n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "id": "Kcawj2ILdNmN" + }, + "outputs": [], + "source": [ + "def get_docs_from_gcs(bucket_name, gcs_dir, blob_name):\n", + " \"\"\"Fetches a JSON file from GCS, deserializes it, and returns the data.\n", + "\n", + " Args:\n", + " bucket_name: Name of the GCS bucket (e.g., 'my-bucket').\n", + " gcs_dir: Directory within the bucket where the JSON file is located.\n", + " blob_name: Path and filename within the bucket\n", + " (e.g., 'my_data.json').\n", + "\n", + " Returns:\n", + " A Python object representing the Document, or None if the file\n", + " is not found or an error occurs.\n", + " \"\"\"\n", + "\n", + " storage_client = storage.Client()\n", + " bucket = storage_client.bucket(bucket_name)\n", + " blob = bucket.blob(f\"{gcs_dir}/{blob_name}\")\n", + "\n", + " if not blob.exists():\n", + " print(f\"File not found: gs://{bucket_name}/{gcs_dir}/{blob_name}\")\n", + " return None\n", + "\n", + " try:\n", + " with blob.open(\"r\") as f:\n", + " json_docs = json.loads(f.read())\n", + " except json.JSONDecodeError:\n", + " print(f\"Error: Invalid JSON format in gs://{bucket_name}/{gcs_dir}/{blob_name}\")\n", + " return None\n", + "\n", + " docs = []\n", + " for json_doc in json_docs:\n", + " docs.append(Document(**(json_doc[\"kwargs\"])))\n", + "\n", + " return docs" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Pg5k6FyykfzW" + }, + "source": [ + "#### Movies" + ] + }, { - "data": { - "application/vnd.google.colaboratory.intrinsic+json": { - "type": "string" + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "dkMjEXEmi4ro" }, - "text/plain": [ - "'```json\n", - "{\n", - " \"book\": \"Harry Potter and the Chamber of Secrets\"\n", - "}\n", - "```'" + "outputs": [], + "source": [ + "# Initialize the vector store for movies\n", + "vector_store = await PostgresVectorStore.create(\n", + " engine,\n", + " table_name=MOVIE_TABLE_NAME,\n", + " embedding_service=VertexAIEmbeddings(\n", + " model_name=\"textembedding-gecko@latest\", project=PROJECT_ID\n", + " ),\n", + ")\n", + "docs = get_docs_from_gcs(\n", + " \"github-repo\",\n", + " \"generative-ai/gemini/reasoning-engine/sample_data\",\n", + " \"harry_potter_movies.json\",\n", + ")\n", + "# Add data to the vector store\n", + "ids = [str(uuid.uuid4()) for i in range(len(docs))]\n", + "await vector_store.aadd_documents(docs, ids=ids)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "GBdIVxqVkjT-" + }, + "source": [ + "#### Books" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "PWAElYbEkyzB" + }, + "outputs": [], + "source": [ + "# Initialize the vector store for books\n", + "vector_store = await PostgresVectorStore.create(\n", + " engine,\n", + " table_name=BOOK_TABLE_NAME,\n", + " embedding_service=VertexAIEmbeddings(\n", + " model_name=\"textembedding-gecko@latest\", project=PROJECT_ID\n", + " ),\n", + ")\n", + "docs = get_docs_from_gcs(\n", + " \"github-repo\",\n", + " \"generative-ai/gemini/reasoning-engine/sample_data\",\n", + " \"harry_potter_books.json\",\n", + ")\n", + "# Add data to the vector store\n", + "ids = [str(uuid.uuid4()) for i in range(len(docs))]\n", + "await vector_store.aadd_documents(docs, ids=ids)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "XCra5kJVKyg5" + }, + "source": [ + "## Define the retriever tool\n", + "\n", + "Tools are interfaces that an agent, chain, or LLM can use to enable the Gemini model to interact with external systems, databases, document stores, and other APIs so that the model can get the most up-to-date information or take action with those systems.\n", + "\n", + "In this example, you'll define a function that will retrieve similar documents from the vector store using semantic search." + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "id": "vLx7O_UdqDyr" + }, + "outputs": [], + "source": [ + "@tool\n", + "def movie_similarity_search(query: str) -> str:\n", + " \"\"\"\n", + " Perform a similarity search for movies based on the user's last message.\n", + "\n", + " Args:\n", + " query str: The current conversation state, where the last message contains the query.\n", + "\n", + " Returns:\n", + " str: A list of BaseMessage containing the search results.\n", + " \"\"\"\n", + " engine = PostgresEngine.from_instance(\n", + " PROJECT_ID,\n", + " REGION,\n", + " INSTANCE,\n", + " DATABASE,\n", + " quota_project=PROJECT_ID,\n", + " user=\"postgres\",\n", + " password=PASSWORD,\n", + " )\n", + "\n", + " vector_store = PostgresVectorStore.create_sync(\n", + " engine,\n", + " table_name=MOVIE_TABLE_NAME,\n", + " embedding_service=VertexAIEmbeddings(\n", + " model_name=\"textembedding-gecko@latest\", project=PROJECT_ID\n", + " ),\n", + " )\n", + " retriever = vector_store.as_retriever()\n", + " return str([doc for doc in retriever.invoke(query)])" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "id": "XVjf2fT_bIGa" + }, + "outputs": [], + "source": [ + "@tool\n", + "def book_similarity_search(query: str) -> str:\n", + " \"\"\"\n", + " Perform a similarity search for books based on the user's last message.\n", + "\n", + " Args:\n", + " state (List[BaseMessage]): The current conversation state, where the last message contains the query.\n", + "\n", + " Returns:\n", + " List[BaseMessage]: A list of BaseMessage containing the search results.\n", + " \"\"\"\n", + " engine = PostgresEngine.from_instance(\n", + " PROJECT_ID,\n", + " REGION,\n", + " INSTANCE,\n", + " DATABASE,\n", + " quota_project=PROJECT_ID,\n", + " # Uncomment to use built-in authentication instead of IAM authentication\n", + " user=\"postgres\",\n", + " password=PASSWORD,\n", + " )\n", + "\n", + " vector_store = PostgresVectorStore.create_sync(\n", + " engine,\n", + " table_name=BOOK_TABLE_NAME,\n", + " embedding_service=VertexAIEmbeddings(\n", + " model_name=\"textembedding-gecko@latest\", project=PROJECT_ID\n", + " ),\n", + " )\n", + " retriever = vector_store.as_retriever()\n", + " return str([doc for doc in retriever.invoke(query)])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "BF8xqd84351O" + }, + "source": [ + "### Define router\n", + "\n", + "We're using the `Multi Agent Collaboration` [approach](https://blog.langchain.dev/langgraph-multi-agent-workflows/). \n", + "This sample notebook could be adapted to use other multi-agent implementations described in the [link](https://blog.langchain.dev/langgraph-multi-agent-workflows/), such as the `Agent Supervisor` or other approaches. \n", + "\n", + "Then, you'll define a router to control the flow of the conversation, determining which tool to use based on user input or the state of the interaction. Here we'll use a simple router setup, and you can customize the behavior of your router to handle multiple tools, custom logic, or multi-agent workflows.\n", + "\n", + "In this example, the router will invoke different nodes in the graph based on whether the user prompt contains the word 'book' or 'movie'." + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": { + "id": "7m9XRSn3351O" + }, + "outputs": [], + "source": [ + "def router(\n", + " state: list[BaseMessage],\n", + ") -> Literal[\"book_similarity_search\", \"movie_similarity_search\", \"__end__\"]:\n", + " if not state[0].content or len(state[1].tool_calls) == 0:\n", + " return \"__end__\"\n", + " if \"book\" in state[0].content:\n", + " return \"book_similarity_search\"\n", + " if \"movie\" in state[0].content:\n", + " return \"movie_similarity_search\"\n", + " return \"__end__\"" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "CFeOIbed351O" + }, + "source": [ + "## Define LangGraph application\n", + "\n", + "Now you'll bring everything together to define your LangGraph application as a custom template in Reasoning Engine.\n", + "\n", + "This application will use the tool and router that you just defined. LangGraph provides a powerful way to structure these interactions and leverage the capabilities of LLMs." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "tZYtR6-zqudb" + }, + "source": [ + "#### Multi stage" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": { + "id": "WWjFaLeW351O" + }, + "outputs": [], + "source": [ + "class MultiStageLangGraphApp:\n", + " def __init__(self, project: str, location: str) -> None:\n", + " self.project_id = project\n", + " self.location = location\n", + "\n", + " # The set_up method is used to define application initialization logic\n", + " def set_up(self) -> None:\n", + " model = ChatVertexAI(model=\"gemini-1.5-pro\")\n", + " builder = MessageGraph()\n", + "\n", + " # Checker node\n", + " def checker(state: list[BaseMessage]):\n", + " if not state[0].content:\n", + " return \"__end__\"\n", + " user_question = state[0].content\n", + " response = model.invoke(\n", + " [\n", + " HumanMessage(\n", + " content=(\n", + " f\"What is the type of the question? {user_question}\"\n", + " \"Think step by step, then answer one of the following:\"\n", + " \"* movie\"\n", + " \"* book\"\n", + " \"* no\"\n", + " )\n", + " )\n", + " ]\n", + " )\n", + " table_name = response.content.split(\"\")[1].split(\"\")[0]\n", + " # Multiturn requests alternate between user and model.\n", + " state[0].content = f\"query:{state[0].content},table_name:{table_name}\"\n", + "\n", + " builder.add_node(\"checker\", checker)\n", + " # Set entry point to checker node so it is reachable\n", + " builder.set_entry_point(\"checker\")\n", + "\n", + " # Tool node.\n", + " model_with_tools = model.bind_tools(\n", + " [book_similarity_search, movie_similarity_search]\n", + " )\n", + " builder.add_node(\"tools\", model_with_tools)\n", + " # Add edge from tools to checker so the flow is checker->tools->router...\n", + " builder.add_edge(\"checker\", \"tools\")\n", + "\n", + " # Summerize node.\n", + " # node\n", + " def summerizar(state: list[BaseMessage]):\n", + " question = state[0].content\n", + " related_docs = state[-1].content\n", + " response = model.invoke(\n", + " [\n", + " HumanMessage(\n", + " content=(\n", + " f\"\"\"\n", + " Use the docs: {related_docs} to answer question:{question}.\n", + " The answer format should be json dict.\n", + " \"\"\"\n", + " )\n", + " )\n", + " ]\n", + " )\n", + " # Multiturn requests alternate between user and model.\n", + " state.append(response)\n", + "\n", + " builder.add_node(\"summerizar_node\", summerizar)\n", + " builder.add_edge(\"summerizar_node\", END)\n", + " # Book retrieval node\n", + " book_node = ToolNode([book_similarity_search])\n", + " builder.add_node(\"book_similarity_search\", book_node)\n", + " builder.add_edge(\"book_similarity_search\", \"summerizar_node\")\n", + "\n", + " # Movie retrieval node\n", + " movie_node = ToolNode([movie_similarity_search])\n", + " builder.add_node(\"movie_similarity_search\", movie_node)\n", + " builder.add_edge(\"movie_similarity_search\", \"summerizar_node\")\n", + "\n", + " # Router to check condition.\n", + " builder.add_conditional_edges(\"tools\", router)\n", + "\n", + " self.runnable = builder.compile()\n", + "\n", + " # The query method will be used to send inputs to the agent\n", + " def query(self, message: str):\n", + " \"\"\"Query the application.\n", + "\n", + " Args:\n", + " message: The user message.\n", + "\n", + " Returns:\n", + " str: The LLM response.\n", + " \"\"\"\n", + " chat_history = self.runnable.invoke(HumanMessage(message))\n", + "\n", + " return chat_history[-1].content" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "sEfQYtgSm9ol" + }, + "source": [ + "### Local test" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": { + "id": "IcWux9IskE-c" + }, + "outputs": [], + "source": [ + "agent = MultiStageLangGraphApp(project=PROJECT_ID, location=LOCATION)\n", + "agent.set_up()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "0aaf11c1677a" + }, + "source": [ + "Expect a JSON format answer like \n", + "```json\n", + "{\"company\": [\"Warner Bros.\", \"Heyday Films\"]}\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "J9yUujSokJpQ" + }, + "outputs": [ + { + "data": { + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" + }, + "text/plain": [ + "```json\n", + "{\n", + " 'answer': 'Warner Bros and Heyday Films produce Harry Potter and the Deathly Hallows: Part 2.'\n", + "}\n", + "```" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "agent.query(message=\"Which company produces and distributes Harry Potter films\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "077f1396f641" + }, + "source": [ + "Expect a JSON format answer like \n", + "```json\n", + "{\n", + " \"answer\": [\"Daniel Radcliffe\", \"Darren Criss\"]\n", + "}\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Vn1wBUEyLGSG" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "```json\n", + "[\n", + " {\n", + " \"actor\": \"Darren Criss\",\n", + " \"movie\": \"A Very Potter Musical\"\n", + " },\n", + " {\n", + " \"actor\": \"Daniel Radcliffe\",\n", + " \"movie\": \"Harry Potter and the Deathly Hallows: Part 2\"\n", + " }\n", + "]\n", + "```\n" + ] + } + ], + "source": [ + "agent.query(message=\"Who acts as Harry Potter\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "df468170bc6d" + }, + "source": [ + "Expect a JSON format answer like \n", + "```json\n", + "{\n", + " \"answer\": \"Harry Potter and the Chamber of Secrets.\"\n", + "}\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "EPGRJjdEb228" + }, + "outputs": [ + { + "data": { + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" + }, + "text/plain": [ + "'```json\n", + "{\n", + " \"book\": \"Harry Potter and the Chamber of Secrets\"\n", + "}\n", + "```'" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "agent.query(message=\"In which book Harry Potter drives car\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "EdvJRUWRNGHE" + }, + "source": [ + "## Building and deploying a LangGraph app on Reasoning Engine\n", + "\n", + "In the following sections, we'll walk through the process of building and deploying a LangGraph application using Reasoning Engine in Vertex AI." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ERxxgFTcI3DC" + }, + "source": [ + "## Deploy the service\n", + "\n", + "Now that you've specified a model, tools, and reasoning for your agent and tested it out, you're ready to deploy your agent as a remote service in Vertex AI!\n", + "\n", + "Here, you'll use the LangChain agent template provided in the Vertex AI SDK for Reasoning Engine, which brings together the model, tools, and reasoning that you've built up so far." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "k2nGSr2_JWcc" + }, + "outputs": [], + "source": [ + "remote_app = reasoning_engines.ReasoningEngine.create(\n", + " MultiStageLangGraphApp(project=PROJECT_ID, location=LOCATION),\n", + " requirements=[\n", + " \"google-cloud-aiplatform[reasoningengine,langchain]==1.60.0\",\n", + " \"langchain-google-cloud-sql-pg==0.6.1\",\n", + " \"cloud-sql-python-connector==1.9.0\",\n", + " \"langchain-google-vertexai==1.0.4\",\n", + " \"cloudpickle==3.0.0\",\n", + " \"pydantic==2.7.4\",\n", + " \"langgraph==0.0.51\",\n", + " \"httpx==0.27.2\",\n", + " ],\n", + " display_name=\"Reasoning Engine with LangGraph Rag Agent\",\n", + " description=\"This is a sample custom application in Reasoning Engine that uses LangGraph and sql pg rag\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "TYqMpB16I4iH" + }, + "source": [ + "## Try it out\n", + "\n", + "Query the remote app directly or retrieve the application endpoint via the resource ID or display name. The endpoint can be used from any Python environment." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "5A-5oNnQjePC" + }, + "source": [ + "### Ask question that can only be answered by the movie." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "P9-7ZcQugWkJ" + }, + "outputs": [], + "source": [ + "response = remote_app.query(message=\"Who acts as Harry Potter\")\n", + "print(response)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "UuACdm6zgaLm" + }, + "outputs": [], + "source": [ + "response = remote_app.query(\n", + " message=\"Which company produces and distributes Harry Potter film\"\n", + ")\n", + "print(response)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "oXVavFQfi9Hz" + }, + "source": [ + "### Ask question that can only be answered by the book." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "NxiCmdMnalY-" + }, + "outputs": [], + "source": [ + "response = remote_app.query(message=\"In which book Harry Potter drives car\")\n", + "print(response)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MrZ9IjnAI5v9" + }, + "source": [ + "## Clean up\n", + "\n", + "If you created a new project for this tutorial, delete the project. If you used an existing project and wish to keep it without the changes added in this tutorial, delete resources created for the tutorial." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "tBc48ZHOJS6J" + }, + "source": [ + "### Deleting the project\n", + "\n", + "The easiest way to eliminate billing is to delete the project that you created for the tutorial.\n", + "\n", + "1. In the Google Cloud console, go to the [Manage resources](https://console.cloud.google.com/iam-admin/projects?_ga=2.235586881.1783688455.1719351858-1945987529.1719351858) page.\n", + "1. In the project list, select the project that you want to delete, and then click Delete.\n", + "1. In the dialog, type the project ID, and then click Shut down to delete the project.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Ed-BFtW-JPbI" + }, + "source": [ + "### Deleting tutorial resources\n", + "\n", + "Delete the reasoning engine instance(s) and Cloud SQL instance." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "LgNlHrxkb6c-" + }, + "outputs": [], + "source": [ + "# Delete the ReasoningEngine instance\n", + "remote_app.delete()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "goyrqS2_I8Hs" + }, + "outputs": [], + "source": [ + "# Or delete all Reasoning Engine apps\n", + "apps = reasoning_engines.ReasoningEngine.list()\n", + "for app in apps:\n", + " app.delete()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "odvj8aKpb3Wi" + }, + "outputs": [], + "source": [ + "# Delete the Cloud SQL instance\n", + "!gcloud sql instances delete {INSTANCE} \\\n", + " --project={PROJECT_ID}" ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" } - ], - "source": [ - "agent.query(message=\"In which book Harry Potter drives car\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "EdvJRUWRNGHE" - }, - "source": [ - "## Building and deploying a LangGraph app on Reasoning Engine\n", - "\n", - "In the following sections, we'll walk through the process of building and deploying a LangGraph application using Reasoning Engine in Vertex AI." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "ERxxgFTcI3DC" - }, - "source": [ - "## Deploy the service\n", - "\n", - "Now that you've specified a model, tools, and reasoning for your agent and tested it out, you're ready to deploy your agent as a remote service in Vertex AI!\n", - "\n", - "Here, you'll use the LangChain agent template provided in the Vertex AI SDK for Reasoning Engine, which brings together the model, tools, and reasoning that you've built up so far." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "k2nGSr2_JWcc" - }, - "outputs": [], - "source": [ - "remote_app = reasoning_engines.ReasoningEngine.create(\n", - " MultiStageLangGraphApp(project=PROJECT_ID, location=LOCATION),\n", - " requirements=[\n", - " \"google-cloud-aiplatform[reasoningengine,langchain]==1.60.0\",\n", - " \"langchain-google-cloud-sql-pg==0.6.1\",\n", - " \"cloud-sql-python-connector==1.9.0\",\n", - " \"langchain-google-vertexai==1.0.4\",\n", - " \"cloudpickle==3.0.0\",\n", - " \"pydantic==2.7.4\",\n", - " \"langgraph==0.0.51\",\n", - " \"httpx==0.27.2\",\n", - " ],\n", - " display_name=\"Reasoning Engine with LangGraph Rag Agent\",\n", - " description=\"This is a sample custom application in Reasoning Engine that uses LangGraph and sql pg rag\",\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "TYqMpB16I4iH" - }, - "source": [ - "## Try it out\n", - "\n", - "Query the remote app directly or retrieve the application endpoint via the resource ID or display name. The endpoint can be used from any Python environment." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "5A-5oNnQjePC" - }, - "source": [ - "### Ask question that can only be answered by the movie." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "P9-7ZcQugWkJ" - }, - "outputs": [], - "source": [ - "response = remote_app.query(message=\"Who acts as Harry Potter\")\n", - "print(response)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "UuACdm6zgaLm" - }, - "outputs": [], - "source": [ - "response = remote_app.query(\n", - " message=\"Which company produces and distributes Harry Potter film\"\n", - ")\n", - "print(response)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "oXVavFQfi9Hz" - }, - "source": [ - "### Ask question that can only be answered by the book." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "NxiCmdMnalY-" - }, - "outputs": [], - "source": [ - "response = remote_app.query(message=\"In which book Harry Potter drives car\")\n", - "print(response)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "MrZ9IjnAI5v9" - }, - "source": [ - "## Clean up\n", - "\n", - "If you created a new project for this tutorial, delete the project. If you used an existing project and wish to keep it without the changes added in this tutorial, delete resources created for the tutorial." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "tBc48ZHOJS6J" - }, - "source": [ - "### Deleting the project\n", - "\n", - "The easiest way to eliminate billing is to delete the project that you created for the tutorial.\n", - "\n", - "1. In the Google Cloud console, go to the [Manage resources](https://console.cloud.google.com/iam-admin/projects?_ga=2.235586881.1783688455.1719351858-1945987529.1719351858) page.\n", - "1. In the project list, select the project that you want to delete, and then click Delete.\n", - "1. In the dialog, type the project ID, and then click Shut down to delete the project.\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Ed-BFtW-JPbI" - }, - "source": [ - "### Deleting tutorial resources\n", - "\n", - "Delete the reasoning engine instance(s) and Cloud SQL instance." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "LgNlHrxkb6c-" - }, - "outputs": [], - "source": [ - "# Delete the ReasoningEngine instance\n", - "remote_app.delete()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "goyrqS2_I8Hs" - }, - "outputs": [], - "source": [ - "# Or delete all Reasoning Engine apps\n", - "apps = reasoning_engines.ReasoningEngine.list()\n", - "for app in apps:\n", - " app.delete()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "odvj8aKpb3Wi" - }, - "outputs": [], - "source": [ - "# Delete the Cloud SQL instance\n", - "!gcloud sql instances delete {INSTANCE} \\\n", - " --project={PROJECT_ID}" - ] - } - ], - "metadata": { - "colab": { - "collapsed_sections": [ - "-RYpMytsZ882", - "R5Xep4W9lq-Z", - "OaP1LRhPi0y7", - "GBdIVxqVkjT-" - ], - "name": "tutorial_langgraph_rag_agent.ipynb", - "toc_visible": true + ], + "metadata": { + "colab": { + "collapsed_sections": [ + "-RYpMytsZ882", + "R5Xep4W9lq-Z", + "OaP1LRhPi0y7", + "GBdIVxqVkjT-" + ], + "name": "tutorial_langgraph_rag_agent.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } }, - "kernelspec": { - "display_name": "Python 3", - "name": "python3" - } - }, - "nbformat": 4, - "nbformat_minor": 4 + "nbformat": 4, + "nbformat_minor": 0 } diff --git a/gemini/tuning/gen_ai_sdk_supervised_finetuning_using_gemini_on_multiple_images.ipynb b/gemini/tuning/gen_ai_sdk_supervised_finetuning_using_gemini_on_multiple_images.ipynb index 2d9bbbcc546..2d8823393b0 100644 --- a/gemini/tuning/gen_ai_sdk_supervised_finetuning_using_gemini_on_multiple_images.ipynb +++ b/gemini/tuning/gen_ai_sdk_supervised_finetuning_using_gemini_on_multiple_images.ipynb @@ -33,22 +33,22 @@ "\n", "\n", " \n", " \n", " \n", " \n", @@ -58,23 +58,23 @@ "\n", "Share to:\n", "\n", - "\n", + "\n", " \"LinkedIn\n", "\n", "\n", - "\n", + "\n", " \"Bluesky\n", "\n", "\n", - "\n", + "\n", " \"X\n", "\n", "\n", - "\n", + "\n", " \"Reddit\n", "\n", "\n", - "\n", + "\n", " \"Facebook\n", "" ] @@ -236,17 +236,17 @@ "cell_type": "code", "execution_count": null, "metadata": { - "id": "Nqwi-5ufWp_B", - "cellView": "code" + "cellView": "code", + "id": "Nqwi-5ufWp_B" }, "outputs": [], "source": [ "# Use the environment variable if the user doesn't provide Project ID.\n", "import os\n", "\n", - "import vertexai\n", "from google import genai\n", "from google.genai import types\n", + "import vertexai\n", "\n", "PROJECT_ID = \"[your-project-id]\" # @param {type: \"string\", placeholder: \"[your-project-id]\", isTemplate: true}\n", "if not PROJECT_ID or PROJECT_ID == \"[your-project-id]\":\n", @@ -259,9 +259,7 @@ "\n", "vertexai.init(project=PROJECT_ID, location=REGION, staging_bucket=BUCKET_URI)\n", "\n", - "client = genai.Client(\n", - " vertexai=True, project=PROJECT_ID, location=REGION\n", - ")" + "client = genai.Client(vertexai=True, project=PROJECT_ID, location=REGION)" ] }, { @@ -318,8 +316,7 @@ "import matplotlib.pyplot as plt\n", "import numpy as np\n", "\n", - "# For model fine tuning.\n", - "from vertexai.preview.tuning import sft" + "# For model fine tuning." ] }, { @@ -658,24 +655,29 @@ }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Sr8r90AAiyoB" + }, + "outputs": [], "source": [ - "train_dataset=str(Path(INPUT_DATA_URI) / \"prepared_train.jsonl\")\n", - "validation_dataset=str(Path(INPUT_DATA_URI) / \"prepared_val.jsonl\")\n", + "train_dataset = str(Path(INPUT_DATA_URI) / \"prepared_train.jsonl\")\n", + "validation_dataset = str(Path(INPUT_DATA_URI) / \"prepared_val.jsonl\")\n", "\n", - "training_dataset= {\n", - " 'gcs_uri': train_dataset,\n", + "training_dataset = {\n", + " \"gcs_uri\": train_dataset,\n", "}\n", "\n", "validation_dataset = types.TuningValidationDataset(gcs_uri=validation_dataset)" - ], - "metadata": { - "id": "Sr8r90AAiyoB" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "GToiYxswipZA" + }, + "outputs": [], "source": [ "tuned_model_display_name = \"spot-the-difference-tuning-job\" # @param {type:\"string\"}\n", "\n", @@ -683,39 +685,34 @@ " base_model=MODEL_ID,\n", " training_dataset=training_dataset,\n", " config=types.CreateTuningJobConfig(\n", - " adapter_size = 'ADAPTER_SIZE_EIGHT',\n", - " epoch_count = 1, # set to one to keep time and cost low\n", + " adapter_size=\"ADAPTER_SIZE_EIGHT\",\n", + " epoch_count=1, # set to one to keep time and cost low\n", " tuned_model_display_name=tuned_model_display_name,\n", - ")\n", + " ),\n", ")\n", "sft_tuning_job" - ], - "metadata": { - "id": "GToiYxswipZA" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", - "source": [ - "⚠️ It will take ~30 mins for the model tuning job to complete on the provided dataset and set configurations/hyperparameters. ⚠️" - ], "metadata": { "id": "HF3HVA8GTh8N" - } + }, + "source": [ + "⚠️ It will take ~30 mins for the model tuning job to complete on the provided dataset and set configurations/hyperparameters. ⚠️" + ] }, { "cell_type": "code", - "source": [ - "tuning_job = client.tunings.get(name=sft_tuning_job.name)\n", - "tuning_job" - ], + "execution_count": null, "metadata": { "id": "0bGX6OjmitaR" }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "tuning_job = client.tunings.get(name=sft_tuning_job.name)\n", + "tuning_job" + ] }, { "cell_type": "markdown", @@ -730,24 +727,24 @@ }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "sqaAHUmufq8-" + }, + "outputs": [], "source": [ "import time\n", "\n", - "running_states = set([\n", + "running_states = {\n", " \"JOB_STATE_PENDING\",\n", " \"JOB_STATE_RUNNING\",\n", - "])\n", + "}\n", "\n", "while sft_tuning_job.state in running_states:\n", " print(sft_tuning_job.state)\n", " tuning_job = client.tunings.get(name=sft_tuning_job.name)\n", " time.sleep(10)" - ], - "metadata": { - "id": "sqaAHUmufq8-" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -764,16 +761,12 @@ "cell_type": "code", "execution_count": null, "metadata": { - "id": "Up7_r1lWLo6i", - "colab": { - "base_uri": "https://localhost:8080/" - }, - "outputId": "45b2f2c3-86c0-4d4f-e57b-86f0b5714a70" + "id": "Up7_r1lWLo6i" }, "outputs": [ { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "Tuned model experiment None\n", "Tuned model endpoint resource name: projects/801452371447/locations/us-central1/endpoints/2471008347403321344\n" @@ -860,23 +853,18 @@ "cell_type": "code", "execution_count": null, "metadata": { - "id": "1UiaDg_3ZKuN", - "outputId": "75216560-945a-4b37-f570-2e746e26691f", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 626 - } + "id": "1UiaDg_3ZKuN" }, "outputs": [ { - "output_type": "display_data", "data": { + "image/png": "\n", "text/plain": [ "
" - ], - "image/png": "\n" + ] }, - "metadata": {} + "metadata": {}, + "output_type": "display_data" } ], "source": [ @@ -904,24 +892,20 @@ "source": [ "tuned_model = tuning_job.tuned_model.endpoint\n", "\n", - "contents=[\n", - " 'Image 1:',\n", - " types.Part.from_uri(\n", - " file_uri=str(input_image_one_uri),\n", - " mime_type=\"image/jpeg\"),\n", - " 'Image 2:',\n", - " types.Part.from_uri(\n", - " file_uri=str(input_image_two_uri),\n", - " mime_type=\"image/jpeg\"),\n", + "contents = [\n", + " \"Image 1:\",\n", + " types.Part.from_uri(file_uri=str(input_image_one_uri), mime_type=\"image/jpeg\"),\n", + " \"Image 2:\",\n", + " types.Part.from_uri(file_uri=str(input_image_two_uri), mime_type=\"image/jpeg\"),\n", "]\n", "\n", "response = client.models.generate_content(\n", - " model = tuned_model,\n", - " contents = contents,\n", - " config={\n", - " 'temperature': 0,\n", + " model=tuned_model,\n", + " contents=contents,\n", + " config={\n", + " \"temperature\": 0,\n", " },\n", - " )\n", + ")\n", "\n", "response" ] @@ -952,7 +936,8 @@ ], "metadata": { "colab": { - "provenance": [] + "name": "gen_ai_sdk_supervised_finetuning_using_gemini_on_multiple_images.ipynb", + "toc_visible": true }, "kernelspec": { "display_name": "Python 3", @@ -961,4 +946,4 @@ }, "nbformat": 4, "nbformat_minor": 0 -} \ No newline at end of file +} diff --git a/gemini/tuning/gen_ai_sdk_supervised_finetuning_using_gemini_qa.ipynb b/gemini/tuning/gen_ai_sdk_supervised_finetuning_using_gemini_qa.ipynb index dc750f0cb5b..f4121513139 100644 --- a/gemini/tuning/gen_ai_sdk_supervised_finetuning_using_gemini_qa.ipynb +++ b/gemini/tuning/gen_ai_sdk_supervised_finetuning_using_gemini_qa.ipynb @@ -33,22 +33,22 @@ "\n", "
\n", - " \n", + " \n", " \"Google
Open in Colab\n", "
\n", "
\n", - " \n", + " \n", " \"Google
Open in Colab Enterprise\n", "
\n", "
\n", - " \n", + " \n", " \"Vertex
Open in Vertex AI Workbench\n", "
\n", "
\n", - " \n", + " \n", " \"GitHub
View on GitHub\n", "
\n", "
\n", " \n", " \n", " \n", " \n", @@ -58,23 +58,23 @@ "\n", "Share to:\n", "\n", - "\n", + "\n", " \"LinkedIn\n", "\n", "\n", - "\n", + "\n", " \"Bluesky\n", "\n", "\n", - "\n", + "\n", " \"X\n", "\n", "\n", - "\n", + "\n", " \"Reddit\n", "\n", "\n", - "\n", + "\n", " \"Facebook\n", " " ] @@ -160,8 +160,7 @@ "source": [ "### Install the Google GenAI SDK and other required packages\n", "\n", - "The new Google Gen AI SDK provides a unified interface to Gemini through both the Gemini Developer API and the Gemini API on Vertex AI. With a few exceptions, code that runs on one platform will run on both. This means that you can prototype an application using the Developer API and then migrate the application to Vertex AI without rewriting your code.\n", - "\n" + "The new Google Gen AI SDK provides a unified interface to Gemini through both the Gemini Developer API and the Gemini API on Vertex AI. With a few exceptions, code that runs on one platform will run on both. This means that you can prototype an application using the Developer API and then migrate the application to Vertex AI without rewriting your code.\n" ] }, { @@ -257,8 +256,8 @@ "cell_type": "code", "execution_count": null, "metadata": { - "id": "Nqwi-5ufWp_B", - "cellView": "code" + "cellView": "code", + "id": "Nqwi-5ufWp_B" }, "outputs": [], "source": [ @@ -274,9 +273,7 @@ "\n", "LOCATION = os.environ.get(\"GOOGLE_CLOUD_REGION\", \"us-central1\")\n", "\n", - "client = genai.Client(\n", - " vertexai=True, project=PROJECT_ID, location=LOCATION\n", - ")" + "client = genai.Client(vertexai=True, project=PROJECT_ID, location=LOCATION)" ] }, { @@ -298,7 +295,6 @@ "source": [ "from collections import Counter\n", "import json\n", - "import time\n", "import random\n", "\n", "# Vertex AI SDK\n", @@ -309,9 +305,8 @@ "import pandas as pd\n", "import plotly.graph_objects as go\n", "from plotly.subplots import make_subplots\n", - "from IPython.display import Markdown, display\n", - "\n", "import vertexai\n", + "\n", "vertexai.init(project=PROJECT_ID, location=LOCATION)\n", "\n", "from google.cloud import aiplatform\n", @@ -612,43 +607,43 @@ }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "udTxzY8mpGYf" + }, + "outputs": [], "source": [ "def get_predictions(question: str, model_version: str) -> str:\n", "\n", - " prompt = question\n", - " base_model = model_version\n", + " prompt = question\n", + " base_model = model_version\n", "\n", - " response = client.models.generate_content(\n", - " model = base_model,\n", - " contents = prompt,\n", - " config={\n", - " 'system_instruction': systemInstruct,\n", - " 'temperature': 0.3,\n", - " },\n", - " )\n", + " response = client.models.generate_content(\n", + " model=base_model,\n", + " contents=prompt,\n", + " config={\n", + " \"system_instruction\": systemInstruct,\n", + " \"temperature\": 0.3,\n", + " },\n", + " )\n", "\n", - " return response.text" - ], - "metadata": { - "id": "udTxzY8mpGYf" - }, - "execution_count": null, - "outputs": [] + " return response.text" + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "PFvwmGll3MIv" + }, + "outputs": [], "source": [ "test_answer = test_df[\"answers\"].iloc[row_dataset]\n", "response = get_predictions(test_question, base_model)\n", "\n", "print(f\"Gemini response: {response}\")\n", "print(f\"Actual answer: {test_answer}\")" - ], - "metadata": { - "id": "PFvwmGll3MIv" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -938,41 +933,41 @@ }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "gdcy4umfpGZE" + }, + "outputs": [], "source": [ "train_dataset = f\"\"\"{BUCKET_URI}/squad_train.jsonl\"\"\"\n", "validation_dataset = f\"\"\"{BUCKET_URI}/squad_train.jsonl\"\"\"\n", "\n", - "training_dataset= {\n", - " 'gcs_uri': train_dataset,\n", + "training_dataset = {\n", + " \"gcs_uri\": train_dataset,\n", "}\n", "\n", "validation_dataset = types.TuningValidationDataset(gcs_uri=validation_dataset)" - ], - "metadata": { - "id": "gdcy4umfpGZE" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "NkboVUkoqWSp" + }, + "outputs": [], "source": [ "sft_tuning_job = client.tunings.tune(\n", " base_model=base_model,\n", " training_dataset=training_dataset,\n", " config=types.CreateTuningJobConfig(\n", - " adapter_size = 'ADAPTER_SIZE_EIGHT',\n", - " epoch_count = 1, # set to one to keep time and cost low\n", - " tuned_model_display_name=\"gemini-flash-1.5-qa\"\n", - ")\n", + " adapter_size=\"ADAPTER_SIZE_EIGHT\",\n", + " epoch_count=1, # set to one to keep time and cost low\n", + " tuned_model_display_name=\"gemini-flash-1.5-qa\",\n", + " ),\n", ")\n", "sft_tuning_job" - ], - "metadata": { - "id": "NkboVUkoqWSp" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -996,26 +991,26 @@ }, { "cell_type": "code", - "source": [ - "sft_tuning_job.state" - ], + "execution_count": null, "metadata": { "id": "WECSLyPRth6M" }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "sft_tuning_job.state" + ] }, { "cell_type": "code", - "source": [ - "tuning_job = client.tunings.get(name=sft_tuning_job.name)\n", - "tuning_job" - ], + "execution_count": null, "metadata": { "id": "_iwz4lhUDC_f" }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "tuning_job = client.tunings.get(name=sft_tuning_job.name)\n", + "tuning_job" + ] }, { "cell_type": "markdown", @@ -1040,15 +1035,15 @@ }, { "cell_type": "code", - "source": [ - "experiment_name = tuning_job.experiment\n", - "experiment_name" - ], + "execution_count": null, "metadata": { "id": "_IoiiRH5Lhpf" }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "experiment_name = tuning_job.experiment\n", + "experiment_name" + ] }, { "cell_type": "code", @@ -1125,16 +1120,10 @@ "cell_type": "code", "execution_count": null, "metadata": { - "id": "DL07j7u__iZx", - "outputId": "c31ad64a-cf9e-45d7-b625-e4a7dbf49cc9", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 542 - } + "id": "DL07j7u__iZx" }, "outputs": [ { - "output_type": "display_data", "data": { "text/html": [ "\n", @@ -1170,7 +1159,8 @@ "" ] }, - "metadata": {} + "metadata": {}, + "output_type": "display_data" } ], "source": [ @@ -1248,14 +1238,14 @@ }, { "cell_type": "code", - "source": [ - "get_predictions(prompt, tuned_model)" - ], + "execution_count": null, "metadata": { "id": "ifhRboiCOBje" }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "get_predictions(prompt, tuned_model)" + ] }, { "cell_type": "code", @@ -1283,12 +1273,12 @@ }, { "cell_type": "markdown", - "source": [ - "After running the evaluation you can see that the model generally performs better on our use case after fine-tuning. Of course, depending on things like use case or data quality performance will differ." - ], "metadata": { "id": "kBawjkvKQ_Q-" - } + }, + "source": [ + "After running the evaluation you can see that the model generally performs better on our use case after fine-tuning. Of course, depending on things like use case or data quality performance will differ." + ] }, { "cell_type": "code", @@ -1306,7 +1296,8 @@ ], "metadata": { "colab": { - "provenance": [] + "name": "gen_ai_sdk_supervised_finetuning_using_gemini_qa.ipynb", + "toc_visible": true }, "kernelspec": { "display_name": "Python 3", @@ -1315,4 +1306,4 @@ }, "nbformat": 4, "nbformat_minor": 0 -} \ No newline at end of file +} From 96e567face9c0c0599d8ee98821c8d8df236ba26 Mon Sep 17 00:00:00 2001 From: Thomas Bottrill Date: Fri, 13 Dec 2024 16:08:59 +0000 Subject: [PATCH 3/3] fix: pins financial-advisor-spanner itables version (#1519) # Description Fixes itables not loading properly by pinning the version. --------- Co-authored-by: Holt Skinner <13262395+holtskinner@users.noreply.github.com> Co-authored-by: code-review-assist[bot] <182814678+code-review-assist[bot]@users.noreply.github.com> --- gemini/sample-apps/finance-advisor-spanner/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gemini/sample-apps/finance-advisor-spanner/requirements.txt b/gemini/sample-apps/finance-advisor-spanner/requirements.txt index 4e0ee04f3ee..718d0d1ccf6 100644 --- a/gemini/sample-apps/finance-advisor-spanner/requirements.txt +++ b/gemini/sample-apps/finance-advisor-spanner/requirements.txt @@ -1,6 +1,6 @@ streamlit google-cloud-spanner -itables +itables==2.1.5 streamlit-navigation-bar streamlit-extras streamlit-agraph
\n", - " \n", + " \n", " \"Google
Open in Colab\n", "
\n", "
\n", - " \n", + " \n", " \"Google
Open in Colab Enterprise\n", "
\n", "
\n", - " \n", + " \n", " \"Vertex
Open in Workbench\n", "
\n", "
\n", - " \n", + " \n", " \"GitHub
View on GitHub\n", "
\n", "