-
Notifications
You must be signed in to change notification settings - Fork 3.3k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Python: Introducing Google Search as a Text Search implementation (#9691
) ### Motivation and Context <!-- Thank you for your contribution to the semantic-kernel repo! Please help reviewers and future users, providing the following information: 1. Why is this change required? 2. What problem does it solve? 3. What scenario does it contribute to? 4. If it fixes an open issue, please link to the issue here. --> This adds a new version of the Google Search implementation based on the new Text Search interfaces. Easily create a plugin from the different search functions to perform RAG with online docs! Closes #6834 ### Description <!-- Describe your changes, the overall approach, the underlying design. These notes will help understanding how your code works. Thanks! --> Creates a new folder under connectors.search for Google. Adds GoogleSearch Adds classes to capture the response. Adds constants for the query parameters Adds a sample showing how to use it with a filter. ### Contribution Checklist <!-- Before submitting this PR, please make sure: --> - [x] The code builds clean without any errors or warnings - [x] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - [x] All unit tests pass, and I have added new tests where possible - [x] I didn't break anyone 😄
- Loading branch information
1 parent
691f561
commit 8e784a1
Showing
8 changed files
with
499 additions
and
3 deletions.
There are no files selected for viewing
149 changes: 149 additions & 0 deletions
149
python/samples/concepts/search/google_text_search_as_plugin.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,149 @@ | ||
# Copyright (c) Microsoft. All rights reserved. | ||
|
||
|
||
from collections.abc import Coroutine | ||
from typing import Any | ||
|
||
from semantic_kernel import Kernel | ||
from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior | ||
from semantic_kernel.connectors.ai.open_ai import ( | ||
OpenAIChatCompletion, | ||
OpenAIChatPromptExecutionSettings, | ||
) | ||
from semantic_kernel.connectors.search.google import GoogleSearch | ||
from semantic_kernel.contents import ChatHistory | ||
from semantic_kernel.filters.filter_types import FilterTypes | ||
from semantic_kernel.filters.functions.function_invocation_context import FunctionInvocationContext | ||
from semantic_kernel.functions import KernelArguments, KernelParameterMetadata, KernelPlugin | ||
|
||
# This sample shows how to setup Google Search as a plugin in the Semantic Kernel. | ||
# With that plugin you can do function calling to augment your chat bot capabilities. | ||
# The plugin uses the search function of the GoogleSearch instance, | ||
# which returns only the snippet of the search results. | ||
# It also shows how the Parameters of the function can be used to pass arguments to the plugin, | ||
# this is shown with the siteSearch parameter. | ||
# The LLM can choose to override that but it will take the default value otherwise. | ||
# You can also set this up with the 'get_search_results', this returns a object with the full results of the search | ||
# and then you can add a `string_mapper` to the function to return the desired string of information | ||
# that you want to pass to the LLM. | ||
|
||
kernel = Kernel() | ||
kernel.add_service(OpenAIChatCompletion(service_id="chat")) | ||
kernel.add_plugin( | ||
KernelPlugin.from_text_search_with_search( | ||
GoogleSearch(), | ||
plugin_name="google", | ||
description="Get details about Semantic Kernel concepts.", | ||
parameters=[ | ||
KernelParameterMetadata( | ||
name="query", | ||
description="The search query.", | ||
type="str", | ||
is_required=True, | ||
type_object=str, | ||
), | ||
KernelParameterMetadata( | ||
name="top", | ||
description="The number of results to return.", | ||
type="int", | ||
is_required=False, | ||
default_value=2, | ||
type_object=int, | ||
), | ||
KernelParameterMetadata( | ||
name="skip", | ||
description="The number of results to skip.", | ||
type="int", | ||
is_required=False, | ||
default_value=0, | ||
type_object=int, | ||
), | ||
KernelParameterMetadata( | ||
name="siteSearch", | ||
description="The site to search.", | ||
default_value="https://github.com/", | ||
type="str", | ||
is_required=False, | ||
type_object=str, | ||
), | ||
], | ||
) | ||
) | ||
chat_function = kernel.add_function( | ||
prompt="{{$chat_history}}{{$user_input}}", | ||
plugin_name="ChatBot", | ||
function_name="Chat", | ||
) | ||
execution_settings = OpenAIChatPromptExecutionSettings( | ||
service_id="chat", | ||
max_tokens=2000, | ||
temperature=0.7, | ||
top_p=0.8, | ||
function_choice_behavior=FunctionChoiceBehavior.Auto(auto_invoke=True), | ||
) | ||
|
||
history = ChatHistory() | ||
system_message = """ | ||
You are a chat bot, specialized in Semantic Kernel, Microsoft LLM orchestration SDK. | ||
Assume questions are related to that, and use the Bing search plugin to find answers. | ||
""" | ||
history.add_system_message(system_message) | ||
history.add_user_message("Hi there, who are you?") | ||
history.add_assistant_message("I am Mosscap, a chat bot. I'm trying to figure out what people need.") | ||
|
||
arguments = KernelArguments(settings=execution_settings) | ||
|
||
|
||
@kernel.filter(filter_type=FilterTypes.FUNCTION_INVOCATION) | ||
async def log_google_filter(context: FunctionInvocationContext, next: Coroutine[FunctionInvocationContext, Any, None]): | ||
if context.function.plugin_name == "google": | ||
print("Calling Google search with arguments:") | ||
if "query" in context.arguments: | ||
print(f' Query: "{context.arguments["query"]}"') | ||
if "top" in context.arguments: | ||
print(f' Top: "{context.arguments["top"]}"') | ||
if "skip" in context.arguments: | ||
print(f' Skip: "{context.arguments["skip"]}"') | ||
await next(context) | ||
print("Google search completed.") | ||
else: | ||
await next(context) | ||
|
||
|
||
async def chat() -> bool: | ||
try: | ||
user_input = input("User:> ") | ||
except KeyboardInterrupt: | ||
print("\n\nExiting chat...") | ||
return False | ||
except EOFError: | ||
print("\n\nExiting chat...") | ||
return False | ||
|
||
if user_input == "exit": | ||
print("\n\nExiting chat...") | ||
return False | ||
arguments["user_input"] = user_input | ||
arguments["chat_history"] = history | ||
result = await kernel.invoke(chat_function, arguments=arguments) | ||
print(f"Mosscap:> {result}") | ||
history.add_user_message(user_input) | ||
history.add_assistant_message(str(result)) | ||
return True | ||
|
||
|
||
async def main(): | ||
chatting = True | ||
print( | ||
"Welcome to the chat bot!\ | ||
\n Type 'exit' to exit.\ | ||
\n Try to find out more about the inner workings of Semantic Kernel." | ||
) | ||
while chatting: | ||
chatting = await chat() | ||
|
||
|
||
if __name__ == "__main__": | ||
import asyncio | ||
|
||
asyncio.run(main()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
11 changes: 11 additions & 0 deletions
11
python/semantic_kernel/connectors/search/google/__init__.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
# Copyright (c) Microsoft. All rights reserved. | ||
|
||
from semantic_kernel.connectors.search.google.google_search import GoogleSearch | ||
from semantic_kernel.connectors.search.google.google_search_response import GoogleSearchResponse | ||
from semantic_kernel.connectors.search.google.google_search_result import GoogleSearchResult | ||
|
||
__all__ = [ | ||
"GoogleSearch", | ||
"GoogleSearchResponse", | ||
"GoogleSearchResult", | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
# Copyright (c) Microsoft. All rights reserved. | ||
|
||
|
||
from typing import Final | ||
|
||
CUSTOM_SEARCH_URL: Final[str] = "https://www.googleapis.com/customsearch/v1" | ||
|
||
# For more info on this list: https://developers.google.com/custom-search/v1/reference/rest/v1/cse/list | ||
QUERY_PARAMETERS: Final[list[str]] = [ | ||
# Country, Restricts search results to documents originating in a particular country. | ||
# You may use Boolean operators in the cr parameter's value. | ||
"cr", | ||
# Date Restrict, Restricts results to URLs based on date. Supported values include: | ||
# d[number]: requests results from the specified number of past days. | ||
# w[number]: requests results from the specified number of past weeks. | ||
# m[number]: requests results from the specified number of past months. | ||
# y[number]: requests results from the specified number of past years. | ||
"dateRestrict", | ||
# exactTerms, Identifies a phrase that all documents in the search results must contain. | ||
"exactTerms", | ||
# excludeTerms, Identifies a word or phrase that should not appear in any documents in the search results. | ||
"excludeTerms", | ||
# fileType, Restricts results to files of a specified extension. A list of file types indexable by Google | ||
# can be found in Search Console Help Center: https://support.google.com/webmasters/answer/35287 | ||
"fileType", | ||
# filter, Controls turning on or off the duplicate content filter. | ||
"filter", | ||
# gl, Geolocation of end user. The gl parameter value is a two-letter country code. The gl parameter boosts search | ||
# results whose country of origin matches the parameter value. | ||
# See the Country Codes page for a list of valid values. | ||
"gl", | ||
# highRange, Specifies the ending value for a search range. | ||
"highRange", | ||
# hl, Sets the user interface language. | ||
"hl", | ||
# linkSite, Specifies that all search results should contain a link to a particular URL. | ||
"linkSite", | ||
# Language of the result. Restricts the search to documents written in a particular language (e.g., lr=lang_ja). | ||
"lr", | ||
# or Terms, Provides additional search terms to check for in a document, where each document in the search results | ||
# must contain at least one of the additional search terms. | ||
"orTerms", | ||
# rights, Filters based on licensing. Supported values include: | ||
# cc_publicdomain, cc_attribute, cc_sharealike, cc_noncommercial, cc_nonderived | ||
"rights", | ||
# siteSearch, Specifies all search results should be pages from a given site. | ||
"siteSearch", | ||
# siteSearchFilter, Controls whether to include or exclude results from the site named in the siteSearch parameter. | ||
"siteSearchFilter", | ||
] |
Oops, something went wrong.