Merge branch 'main' into processes-dapr-runtime

microsoft · Nov 15, 2024 · abbe1b4 · abbe1b4
2 parents 0f9e16e + a83abb9
commit abbe1b4
Show file tree

Hide file tree

Showing 21 changed files with 1,392 additions and 494 deletions.
diff --git a/.github/workflows/python-integration-tests.yml b/.github/workflows/python-integration-tests.yml
@@ -173,7 +173,7 @@ jobs:
           subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
       - name: Run Integration Tests - Completions
         id: run_tests_completions
-        timeout-minutes: 10
+        timeout-minutes: 15
         shell: bash
         run: |
           uv run pytest -n logical --dist loadfile --dist worksteal ./tests/integration/completions -v --junitxml=pytest-completions.xml
@@ -185,7 +185,7 @@ jobs:
           uv run pytest -n logical --dist loadfile --dist worksteal ./tests/integration/embeddings -v --junitxml=pytest-embeddings.xml
       - name: Run Integration Tests - Memory
         id: run_tests_memory
-        timeout-minutes: 5
+        timeout-minutes: 10
         shell: bash
         run: |
           uv run pytest -n logical --dist loadfile --dist worksteal ./tests/integration/memory -v --junitxml=pytest-memory.xml

diff --git a/python/samples/concepts/memory/new_memory.py b/python/samples/concepts/memory/new_memory.py
@@ -30,7 +30,11 @@
     vectorstoremodel,
 )
 from semantic_kernel.data.const import DistanceFunction, IndexKind
+from semantic_kernel.data.vector_search.vector_search_filter import VectorSearchFilter
 from semantic_kernel.data.vector_search.vector_search_options import VectorSearchOptions
+from semantic_kernel.data.vector_search.vector_search_result import VectorSearchResult
+from semantic_kernel.data.vector_search.vector_text_search import VectorTextSearchMixin
+from semantic_kernel.data.vector_search.vectorizable_text_search import VectorizableTextSearchMixin
 from semantic_kernel.data.vector_search.vectorized_search import VectorizedSearchMixin
 
 
@@ -50,11 +54,18 @@ class DataModelArray:
                 deserialize_function=np.array,
             ),
         ] = None
-        other: str | None = None
         id: Annotated[str, VectorStoreRecordKeyField()] = field(default_factory=lambda: str(uuid4()))
         content: Annotated[
-            str, VectorStoreRecordDataField(has_embedding=True, embedding_property_name="vector", property_type="str")
+            str,
+            VectorStoreRecordDataField(
+                has_embedding=True,
+                embedding_property_name="vector",
+                property_type="str",
+                is_full_text_searchable=True,
+            ),
         ] = "content1"
+        title: Annotated[str, VectorStoreRecordDataField(property_type="str", is_full_text_searchable=True)] = "title1"
+        tag: Annotated[str, VectorStoreRecordDataField(property_type="str", is_filterable=True)] = "tag1"
 
     return DataModelArray
 
@@ -73,19 +84,26 @@ class DataModelList:
                 property_type="float",
             ),
         ] = None
-        other: str | None = None
         id: Annotated[str, VectorStoreRecordKeyField()] = field(default_factory=lambda: str(uuid4()))
         content: Annotated[
-            str, VectorStoreRecordDataField(has_embedding=True, embedding_property_name="vector", property_type="str")
+            str,
+            VectorStoreRecordDataField(
+                has_embedding=True,
+                embedding_property_name="vector",
+                property_type="str",
+                is_full_text_searchable=True,
+            ),
         ] = "content1"
+        title: Annotated[str, VectorStoreRecordDataField(property_type="str", is_full_text_searchable=True)] = "title1"
+        tag: Annotated[str, VectorStoreRecordDataField(property_type="str", is_filterable=True)] = "tag1"
 
     return DataModelList
 
 
 collection_name = "test"
 # Depending on the vector database, the index kind and distance function may need to be adjusted,
 # since not all combinations are supported by all databases.
-DataModel = get_data_model_array(IndexKind.HNSW, DistanceFunction.COSINE_SIMILARITY)
+DataModel = get_data_model_array(IndexKind.HNSW, DistanceFunction.COSINE_DISTANCE)
 
 # A list of VectorStoreRecordCollection that can be used.
 # Available collections are:
@@ -133,7 +151,7 @@ class DataModelList:
         data_model_type=DataModel,
         collection_name=collection_name,
     ),
-    "weaviate": lambda: WeaviateCollection[DataModel](
+    "weaviate": lambda: WeaviateCollection[str, DataModel](
         data_model_type=DataModel,
         collection_name=collection_name,
     ),
@@ -146,6 +164,18 @@ class DataModelList:
 }
 
 
+def print_record(result: VectorSearchResult | None = None, record: DataModel | None = None):
+    if result:
+        record = result.record
+    print(f"  Found id: {record.id}")
+    print(f"    Content: {record.content}")
+    if record.vector is not None:
+        print(f"    Vector (first five): {record.vector[:5]}")
+    if result:
+        print(f"  Score: {result.score:.4f}")
+    print()
+
+
 async def main(collection: str, use_azure_openai: bool, embedding_model: str):
     print("-" * 30)
     kernel = Kernel()
@@ -157,12 +187,20 @@ async def main(collection: str, use_azure_openai: bool, embedding_model: str):
     kernel.add_service(embedder)
     async with collections[collection]() as record_collection:
         print(f"Creating {collection} collection!")
+        await record_collection.delete_collection()
         await record_collection.create_collection_if_not_exists()
 
-        record1 = DataModel(content="Semantic Kernel is awesome", id="e6103c03-487f-4d7d-9c23-4723651c17f4")
+        record1 = DataModel(
+            content="Semantic Kernel is awesome",
+            id="e6103c03-487f-4d7d-9c23-4723651c17f4",
+            title="Overview",
+            tag="general",
+        )
         record2 = DataModel(
             content="Semantic Kernel is available in dotnet, python and Java.",
             id="09caec77-f7e1-466a-bcec-f1d51c5b15be",
+            title="Semantic Kernel Languages",
+            tag="general",
         )
 
         print("Adding records!")
@@ -174,29 +212,53 @@ async def main(collection: str, use_azure_openai: bool, embedding_model: str):
         print("Getting records!")
         results = await record_collection.get_batch([record1.id, record2.id])
         if results:
-            for result in results:
-                print(f"  Found id: {result.id}")
-                print(f"    Content: {result.content}")
-                if result.vector is not None:
-                    print(f"    Vector (first five): {result.vector[:5]}")
+            [print_record(record=result) for result in results]
         else:
             print("Nothing found...")
+        options = VectorSearchOptions(
+            vector_field_name="vector",
+            include_vectors=True,
+            filter=VectorSearchFilter.equal_to("tag", "general"),
+        )
+        if isinstance(record_collection, VectorTextSearchMixin):
+            print("-" * 30)
+            print("Using text search")
+            try:
+                search_results = await record_collection.text_search("python", options)
+                if search_results.total_count == 0:
+                    print("\nNothing found...\n")
+                else:
+                    [print_record(result) async for result in search_results.results]
+            except Exception:
+                print("Text search could not execute.")
         if isinstance(record_collection, VectorizedSearchMixin):
             print("-" * 30)
-            print("Using vectorized search, the distance function is set to cosine_similarity.")
-            print("This means that the higher the score the more similar.")
-            search_results = await record_collection.vectorized_search(
-                vector=(await embedder.generate_raw_embeddings(["python"]))[0],
-                options=VectorSearchOptions(vector_field_name="vector", include_vectors=True),
+            print(
+                "Using vectorized search, depending on the distance function, "
+                "the better score might be higher or lower."
             )
-            results = [record async for record in search_results.results]
-            for result in results:
-                print(f"  Found id: {result.record.id}")
-                print(f"    Content: {result.record.content}")
-                if result.record.vector is not None:
-                    print(f"    Vector (first five): {result.record.vector[:5]}")
-                print(f"  Score: {result.score:.4f}")
-                print("")
+            try:
+                search_results = await record_collection.vectorized_search(
+                    vector=(await embedder.generate_raw_embeddings(["python"]))[0],
+                    options=VectorSearchOptions(vector_field_name="vector", include_vectors=True),
+                )
+                if search_results.total_count == 0:
+                    print("\nNothing found...\n")
+                else:
+                    [print_record(result) async for result in search_results.results]
+            except Exception:
+                print("Vectorized search could not execute.")
+        if isinstance(record_collection, VectorizableTextSearchMixin):
+            print("-" * 30)
+            print("Using vectorizable text search")
+            try:
+                search_results = await record_collection.vectorizable_text_search("python", options)
+                if search_results.total_count == 0:
+                    print("\nNothing found...\n")
+                else:
+                    [print_record(result) async for result in search_results.results]
+            except Exception:
+                print("Vectorizable text search could not execute.")
         print("-" * 30)
         print("Deleting collection!")
         await record_collection.delete_collection()

diff --git a/python/samples/concepts/search/google_text_search_as_plugin.py b/python/samples/concepts/search/google_text_search_as_plugin.py
@@ -0,0 +1,149 @@
+# Copyright (c) Microsoft. All rights reserved.
+
+
+from collections.abc import Coroutine
+from typing import Any
+
+from semantic_kernel import Kernel
+from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior
+from semantic_kernel.connectors.ai.open_ai import (
+    OpenAIChatCompletion,
+    OpenAIChatPromptExecutionSettings,
+)
+from semantic_kernel.connectors.search.google import GoogleSearch
+from semantic_kernel.contents import ChatHistory
+from semantic_kernel.filters.filter_types import FilterTypes
+from semantic_kernel.filters.functions.function_invocation_context import FunctionInvocationContext
+from semantic_kernel.functions import KernelArguments, KernelParameterMetadata, KernelPlugin
+
+# This sample shows how to setup Google Search as a plugin in the Semantic Kernel.
+# With that plugin you can do function calling to augment your chat bot capabilities.
+# The plugin uses the search function of the GoogleSearch instance,
+# which returns only the snippet of the search results.
+# It also shows how the Parameters of the function can be used to pass arguments to the plugin,
+# this is shown with the siteSearch parameter.
+# The LLM can choose to override that but it will take the default value otherwise.
+# You can also set this up with the 'get_search_results', this returns a object with the full results of the search
+# and then you can add a `string_mapper` to the function to return the desired string of information
+# that you want to pass to the LLM.
+
+kernel = Kernel()
+kernel.add_service(OpenAIChatCompletion(service_id="chat"))
+kernel.add_plugin(
+    KernelPlugin.from_text_search_with_search(
+        GoogleSearch(),
+        plugin_name="google",
+        description="Get details about Semantic Kernel concepts.",
+        parameters=[
+            KernelParameterMetadata(
+                name="query",
+                description="The search query.",
+                type="str",
+                is_required=True,
+                type_object=str,
+            ),
+            KernelParameterMetadata(
+                name="top",
+                description="The number of results to return.",
+                type="int",
+                is_required=False,
+                default_value=2,
+                type_object=int,
+            ),
+            KernelParameterMetadata(
+                name="skip",
+                description="The number of results to skip.",
+                type="int",
+                is_required=False,
+                default_value=0,
+                type_object=int,
+            ),
+            KernelParameterMetadata(
+                name="siteSearch",
+                description="The site to search.",
+                default_value="https://github.com/",
+                type="str",
+                is_required=False,
+                type_object=str,
+            ),
+        ],
+    )
+)
+chat_function = kernel.add_function(
+    prompt="{{$chat_history}}{{$user_input}}",
+    plugin_name="ChatBot",
+    function_name="Chat",
+)
+execution_settings = OpenAIChatPromptExecutionSettings(
+    service_id="chat",
+    max_tokens=2000,
+    temperature=0.7,
+    top_p=0.8,
+    function_choice_behavior=FunctionChoiceBehavior.Auto(auto_invoke=True),
+)
+
+history = ChatHistory()
+system_message = """
+You are a chat bot, specialized in Semantic Kernel, Microsoft LLM orchestration SDK.
+Assume questions are related to that, and use the Bing search plugin to find answers.
+"""
+history.add_system_message(system_message)
+history.add_user_message("Hi there, who are you?")
+history.add_assistant_message("I am Mosscap, a chat bot. I'm trying to figure out what people need.")
+
+arguments = KernelArguments(settings=execution_settings)
+
+
+@kernel.filter(filter_type=FilterTypes.FUNCTION_INVOCATION)
+async def log_google_filter(context: FunctionInvocationContext, next: Coroutine[FunctionInvocationContext, Any, None]):
+    if context.function.plugin_name == "google":
+        print("Calling Google search with arguments:")
+        if "query" in context.arguments:
+            print(f'  Query: "{context.arguments["query"]}"')
+        if "top" in context.arguments:
+            print(f'  Top: "{context.arguments["top"]}"')
+        if "skip" in context.arguments:
+            print(f'  Skip: "{context.arguments["skip"]}"')
+        await next(context)
+        print("Google search completed.")
+    else:
+        await next(context)
+
+
+async def chat() -> bool:
+    try:
+        user_input = input("User:> ")
+    except KeyboardInterrupt:
+        print("\n\nExiting chat...")
+        return False
+    except EOFError:
+        print("\n\nExiting chat...")
+        return False
+
+    if user_input == "exit":
+        print("\n\nExiting chat...")
+        return False
+    arguments["user_input"] = user_input
+    arguments["chat_history"] = history
+    result = await kernel.invoke(chat_function, arguments=arguments)
+    print(f"Mosscap:> {result}")
+    history.add_user_message(user_input)
+    history.add_assistant_message(str(result))
+    return True
+
+
+async def main():
+    chatting = True
+    print(
+        "Welcome to the chat bot!\
+        \n  Type 'exit' to exit.\
+        \n  Try to find out more about the inner workings of Semantic Kernel."
+    )
+    while chatting:
+        chatting = await chat()
+
+
+if __name__ == "__main__":
+    import asyncio
+
+    asyncio.run(main())
diff --git a/python/semantic_kernel/__init__.py b/python/semantic_kernel/__init__.py
@@ -2,5 +2,5 @@
 
 from semantic_kernel.kernel import Kernel
 
-__version__ = "1.14.0"
+__version__ = "1.15.0"
 __all__ = ["Kernel", "__version__"]
diff --git a/python/semantic_kernel/connectors/memory/azure_ai_search/azure_ai_search_collection.py b/python/semantic_kernel/connectors/memory/azure_ai_search/azure_ai_search_collection.py
@@ -6,8 +6,6 @@
 from collections.abc import Sequence
 from typing import Any, ClassVar, Generic, TypeVar
 
-from semantic_kernel.data.vector_search.vector_search_result import VectorSearchResult
-
 if sys.version_info >= (3, 12):
     from typing import override  # pragma: no cover
 else:
@@ -33,6 +31,7 @@
     VectorSearchOptions,
 )
 from semantic_kernel.data.vector_search.vector_search import VectorSearchBase
+from semantic_kernel.data.vector_search.vector_search_result import VectorSearchResult
 from semantic_kernel.data.vector_search.vector_text_search import VectorTextSearchMixin
 from semantic_kernel.data.vector_search.vectorized_search import VectorizedSearchMixin
 from semantic_kernel.exceptions import MemoryConnectorException, MemoryConnectorInitializationError