Skip to content

Commit

Permalink
Merge branch 'main' into processes-dapr-runtime
Browse files Browse the repository at this point in the history
  • Loading branch information
moonbox3 authored Nov 15, 2024
2 parents 0f9e16e + a83abb9 commit abbe1b4
Show file tree
Hide file tree
Showing 21 changed files with 1,392 additions and 494 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/python-integration-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ jobs:
subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
- name: Run Integration Tests - Completions
id: run_tests_completions
timeout-minutes: 10
timeout-minutes: 15
shell: bash
run: |
uv run pytest -n logical --dist loadfile --dist worksteal ./tests/integration/completions -v --junitxml=pytest-completions.xml
Expand All @@ -185,7 +185,7 @@ jobs:
uv run pytest -n logical --dist loadfile --dist worksteal ./tests/integration/embeddings -v --junitxml=pytest-embeddings.xml
- name: Run Integration Tests - Memory
id: run_tests_memory
timeout-minutes: 5
timeout-minutes: 10
shell: bash
run: |
uv run pytest -n logical --dist loadfile --dist worksteal ./tests/integration/memory -v --junitxml=pytest-memory.xml
Expand Down
112 changes: 87 additions & 25 deletions python/samples/concepts/memory/new_memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,11 @@
vectorstoremodel,
)
from semantic_kernel.data.const import DistanceFunction, IndexKind
from semantic_kernel.data.vector_search.vector_search_filter import VectorSearchFilter
from semantic_kernel.data.vector_search.vector_search_options import VectorSearchOptions
from semantic_kernel.data.vector_search.vector_search_result import VectorSearchResult
from semantic_kernel.data.vector_search.vector_text_search import VectorTextSearchMixin
from semantic_kernel.data.vector_search.vectorizable_text_search import VectorizableTextSearchMixin
from semantic_kernel.data.vector_search.vectorized_search import VectorizedSearchMixin


Expand All @@ -50,11 +54,18 @@ class DataModelArray:
deserialize_function=np.array,
),
] = None
other: str | None = None
id: Annotated[str, VectorStoreRecordKeyField()] = field(default_factory=lambda: str(uuid4()))
content: Annotated[
str, VectorStoreRecordDataField(has_embedding=True, embedding_property_name="vector", property_type="str")
str,
VectorStoreRecordDataField(
has_embedding=True,
embedding_property_name="vector",
property_type="str",
is_full_text_searchable=True,
),
] = "content1"
title: Annotated[str, VectorStoreRecordDataField(property_type="str", is_full_text_searchable=True)] = "title1"
tag: Annotated[str, VectorStoreRecordDataField(property_type="str", is_filterable=True)] = "tag1"

return DataModelArray

Expand All @@ -73,19 +84,26 @@ class DataModelList:
property_type="float",
),
] = None
other: str | None = None
id: Annotated[str, VectorStoreRecordKeyField()] = field(default_factory=lambda: str(uuid4()))
content: Annotated[
str, VectorStoreRecordDataField(has_embedding=True, embedding_property_name="vector", property_type="str")
str,
VectorStoreRecordDataField(
has_embedding=True,
embedding_property_name="vector",
property_type="str",
is_full_text_searchable=True,
),
] = "content1"
title: Annotated[str, VectorStoreRecordDataField(property_type="str", is_full_text_searchable=True)] = "title1"
tag: Annotated[str, VectorStoreRecordDataField(property_type="str", is_filterable=True)] = "tag1"

return DataModelList


collection_name = "test"
# Depending on the vector database, the index kind and distance function may need to be adjusted,
# since not all combinations are supported by all databases.
DataModel = get_data_model_array(IndexKind.HNSW, DistanceFunction.COSINE_SIMILARITY)
DataModel = get_data_model_array(IndexKind.HNSW, DistanceFunction.COSINE_DISTANCE)

# A list of VectorStoreRecordCollection that can be used.
# Available collections are:
Expand Down Expand Up @@ -133,7 +151,7 @@ class DataModelList:
data_model_type=DataModel,
collection_name=collection_name,
),
"weaviate": lambda: WeaviateCollection[DataModel](
"weaviate": lambda: WeaviateCollection[str, DataModel](
data_model_type=DataModel,
collection_name=collection_name,
),
Expand All @@ -146,6 +164,18 @@ class DataModelList:
}


def print_record(result: VectorSearchResult | None = None, record: DataModel | None = None):
if result:
record = result.record
print(f" Found id: {record.id}")
print(f" Content: {record.content}")
if record.vector is not None:
print(f" Vector (first five): {record.vector[:5]}")
if result:
print(f" Score: {result.score:.4f}")
print()


async def main(collection: str, use_azure_openai: bool, embedding_model: str):
print("-" * 30)
kernel = Kernel()
Expand All @@ -157,12 +187,20 @@ async def main(collection: str, use_azure_openai: bool, embedding_model: str):
kernel.add_service(embedder)
async with collections[collection]() as record_collection:
print(f"Creating {collection} collection!")
await record_collection.delete_collection()
await record_collection.create_collection_if_not_exists()

record1 = DataModel(content="Semantic Kernel is awesome", id="e6103c03-487f-4d7d-9c23-4723651c17f4")
record1 = DataModel(
content="Semantic Kernel is awesome",
id="e6103c03-487f-4d7d-9c23-4723651c17f4",
title="Overview",
tag="general",
)
record2 = DataModel(
content="Semantic Kernel is available in dotnet, python and Java.",
id="09caec77-f7e1-466a-bcec-f1d51c5b15be",
title="Semantic Kernel Languages",
tag="general",
)

print("Adding records!")
Expand All @@ -174,29 +212,53 @@ async def main(collection: str, use_azure_openai: bool, embedding_model: str):
print("Getting records!")
results = await record_collection.get_batch([record1.id, record2.id])
if results:
for result in results:
print(f" Found id: {result.id}")
print(f" Content: {result.content}")
if result.vector is not None:
print(f" Vector (first five): {result.vector[:5]}")
[print_record(record=result) for result in results]
else:
print("Nothing found...")
options = VectorSearchOptions(
vector_field_name="vector",
include_vectors=True,
filter=VectorSearchFilter.equal_to("tag", "general"),
)
if isinstance(record_collection, VectorTextSearchMixin):
print("-" * 30)
print("Using text search")
try:
search_results = await record_collection.text_search("python", options)
if search_results.total_count == 0:
print("\nNothing found...\n")
else:
[print_record(result) async for result in search_results.results]
except Exception:
print("Text search could not execute.")
if isinstance(record_collection, VectorizedSearchMixin):
print("-" * 30)
print("Using vectorized search, the distance function is set to cosine_similarity.")
print("This means that the higher the score the more similar.")
search_results = await record_collection.vectorized_search(
vector=(await embedder.generate_raw_embeddings(["python"]))[0],
options=VectorSearchOptions(vector_field_name="vector", include_vectors=True),
print(
"Using vectorized search, depending on the distance function, "
"the better score might be higher or lower."
)
results = [record async for record in search_results.results]
for result in results:
print(f" Found id: {result.record.id}")
print(f" Content: {result.record.content}")
if result.record.vector is not None:
print(f" Vector (first five): {result.record.vector[:5]}")
print(f" Score: {result.score:.4f}")
print("")
try:
search_results = await record_collection.vectorized_search(
vector=(await embedder.generate_raw_embeddings(["python"]))[0],
options=VectorSearchOptions(vector_field_name="vector", include_vectors=True),
)
if search_results.total_count == 0:
print("\nNothing found...\n")
else:
[print_record(result) async for result in search_results.results]
except Exception:
print("Vectorized search could not execute.")
if isinstance(record_collection, VectorizableTextSearchMixin):
print("-" * 30)
print("Using vectorizable text search")
try:
search_results = await record_collection.vectorizable_text_search("python", options)
if search_results.total_count == 0:
print("\nNothing found...\n")
else:
[print_record(result) async for result in search_results.results]
except Exception:
print("Vectorizable text search could not execute.")
print("-" * 30)
print("Deleting collection!")
await record_collection.delete_collection()
Expand Down
149 changes: 149 additions & 0 deletions python/samples/concepts/search/google_text_search_as_plugin.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
# Copyright (c) Microsoft. All rights reserved.


from collections.abc import Coroutine
from typing import Any

from semantic_kernel import Kernel
from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior
from semantic_kernel.connectors.ai.open_ai import (
OpenAIChatCompletion,
OpenAIChatPromptExecutionSettings,
)
from semantic_kernel.connectors.search.google import GoogleSearch
from semantic_kernel.contents import ChatHistory
from semantic_kernel.filters.filter_types import FilterTypes
from semantic_kernel.filters.functions.function_invocation_context import FunctionInvocationContext
from semantic_kernel.functions import KernelArguments, KernelParameterMetadata, KernelPlugin

# This sample shows how to setup Google Search as a plugin in the Semantic Kernel.
# With that plugin you can do function calling to augment your chat bot capabilities.
# The plugin uses the search function of the GoogleSearch instance,
# which returns only the snippet of the search results.
# It also shows how the Parameters of the function can be used to pass arguments to the plugin,
# this is shown with the siteSearch parameter.
# The LLM can choose to override that but it will take the default value otherwise.
# You can also set this up with the 'get_search_results', this returns a object with the full results of the search
# and then you can add a `string_mapper` to the function to return the desired string of information
# that you want to pass to the LLM.

kernel = Kernel()
kernel.add_service(OpenAIChatCompletion(service_id="chat"))
kernel.add_plugin(
KernelPlugin.from_text_search_with_search(
GoogleSearch(),
plugin_name="google",
description="Get details about Semantic Kernel concepts.",
parameters=[
KernelParameterMetadata(
name="query",
description="The search query.",
type="str",
is_required=True,
type_object=str,
),
KernelParameterMetadata(
name="top",
description="The number of results to return.",
type="int",
is_required=False,
default_value=2,
type_object=int,
),
KernelParameterMetadata(
name="skip",
description="The number of results to skip.",
type="int",
is_required=False,
default_value=0,
type_object=int,
),
KernelParameterMetadata(
name="siteSearch",
description="The site to search.",
default_value="https://github.com/",
type="str",
is_required=False,
type_object=str,
),
],
)
)
chat_function = kernel.add_function(
prompt="{{$chat_history}}{{$user_input}}",
plugin_name="ChatBot",
function_name="Chat",
)
execution_settings = OpenAIChatPromptExecutionSettings(
service_id="chat",
max_tokens=2000,
temperature=0.7,
top_p=0.8,
function_choice_behavior=FunctionChoiceBehavior.Auto(auto_invoke=True),
)

history = ChatHistory()
system_message = """
You are a chat bot, specialized in Semantic Kernel, Microsoft LLM orchestration SDK.
Assume questions are related to that, and use the Bing search plugin to find answers.
"""
history.add_system_message(system_message)
history.add_user_message("Hi there, who are you?")
history.add_assistant_message("I am Mosscap, a chat bot. I'm trying to figure out what people need.")

arguments = KernelArguments(settings=execution_settings)


@kernel.filter(filter_type=FilterTypes.FUNCTION_INVOCATION)
async def log_google_filter(context: FunctionInvocationContext, next: Coroutine[FunctionInvocationContext, Any, None]):
if context.function.plugin_name == "google":
print("Calling Google search with arguments:")
if "query" in context.arguments:
print(f' Query: "{context.arguments["query"]}"')
if "top" in context.arguments:
print(f' Top: "{context.arguments["top"]}"')
if "skip" in context.arguments:
print(f' Skip: "{context.arguments["skip"]}"')
await next(context)
print("Google search completed.")
else:
await next(context)


async def chat() -> bool:
try:
user_input = input("User:> ")
except KeyboardInterrupt:
print("\n\nExiting chat...")
return False
except EOFError:
print("\n\nExiting chat...")
return False

if user_input == "exit":
print("\n\nExiting chat...")
return False
arguments["user_input"] = user_input
arguments["chat_history"] = history
result = await kernel.invoke(chat_function, arguments=arguments)
print(f"Mosscap:> {result}")
history.add_user_message(user_input)
history.add_assistant_message(str(result))
return True


async def main():
chatting = True
print(
"Welcome to the chat bot!\
\n Type 'exit' to exit.\
\n Try to find out more about the inner workings of Semantic Kernel."
)
while chatting:
chatting = await chat()


if __name__ == "__main__":
import asyncio

asyncio.run(main())
2 changes: 1 addition & 1 deletion python/semantic_kernel/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@

from semantic_kernel.kernel import Kernel

__version__ = "1.14.0"
__version__ = "1.15.0"
__all__ = ["Kernel", "__version__"]
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@
from collections.abc import Sequence
from typing import Any, ClassVar, Generic, TypeVar

from semantic_kernel.data.vector_search.vector_search_result import VectorSearchResult

if sys.version_info >= (3, 12):
from typing import override # pragma: no cover
else:
Expand All @@ -33,6 +31,7 @@
VectorSearchOptions,
)
from semantic_kernel.data.vector_search.vector_search import VectorSearchBase
from semantic_kernel.data.vector_search.vector_search_result import VectorSearchResult
from semantic_kernel.data.vector_search.vector_text_search import VectorTextSearchMixin
from semantic_kernel.data.vector_search.vectorized_search import VectorizedSearchMixin
from semantic_kernel.exceptions import MemoryConnectorException, MemoryConnectorInitializationError
Expand Down
Loading

0 comments on commit abbe1b4

Please sign in to comment.