Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Python SK: Simplify memory_store_base #684

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view

This file was deleted.

42 changes: 36 additions & 6 deletions python/semantic_kernel/memory/memory_query_result.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

from typing import Optional

from numpy import ndarray

from semantic_kernel.memory.memory_record import MemoryRecord


Expand All @@ -12,6 +14,7 @@ class MemoryQueryResult:
description: Optional[str]
text: Optional[str]
relevance: float
embedding: Optional[ndarray]
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

numpy.typing has an NDArray typehint that lets you typehint the dtype too: https://numpy.org/doc/stable/reference/typing.html

it's relatively recent so you may need to verify it works with your numpy version ranges in your dependencies


def __init__(
self,
Expand All @@ -20,24 +23,51 @@ def __init__(
id: str,
description: Optional[str],
text: Optional[str],
embedding: Optional[ndarray],
relevance: float,
) -> None:
"""Initialize a new instance of MemoryQueryResult.

Arguments:
is_reference {bool} -- Whether the record is a reference record.
external_source_name {Optional[str]} -- The name of the external source.
id {str} -- A unique for the record.
description {Optional[str]} -- The description of the record.
text {Optional[str]} -- The text of the record.
embedding {ndarray} -- The embedding of the record.
relevance {float} -- The relevance of the record to a known query.

Returns:
None -- None.
"""
self.is_reference = is_reference
self.external_source_name = external_source_name
self.id = id
self.description = description
self.text = text
self.relevance = relevance
self.embedding = embedding

@staticmethod
def from_memory_record(
record: MemoryRecord, relevance: float
record: MemoryRecord,
relevance: float,
) -> "MemoryQueryResult":
"""Create a new instance of MemoryQueryResult from a MemoryRecord.

Arguments:
record {MemoryRecord} -- The MemoryRecord to create the MemoryQueryResult from.
relevance {float} -- The relevance of the record to a known query.

Returns:
MemoryQueryResult -- The created MemoryQueryResult.
"""
return MemoryQueryResult(
is_reference=record.is_reference,
external_source_name=record.external_source_name,
id=record.id,
description=record.description,
text=record.text,
is_reference=record._is_reference,
external_source_name=record._external_source_name,
id=record._id,
description=record._description,
text=record._text,
embedding=record._embedding,
relevance=relevance,
)
63 changes: 52 additions & 11 deletions python/semantic_kernel/memory/memory_record.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,13 @@


class MemoryRecord:
is_reference: bool
external_source_name: Optional[str]
id: str
description: Optional[str]
text: Optional[str]
_key: str
_timestamp: str
_is_reference: bool
_external_source_name: Optional[str]
_id: str
_description: Optional[str]
_text: Optional[str]
_embedding: ndarray

def __init__(
Expand All @@ -21,17 +23,34 @@ def __init__(
description: Optional[str],
text: Optional[str],
embedding: ndarray,
key: Optional[str] = None,
timestamp: Optional[str] = None,
) -> None:
self.is_reference = is_reference
self.external_source_name = external_source_name
self.id = id
self.description = description
self.text = text
"""Initialize a new instance of MemoryRecord.

Arguments:
is_reference {bool} -- Whether the record is a reference record.
external_source_name {Optional[str]} -- The name of the external source.
id {str} -- A unique for the record.
description {Optional[str]} -- The description of the record.
text {Optional[str]} -- The text of the record.
embedding {ndarray} -- The embedding of the record.

Returns:
None -- None.
"""
self._key = key
self._timestamp = timestamp
self._is_reference = is_reference
self._external_source_name = external_source_name
self._id = id
self._description = description
self._text = text
self._embedding = embedding

@property
def embedding(self) -> ndarray:
return self._embedding
return self.embedding

@staticmethod
def reference_record(
Expand All @@ -40,6 +59,17 @@ def reference_record(
description: Optional[str],
embedding: ndarray,
) -> "MemoryRecord":
"""Create a reference record.

Arguments:
external_id {str} -- The external id of the record.
source_name {str} -- The name of the external source.
description {Optional[str]} -- The description of the record.
embedding {ndarray} -- The embedding of the record.

Returns:
MemoryRecord -- The reference record.
"""
return MemoryRecord(
is_reference=True,
external_source_name=source_name,
Expand All @@ -53,6 +83,17 @@ def reference_record(
def local_record(
id: str, text: str, description: Optional[str], embedding: ndarray
) -> "MemoryRecord":
"""Create a local record.

Arguments:
id {str} -- A unique for the record.
text {str} -- The text of the record.
description {Optional[str]} -- The description of the record.
embedding {ndarray} -- The embedding of the record.

Returns:
MemoryRecord -- The local record.
"""
return MemoryRecord(
is_reference=False,
external_source_name=None,
Expand Down
81 changes: 74 additions & 7 deletions python/semantic_kernel/memory/memory_store_base.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,79 @@
# Copyright (c) Microsoft. All rights reserved.

from abc import ABC
from abc import abstractmethod
from typing import List, Tuple

from semantic_kernel.connectors.ai.embeddings.embedding_index_base import (
EmbeddingIndexBase,
)
from semantic_kernel.memory.storage.data_store_base import DataStoreBase
from numpy import ndarray

from semantic_kernel.memory.memory_record import MemoryRecord

class MemoryStoreBase(DataStoreBase, EmbeddingIndexBase, ABC):
pass

class MemoryStoreBase:
@abstractmethod
async def create_collection_async(self, collection_name: SystemError) -> None:
pass

@abstractmethod
async def get_collections_async(
self,
) -> List[str]:
pass

@abstractmethod
async def delete_collection_async(self, collection_name: str) -> None:
pass

@abstractmethod
async def does_collection_exist_async(self, collection_name: str) -> bool:
pass

@abstractmethod
async def upsert_async(self, collection_name: str, record: MemoryRecord) -> str:
pass

@abstractmethod
async def upsert_batch_async(
self, collection_name: str, records: List[MemoryRecord]
) -> List[str]:
pass

@abstractmethod
async def get_async(
self, collection_name: str, key: str, with_embedding: bool
) -> MemoryRecord:
pass

@abstractmethod
async def get_batch_async(
self, collection_name: str, keys: List[str], with_embeddings: bool
) -> List[MemoryRecord]:
pass

@abstractmethod
async def remove_async(self, collection_name: str, key: str) -> None:
pass

@abstractmethod
async def remove_batch_async(self, collection_name: str, keys: List[str]) -> None:
pass

@abstractmethod
async def get_nearest_matches_async(
self,
collection_name: str,
embedding: ndarray,
limit: int,
min_relevance_score: float,
with_embeddings: bool,
) -> List[Tuple[MemoryRecord, float]]:
pass

@abstractmethod
async def get_nearest_match_async(
self,
collection_name: str,
embedding: ndarray,
min_relevance_score: float,
with_embedding: bool,
) -> Tuple[MemoryRecord, float]:
pass
Loading