From 153961feaa5f7f9ef0435477b36f150e3d7ffd83 Mon Sep 17 00:00:00 2001 From: Trayan Azarov Date: Fri, 29 Mar 2024 17:10:45 +0200 Subject: [PATCH] feat: Metadata persisted to protobuf binary file - Also added copy the existing metadata file on write to handle data corruption during writes --- chromadb/proto/chroma_pb2.py | 35 ++-- chromadb/proto/chroma_pb2.pyi | 66 ++++--- chromadb/proto/coordinator_pb2.py | 101 +++++------ chromadb/proto/coordinator_pb2.pyi | 86 ++++----- chromadb/proto/logservice_pb2.py | 9 +- chromadb/proto/logservice_pb2.pyi | 16 +- .../impl/vector/local_persistent_hnsw.py | 169 ++++++++---------- chromadb/test/segment/test_vector.py | 136 +++++++------- idl/chromadb/proto/chroma.proto | 14 ++ 9 files changed, 332 insertions(+), 300 deletions(-) diff --git a/chromadb/proto/chroma_pb2.py b/chromadb/proto/chroma_pb2.py index df92b355affb..995484f48ec1 100644 --- a/chromadb/proto/chroma_pb2.py +++ b/chromadb/proto/chroma_pb2.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: chromadb/proto/chroma.proto +# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -13,24 +14,24 @@ -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x1b\x63hromadb/proto/chroma.proto\x12\x06\x63hroma\"&\n\x06Status\x12\x0e\n\x06reason\x18\x01 \x01(\t\x12\x0c\n\x04\x63ode\x18\x02 \x01(\x05\"U\n\x06Vector\x12\x11\n\tdimension\x18\x01 \x01(\x05\x12\x0e\n\x06vector\x18\x02 \x01(\x0c\x12(\n\x08\x65ncoding\x18\x03 \x01(\x0e\x32\x16.chroma.ScalarEncoding\"\x1a\n\tFilePaths\x12\r\n\x05paths\x18\x01 \x03(\t\"\xa5\x02\n\x07Segment\x12\n\n\x02id\x18\x01 \x01(\t\x12\x0c\n\x04type\x18\x02 \x01(\t\x12#\n\x05scope\x18\x03 \x01(\x0e\x32\x14.chroma.SegmentScope\x12\x17\n\ncollection\x18\x05 \x01(\tH\x00\x88\x01\x01\x12-\n\x08metadata\x18\x06 \x01(\x0b\x32\x16.chroma.UpdateMetadataH\x01\x88\x01\x01\x12\x32\n\nfile_paths\x18\x07 \x03(\x0b\x32\x1e.chroma.Segment.FilePathsEntry\x1a\x43\n\x0e\x46ilePathsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12 \n\x05value\x18\x02 \x01(\x0b\x32\x11.chroma.FilePaths:\x02\x38\x01\x42\r\n\x0b_collectionB\x0b\n\t_metadata\"\xd0\x01\n\nCollection\x12\n\n\x02id\x18\x01 \x01(\t\x12\x0c\n\x04name\x18\x02 \x01(\t\x12-\n\x08metadata\x18\x04 \x01(\x0b\x32\x16.chroma.UpdateMetadataH\x00\x88\x01\x01\x12\x16\n\tdimension\x18\x05 \x01(\x05H\x01\x88\x01\x01\x12\x0e\n\x06tenant\x18\x06 \x01(\t\x12\x10\n\x08\x64\x61tabase\x18\x07 \x01(\t\x12\x13\n\x0blogPosition\x18\x08 \x01(\x03\x12\x0f\n\x07version\x18\t \x01(\x05\x42\x0b\n\t_metadataB\x0c\n\n_dimension\"4\n\x08\x44\x61tabase\x12\n\n\x02id\x18\x01 \x01(\t\x12\x0c\n\x04name\x18\x02 \x01(\t\x12\x0e\n\x06tenant\x18\x03 \x01(\t\"\x16\n\x06Tenant\x12\x0c\n\x04name\x18\x01 \x01(\t\"b\n\x13UpdateMetadataValue\x12\x16\n\x0cstring_value\x18\x01 \x01(\tH\x00\x12\x13\n\tint_value\x18\x02 \x01(\x03H\x00\x12\x15\n\x0b\x66loat_value\x18\x03 \x01(\x01H\x00\x42\x07\n\x05value\"\x96\x01\n\x0eUpdateMetadata\x12\x36\n\x08metadata\x18\x01 \x03(\x0b\x32$.chroma.UpdateMetadata.MetadataEntry\x1aL\n\rMetadataEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12*\n\x05value\x18\x02 \x01(\x0b\x32\x1b.chroma.UpdateMetadataValue:\x02\x38\x01\"\xaf\x01\n\x0fOperationRecord\x12\n\n\x02id\x18\x01 \x01(\t\x12#\n\x06vector\x18\x02 \x01(\x0b\x32\x0e.chroma.VectorH\x00\x88\x01\x01\x12-\n\x08metadata\x18\x03 \x01(\x0b\x32\x16.chroma.UpdateMetadataH\x01\x88\x01\x01\x12$\n\toperation\x18\x04 \x01(\x0e\x32\x11.chroma.OperationB\t\n\x07_vectorB\x0b\n\t_metadata\"S\n\x15VectorEmbeddingRecord\x12\n\n\x02id\x18\x01 \x01(\t\x12\x0e\n\x06seq_id\x18\x02 \x01(\x0c\x12\x1e\n\x06vector\x18\x03 \x01(\x0b\x32\x0e.chroma.Vector\"q\n\x11VectorQueryResult\x12\n\n\x02id\x18\x01 \x01(\t\x12\x0e\n\x06seq_id\x18\x02 \x01(\x0c\x12\x10\n\x08\x64istance\x18\x03 \x01(\x02\x12#\n\x06vector\x18\x04 \x01(\x0b\x32\x0e.chroma.VectorH\x00\x88\x01\x01\x42\t\n\x07_vector\"@\n\x12VectorQueryResults\x12*\n\x07results\x18\x01 \x03(\x0b\x32\x19.chroma.VectorQueryResult\"4\n\x11GetVectorsRequest\x12\x0b\n\x03ids\x18\x01 \x03(\t\x12\x12\n\nsegment_id\x18\x02 \x01(\t\"D\n\x12GetVectorsResponse\x12.\n\x07records\x18\x01 \x03(\x0b\x32\x1d.chroma.VectorEmbeddingRecord\"\x86\x01\n\x13QueryVectorsRequest\x12\x1f\n\x07vectors\x18\x01 \x03(\x0b\x32\x0e.chroma.Vector\x12\t\n\x01k\x18\x02 \x01(\x05\x12\x13\n\x0b\x61llowed_ids\x18\x03 \x03(\t\x12\x1a\n\x12include_embeddings\x18\x04 \x01(\x08\x12\x12\n\nsegment_id\x18\x05 \x01(\t\"C\n\x14QueryVectorsResponse\x12+\n\x07results\x18\x01 \x03(\x0b\x32\x1a.chroma.VectorQueryResults*8\n\tOperation\x12\x07\n\x03\x41\x44\x44\x10\x00\x12\n\n\x06UPDATE\x10\x01\x12\n\n\x06UPSERT\x10\x02\x12\n\n\x06\x44\x45LETE\x10\x03*(\n\x0eScalarEncoding\x12\x0b\n\x07\x46LOAT32\x10\x00\x12\t\n\x05INT32\x10\x01*(\n\x0cSegmentScope\x12\n\n\x06VECTOR\x10\x00\x12\x0c\n\x08METADATA\x10\x01\x32\xa2\x01\n\x0cVectorReader\x12\x45\n\nGetVectors\x12\x19.chroma.GetVectorsRequest\x1a\x1a.chroma.GetVectorsResponse\"\x00\x12K\n\x0cQueryVectors\x12\x1b.chroma.QueryVectorsRequest\x1a\x1c.chroma.QueryVectorsResponse\"\x00\x42:Z8github.com/chroma-core/chroma/go/pkg/proto/coordinatorpbb\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x1b\x63hromadb/proto/chroma.proto\x12\x06\x63hroma\"&\n\x06Status\x12\x0e\n\x06reason\x18\x01 \x01(\t\x12\x0c\n\x04\x63ode\x18\x02 \x01(\x05\"U\n\x06Vector\x12\x11\n\tdimension\x18\x01 \x01(\x05\x12\x0e\n\x06vector\x18\x02 \x01(\x0c\x12(\n\x08\x65ncoding\x18\x03 \x01(\x0e\x32\x16.chroma.ScalarEncoding\"\x1a\n\tFilePaths\x12\r\n\x05paths\x18\x01 \x03(\t\"\xa5\x02\n\x07Segment\x12\n\n\x02id\x18\x01 \x01(\t\x12\x0c\n\x04type\x18\x02 \x01(\t\x12#\n\x05scope\x18\x03 \x01(\x0e\x32\x14.chroma.SegmentScope\x12\x17\n\ncollection\x18\x05 \x01(\tH\x00\x88\x01\x01\x12-\n\x08metadata\x18\x06 \x01(\x0b\x32\x16.chroma.UpdateMetadataH\x01\x88\x01\x01\x12\x32\n\nfile_paths\x18\x07 \x03(\x0b\x32\x1e.chroma.Segment.FilePathsEntry\x1a\x43\n\x0e\x46ilePathsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12 \n\x05value\x18\x02 \x01(\x0b\x32\x11.chroma.FilePaths:\x02\x38\x01\x42\r\n\x0b_collectionB\x0b\n\t_metadata\"\xd0\x01\n\nCollection\x12\n\n\x02id\x18\x01 \x01(\t\x12\x0c\n\x04name\x18\x02 \x01(\t\x12-\n\x08metadata\x18\x04 \x01(\x0b\x32\x16.chroma.UpdateMetadataH\x00\x88\x01\x01\x12\x16\n\tdimension\x18\x05 \x01(\x05H\x01\x88\x01\x01\x12\x0e\n\x06tenant\x18\x06 \x01(\t\x12\x10\n\x08\x64\x61tabase\x18\x07 \x01(\t\x12\x13\n\x0blogPosition\x18\x08 \x01(\x03\x12\x0f\n\x07version\x18\t \x01(\x05\x42\x0b\n\t_metadataB\x0c\n\n_dimension\"4\n\x08\x44\x61tabase\x12\n\n\x02id\x18\x01 \x01(\t\x12\x0c\n\x04name\x18\x02 \x01(\t\x12\x0e\n\x06tenant\x18\x03 \x01(\t\"\x16\n\x06Tenant\x12\x0c\n\x04name\x18\x01 \x01(\t\"b\n\x13UpdateMetadataValue\x12\x16\n\x0cstring_value\x18\x01 \x01(\tH\x00\x12\x13\n\tint_value\x18\x02 \x01(\x03H\x00\x12\x15\n\x0b\x66loat_value\x18\x03 \x01(\x01H\x00\x42\x07\n\x05value\"\x96\x01\n\x0eUpdateMetadata\x12\x36\n\x08metadata\x18\x01 \x03(\x0b\x32$.chroma.UpdateMetadata.MetadataEntry\x1aL\n\rMetadataEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12*\n\x05value\x18\x02 \x01(\x0b\x32\x1b.chroma.UpdateMetadataValue:\x02\x38\x01\"\xaf\x01\n\x0fOperationRecord\x12\n\n\x02id\x18\x01 \x01(\t\x12#\n\x06vector\x18\x02 \x01(\x0b\x32\x0e.chroma.VectorH\x00\x88\x01\x01\x12-\n\x08metadata\x18\x03 \x01(\x0b\x32\x16.chroma.UpdateMetadataH\x01\x88\x01\x01\x12$\n\toperation\x18\x04 \x01(\x0e\x32\x11.chroma.OperationB\t\n\x07_vectorB\x0b\n\t_metadata\"S\n\x15VectorEmbeddingRecord\x12\n\n\x02id\x18\x01 \x01(\t\x12\x0e\n\x06seq_id\x18\x02 \x01(\x0c\x12\x1e\n\x06vector\x18\x03 \x01(\x0b\x32\x0e.chroma.Vector\"q\n\x11VectorQueryResult\x12\n\n\x02id\x18\x01 \x01(\t\x12\x0e\n\x06seq_id\x18\x02 \x01(\x0c\x12\x10\n\x08\x64istance\x18\x03 \x01(\x02\x12#\n\x06vector\x18\x04 \x01(\x0b\x32\x0e.chroma.VectorH\x00\x88\x01\x01\x42\t\n\x07_vector\"@\n\x12VectorQueryResults\x12*\n\x07results\x18\x01 \x03(\x0b\x32\x19.chroma.VectorQueryResult\"4\n\x11GetVectorsRequest\x12\x0b\n\x03ids\x18\x01 \x03(\t\x12\x12\n\nsegment_id\x18\x02 \x01(\t\"D\n\x12GetVectorsResponse\x12.\n\x07records\x18\x01 \x03(\x0b\x32\x1d.chroma.VectorEmbeddingRecord\"\x86\x01\n\x13QueryVectorsRequest\x12\x1f\n\x07vectors\x18\x01 \x03(\x0b\x32\x0e.chroma.Vector\x12\t\n\x01k\x18\x02 \x01(\x05\x12\x13\n\x0b\x61llowed_ids\x18\x03 \x03(\t\x12\x1a\n\x12include_embeddings\x18\x04 \x01(\x08\x12\x12\n\nsegment_id\x18\x05 \x01(\t\"C\n\x14QueryVectorsResponse\x12+\n\x07results\x18\x01 \x03(\x0b\x32\x1a.chroma.VectorQueryResults\"U\n\x19LocalSegmentMetadataTuple\x12\x14\n\x0c\x65mbedding_id\x18\x01 \x01(\t\x12\x12\n\nhnsw_label\x18\x02 \x01(\x05\x12\x0e\n\x06seq_id\x18\x03 \x01(\x05\"\x93\x01\n\x14LocalSegmentMetadata\x12\x31\n\x06tuples\x18\x01 \x03(\x0b\x32!.chroma.LocalSegmentMetadataTuple\x12\x16\n\x0e\x64imensionality\x18\x02 \x01(\x05\x12\x1c\n\x14total_elements_added\x18\x03 \x01(\x03\x12\x12\n\nmax_seq_id\x18\x04 \x01(\x03*8\n\tOperation\x12\x07\n\x03\x41\x44\x44\x10\x00\x12\n\n\x06UPDATE\x10\x01\x12\n\n\x06UPSERT\x10\x02\x12\n\n\x06\x44\x45LETE\x10\x03*(\n\x0eScalarEncoding\x12\x0b\n\x07\x46LOAT32\x10\x00\x12\t\n\x05INT32\x10\x01*(\n\x0cSegmentScope\x12\n\n\x06VECTOR\x10\x00\x12\x0c\n\x08METADATA\x10\x01\x32\xa2\x01\n\x0cVectorReader\x12\x45\n\nGetVectors\x12\x19.chroma.GetVectorsRequest\x1a\x1a.chroma.GetVectorsResponse\"\x00\x12K\n\x0cQueryVectors\x12\x1b.chroma.QueryVectorsRequest\x1a\x1c.chroma.QueryVectorsResponse\"\x00\x42:Z8github.com/chroma-core/chroma/go/pkg/proto/coordinatorpbb\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'chromadb.proto.chroma_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - DESCRIPTOR._options = None - DESCRIPTOR._serialized_options = b'Z8github.com/chroma-core/chroma/go/pkg/proto/coordinatorpb' - _SEGMENT_FILEPATHSENTRY._options = None - _SEGMENT_FILEPATHSENTRY._serialized_options = b'8\001' - _UPDATEMETADATA_METADATAENTRY._options = None - _UPDATEMETADATA_METADATAENTRY._serialized_options = b'8\001' - _globals['_OPERATION']._serialized_start=1806 - _globals['_OPERATION']._serialized_end=1862 - _globals['_SCALARENCODING']._serialized_start=1864 - _globals['_SCALARENCODING']._serialized_end=1904 - _globals['_SEGMENTSCOPE']._serialized_start=1906 - _globals['_SEGMENTSCOPE']._serialized_end=1946 + _globals['DESCRIPTOR']._options = None + _globals['DESCRIPTOR']._serialized_options = b'Z8github.com/chroma-core/chroma/go/pkg/proto/coordinatorpb' + _globals['_SEGMENT_FILEPATHSENTRY']._options = None + _globals['_SEGMENT_FILEPATHSENTRY']._serialized_options = b'8\001' + _globals['_UPDATEMETADATA_METADATAENTRY']._options = None + _globals['_UPDATEMETADATA_METADATAENTRY']._serialized_options = b'8\001' + _globals['_OPERATION']._serialized_start=2043 + _globals['_OPERATION']._serialized_end=2099 + _globals['_SCALARENCODING']._serialized_start=2101 + _globals['_SCALARENCODING']._serialized_end=2141 + _globals['_SEGMENTSCOPE']._serialized_start=2143 + _globals['_SEGMENTSCOPE']._serialized_end=2183 _globals['_STATUS']._serialized_start=39 _globals['_STATUS']._serialized_end=77 _globals['_VECTOR']._serialized_start=79 @@ -69,6 +70,10 @@ _globals['_QUERYVECTORSREQUEST']._serialized_end=1735 _globals['_QUERYVECTORSRESPONSE']._serialized_start=1737 _globals['_QUERYVECTORSRESPONSE']._serialized_end=1804 - _globals['_VECTORREADER']._serialized_start=1949 - _globals['_VECTORREADER']._serialized_end=2111 + _globals['_LOCALSEGMENTMETADATATUPLE']._serialized_start=1806 + _globals['_LOCALSEGMENTMETADATATUPLE']._serialized_end=1891 + _globals['_LOCALSEGMENTMETADATA']._serialized_start=1894 + _globals['_LOCALSEGMENTMETADATA']._serialized_end=2041 + _globals['_VECTORREADER']._serialized_start=2186 + _globals['_VECTORREADER']._serialized_end=2348 # @@protoc_insertion_point(module_scope) diff --git a/chromadb/proto/chroma_pb2.pyi b/chromadb/proto/chroma_pb2.pyi index 1e4ac4979a26..a671b417990d 100644 --- a/chromadb/proto/chroma_pb2.pyi +++ b/chromadb/proto/chroma_pb2.pyi @@ -7,19 +7,19 @@ from typing import ClassVar as _ClassVar, Iterable as _Iterable, Mapping as _Map DESCRIPTOR: _descriptor.FileDescriptor class Operation(int, metaclass=_enum_type_wrapper.EnumTypeWrapper): - __slots__ = [] + __slots__ = () ADD: _ClassVar[Operation] UPDATE: _ClassVar[Operation] UPSERT: _ClassVar[Operation] DELETE: _ClassVar[Operation] class ScalarEncoding(int, metaclass=_enum_type_wrapper.EnumTypeWrapper): - __slots__ = [] + __slots__ = () FLOAT32: _ClassVar[ScalarEncoding] INT32: _ClassVar[ScalarEncoding] class SegmentScope(int, metaclass=_enum_type_wrapper.EnumTypeWrapper): - __slots__ = [] + __slots__ = () VECTOR: _ClassVar[SegmentScope] METADATA: _ClassVar[SegmentScope] ADD: Operation @@ -32,7 +32,7 @@ VECTOR: SegmentScope METADATA: SegmentScope class Status(_message.Message): - __slots__ = ["reason", "code"] + __slots__ = ("reason", "code") REASON_FIELD_NUMBER: _ClassVar[int] CODE_FIELD_NUMBER: _ClassVar[int] reason: str @@ -40,7 +40,7 @@ class Status(_message.Message): def __init__(self, reason: _Optional[str] = ..., code: _Optional[int] = ...) -> None: ... class Vector(_message.Message): - __slots__ = ["dimension", "vector", "encoding"] + __slots__ = ("dimension", "vector", "encoding") DIMENSION_FIELD_NUMBER: _ClassVar[int] VECTOR_FIELD_NUMBER: _ClassVar[int] ENCODING_FIELD_NUMBER: _ClassVar[int] @@ -50,15 +50,15 @@ class Vector(_message.Message): def __init__(self, dimension: _Optional[int] = ..., vector: _Optional[bytes] = ..., encoding: _Optional[_Union[ScalarEncoding, str]] = ...) -> None: ... class FilePaths(_message.Message): - __slots__ = ["paths"] + __slots__ = ("paths",) PATHS_FIELD_NUMBER: _ClassVar[int] paths: _containers.RepeatedScalarFieldContainer[str] def __init__(self, paths: _Optional[_Iterable[str]] = ...) -> None: ... class Segment(_message.Message): - __slots__ = ["id", "type", "scope", "collection", "metadata", "file_paths"] + __slots__ = ("id", "type", "scope", "collection", "metadata", "file_paths") class FilePathsEntry(_message.Message): - __slots__ = ["key", "value"] + __slots__ = ("key", "value") KEY_FIELD_NUMBER: _ClassVar[int] VALUE_FIELD_NUMBER: _ClassVar[int] key: str @@ -79,7 +79,7 @@ class Segment(_message.Message): def __init__(self, id: _Optional[str] = ..., type: _Optional[str] = ..., scope: _Optional[_Union[SegmentScope, str]] = ..., collection: _Optional[str] = ..., metadata: _Optional[_Union[UpdateMetadata, _Mapping]] = ..., file_paths: _Optional[_Mapping[str, FilePaths]] = ...) -> None: ... class Collection(_message.Message): - __slots__ = ["id", "name", "metadata", "dimension", "tenant", "database", "logPosition", "version"] + __slots__ = ("id", "name", "metadata", "dimension", "tenant", "database", "logPosition", "version") ID_FIELD_NUMBER: _ClassVar[int] NAME_FIELD_NUMBER: _ClassVar[int] METADATA_FIELD_NUMBER: _ClassVar[int] @@ -99,7 +99,7 @@ class Collection(_message.Message): def __init__(self, id: _Optional[str] = ..., name: _Optional[str] = ..., metadata: _Optional[_Union[UpdateMetadata, _Mapping]] = ..., dimension: _Optional[int] = ..., tenant: _Optional[str] = ..., database: _Optional[str] = ..., logPosition: _Optional[int] = ..., version: _Optional[int] = ...) -> None: ... class Database(_message.Message): - __slots__ = ["id", "name", "tenant"] + __slots__ = ("id", "name", "tenant") ID_FIELD_NUMBER: _ClassVar[int] NAME_FIELD_NUMBER: _ClassVar[int] TENANT_FIELD_NUMBER: _ClassVar[int] @@ -109,13 +109,13 @@ class Database(_message.Message): def __init__(self, id: _Optional[str] = ..., name: _Optional[str] = ..., tenant: _Optional[str] = ...) -> None: ... class Tenant(_message.Message): - __slots__ = ["name"] + __slots__ = ("name",) NAME_FIELD_NUMBER: _ClassVar[int] name: str def __init__(self, name: _Optional[str] = ...) -> None: ... class UpdateMetadataValue(_message.Message): - __slots__ = ["string_value", "int_value", "float_value"] + __slots__ = ("string_value", "int_value", "float_value") STRING_VALUE_FIELD_NUMBER: _ClassVar[int] INT_VALUE_FIELD_NUMBER: _ClassVar[int] FLOAT_VALUE_FIELD_NUMBER: _ClassVar[int] @@ -125,9 +125,9 @@ class UpdateMetadataValue(_message.Message): def __init__(self, string_value: _Optional[str] = ..., int_value: _Optional[int] = ..., float_value: _Optional[float] = ...) -> None: ... class UpdateMetadata(_message.Message): - __slots__ = ["metadata"] + __slots__ = ("metadata",) class MetadataEntry(_message.Message): - __slots__ = ["key", "value"] + __slots__ = ("key", "value") KEY_FIELD_NUMBER: _ClassVar[int] VALUE_FIELD_NUMBER: _ClassVar[int] key: str @@ -138,7 +138,7 @@ class UpdateMetadata(_message.Message): def __init__(self, metadata: _Optional[_Mapping[str, UpdateMetadataValue]] = ...) -> None: ... class OperationRecord(_message.Message): - __slots__ = ["id", "vector", "metadata", "operation"] + __slots__ = ("id", "vector", "metadata", "operation") ID_FIELD_NUMBER: _ClassVar[int] VECTOR_FIELD_NUMBER: _ClassVar[int] METADATA_FIELD_NUMBER: _ClassVar[int] @@ -150,7 +150,7 @@ class OperationRecord(_message.Message): def __init__(self, id: _Optional[str] = ..., vector: _Optional[_Union[Vector, _Mapping]] = ..., metadata: _Optional[_Union[UpdateMetadata, _Mapping]] = ..., operation: _Optional[_Union[Operation, str]] = ...) -> None: ... class VectorEmbeddingRecord(_message.Message): - __slots__ = ["id", "seq_id", "vector"] + __slots__ = ("id", "seq_id", "vector") ID_FIELD_NUMBER: _ClassVar[int] SEQ_ID_FIELD_NUMBER: _ClassVar[int] VECTOR_FIELD_NUMBER: _ClassVar[int] @@ -160,7 +160,7 @@ class VectorEmbeddingRecord(_message.Message): def __init__(self, id: _Optional[str] = ..., seq_id: _Optional[bytes] = ..., vector: _Optional[_Union[Vector, _Mapping]] = ...) -> None: ... class VectorQueryResult(_message.Message): - __slots__ = ["id", "seq_id", "distance", "vector"] + __slots__ = ("id", "seq_id", "distance", "vector") ID_FIELD_NUMBER: _ClassVar[int] SEQ_ID_FIELD_NUMBER: _ClassVar[int] DISTANCE_FIELD_NUMBER: _ClassVar[int] @@ -172,13 +172,13 @@ class VectorQueryResult(_message.Message): def __init__(self, id: _Optional[str] = ..., seq_id: _Optional[bytes] = ..., distance: _Optional[float] = ..., vector: _Optional[_Union[Vector, _Mapping]] = ...) -> None: ... class VectorQueryResults(_message.Message): - __slots__ = ["results"] + __slots__ = ("results",) RESULTS_FIELD_NUMBER: _ClassVar[int] results: _containers.RepeatedCompositeFieldContainer[VectorQueryResult] def __init__(self, results: _Optional[_Iterable[_Union[VectorQueryResult, _Mapping]]] = ...) -> None: ... class GetVectorsRequest(_message.Message): - __slots__ = ["ids", "segment_id"] + __slots__ = ("ids", "segment_id") IDS_FIELD_NUMBER: _ClassVar[int] SEGMENT_ID_FIELD_NUMBER: _ClassVar[int] ids: _containers.RepeatedScalarFieldContainer[str] @@ -186,13 +186,13 @@ class GetVectorsRequest(_message.Message): def __init__(self, ids: _Optional[_Iterable[str]] = ..., segment_id: _Optional[str] = ...) -> None: ... class GetVectorsResponse(_message.Message): - __slots__ = ["records"] + __slots__ = ("records",) RECORDS_FIELD_NUMBER: _ClassVar[int] records: _containers.RepeatedCompositeFieldContainer[VectorEmbeddingRecord] def __init__(self, records: _Optional[_Iterable[_Union[VectorEmbeddingRecord, _Mapping]]] = ...) -> None: ... class QueryVectorsRequest(_message.Message): - __slots__ = ["vectors", "k", "allowed_ids", "include_embeddings", "segment_id"] + __slots__ = ("vectors", "k", "allowed_ids", "include_embeddings", "segment_id") VECTORS_FIELD_NUMBER: _ClassVar[int] K_FIELD_NUMBER: _ClassVar[int] ALLOWED_IDS_FIELD_NUMBER: _ClassVar[int] @@ -206,7 +206,29 @@ class QueryVectorsRequest(_message.Message): def __init__(self, vectors: _Optional[_Iterable[_Union[Vector, _Mapping]]] = ..., k: _Optional[int] = ..., allowed_ids: _Optional[_Iterable[str]] = ..., include_embeddings: bool = ..., segment_id: _Optional[str] = ...) -> None: ... class QueryVectorsResponse(_message.Message): - __slots__ = ["results"] + __slots__ = ("results",) RESULTS_FIELD_NUMBER: _ClassVar[int] results: _containers.RepeatedCompositeFieldContainer[VectorQueryResults] def __init__(self, results: _Optional[_Iterable[_Union[VectorQueryResults, _Mapping]]] = ...) -> None: ... + +class LocalSegmentMetadataTuple(_message.Message): + __slots__ = ("embedding_id", "hnsw_label", "seq_id") + EMBEDDING_ID_FIELD_NUMBER: _ClassVar[int] + HNSW_LABEL_FIELD_NUMBER: _ClassVar[int] + SEQ_ID_FIELD_NUMBER: _ClassVar[int] + embedding_id: str + hnsw_label: int + seq_id: int + def __init__(self, embedding_id: _Optional[str] = ..., hnsw_label: _Optional[int] = ..., seq_id: _Optional[int] = ...) -> None: ... + +class LocalSegmentMetadata(_message.Message): + __slots__ = ("tuples", "dimensionality", "total_elements_added", "max_seq_id") + TUPLES_FIELD_NUMBER: _ClassVar[int] + DIMENSIONALITY_FIELD_NUMBER: _ClassVar[int] + TOTAL_ELEMENTS_ADDED_FIELD_NUMBER: _ClassVar[int] + MAX_SEQ_ID_FIELD_NUMBER: _ClassVar[int] + tuples: _containers.RepeatedCompositeFieldContainer[LocalSegmentMetadataTuple] + dimensionality: int + total_elements_added: int + max_seq_id: int + def __init__(self, tuples: _Optional[_Iterable[_Union[LocalSegmentMetadataTuple, _Mapping]]] = ..., dimensionality: _Optional[int] = ..., total_elements_added: _Optional[int] = ..., max_seq_id: _Optional[int] = ...) -> None: ... diff --git a/chromadb/proto/coordinator_pb2.py b/chromadb/proto/coordinator_pb2.py index 7264a86f0380..d54b0db179b3 100644 --- a/chromadb/proto/coordinator_pb2.py +++ b/chromadb/proto/coordinator_pb2.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: chromadb/proto/coordinator.proto +# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -15,16 +16,16 @@ from google.protobuf import empty_pb2 as google_dot_protobuf_dot_empty__pb2 -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n chromadb/proto/coordinator.proto\x12\x06\x63hroma\x1a\x1b\x63hromadb/proto/chroma.proto\x1a\x1bgoogle/protobuf/empty.proto\"A\n\x15\x43reateDatabaseRequest\x12\n\n\x02id\x18\x01 \x01(\t\x12\x0c\n\x04name\x18\x02 \x01(\t\x12\x0e\n\x06tenant\x18\x03 \x01(\t\"8\n\x16\x43reateDatabaseResponse\x12\x1e\n\x06status\x18\x01 \x01(\x0b\x32\x0e.chroma.Status\"2\n\x12GetDatabaseRequest\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0e\n\x06tenant\x18\x02 \x01(\t\"Y\n\x13GetDatabaseResponse\x12\"\n\x08\x64\x61tabase\x18\x01 \x01(\x0b\x32\x10.chroma.Database\x12\x1e\n\x06status\x18\x02 \x01(\x0b\x32\x0e.chroma.Status\"#\n\x13\x43reateTenantRequest\x12\x0c\n\x04name\x18\x02 \x01(\t\"6\n\x14\x43reateTenantResponse\x12\x1e\n\x06status\x18\x01 \x01(\x0b\x32\x0e.chroma.Status\" \n\x10GetTenantRequest\x12\x0c\n\x04name\x18\x01 \x01(\t\"S\n\x11GetTenantResponse\x12\x1e\n\x06tenant\x18\x01 \x01(\x0b\x32\x0e.chroma.Tenant\x12\x1e\n\x06status\x18\x02 \x01(\x0b\x32\x0e.chroma.Status\"8\n\x14\x43reateSegmentRequest\x12 \n\x07segment\x18\x01 \x01(\x0b\x32\x0f.chroma.Segment\"7\n\x15\x43reateSegmentResponse\x12\x1e\n\x06status\x18\x01 \x01(\x0b\x32\x0e.chroma.Status\"\"\n\x14\x44\x65leteSegmentRequest\x12\n\n\x02id\x18\x01 \x01(\t\"7\n\x15\x44\x65leteSegmentResponse\x12\x1e\n\x06status\x18\x01 \x01(\x0b\x32\x0e.chroma.Status\"\xc2\x01\n\x12GetSegmentsRequest\x12\x0f\n\x02id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x11\n\x04type\x18\x02 \x01(\tH\x01\x88\x01\x01\x12(\n\x05scope\x18\x03 \x01(\x0e\x32\x14.chroma.SegmentScopeH\x02\x88\x01\x01\x12\x12\n\x05topic\x18\x04 \x01(\tH\x03\x88\x01\x01\x12\x17\n\ncollection\x18\x05 \x01(\tH\x04\x88\x01\x01\x42\x05\n\x03_idB\x07\n\x05_typeB\x08\n\x06_scopeB\x08\n\x06_topicB\r\n\x0b_collection\"X\n\x13GetSegmentsResponse\x12!\n\x08segments\x18\x01 \x03(\x0b\x32\x0f.chroma.Segment\x12\x1e\n\x06status\x18\x02 \x01(\x0b\x32\x0e.chroma.Status\"\xfa\x01\n\x14UpdateSegmentRequest\x12\n\n\x02id\x18\x01 \x01(\t\x12\x0f\n\x05topic\x18\x02 \x01(\tH\x00\x12\x15\n\x0breset_topic\x18\x03 \x01(\x08H\x00\x12\x14\n\ncollection\x18\x04 \x01(\tH\x01\x12\x1a\n\x10reset_collection\x18\x05 \x01(\x08H\x01\x12*\n\x08metadata\x18\x06 \x01(\x0b\x32\x16.chroma.UpdateMetadataH\x02\x12\x18\n\x0ereset_metadata\x18\x07 \x01(\x08H\x02\x42\x0e\n\x0ctopic_updateB\x13\n\x11\x63ollection_updateB\x11\n\x0fmetadata_update\"7\n\x15UpdateSegmentResponse\x12\x1e\n\x06status\x18\x01 \x01(\x0b\x32\x0e.chroma.Status\"\xe5\x01\n\x17\x43reateCollectionRequest\x12\n\n\x02id\x18\x01 \x01(\t\x12\x0c\n\x04name\x18\x02 \x01(\t\x12-\n\x08metadata\x18\x03 \x01(\x0b\x32\x16.chroma.UpdateMetadataH\x00\x88\x01\x01\x12\x16\n\tdimension\x18\x04 \x01(\x05H\x01\x88\x01\x01\x12\x1a\n\rget_or_create\x18\x05 \x01(\x08H\x02\x88\x01\x01\x12\x0e\n\x06tenant\x18\x06 \x01(\t\x12\x10\n\x08\x64\x61tabase\x18\x07 \x01(\tB\x0b\n\t_metadataB\x0c\n\n_dimensionB\x10\n\x0e_get_or_create\"s\n\x18\x43reateCollectionResponse\x12&\n\ncollection\x18\x01 \x01(\x0b\x32\x12.chroma.Collection\x12\x0f\n\x07\x63reated\x18\x02 \x01(\x08\x12\x1e\n\x06status\x18\x03 \x01(\x0b\x32\x0e.chroma.Status\"G\n\x17\x44\x65leteCollectionRequest\x12\n\n\x02id\x18\x01 \x01(\t\x12\x0e\n\x06tenant\x18\x02 \x01(\t\x12\x10\n\x08\x64\x61tabase\x18\x03 \x01(\t\":\n\x18\x44\x65leteCollectionResponse\x12\x1e\n\x06status\x18\x01 \x01(\x0b\x32\x0e.chroma.Status\"\x8b\x01\n\x15GetCollectionsRequest\x12\x0f\n\x02id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x11\n\x04name\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x12\n\x05topic\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x0e\n\x06tenant\x18\x04 \x01(\t\x12\x10\n\x08\x64\x61tabase\x18\x05 \x01(\tB\x05\n\x03_idB\x07\n\x05_nameB\x08\n\x06_topic\"a\n\x16GetCollectionsResponse\x12\'\n\x0b\x63ollections\x18\x01 \x03(\x0b\x32\x12.chroma.Collection\x12\x1e\n\x06status\x18\x02 \x01(\x0b\x32\x0e.chroma.Status\"\xde\x01\n\x17UpdateCollectionRequest\x12\n\n\x02id\x18\x01 \x01(\t\x12\x12\n\x05topic\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x11\n\x04name\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x16\n\tdimension\x18\x04 \x01(\x05H\x03\x88\x01\x01\x12*\n\x08metadata\x18\x05 \x01(\x0b\x32\x16.chroma.UpdateMetadataH\x00\x12\x18\n\x0ereset_metadata\x18\x06 \x01(\x08H\x00\x42\x11\n\x0fmetadata_updateB\x08\n\x06_topicB\x07\n\x05_nameB\x0c\n\n_dimension\":\n\x18UpdateCollectionResponse\x12\x1e\n\x06status\x18\x01 \x01(\x0b\x32\x0e.chroma.Status\"O\n\x0cNotification\x12\n\n\x02id\x18\x01 \x01(\x03\x12\x15\n\rcollection_id\x18\x02 \x01(\t\x12\x0c\n\x04type\x18\x03 \x01(\t\x12\x0e\n\x06status\x18\x04 \x01(\t\"4\n\x12ResetStateResponse\x12\x1e\n\x06status\x18\x01 \x01(\x0b\x32\x0e.chroma.Status\":\n%GetLastCompactionTimeForTenantRequest\x12\x11\n\ttenant_id\x18\x01 \x03(\t\"K\n\x18TenantLastCompactionTime\x12\x11\n\ttenant_id\x18\x01 \x01(\t\x12\x1c\n\x14last_compaction_time\x18\x02 \x01(\x03\"o\n&GetLastCompactionTimeForTenantResponse\x12\x45\n\x1btenant_last_compaction_time\x18\x01 \x03(\x0b\x32 .chroma.TenantLastCompactionTime\"n\n%SetLastCompactionTimeForTenantRequest\x12\x45\n\x1btenant_last_compaction_time\x18\x01 \x01(\x0b\x32 .chroma.TenantLastCompactionTime\"\xbc\x01\n\x1a\x46lushSegmentCompactionInfo\x12\x12\n\nsegment_id\x18\x01 \x01(\t\x12\x45\n\nfile_paths\x18\x02 \x03(\x0b\x32\x31.chroma.FlushSegmentCompactionInfo.FilePathsEntry\x1a\x43\n\x0e\x46ilePathsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12 \n\x05value\x18\x02 \x01(\x0b\x32\x11.chroma.FilePaths:\x02\x38\x01\"\xc3\x01\n FlushCollectionCompactionRequest\x12\x11\n\ttenant_id\x18\x01 \x01(\t\x12\x15\n\rcollection_id\x18\x02 \x01(\t\x12\x14\n\x0clog_position\x18\x03 \x01(\x03\x12\x1a\n\x12\x63ollection_version\x18\x04 \x01(\x05\x12\x43\n\x17segment_compaction_info\x18\x05 \x03(\x0b\x32\".chroma.FlushSegmentCompactionInfo\"t\n!FlushCollectionCompactionResponse\x12\x15\n\rcollection_id\x18\x01 \x01(\t\x12\x1a\n\x12\x63ollection_version\x18\x02 \x01(\x05\x12\x1c\n\x14last_compaction_time\x18\x03 \x01(\x03\x32\xf4\n\n\x05SysDB\x12Q\n\x0e\x43reateDatabase\x12\x1d.chroma.CreateDatabaseRequest\x1a\x1e.chroma.CreateDatabaseResponse\"\x00\x12H\n\x0bGetDatabase\x12\x1a.chroma.GetDatabaseRequest\x1a\x1b.chroma.GetDatabaseResponse\"\x00\x12K\n\x0c\x43reateTenant\x12\x1b.chroma.CreateTenantRequest\x1a\x1c.chroma.CreateTenantResponse\"\x00\x12\x42\n\tGetTenant\x12\x18.chroma.GetTenantRequest\x1a\x19.chroma.GetTenantResponse\"\x00\x12N\n\rCreateSegment\x12\x1c.chroma.CreateSegmentRequest\x1a\x1d.chroma.CreateSegmentResponse\"\x00\x12N\n\rDeleteSegment\x12\x1c.chroma.DeleteSegmentRequest\x1a\x1d.chroma.DeleteSegmentResponse\"\x00\x12H\n\x0bGetSegments\x12\x1a.chroma.GetSegmentsRequest\x1a\x1b.chroma.GetSegmentsResponse\"\x00\x12N\n\rUpdateSegment\x12\x1c.chroma.UpdateSegmentRequest\x1a\x1d.chroma.UpdateSegmentResponse\"\x00\x12W\n\x10\x43reateCollection\x12\x1f.chroma.CreateCollectionRequest\x1a .chroma.CreateCollectionResponse\"\x00\x12W\n\x10\x44\x65leteCollection\x12\x1f.chroma.DeleteCollectionRequest\x1a .chroma.DeleteCollectionResponse\"\x00\x12Q\n\x0eGetCollections\x12\x1d.chroma.GetCollectionsRequest\x1a\x1e.chroma.GetCollectionsResponse\"\x00\x12W\n\x10UpdateCollection\x12\x1f.chroma.UpdateCollectionRequest\x1a .chroma.UpdateCollectionResponse\"\x00\x12\x42\n\nResetState\x12\x16.google.protobuf.Empty\x1a\x1a.chroma.ResetStateResponse\"\x00\x12\x81\x01\n\x1eGetLastCompactionTimeForTenant\x12-.chroma.GetLastCompactionTimeForTenantRequest\x1a..chroma.GetLastCompactionTimeForTenantResponse\"\x00\x12i\n\x1eSetLastCompactionTimeForTenant\x12-.chroma.SetLastCompactionTimeForTenantRequest\x1a\x16.google.protobuf.Empty\"\x00\x12r\n\x19\x46lushCollectionCompaction\x12(.chroma.FlushCollectionCompactionRequest\x1a).chroma.FlushCollectionCompactionResponse\"\x00\x42:Z8github.com/chroma-core/chroma/go/pkg/proto/coordinatorpbb\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n chromadb/proto/coordinator.proto\x12\x06\x63hroma\x1a\x1b\x63hromadb/proto/chroma.proto\x1a\x1bgoogle/protobuf/empty.proto\"A\n\x15\x43reateDatabaseRequest\x12\n\n\x02id\x18\x01 \x01(\t\x12\x0c\n\x04name\x18\x02 \x01(\t\x12\x0e\n\x06tenant\x18\x03 \x01(\t\"8\n\x16\x43reateDatabaseResponse\x12\x1e\n\x06status\x18\x01 \x01(\x0b\x32\x0e.chroma.Status\"2\n\x12GetDatabaseRequest\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0e\n\x06tenant\x18\x02 \x01(\t\"Y\n\x13GetDatabaseResponse\x12\"\n\x08\x64\x61tabase\x18\x01 \x01(\x0b\x32\x10.chroma.Database\x12\x1e\n\x06status\x18\x02 \x01(\x0b\x32\x0e.chroma.Status\"#\n\x13\x43reateTenantRequest\x12\x0c\n\x04name\x18\x02 \x01(\t\"6\n\x14\x43reateTenantResponse\x12\x1e\n\x06status\x18\x01 \x01(\x0b\x32\x0e.chroma.Status\" \n\x10GetTenantRequest\x12\x0c\n\x04name\x18\x01 \x01(\t\"S\n\x11GetTenantResponse\x12\x1e\n\x06tenant\x18\x01 \x01(\x0b\x32\x0e.chroma.Tenant\x12\x1e\n\x06status\x18\x02 \x01(\x0b\x32\x0e.chroma.Status\"8\n\x14\x43reateSegmentRequest\x12 \n\x07segment\x18\x01 \x01(\x0b\x32\x0f.chroma.Segment\"7\n\x15\x43reateSegmentResponse\x12\x1e\n\x06status\x18\x01 \x01(\x0b\x32\x0e.chroma.Status\"\"\n\x14\x44\x65leteSegmentRequest\x12\n\n\x02id\x18\x01 \x01(\t\"7\n\x15\x44\x65leteSegmentResponse\x12\x1e\n\x06status\x18\x01 \x01(\x0b\x32\x0e.chroma.Status\"\xa4\x01\n\x12GetSegmentsRequest\x12\x0f\n\x02id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x11\n\x04type\x18\x02 \x01(\tH\x01\x88\x01\x01\x12(\n\x05scope\x18\x03 \x01(\x0e\x32\x14.chroma.SegmentScopeH\x02\x88\x01\x01\x12\x17\n\ncollection\x18\x05 \x01(\tH\x03\x88\x01\x01\x42\x05\n\x03_idB\x07\n\x05_typeB\x08\n\x06_scopeB\r\n\x0b_collection\"X\n\x13GetSegmentsResponse\x12!\n\x08segments\x18\x01 \x03(\x0b\x32\x0f.chroma.Segment\x12\x1e\n\x06status\x18\x02 \x01(\x0b\x32\x0e.chroma.Status\"\xc2\x01\n\x14UpdateSegmentRequest\x12\n\n\x02id\x18\x01 \x01(\t\x12\x14\n\ncollection\x18\x04 \x01(\tH\x00\x12\x1a\n\x10reset_collection\x18\x05 \x01(\x08H\x00\x12*\n\x08metadata\x18\x06 \x01(\x0b\x32\x16.chroma.UpdateMetadataH\x01\x12\x18\n\x0ereset_metadata\x18\x07 \x01(\x08H\x01\x42\x13\n\x11\x63ollection_updateB\x11\n\x0fmetadata_update\"7\n\x15UpdateSegmentResponse\x12\x1e\n\x06status\x18\x01 \x01(\x0b\x32\x0e.chroma.Status\"\xe5\x01\n\x17\x43reateCollectionRequest\x12\n\n\x02id\x18\x01 \x01(\t\x12\x0c\n\x04name\x18\x02 \x01(\t\x12-\n\x08metadata\x18\x03 \x01(\x0b\x32\x16.chroma.UpdateMetadataH\x00\x88\x01\x01\x12\x16\n\tdimension\x18\x04 \x01(\x05H\x01\x88\x01\x01\x12\x1a\n\rget_or_create\x18\x05 \x01(\x08H\x02\x88\x01\x01\x12\x0e\n\x06tenant\x18\x06 \x01(\t\x12\x10\n\x08\x64\x61tabase\x18\x07 \x01(\tB\x0b\n\t_metadataB\x0c\n\n_dimensionB\x10\n\x0e_get_or_create\"s\n\x18\x43reateCollectionResponse\x12&\n\ncollection\x18\x01 \x01(\x0b\x32\x12.chroma.Collection\x12\x0f\n\x07\x63reated\x18\x02 \x01(\x08\x12\x1e\n\x06status\x18\x03 \x01(\x0b\x32\x0e.chroma.Status\"G\n\x17\x44\x65leteCollectionRequest\x12\n\n\x02id\x18\x01 \x01(\t\x12\x0e\n\x06tenant\x18\x02 \x01(\t\x12\x10\n\x08\x64\x61tabase\x18\x03 \x01(\t\":\n\x18\x44\x65leteCollectionResponse\x12\x1e\n\x06status\x18\x01 \x01(\x0b\x32\x0e.chroma.Status\"m\n\x15GetCollectionsRequest\x12\x0f\n\x02id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x11\n\x04name\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x0e\n\x06tenant\x18\x04 \x01(\t\x12\x10\n\x08\x64\x61tabase\x18\x05 \x01(\tB\x05\n\x03_idB\x07\n\x05_name\"a\n\x16GetCollectionsResponse\x12\'\n\x0b\x63ollections\x18\x01 \x03(\x0b\x32\x12.chroma.Collection\x12\x1e\n\x06status\x18\x02 \x01(\x0b\x32\x0e.chroma.Status\"\xc0\x01\n\x17UpdateCollectionRequest\x12\n\n\x02id\x18\x01 \x01(\t\x12\x11\n\x04name\x18\x03 \x01(\tH\x01\x88\x01\x01\x12\x16\n\tdimension\x18\x04 \x01(\x05H\x02\x88\x01\x01\x12*\n\x08metadata\x18\x05 \x01(\x0b\x32\x16.chroma.UpdateMetadataH\x00\x12\x18\n\x0ereset_metadata\x18\x06 \x01(\x08H\x00\x42\x11\n\x0fmetadata_updateB\x07\n\x05_nameB\x0c\n\n_dimension\":\n\x18UpdateCollectionResponse\x12\x1e\n\x06status\x18\x01 \x01(\x0b\x32\x0e.chroma.Status\"O\n\x0cNotification\x12\n\n\x02id\x18\x01 \x01(\x03\x12\x15\n\rcollection_id\x18\x02 \x01(\t\x12\x0c\n\x04type\x18\x03 \x01(\t\x12\x0e\n\x06status\x18\x04 \x01(\t\"4\n\x12ResetStateResponse\x12\x1e\n\x06status\x18\x01 \x01(\x0b\x32\x0e.chroma.Status\":\n%GetLastCompactionTimeForTenantRequest\x12\x11\n\ttenant_id\x18\x01 \x03(\t\"K\n\x18TenantLastCompactionTime\x12\x11\n\ttenant_id\x18\x01 \x01(\t\x12\x1c\n\x14last_compaction_time\x18\x02 \x01(\x03\"o\n&GetLastCompactionTimeForTenantResponse\x12\x45\n\x1btenant_last_compaction_time\x18\x01 \x03(\x0b\x32 .chroma.TenantLastCompactionTime\"n\n%SetLastCompactionTimeForTenantRequest\x12\x45\n\x1btenant_last_compaction_time\x18\x01 \x01(\x0b\x32 .chroma.TenantLastCompactionTime\"\xbc\x01\n\x1a\x46lushSegmentCompactionInfo\x12\x12\n\nsegment_id\x18\x01 \x01(\t\x12\x45\n\nfile_paths\x18\x02 \x03(\x0b\x32\x31.chroma.FlushSegmentCompactionInfo.FilePathsEntry\x1a\x43\n\x0e\x46ilePathsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12 \n\x05value\x18\x02 \x01(\x0b\x32\x11.chroma.FilePaths:\x02\x38\x01\"\xc3\x01\n FlushCollectionCompactionRequest\x12\x11\n\ttenant_id\x18\x01 \x01(\t\x12\x15\n\rcollection_id\x18\x02 \x01(\t\x12\x14\n\x0clog_position\x18\x03 \x01(\x03\x12\x1a\n\x12\x63ollection_version\x18\x04 \x01(\x05\x12\x43\n\x17segment_compaction_info\x18\x05 \x03(\x0b\x32\".chroma.FlushSegmentCompactionInfo\"t\n!FlushCollectionCompactionResponse\x12\x15\n\rcollection_id\x18\x01 \x01(\t\x12\x1a\n\x12\x63ollection_version\x18\x02 \x01(\x05\x12\x1c\n\x14last_compaction_time\x18\x03 \x01(\x03\x32\xf4\n\n\x05SysDB\x12Q\n\x0e\x43reateDatabase\x12\x1d.chroma.CreateDatabaseRequest\x1a\x1e.chroma.CreateDatabaseResponse\"\x00\x12H\n\x0bGetDatabase\x12\x1a.chroma.GetDatabaseRequest\x1a\x1b.chroma.GetDatabaseResponse\"\x00\x12K\n\x0c\x43reateTenant\x12\x1b.chroma.CreateTenantRequest\x1a\x1c.chroma.CreateTenantResponse\"\x00\x12\x42\n\tGetTenant\x12\x18.chroma.GetTenantRequest\x1a\x19.chroma.GetTenantResponse\"\x00\x12N\n\rCreateSegment\x12\x1c.chroma.CreateSegmentRequest\x1a\x1d.chroma.CreateSegmentResponse\"\x00\x12N\n\rDeleteSegment\x12\x1c.chroma.DeleteSegmentRequest\x1a\x1d.chroma.DeleteSegmentResponse\"\x00\x12H\n\x0bGetSegments\x12\x1a.chroma.GetSegmentsRequest\x1a\x1b.chroma.GetSegmentsResponse\"\x00\x12N\n\rUpdateSegment\x12\x1c.chroma.UpdateSegmentRequest\x1a\x1d.chroma.UpdateSegmentResponse\"\x00\x12W\n\x10\x43reateCollection\x12\x1f.chroma.CreateCollectionRequest\x1a .chroma.CreateCollectionResponse\"\x00\x12W\n\x10\x44\x65leteCollection\x12\x1f.chroma.DeleteCollectionRequest\x1a .chroma.DeleteCollectionResponse\"\x00\x12Q\n\x0eGetCollections\x12\x1d.chroma.GetCollectionsRequest\x1a\x1e.chroma.GetCollectionsResponse\"\x00\x12W\n\x10UpdateCollection\x12\x1f.chroma.UpdateCollectionRequest\x1a .chroma.UpdateCollectionResponse\"\x00\x12\x42\n\nResetState\x12\x16.google.protobuf.Empty\x1a\x1a.chroma.ResetStateResponse\"\x00\x12\x81\x01\n\x1eGetLastCompactionTimeForTenant\x12-.chroma.GetLastCompactionTimeForTenantRequest\x1a..chroma.GetLastCompactionTimeForTenantResponse\"\x00\x12i\n\x1eSetLastCompactionTimeForTenant\x12-.chroma.SetLastCompactionTimeForTenantRequest\x1a\x16.google.protobuf.Empty\"\x00\x12r\n\x19\x46lushCollectionCompaction\x12(.chroma.FlushCollectionCompactionRequest\x1a).chroma.FlushCollectionCompactionResponse\"\x00\x42:Z8github.com/chroma-core/chroma/go/pkg/proto/coordinatorpbb\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'chromadb.proto.coordinator_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - DESCRIPTOR._options = None - DESCRIPTOR._serialized_options = b'Z8github.com/chroma-core/chroma/go/pkg/proto/coordinatorpb' - _FLUSHSEGMENTCOMPACTIONINFO_FILEPATHSENTRY._options = None - _FLUSHSEGMENTCOMPACTIONINFO_FILEPATHSENTRY._serialized_options = b'8\001' + _globals['DESCRIPTOR']._options = None + _globals['DESCRIPTOR']._serialized_options = b'Z8github.com/chroma-core/chroma/go/pkg/proto/coordinatorpb' + _globals['_FLUSHSEGMENTCOMPACTIONINFO_FILEPATHSENTRY']._options = None + _globals['_FLUSHSEGMENTCOMPACTIONINFO_FILEPATHSENTRY']._serialized_options = b'8\001' _globals['_CREATEDATABASEREQUEST']._serialized_start=102 _globals['_CREATEDATABASEREQUEST']._serialized_end=167 _globals['_CREATEDATABASERESPONSE']._serialized_start=169 @@ -50,49 +51,49 @@ _globals['_DELETESEGMENTRESPONSE']._serialized_start=733 _globals['_DELETESEGMENTRESPONSE']._serialized_end=788 _globals['_GETSEGMENTSREQUEST']._serialized_start=791 - _globals['_GETSEGMENTSREQUEST']._serialized_end=985 - _globals['_GETSEGMENTSRESPONSE']._serialized_start=987 - _globals['_GETSEGMENTSRESPONSE']._serialized_end=1075 - _globals['_UPDATESEGMENTREQUEST']._serialized_start=1078 - _globals['_UPDATESEGMENTREQUEST']._serialized_end=1328 - _globals['_UPDATESEGMENTRESPONSE']._serialized_start=1330 - _globals['_UPDATESEGMENTRESPONSE']._serialized_end=1385 - _globals['_CREATECOLLECTIONREQUEST']._serialized_start=1388 - _globals['_CREATECOLLECTIONREQUEST']._serialized_end=1617 - _globals['_CREATECOLLECTIONRESPONSE']._serialized_start=1619 - _globals['_CREATECOLLECTIONRESPONSE']._serialized_end=1734 - _globals['_DELETECOLLECTIONREQUEST']._serialized_start=1736 - _globals['_DELETECOLLECTIONREQUEST']._serialized_end=1807 - _globals['_DELETECOLLECTIONRESPONSE']._serialized_start=1809 - _globals['_DELETECOLLECTIONRESPONSE']._serialized_end=1867 - _globals['_GETCOLLECTIONSREQUEST']._serialized_start=1870 - _globals['_GETCOLLECTIONSREQUEST']._serialized_end=2009 - _globals['_GETCOLLECTIONSRESPONSE']._serialized_start=2011 - _globals['_GETCOLLECTIONSRESPONSE']._serialized_end=2108 - _globals['_UPDATECOLLECTIONREQUEST']._serialized_start=2111 - _globals['_UPDATECOLLECTIONREQUEST']._serialized_end=2333 - _globals['_UPDATECOLLECTIONRESPONSE']._serialized_start=2335 - _globals['_UPDATECOLLECTIONRESPONSE']._serialized_end=2393 - _globals['_NOTIFICATION']._serialized_start=2395 - _globals['_NOTIFICATION']._serialized_end=2474 - _globals['_RESETSTATERESPONSE']._serialized_start=2476 - _globals['_RESETSTATERESPONSE']._serialized_end=2528 - _globals['_GETLASTCOMPACTIONTIMEFORTENANTREQUEST']._serialized_start=2530 - _globals['_GETLASTCOMPACTIONTIMEFORTENANTREQUEST']._serialized_end=2588 - _globals['_TENANTLASTCOMPACTIONTIME']._serialized_start=2590 - _globals['_TENANTLASTCOMPACTIONTIME']._serialized_end=2665 - _globals['_GETLASTCOMPACTIONTIMEFORTENANTRESPONSE']._serialized_start=2667 - _globals['_GETLASTCOMPACTIONTIMEFORTENANTRESPONSE']._serialized_end=2778 - _globals['_SETLASTCOMPACTIONTIMEFORTENANTREQUEST']._serialized_start=2780 - _globals['_SETLASTCOMPACTIONTIMEFORTENANTREQUEST']._serialized_end=2890 - _globals['_FLUSHSEGMENTCOMPACTIONINFO']._serialized_start=2893 - _globals['_FLUSHSEGMENTCOMPACTIONINFO']._serialized_end=3081 - _globals['_FLUSHSEGMENTCOMPACTIONINFO_FILEPATHSENTRY']._serialized_start=3014 - _globals['_FLUSHSEGMENTCOMPACTIONINFO_FILEPATHSENTRY']._serialized_end=3081 - _globals['_FLUSHCOLLECTIONCOMPACTIONREQUEST']._serialized_start=3084 - _globals['_FLUSHCOLLECTIONCOMPACTIONREQUEST']._serialized_end=3279 - _globals['_FLUSHCOLLECTIONCOMPACTIONRESPONSE']._serialized_start=3281 - _globals['_FLUSHCOLLECTIONCOMPACTIONRESPONSE']._serialized_end=3397 - _globals['_SYSDB']._serialized_start=3400 - _globals['_SYSDB']._serialized_end=4796 + _globals['_GETSEGMENTSREQUEST']._serialized_end=955 + _globals['_GETSEGMENTSRESPONSE']._serialized_start=957 + _globals['_GETSEGMENTSRESPONSE']._serialized_end=1045 + _globals['_UPDATESEGMENTREQUEST']._serialized_start=1048 + _globals['_UPDATESEGMENTREQUEST']._serialized_end=1242 + _globals['_UPDATESEGMENTRESPONSE']._serialized_start=1244 + _globals['_UPDATESEGMENTRESPONSE']._serialized_end=1299 + _globals['_CREATECOLLECTIONREQUEST']._serialized_start=1302 + _globals['_CREATECOLLECTIONREQUEST']._serialized_end=1531 + _globals['_CREATECOLLECTIONRESPONSE']._serialized_start=1533 + _globals['_CREATECOLLECTIONRESPONSE']._serialized_end=1648 + _globals['_DELETECOLLECTIONREQUEST']._serialized_start=1650 + _globals['_DELETECOLLECTIONREQUEST']._serialized_end=1721 + _globals['_DELETECOLLECTIONRESPONSE']._serialized_start=1723 + _globals['_DELETECOLLECTIONRESPONSE']._serialized_end=1781 + _globals['_GETCOLLECTIONSREQUEST']._serialized_start=1783 + _globals['_GETCOLLECTIONSREQUEST']._serialized_end=1892 + _globals['_GETCOLLECTIONSRESPONSE']._serialized_start=1894 + _globals['_GETCOLLECTIONSRESPONSE']._serialized_end=1991 + _globals['_UPDATECOLLECTIONREQUEST']._serialized_start=1994 + _globals['_UPDATECOLLECTIONREQUEST']._serialized_end=2186 + _globals['_UPDATECOLLECTIONRESPONSE']._serialized_start=2188 + _globals['_UPDATECOLLECTIONRESPONSE']._serialized_end=2246 + _globals['_NOTIFICATION']._serialized_start=2248 + _globals['_NOTIFICATION']._serialized_end=2327 + _globals['_RESETSTATERESPONSE']._serialized_start=2329 + _globals['_RESETSTATERESPONSE']._serialized_end=2381 + _globals['_GETLASTCOMPACTIONTIMEFORTENANTREQUEST']._serialized_start=2383 + _globals['_GETLASTCOMPACTIONTIMEFORTENANTREQUEST']._serialized_end=2441 + _globals['_TENANTLASTCOMPACTIONTIME']._serialized_start=2443 + _globals['_TENANTLASTCOMPACTIONTIME']._serialized_end=2518 + _globals['_GETLASTCOMPACTIONTIMEFORTENANTRESPONSE']._serialized_start=2520 + _globals['_GETLASTCOMPACTIONTIMEFORTENANTRESPONSE']._serialized_end=2631 + _globals['_SETLASTCOMPACTIONTIMEFORTENANTREQUEST']._serialized_start=2633 + _globals['_SETLASTCOMPACTIONTIMEFORTENANTREQUEST']._serialized_end=2743 + _globals['_FLUSHSEGMENTCOMPACTIONINFO']._serialized_start=2746 + _globals['_FLUSHSEGMENTCOMPACTIONINFO']._serialized_end=2934 + _globals['_FLUSHSEGMENTCOMPACTIONINFO_FILEPATHSENTRY']._serialized_start=2867 + _globals['_FLUSHSEGMENTCOMPACTIONINFO_FILEPATHSENTRY']._serialized_end=2934 + _globals['_FLUSHCOLLECTIONCOMPACTIONREQUEST']._serialized_start=2937 + _globals['_FLUSHCOLLECTIONCOMPACTIONREQUEST']._serialized_end=3132 + _globals['_FLUSHCOLLECTIONCOMPACTIONRESPONSE']._serialized_start=3134 + _globals['_FLUSHCOLLECTIONCOMPACTIONRESPONSE']._serialized_end=3250 + _globals['_SYSDB']._serialized_start=3253 + _globals['_SYSDB']._serialized_end=4649 # @@protoc_insertion_point(module_scope) diff --git a/chromadb/proto/coordinator_pb2.pyi b/chromadb/proto/coordinator_pb2.pyi index 6175b63917e7..15f167516f74 100644 --- a/chromadb/proto/coordinator_pb2.pyi +++ b/chromadb/proto/coordinator_pb2.pyi @@ -8,7 +8,7 @@ from typing import ClassVar as _ClassVar, Iterable as _Iterable, Mapping as _Map DESCRIPTOR: _descriptor.FileDescriptor class CreateDatabaseRequest(_message.Message): - __slots__ = ["id", "name", "tenant"] + __slots__ = ("id", "name", "tenant") ID_FIELD_NUMBER: _ClassVar[int] NAME_FIELD_NUMBER: _ClassVar[int] TENANT_FIELD_NUMBER: _ClassVar[int] @@ -18,13 +18,13 @@ class CreateDatabaseRequest(_message.Message): def __init__(self, id: _Optional[str] = ..., name: _Optional[str] = ..., tenant: _Optional[str] = ...) -> None: ... class CreateDatabaseResponse(_message.Message): - __slots__ = ["status"] + __slots__ = ("status",) STATUS_FIELD_NUMBER: _ClassVar[int] status: _chroma_pb2.Status def __init__(self, status: _Optional[_Union[_chroma_pb2.Status, _Mapping]] = ...) -> None: ... class GetDatabaseRequest(_message.Message): - __slots__ = ["name", "tenant"] + __slots__ = ("name", "tenant") NAME_FIELD_NUMBER: _ClassVar[int] TENANT_FIELD_NUMBER: _ClassVar[int] name: str @@ -32,7 +32,7 @@ class GetDatabaseRequest(_message.Message): def __init__(self, name: _Optional[str] = ..., tenant: _Optional[str] = ...) -> None: ... class GetDatabaseResponse(_message.Message): - __slots__ = ["database", "status"] + __slots__ = ("database", "status") DATABASE_FIELD_NUMBER: _ClassVar[int] STATUS_FIELD_NUMBER: _ClassVar[int] database: _chroma_pb2.Database @@ -40,25 +40,25 @@ class GetDatabaseResponse(_message.Message): def __init__(self, database: _Optional[_Union[_chroma_pb2.Database, _Mapping]] = ..., status: _Optional[_Union[_chroma_pb2.Status, _Mapping]] = ...) -> None: ... class CreateTenantRequest(_message.Message): - __slots__ = ["name"] + __slots__ = ("name",) NAME_FIELD_NUMBER: _ClassVar[int] name: str def __init__(self, name: _Optional[str] = ...) -> None: ... class CreateTenantResponse(_message.Message): - __slots__ = ["status"] + __slots__ = ("status",) STATUS_FIELD_NUMBER: _ClassVar[int] status: _chroma_pb2.Status def __init__(self, status: _Optional[_Union[_chroma_pb2.Status, _Mapping]] = ...) -> None: ... class GetTenantRequest(_message.Message): - __slots__ = ["name"] + __slots__ = ("name",) NAME_FIELD_NUMBER: _ClassVar[int] name: str def __init__(self, name: _Optional[str] = ...) -> None: ... class GetTenantResponse(_message.Message): - __slots__ = ["tenant", "status"] + __slots__ = ("tenant", "status") TENANT_FIELD_NUMBER: _ClassVar[int] STATUS_FIELD_NUMBER: _ClassVar[int] tenant: _chroma_pb2.Tenant @@ -66,45 +66,43 @@ class GetTenantResponse(_message.Message): def __init__(self, tenant: _Optional[_Union[_chroma_pb2.Tenant, _Mapping]] = ..., status: _Optional[_Union[_chroma_pb2.Status, _Mapping]] = ...) -> None: ... class CreateSegmentRequest(_message.Message): - __slots__ = ["segment"] + __slots__ = ("segment",) SEGMENT_FIELD_NUMBER: _ClassVar[int] segment: _chroma_pb2.Segment def __init__(self, segment: _Optional[_Union[_chroma_pb2.Segment, _Mapping]] = ...) -> None: ... class CreateSegmentResponse(_message.Message): - __slots__ = ["status"] + __slots__ = ("status",) STATUS_FIELD_NUMBER: _ClassVar[int] status: _chroma_pb2.Status def __init__(self, status: _Optional[_Union[_chroma_pb2.Status, _Mapping]] = ...) -> None: ... class DeleteSegmentRequest(_message.Message): - __slots__ = ["id"] + __slots__ = ("id",) ID_FIELD_NUMBER: _ClassVar[int] id: str def __init__(self, id: _Optional[str] = ...) -> None: ... class DeleteSegmentResponse(_message.Message): - __slots__ = ["status"] + __slots__ = ("status",) STATUS_FIELD_NUMBER: _ClassVar[int] status: _chroma_pb2.Status def __init__(self, status: _Optional[_Union[_chroma_pb2.Status, _Mapping]] = ...) -> None: ... class GetSegmentsRequest(_message.Message): - __slots__ = ["id", "type", "scope", "topic", "collection"] + __slots__ = ("id", "type", "scope", "collection") ID_FIELD_NUMBER: _ClassVar[int] TYPE_FIELD_NUMBER: _ClassVar[int] SCOPE_FIELD_NUMBER: _ClassVar[int] - TOPIC_FIELD_NUMBER: _ClassVar[int] COLLECTION_FIELD_NUMBER: _ClassVar[int] id: str type: str scope: _chroma_pb2.SegmentScope - topic: str collection: str - def __init__(self, id: _Optional[str] = ..., type: _Optional[str] = ..., scope: _Optional[_Union[_chroma_pb2.SegmentScope, str]] = ..., topic: _Optional[str] = ..., collection: _Optional[str] = ...) -> None: ... + def __init__(self, id: _Optional[str] = ..., type: _Optional[str] = ..., scope: _Optional[_Union[_chroma_pb2.SegmentScope, str]] = ..., collection: _Optional[str] = ...) -> None: ... class GetSegmentsResponse(_message.Message): - __slots__ = ["segments", "status"] + __slots__ = ("segments", "status") SEGMENTS_FIELD_NUMBER: _ClassVar[int] STATUS_FIELD_NUMBER: _ClassVar[int] segments: _containers.RepeatedCompositeFieldContainer[_chroma_pb2.Segment] @@ -112,31 +110,27 @@ class GetSegmentsResponse(_message.Message): def __init__(self, segments: _Optional[_Iterable[_Union[_chroma_pb2.Segment, _Mapping]]] = ..., status: _Optional[_Union[_chroma_pb2.Status, _Mapping]] = ...) -> None: ... class UpdateSegmentRequest(_message.Message): - __slots__ = ["id", "topic", "reset_topic", "collection", "reset_collection", "metadata", "reset_metadata"] + __slots__ = ("id", "collection", "reset_collection", "metadata", "reset_metadata") ID_FIELD_NUMBER: _ClassVar[int] - TOPIC_FIELD_NUMBER: _ClassVar[int] - RESET_TOPIC_FIELD_NUMBER: _ClassVar[int] COLLECTION_FIELD_NUMBER: _ClassVar[int] RESET_COLLECTION_FIELD_NUMBER: _ClassVar[int] METADATA_FIELD_NUMBER: _ClassVar[int] RESET_METADATA_FIELD_NUMBER: _ClassVar[int] id: str - topic: str - reset_topic: bool collection: str reset_collection: bool metadata: _chroma_pb2.UpdateMetadata reset_metadata: bool - def __init__(self, id: _Optional[str] = ..., topic: _Optional[str] = ..., reset_topic: bool = ..., collection: _Optional[str] = ..., reset_collection: bool = ..., metadata: _Optional[_Union[_chroma_pb2.UpdateMetadata, _Mapping]] = ..., reset_metadata: bool = ...) -> None: ... + def __init__(self, id: _Optional[str] = ..., collection: _Optional[str] = ..., reset_collection: bool = ..., metadata: _Optional[_Union[_chroma_pb2.UpdateMetadata, _Mapping]] = ..., reset_metadata: bool = ...) -> None: ... class UpdateSegmentResponse(_message.Message): - __slots__ = ["status"] + __slots__ = ("status",) STATUS_FIELD_NUMBER: _ClassVar[int] status: _chroma_pb2.Status def __init__(self, status: _Optional[_Union[_chroma_pb2.Status, _Mapping]] = ...) -> None: ... class CreateCollectionRequest(_message.Message): - __slots__ = ["id", "name", "metadata", "dimension", "get_or_create", "tenant", "database"] + __slots__ = ("id", "name", "metadata", "dimension", "get_or_create", "tenant", "database") ID_FIELD_NUMBER: _ClassVar[int] NAME_FIELD_NUMBER: _ClassVar[int] METADATA_FIELD_NUMBER: _ClassVar[int] @@ -154,7 +148,7 @@ class CreateCollectionRequest(_message.Message): def __init__(self, id: _Optional[str] = ..., name: _Optional[str] = ..., metadata: _Optional[_Union[_chroma_pb2.UpdateMetadata, _Mapping]] = ..., dimension: _Optional[int] = ..., get_or_create: bool = ..., tenant: _Optional[str] = ..., database: _Optional[str] = ...) -> None: ... class CreateCollectionResponse(_message.Message): - __slots__ = ["collection", "created", "status"] + __slots__ = ("collection", "created", "status") COLLECTION_FIELD_NUMBER: _ClassVar[int] CREATED_FIELD_NUMBER: _ClassVar[int] STATUS_FIELD_NUMBER: _ClassVar[int] @@ -164,7 +158,7 @@ class CreateCollectionResponse(_message.Message): def __init__(self, collection: _Optional[_Union[_chroma_pb2.Collection, _Mapping]] = ..., created: bool = ..., status: _Optional[_Union[_chroma_pb2.Status, _Mapping]] = ...) -> None: ... class DeleteCollectionRequest(_message.Message): - __slots__ = ["id", "tenant", "database"] + __slots__ = ("id", "tenant", "database") ID_FIELD_NUMBER: _ClassVar[int] TENANT_FIELD_NUMBER: _ClassVar[int] DATABASE_FIELD_NUMBER: _ClassVar[int] @@ -174,27 +168,25 @@ class DeleteCollectionRequest(_message.Message): def __init__(self, id: _Optional[str] = ..., tenant: _Optional[str] = ..., database: _Optional[str] = ...) -> None: ... class DeleteCollectionResponse(_message.Message): - __slots__ = ["status"] + __slots__ = ("status",) STATUS_FIELD_NUMBER: _ClassVar[int] status: _chroma_pb2.Status def __init__(self, status: _Optional[_Union[_chroma_pb2.Status, _Mapping]] = ...) -> None: ... class GetCollectionsRequest(_message.Message): - __slots__ = ["id", "name", "topic", "tenant", "database"] + __slots__ = ("id", "name", "tenant", "database") ID_FIELD_NUMBER: _ClassVar[int] NAME_FIELD_NUMBER: _ClassVar[int] - TOPIC_FIELD_NUMBER: _ClassVar[int] TENANT_FIELD_NUMBER: _ClassVar[int] DATABASE_FIELD_NUMBER: _ClassVar[int] id: str name: str - topic: str tenant: str database: str - def __init__(self, id: _Optional[str] = ..., name: _Optional[str] = ..., topic: _Optional[str] = ..., tenant: _Optional[str] = ..., database: _Optional[str] = ...) -> None: ... + def __init__(self, id: _Optional[str] = ..., name: _Optional[str] = ..., tenant: _Optional[str] = ..., database: _Optional[str] = ...) -> None: ... class GetCollectionsResponse(_message.Message): - __slots__ = ["collections", "status"] + __slots__ = ("collections", "status") COLLECTIONS_FIELD_NUMBER: _ClassVar[int] STATUS_FIELD_NUMBER: _ClassVar[int] collections: _containers.RepeatedCompositeFieldContainer[_chroma_pb2.Collection] @@ -202,29 +194,27 @@ class GetCollectionsResponse(_message.Message): def __init__(self, collections: _Optional[_Iterable[_Union[_chroma_pb2.Collection, _Mapping]]] = ..., status: _Optional[_Union[_chroma_pb2.Status, _Mapping]] = ...) -> None: ... class UpdateCollectionRequest(_message.Message): - __slots__ = ["id", "topic", "name", "dimension", "metadata", "reset_metadata"] + __slots__ = ("id", "name", "dimension", "metadata", "reset_metadata") ID_FIELD_NUMBER: _ClassVar[int] - TOPIC_FIELD_NUMBER: _ClassVar[int] NAME_FIELD_NUMBER: _ClassVar[int] DIMENSION_FIELD_NUMBER: _ClassVar[int] METADATA_FIELD_NUMBER: _ClassVar[int] RESET_METADATA_FIELD_NUMBER: _ClassVar[int] id: str - topic: str name: str dimension: int metadata: _chroma_pb2.UpdateMetadata reset_metadata: bool - def __init__(self, id: _Optional[str] = ..., topic: _Optional[str] = ..., name: _Optional[str] = ..., dimension: _Optional[int] = ..., metadata: _Optional[_Union[_chroma_pb2.UpdateMetadata, _Mapping]] = ..., reset_metadata: bool = ...) -> None: ... + def __init__(self, id: _Optional[str] = ..., name: _Optional[str] = ..., dimension: _Optional[int] = ..., metadata: _Optional[_Union[_chroma_pb2.UpdateMetadata, _Mapping]] = ..., reset_metadata: bool = ...) -> None: ... class UpdateCollectionResponse(_message.Message): - __slots__ = ["status"] + __slots__ = ("status",) STATUS_FIELD_NUMBER: _ClassVar[int] status: _chroma_pb2.Status def __init__(self, status: _Optional[_Union[_chroma_pb2.Status, _Mapping]] = ...) -> None: ... class Notification(_message.Message): - __slots__ = ["id", "collection_id", "type", "status"] + __slots__ = ("id", "collection_id", "type", "status") ID_FIELD_NUMBER: _ClassVar[int] COLLECTION_ID_FIELD_NUMBER: _ClassVar[int] TYPE_FIELD_NUMBER: _ClassVar[int] @@ -236,19 +226,19 @@ class Notification(_message.Message): def __init__(self, id: _Optional[int] = ..., collection_id: _Optional[str] = ..., type: _Optional[str] = ..., status: _Optional[str] = ...) -> None: ... class ResetStateResponse(_message.Message): - __slots__ = ["status"] + __slots__ = ("status",) STATUS_FIELD_NUMBER: _ClassVar[int] status: _chroma_pb2.Status def __init__(self, status: _Optional[_Union[_chroma_pb2.Status, _Mapping]] = ...) -> None: ... class GetLastCompactionTimeForTenantRequest(_message.Message): - __slots__ = ["tenant_id"] + __slots__ = ("tenant_id",) TENANT_ID_FIELD_NUMBER: _ClassVar[int] tenant_id: _containers.RepeatedScalarFieldContainer[str] def __init__(self, tenant_id: _Optional[_Iterable[str]] = ...) -> None: ... class TenantLastCompactionTime(_message.Message): - __slots__ = ["tenant_id", "last_compaction_time"] + __slots__ = ("tenant_id", "last_compaction_time") TENANT_ID_FIELD_NUMBER: _ClassVar[int] LAST_COMPACTION_TIME_FIELD_NUMBER: _ClassVar[int] tenant_id: str @@ -256,21 +246,21 @@ class TenantLastCompactionTime(_message.Message): def __init__(self, tenant_id: _Optional[str] = ..., last_compaction_time: _Optional[int] = ...) -> None: ... class GetLastCompactionTimeForTenantResponse(_message.Message): - __slots__ = ["tenant_last_compaction_time"] + __slots__ = ("tenant_last_compaction_time",) TENANT_LAST_COMPACTION_TIME_FIELD_NUMBER: _ClassVar[int] tenant_last_compaction_time: _containers.RepeatedCompositeFieldContainer[TenantLastCompactionTime] def __init__(self, tenant_last_compaction_time: _Optional[_Iterable[_Union[TenantLastCompactionTime, _Mapping]]] = ...) -> None: ... class SetLastCompactionTimeForTenantRequest(_message.Message): - __slots__ = ["tenant_last_compaction_time"] + __slots__ = ("tenant_last_compaction_time",) TENANT_LAST_COMPACTION_TIME_FIELD_NUMBER: _ClassVar[int] tenant_last_compaction_time: TenantLastCompactionTime def __init__(self, tenant_last_compaction_time: _Optional[_Union[TenantLastCompactionTime, _Mapping]] = ...) -> None: ... class FlushSegmentCompactionInfo(_message.Message): - __slots__ = ["segment_id", "file_paths"] + __slots__ = ("segment_id", "file_paths") class FilePathsEntry(_message.Message): - __slots__ = ["key", "value"] + __slots__ = ("key", "value") KEY_FIELD_NUMBER: _ClassVar[int] VALUE_FIELD_NUMBER: _ClassVar[int] key: str @@ -283,7 +273,7 @@ class FlushSegmentCompactionInfo(_message.Message): def __init__(self, segment_id: _Optional[str] = ..., file_paths: _Optional[_Mapping[str, _chroma_pb2.FilePaths]] = ...) -> None: ... class FlushCollectionCompactionRequest(_message.Message): - __slots__ = ["tenant_id", "collection_id", "log_position", "collection_version", "segment_compaction_info"] + __slots__ = ("tenant_id", "collection_id", "log_position", "collection_version", "segment_compaction_info") TENANT_ID_FIELD_NUMBER: _ClassVar[int] COLLECTION_ID_FIELD_NUMBER: _ClassVar[int] LOG_POSITION_FIELD_NUMBER: _ClassVar[int] @@ -297,7 +287,7 @@ class FlushCollectionCompactionRequest(_message.Message): def __init__(self, tenant_id: _Optional[str] = ..., collection_id: _Optional[str] = ..., log_position: _Optional[int] = ..., collection_version: _Optional[int] = ..., segment_compaction_info: _Optional[_Iterable[_Union[FlushSegmentCompactionInfo, _Mapping]]] = ...) -> None: ... class FlushCollectionCompactionResponse(_message.Message): - __slots__ = ["collection_id", "collection_version", "last_compaction_time"] + __slots__ = ("collection_id", "collection_version", "last_compaction_time") COLLECTION_ID_FIELD_NUMBER: _ClassVar[int] COLLECTION_VERSION_FIELD_NUMBER: _ClassVar[int] LAST_COMPACTION_TIME_FIELD_NUMBER: _ClassVar[int] diff --git a/chromadb/proto/logservice_pb2.py b/chromadb/proto/logservice_pb2.py index 0c7ca972ebe9..3d309f615920 100644 --- a/chromadb/proto/logservice_pb2.py +++ b/chromadb/proto/logservice_pb2.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: chromadb/proto/logservice.proto +# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -25,10 +26,10 @@ DESCRIPTOR, "chromadb.proto.logservice_pb2", _globals ) if _descriptor._USE_C_DESCRIPTORS == False: - DESCRIPTOR._options = None - DESCRIPTOR._serialized_options = ( - b"Z7github.com/chroma-core/chroma/go/pkg/proto/logservicepb" - ) + _globals["DESCRIPTOR"]._options = None + _globals[ + "DESCRIPTOR" + ]._serialized_options = b"Z7github.com/chroma-core/chroma/go/pkg/proto/logservicepb" _globals["_PUSHLOGSREQUEST"]._serialized_start = 72 _globals["_PUSHLOGSREQUEST"]._serialized_end = 154 _globals["_PUSHLOGSRESPONSE"]._serialized_start = 156 diff --git a/chromadb/proto/logservice_pb2.pyi b/chromadb/proto/logservice_pb2.pyi index 78680253a6d2..25be532cb828 100644 --- a/chromadb/proto/logservice_pb2.pyi +++ b/chromadb/proto/logservice_pb2.pyi @@ -13,7 +13,7 @@ from typing import ( DESCRIPTOR: _descriptor.FileDescriptor class PushLogsRequest(_message.Message): - __slots__ = ["collection_id", "records"] + __slots__ = ("collection_id", "records") COLLECTION_ID_FIELD_NUMBER: _ClassVar[int] RECORDS_FIELD_NUMBER: _ClassVar[int] collection_id: str @@ -27,13 +27,13 @@ class PushLogsRequest(_message.Message): ) -> None: ... class PushLogsResponse(_message.Message): - __slots__ = ["record_count"] + __slots__ = ("record_count",) RECORD_COUNT_FIELD_NUMBER: _ClassVar[int] record_count: int def __init__(self, record_count: _Optional[int] = ...) -> None: ... class PullLogsRequest(_message.Message): - __slots__ = ["collection_id", "start_from_id", "batch_size", "end_timestamp"] + __slots__ = ("collection_id", "start_from_id", "batch_size", "end_timestamp") COLLECTION_ID_FIELD_NUMBER: _ClassVar[int] START_FROM_ID_FIELD_NUMBER: _ClassVar[int] BATCH_SIZE_FIELD_NUMBER: _ClassVar[int] @@ -51,7 +51,7 @@ class PullLogsRequest(_message.Message): ) -> None: ... class RecordLog(_message.Message): - __slots__ = ["log_id", "record"] + __slots__ = ("log_id", "record") LOG_ID_FIELD_NUMBER: _ClassVar[int] RECORD_FIELD_NUMBER: _ClassVar[int] log_id: int @@ -63,7 +63,7 @@ class RecordLog(_message.Message): ) -> None: ... class PullLogsResponse(_message.Message): - __slots__ = ["records"] + __slots__ = ("records",) RECORDS_FIELD_NUMBER: _ClassVar[int] records: _containers.RepeatedCompositeFieldContainer[RecordLog] def __init__( @@ -71,7 +71,7 @@ class PullLogsResponse(_message.Message): ) -> None: ... class CollectionInfo(_message.Message): - __slots__ = ["collection_id", "first_log_id", "first_log_id_ts"] + __slots__ = ("collection_id", "first_log_id", "first_log_id_ts") COLLECTION_ID_FIELD_NUMBER: _ClassVar[int] FIRST_LOG_ID_FIELD_NUMBER: _ClassVar[int] FIRST_LOG_ID_TS_FIELD_NUMBER: _ClassVar[int] @@ -86,11 +86,11 @@ class CollectionInfo(_message.Message): ) -> None: ... class GetAllCollectionInfoToCompactRequest(_message.Message): - __slots__ = [] + __slots__ = () def __init__(self) -> None: ... class GetAllCollectionInfoToCompactResponse(_message.Message): - __slots__ = ["all_collection_info"] + __slots__ = ("all_collection_info",) ALL_COLLECTION_INFO_FIELD_NUMBER: _ClassVar[int] all_collection_info: _containers.RepeatedCompositeFieldContainer[CollectionInfo] def __init__( diff --git a/chromadb/segment/impl/vector/local_persistent_hnsw.py b/chromadb/segment/impl/vector/local_persistent_hnsw.py index d9a3f90faa46..9bd0725e21d3 100644 --- a/chromadb/segment/impl/vector/local_persistent_hnsw.py +++ b/chromadb/segment/impl/vector/local_persistent_hnsw.py @@ -1,8 +1,7 @@ -import orjson as json import os import shutil -from uuid import UUID +from google.protobuf import message from overrides import override import pickle from typing import Dict, List, Optional, Sequence, Set, cast @@ -11,7 +10,7 @@ from chromadb.config import System from chromadb.db.base import ParameterValue, get_sql -from chromadb.db.impl.sqlite import SqliteDB +from chromadb.proto.chroma_pb2 import LocalSegmentMetadataTuple, LocalSegmentMetadata from chromadb.segment.impl.metadata.sqlite import _encode_seq_id, _decode_seq_id from chromadb.segment.impl.vector.batch import Batch from chromadb.segment.impl.vector.hnsw_params import PersistentHnswParams @@ -79,93 +78,69 @@ def load_from_file(filename: str) -> "PersistentData": ret = cast(PersistentData, pickle.load(f)) return ret - @staticmethod - def load_from_sysdb(db: SqliteDB, segment_id: UUID) -> "PersistentData": - t2 = Table("segment_metadata") - q2 = ( - db.querybuilder() - .from_(t2) - .select(t2.key, t2.int_value, t2.str_value) - .where(t2.segment_id == ParameterValue(db.uuid_to_db(segment_id))) + def store_to_proto(self, metadata_file: str) -> None: + result = LocalSegmentMetadata( + tuples=[ + LocalSegmentMetadataTuple( + embedding_id=_id, + hnsw_label=self.id_to_label[_id], + seq_id=self.id_to_seq_id[_id], + ) + for _id in self.id_to_label + ], + max_seq_id=self.max_seq_id, + total_elements_added=self.total_elements_added, + dimensionality=self.dimensionality, ) - sql2, params2 = get_sql(q2) - with db.tx() as cur: - result = cur.execute(sql2, params2).fetchall() - kdict = {r[0]: r[1] if r[1] is not None else r[2] for r in result} - _dimensionality = kdict.get("dimensionality") - _total_elements_added = kdict.get("total_elements_added") - _max_seq_id = kdict.get("max_seq_id") - id_label_seq_id_tuple_list = kdict.get("id_label_seq_id_tuple_list") - if ( - _dimensionality is None - or _total_elements_added is None - or _max_seq_id is None - ): - raise ValueError("Missing required metadata in segment_metadata") - if id_label_seq_id_tuple_list is not None: - tuple_list = json.loads(id_label_seq_id_tuple_list) - _id_to_label = {r[0]: r[1] for r in tuple_list} - _id_to_seq_id = {r[0]: r[2] for r in tuple_list} - _label_to_id = {r[1]: r[0] for r in tuple_list} - else: - raise ValueError("Missing required metadata in segment_metadata") - - return PersistentData( - dimensionality=_dimensionality, - total_elements_added=_total_elements_added, - max_seq_id=_max_seq_id, - id_to_label=_id_to_label, - label_to_id=_label_to_id, - id_to_seq_id=_id_to_seq_id, - ) + with open(metadata_file + ".new", "wb") as f: + f.write(result.SerializeToString()) + # we copy only when the new file is written successfully + shutil.copy(metadata_file + ".new", metadata_file) + os.unlink(metadata_file + ".new") - def store_to_db(self, db: SqliteDB, segment_id: UUID) -> None: - with db.tx() as cur: - q1 = ( - db.querybuilder() - .into(Table("segment_metadata")) - .columns("segment_id", "key", "int_value") - .insert( - ParameterValue(db.uuid_to_db(segment_id)), - "total_elements_added", - self.total_elements_added, - ) - .insert( - ParameterValue(db.uuid_to_db(segment_id)), - "dimensionality", - self.dimensionality, - ) - .insert( - ParameterValue(db.uuid_to_db(segment_id)), - "max_seq_id", - self.max_seq_id, - ) + @staticmethod + def load_from_proto(metadata_file: str) -> "PersistentData": + """Load persistent data from a protobuf file""" + + def _load_from_file(metadata_file_to_load: str) -> LocalSegmentMetadata: + _result = LocalSegmentMetadata() + with open(metadata_file_to_load, "rb") as f: + _result.ParseFromString(f.read()) + return _result + + _new_metadata_file = metadata_file + ".new" + if os.path.exists(_new_metadata_file): + logger.warning( + f"Found new metadata file {metadata_file}.new, using it instead of {metadata_file}" ) - sql, params = get_sql(q1) - sql = sql.replace("INSERT", "INSERT OR REPLACE") - cur.execute(sql, params) - result = [ - (_id, self.id_to_label[_id], self.id_to_seq_id[_id]) - for _id in self.id_to_label - ] - dumped_result = json.dumps(result) - q2 = ( - db.querybuilder() - .into(Table("segment_metadata")) - .columns("segment_id", "key", "str_value") - .insert( - ParameterValue(db.uuid_to_db(segment_id)), - "id_label_seq_id_tuple_list", - ParameterValue(dumped_result), + try: + result = _load_from_file(_new_metadata_file) + except message.DecodeError: + logger.warning( + f"Failed to load metadata file {_new_metadata_file}, " + f"falling back to original file {metadata_file}" ) - ) - sql, params = get_sql(q2) - sql = sql.replace("INSERT", "INSERT OR REPLACE") - cur.execute(sql, params) + result = _load_from_file(metadata_file) + else: + result = _load_from_file(metadata_file) + + id_to_label = {r.embedding_id: r.hnsw_label for r in result.tuples} + id_to_seq_id = {r.embedding_id: r.seq_id for r in result.tuples} + label_to_id = {r.hnsw_label: r.embedding_id for r in result.tuples} + + return PersistentData( + dimensionality=result.dimensionality, + total_elements_added=result.total_elements_added, + max_seq_id=result.max_seq_id, + id_to_label=id_to_label, + label_to_id=label_to_id, + id_to_seq_id=id_to_seq_id, + ) class PersistentLocalHnswSegment(LocalHnswSegment): - METADATA_FILE: str = "index_metadata.pickle" + LEGACY_METADATA_FILE: str = "index_metadata.pickle" # TODO remove in 0.5+ + METADATA_FILE: str = "index_metadata.bin" # How many records to add to index at once, we do this because crossing the python/c++ boundary is expensive (for add()) # When records are not added to the c++ index, they are buffered in memory and served # via brute force search. @@ -197,17 +172,19 @@ def __init__(self, system: System, segment: Segment): os.makedirs(self._get_storage_folder(), exist_ok=True) # Load persist data if it exists already, otherwise create it if self._index_exists(): - # migration from pickle file to sqlite - _migrated = False - if os.path.exists(self._get_metadata_file()): + # migration from pickle file to protobufs + _migrated = False # TODO remove in 0.5+ + if os.path.exists(self._get_legacy_metadata_file()): tmp_persist_data = PersistentData.load_from_file( - self._get_metadata_file() + self._get_legacy_metadata_file() ) - tmp_persist_data.store_to_db(self._db, self._id) + tmp_persist_data.store_to_proto(self._get_metadata_file()) _migrated = True - self._persist_data = PersistentData.load_from_sysdb(self._db, self._id) - if _migrated: - os.remove(self._get_metadata_file()) + self._persist_data = PersistentData.load_from_proto( + self._get_metadata_file() + ) + if _migrated: # TODO remove in 0.5+ + os.remove(self._get_legacy_metadata_file()) self._dimensionality = self._persist_data.dimensionality self._total_elements_added = self._persist_data.total_elements_added self._max_seq_id = self._persist_data.max_seq_id @@ -237,12 +214,18 @@ def propagate_collection_metadata(metadata: Metadata) -> Optional[Metadata]: def _index_exists(self) -> bool: """Check if the index exists via the metadata file""" - return os.path.exists(self._get_metadata_file()) + return os.path.exists(self._get_metadata_file()) or os.path.exists( + self._get_legacy_metadata_file() + ) def _get_metadata_file(self) -> str: """Get the metadata file path""" return os.path.join(self._get_storage_folder(), self.METADATA_FILE) + def _get_legacy_metadata_file(self) -> str: + """Get the metadata file path""" + return os.path.join(self._get_storage_folder(), self.LEGACY_METADATA_FILE) + def _get_storage_folder(self) -> str: """Get the storage folder path""" folder = os.path.join(self._persist_directory, str(self._id)) @@ -317,7 +300,7 @@ def _persist(self) -> None: sql, params = get_sql(q) sql = sql.replace("INSERT", "INSERT OR REPLACE") cur.execute(sql, params) - self._persist_data.store_to_db(self._db, self._id) + self._persist_data.store_to_proto(self._get_metadata_file()) @override def max_seqid(self) -> SeqId: diff --git a/chromadb/test/segment/test_vector.py b/chromadb/test/segment/test_vector.py index 8fa39a90fd70..37dada796042 100644 --- a/chromadb/test/segment/test_vector.py +++ b/chromadb/test/segment/test_vector.py @@ -3,11 +3,8 @@ import pytest from typing import Generator, List, Callable, Iterator, Type, cast, Any, Dict, Optional -from pypika import Table from chromadb.config import System, Settings -from chromadb.db.base import ParameterValue, get_sql -from chromadb.segment.impl.metadata.sqlite import _decode_seq_id from chromadb.test.conftest import ProducerFn from chromadb.types import ( OperationRecord, @@ -30,6 +27,7 @@ from chromadb.segment.impl.vector.local_persistent_hnsw import ( PersistentLocalHnswSegment, + PersistentData, ) from chromadb.test.property.strategies import test_hnsw_config @@ -177,7 +175,7 @@ def test_insert_and_count( assert segment.count() == 6 -def test_insert_with_db_persist( +def test_insert_with_protobuf_persist( system: System, sample_embeddings: Iterator[OperationRecord], vector_reader: Type[VectorReader], @@ -206,34 +204,11 @@ def test_insert_with_db_persist( assert segment.count() == 5 if isinstance(segment, PersistentLocalHnswSegment): - t = Table("max_seq_id") - q = ( - segment._db.querybuilder() - .from_(t) - .select(t.seq_id) - .where(t.segment_id == ParameterValue(segment._db.uuid_to_db(segment._id))) - ) - sql, params = get_sql(q) - with segment._db.tx() as cur: - result = cur.execute(sql, params).fetchone() - assert _decode_seq_id(result[0]) == 5 - t2 = Table("segment_metadata") - q2 = ( - segment._db.querybuilder() - .from_(t2) - .select(t2.key, t2.int_value) - .where(t2.segment_id == ParameterValue(segment._db.uuid_to_db(segment._id))) - ) - sql2, params2 = get_sql(q2) - with segment._db.tx() as cur: - metadata = cur.execute(sql2, params2).fetchall() - assert len(metadata) >= 3 - kdict = {r[0]: r[1] for r in metadata} - assert "max_seq_id" in kdict.keys() - assert "total_elements_added" in kdict.keys() - assert "dimensionality" in kdict.keys() - assert "id_label_seq_id_tuple_list" in kdict.keys() - assert kdict["max_seq_id"] == 5 + _pd = PersistentData.load_from_proto(segment._get_metadata_file()) + assert _pd.max_seq_id == 5 + assert _pd.total_elements_added == 5 + assert _pd.dimensionality == 2 + assert len(_pd.id_to_label) == 5 def test_migrate_metadatafile( @@ -265,40 +240,81 @@ def test_migrate_metadatafile( assert segment.count() == 5 if isinstance(segment, PersistentLocalHnswSegment): - with open(segment._get_metadata_file(), "wb") as metadata_file: + _pd = PersistentData.load_from_proto(segment._get_metadata_file()) + with open(segment._get_legacy_metadata_file(), "wb") as metadata_file: pickle.dump(segment._persist_data, metadata_file, pickle.HIGHEST_PROTOCOL) - t2 = Table("segment_metadata") - q2 = ( - segment._db.querybuilder() - .from_(t2) - .delete() - .where(t2.segment_id == ParameterValue(segment._db.uuid_to_db(segment._id))) - ) - q3 = ( - segment._db.querybuilder() - .from_(t2) - .select(t2.key, t2.int_value) - .where(t2.segment_id == ParameterValue(segment._db.uuid_to_db(segment._id))) - ) - with segment._db.tx() as cur: - cur.execute(*get_sql(q2)) - with segment._db.tx() as cur: - metadata = cur.execute(*get_sql(q3)).fetchall() - assert len(metadata) == 0 + assert os.path.exists(segment._get_legacy_metadata_file()) + assert _pd.max_seq_id == 5 + assert _pd.total_elements_added == 5 + assert _pd.dimensionality == 2 + assert len(_pd.id_to_label) == 5 + os.unlink(segment._get_metadata_file()) + assert not os.path.exists(segment._get_metadata_file()) segment.stop() + segment.start() segment = cast( PersistentLocalHnswSegment, vector_reader(system, segment_definition) ) + assert os.path.exists(segment._get_metadata_file()) + assert not os.path.exists(segment._get_legacy_metadata_file()) + _migrated_pd = PersistentData.load_from_proto(segment._get_metadata_file()) + assert _migrated_pd.max_seq_id == 5 + assert _migrated_pd.total_elements_added == 5 + assert _migrated_pd.dimensionality == 2 + assert len(_migrated_pd.id_to_label) == 5 + + +def test_metadata_corruption_with_backup( + system: System, + sample_embeddings: Iterator[OperationRecord], + vector_reader: Type[VectorReader], + produce_fns: ProducerFn, +) -> None: + producer = system.instance(Producer) + + system.reset_state() + segment_definition = create_random_segment_definition( + extra_hnsw_config={"hnsw:batch_size": 1, "hnsw:sync_threshold": 5} + ) + collection_id = segment_definition["collection"] + # We know that the segment definition has a collection_id + collection_id = cast(uuid.UUID, collection_id) + max_id = produce_fns( + collection_id=collection_id, + producer=producer, + n=5, + embeddings=sample_embeddings, + )[1][-1] + + segment = vector_reader(system, segment_definition) + segment.start() + + sync(segment, max_id) + + assert segment.count() == 5 + if isinstance(segment, PersistentLocalHnswSegment): + _pd = PersistentData.load_from_proto(segment._get_metadata_file()) + _pd.store_to_proto(segment._get_metadata_file() + ".new") + assert os.path.exists(segment._get_metadata_file() + ".new") + assert _pd.max_seq_id == 5 + assert _pd.total_elements_added == 5 + assert _pd.dimensionality == 2 + assert len(_pd.id_to_label) == 5 + os.unlink(segment._get_metadata_file()) + assert not os.path.exists(segment._get_metadata_file()) + with open(segment._get_metadata_file(), "wb") as metadata_file: + metadata_file.write(b"corrupted") + segment.stop() segment.start() - with segment._db.tx() as cur: - metadata = cur.execute(*get_sql(q3)).fetchall() - assert len(metadata) >= 3 - kdict = {r[0]: r[1] for r in metadata} - assert "max_seq_id" in kdict.keys() - assert "total_elements_added" in kdict.keys() - assert "dimensionality" in kdict.keys() - assert "id_label_seq_id_tuple_list" in kdict.keys() - assert kdict["max_seq_id"] == 5 + segment = cast( + PersistentLocalHnswSegment, vector_reader(system, segment_definition) + ) + assert os.path.exists(segment._get_metadata_file()) + _migrated_pd = PersistentData.load_from_proto(segment._get_metadata_file()) + assert _migrated_pd.max_seq_id == 5 + assert _migrated_pd.total_elements_added == 5 + assert _migrated_pd.dimensionality == 2 + assert len(_migrated_pd.id_to_label) == 5 def approx_equal(a: float, b: float, epsilon: float = 0.0001) -> bool: diff --git a/idl/chromadb/proto/chroma.proto b/idl/chromadb/proto/chroma.proto index 70a684bb7f81..b9528babef55 100644 --- a/idl/chromadb/proto/chroma.proto +++ b/idl/chromadb/proto/chroma.proto @@ -132,3 +132,17 @@ message QueryVectorsRequest { message QueryVectorsResponse { repeated VectorQueryResults results = 1; } + + +message LocalSegmentMetadataTuple { + string embedding_id = 1; + int32 hnsw_label = 2; + int32 seq_id = 3; +} + +message LocalSegmentMetadata { + repeated LocalSegmentMetadataTuple tuples = 1; + int32 dimensionality = 2; + int64 total_elements_added = 3; + int64 max_seq_id = 4; +}