From e6ceeee1d6e5fb1adaa7d3187fd65bfc8787c37d Mon Sep 17 00:00:00 2001 From: nicolasgere Date: Fri, 16 Feb 2024 09:58:59 -0800 Subject: [PATCH 1/9] [ENH] Add quota component and test for static (#1720) ## Description of changes *Summarize the changes made by this PR.* - New functionality - Add quota check, it will be use to be able to rate limit, apply static check to payload etc. ## Test plan *How are these changes tested?* - [ ] Tests pass locally with `pytest`, added unit test --------- Co-authored-by: nicolas --- chromadb/api/segment.py | 6 +- chromadb/config.py | 3 + chromadb/quota/__init__.py | 90 +++++++++++++++++++ chromadb/quota/test_provider.py | 14 +++ chromadb/server/fastapi/__init__.py | 8 ++ chromadb/test/conftest.py | 1 - .../test/quota/test_static_quota_enforcer.py | 78 ++++++++++++++++ 7 files changed, 198 insertions(+), 2 deletions(-) create mode 100644 chromadb/quota/__init__.py create mode 100644 chromadb/quota/test_provider.py create mode 100644 chromadb/test/quota/test_static_quota_enforcer.py diff --git a/chromadb/api/segment.py b/chromadb/api/segment.py index 72df138d9bec..33bd00054a71 100644 --- a/chromadb/api/segment.py +++ b/chromadb/api/segment.py @@ -1,6 +1,7 @@ from chromadb.api import ServerAPI from chromadb.config import DEFAULT_DATABASE, DEFAULT_TENANT, Settings, System from chromadb.db.system import SysDB +from chromadb.quota import QuotaEnforcer from chromadb.segment import SegmentManager, MetadataReader, VectorReader from chromadb.telemetry.opentelemetry import ( add_attributes_to_current_span, @@ -58,7 +59,6 @@ import logging import re - logger = logging.getLogger(__name__) @@ -101,6 +101,7 @@ def __init__(self, system: System): self._settings = system.settings self._sysdb = self.require(SysDB) self._manager = self.require(SegmentManager) + self._quota = self.require(QuotaEnforcer) self._product_telemetry_client = self.require(ProductTelemetryClient) self._opentelemetry_client = self.require(OpenTelemetryClient) self._producer = self.require(Producer) @@ -356,6 +357,7 @@ def _add( documents: Optional[Documents] = None, uris: Optional[URIs] = None, ) -> bool: + self._quota.static_check(metadatas, documents, embeddings, collection_id) coll = self._get_collection(collection_id) self._manager.hint_use_collection(collection_id, t.Operation.ADD) validate_batch( @@ -398,6 +400,7 @@ def _update( documents: Optional[Documents] = None, uris: Optional[URIs] = None, ) -> bool: + self._quota.static_check(metadatas, documents, embeddings, collection_id) coll = self._get_collection(collection_id) self._manager.hint_use_collection(collection_id, t.Operation.UPDATE) validate_batch( @@ -442,6 +445,7 @@ def _upsert( documents: Optional[Documents] = None, uris: Optional[URIs] = None, ) -> bool: + self._quota.static_check(metadatas, documents, embeddings, collection_id) coll = self._get_collection(collection_id) self._manager.hint_use_collection(collection_id, t.Operation.UPSERT) validate_batch( diff --git a/chromadb/config.py b/chromadb/config.py index e9ceffc5dd02..98f4549e9f43 100644 --- a/chromadb/config.py +++ b/chromadb/config.py @@ -70,11 +70,13 @@ "chromadb.telemetry.product.ProductTelemetryClient": "chroma_product_telemetry_impl", "chromadb.ingest.Producer": "chroma_producer_impl", "chromadb.ingest.Consumer": "chroma_consumer_impl", + "chromadb.quota.QuotaProvider": "chroma_quota_provider_impl", "chromadb.ingest.CollectionAssignmentPolicy": "chroma_collection_assignment_policy_impl", # noqa "chromadb.db.system.SysDB": "chroma_sysdb_impl", "chromadb.segment.SegmentManager": "chroma_segment_manager_impl", "chromadb.segment.distributed.SegmentDirectory": "chroma_segment_directory_impl", "chromadb.segment.distributed.MemberlistProvider": "chroma_memberlist_provider_impl", + } DEFAULT_TENANT = "default_tenant" @@ -99,6 +101,7 @@ class Settings(BaseSettings): # type: ignore chroma_segment_manager_impl: str = ( "chromadb.segment.impl.manager.local.LocalSegmentManager" ) + chroma_quota_provider_impl:Optional[str] = None # Distributed architecture specific components chroma_segment_directory_impl: str = "chromadb.segment.impl.distributed.segment_directory.RendezvousHashSegmentDirectory" diff --git a/chromadb/quota/__init__.py b/chromadb/quota/__init__.py new file mode 100644 index 000000000000..82365ff1bd18 --- /dev/null +++ b/chromadb/quota/__init__.py @@ -0,0 +1,90 @@ +from abc import abstractmethod +from enum import Enum +from typing import Optional, Literal + +from chromadb import Documents, Embeddings +from chromadb.api import Metadatas +from chromadb.config import ( + Component, + System, +) + + +class Resource(Enum): + METADATA_KEY_LENGTH = "METADATA_KEY_LENGTH" + METADATA_VALUE_LENGTH = "METADATA_VALUE_LENGTH" + DOCUMENT_SIZE = "DOCUMENT_SIZE" + EMBEDDINGS_DIMENSION = "EMBEDDINGS_DIMENSION" + + +class QuotaError(Exception): + def __init__(self, resource: Resource, quota: int, actual: int): + super().__init__(f"quota error. resource: {resource} quota: {quota} actual: {actual}") + self.quota = quota + self.actual = actual + self.resource = resource + +class QuotaProvider(Component): + """ + Retrieves quotas for resources within a system. + + Methods: + get_for_subject(resource, subject=None, tier=None): + Returns the quota for a given resource, optionally considering the tier and subject. + """ + def __init__(self, system: System) -> None: + super().__init__(system) + self.system = system + + @abstractmethod + def get_for_subject(self, resource: Resource, subject: Optional[str] = None, tier: Optional[str] = None) -> \ + Optional[int]: + pass + + +class QuotaEnforcer(Component): + """ + Enforces quota restrictions on various resources using quota provider. + + Methods: + static_check(metadatas=None, documents=None, embeddings=None, collection_id=None): + Performs static checks against quotas for metadatas, documents, and embeddings. Raises QuotaError if limits are exceeded. + """ + def __init__(self, system: System) -> None: + super().__init__(system) + self.should_enforce = False + if system.settings.chroma_quota_provider_impl: + self._quota_provider = system.require(QuotaProvider) + self.should_enforce = True + self.system = system + + def static_check(self, metadatas: Optional[Metadatas] = None, documents: Optional[Documents] = None, + embeddings: Optional[Embeddings] = None, collection_id: Optional[str] = None): + if not self.should_enforce: + return + metadata_key_length_quota = self._quota_provider.get_for_subject(resource=Resource.METADATA_KEY_LENGTH, + subject=collection_id) + metadata_value_length_quota = self._quota_provider.get_for_subject(resource=Resource.METADATA_VALUE_LENGTH, + subject=collection_id) + if metadatas and (metadata_key_length_quota or metadata_key_length_quota): + for metadata in metadatas: + for key in metadata: + if metadata_key_length_quota and len(key) > metadata_key_length_quota: + raise QuotaError(resource=Resource.METADATA_KEY_LENGTH, actual=len(key), + quota=metadata_key_length_quota) + if metadata_value_length_quota and isinstance(metadata[key], str) and len( + metadata[key]) > metadata_value_length_quota: + raise QuotaError(resource=Resource.METADATA_VALUE_LENGTH, actual=len(metadata[key]), + quota=metadata_value_length_quota) + document_size_quota = self._quota_provider.get_for_subject(resource=Resource.DOCUMENT_SIZE, subject=collection_id) + if document_size_quota and documents: + for document in documents: + if len(document) > document_size_quota: + raise QuotaError(resource=Resource.DOCUMENT_SIZE, actual=len(document), quota=document_size_quota) + embedding_dimension_quota = self._quota_provider.get_for_subject(resource=Resource.EMBEDDINGS_DIMENSION, + subject=collection_id) + if embedding_dimension_quota and embeddings: + for embedding in embeddings: + if len(embedding) > embedding_dimension_quota: + raise QuotaError(resource=Resource.EMBEDDINGS_DIMENSION, actual=len(embedding), + quota=embedding_dimension_quota) diff --git a/chromadb/quota/test_provider.py b/chromadb/quota/test_provider.py new file mode 100644 index 000000000000..484282fb7d01 --- /dev/null +++ b/chromadb/quota/test_provider.py @@ -0,0 +1,14 @@ +from typing import Optional + +from overrides import overrides + +from chromadb.quota import QuotaProvider, Resource + + +class QuotaProviderForTest(QuotaProvider): + def __init__(self, system) -> None: + super().__init__(system) + + @overrides + def get_for_subject(self, resource: Resource, subject: Optional[str] = "", tier: Optional[str] = "") -> Optional[int]: + pass diff --git a/chromadb/server/fastapi/__init__.py b/chromadb/server/fastapi/__init__.py index 529606a6c368..a38225de7f33 100644 --- a/chromadb/server/fastapi/__init__.py +++ b/chromadb/server/fastapi/__init__.py @@ -35,6 +35,7 @@ InvalidDimensionException, InvalidHTTPVersion, ) +from chromadb.quota import QuotaError from chromadb.server.fastapi.types import ( AddEmbedding, CreateDatabase, @@ -140,6 +141,7 @@ def __init__(self, settings: Settings): allow_origins=settings.chroma_server_cors_allow_origins, allow_methods=["*"], ) + self._app.add_exception_handler(QuotaError, self.quota_exception_handler) self._app.on_event("shutdown")(self.shutdown) @@ -291,6 +293,12 @@ def app(self) -> fastapi.FastAPI: def root(self) -> Dict[str, int]: return {"nanosecond heartbeat": self._api.heartbeat()} + async def quota_exception_handler(request: Request, exc: QuotaError): + return JSONResponse( + status_code=429, + content={"message": f"quota error. resource: {exc.resource} quota: {exc.quota} actual: {exc.actual}"}, + ) + def heartbeat(self) -> Dict[str, int]: return self.root() diff --git a/chromadb/test/conftest.py b/chromadb/test/conftest.py index 34a1b040dd19..3e041cfe9a71 100644 --- a/chromadb/test/conftest.py +++ b/chromadb/test/conftest.py @@ -468,7 +468,6 @@ def system_wrong_auth( def system(request: pytest.FixtureRequest) -> Generator[ServerAPI, None, None]: yield next(request.param()) - @pytest.fixture(scope="module", params=system_fixtures_ssl()) def system_ssl(request: pytest.FixtureRequest) -> Generator[ServerAPI, None, None]: yield next(request.param()) diff --git a/chromadb/test/quota/test_static_quota_enforcer.py b/chromadb/test/quota/test_static_quota_enforcer.py new file mode 100644 index 000000000000..245e9ba2e804 --- /dev/null +++ b/chromadb/test/quota/test_static_quota_enforcer.py @@ -0,0 +1,78 @@ +import random +import string +from typing import Optional, List, Tuple, Any +from unittest.mock import patch + +from chromadb.config import System, Settings +from chromadb.quota import QuotaEnforcer, Resource +import pytest + + +def generate_random_string(size: int) -> str: + return ''.join(random.choices(string.ascii_letters + string.digits, k=size)) + +def mock_get_for_subject(self, resource: Resource, subject: Optional[str] = "", tier: Optional[str] = "") -> Optional[ + int]: + """Mock function to simulate quota retrieval.""" + return 10 + + +def run_static_checks(enforcer: QuotaEnforcer, test_cases: List[Tuple[Any, Optional[str]]], data_key: str): + """Generalized function to run static checks on different types of data.""" + for test_case in test_cases: + data, expected_error = test_case if len(test_case) == 2 else (test_case[0], None) + args = {data_key: [data]} + if expected_error: + with pytest.raises(Exception) as exc_info: + enforcer.static_check(**args) + assert expected_error in str(exc_info.value.resource) + else: + enforcer.static_check(**args) + + + +@pytest.fixture(scope="module") +def enforcer() -> QuotaEnforcer: + settings = Settings( + chroma_quota_provider_impl = "chromadb.quota.test_provider.QuotaProviderForTest" + ) + system = System(settings) + return system.require(QuotaEnforcer) + +@patch('chromadb.quota.test_provider.QuotaProviderForTest.get_for_subject', mock_get_for_subject) +def test_static_enforcer_metadata(enforcer): + test_cases = [ + ({generate_random_string(20): generate_random_string(5)}, "METADATA_KEY_LENGTH"), + ({generate_random_string(5): generate_random_string(5)}, None), + ({generate_random_string(5): generate_random_string(20)}, "METADATA_VALUE_LENGTH"), + ({generate_random_string(5): generate_random_string(5)}, None) + ] + run_static_checks(enforcer, test_cases, 'metadatas') + + +@patch('chromadb.quota.test_provider.QuotaProviderForTest.get_for_subject', mock_get_for_subject) +def test_static_enforcer_documents(enforcer): + test_cases = [ + (generate_random_string(20), "DOCUMENT_SIZE"), + (generate_random_string(5), None) + ] + run_static_checks(enforcer, test_cases, 'documents') + +@patch('chromadb.quota.test_provider.QuotaProviderForTest.get_for_subject', mock_get_for_subject) +def test_static_enforcer_embeddings(enforcer): + test_cases = [ + (random.sample(range(1, 101), 100), "EMBEDDINGS_DIMENSION"), + (random.sample(range(1, 101), 5), None) + ] + run_static_checks(enforcer, test_cases, 'embeddings') + +# Should not raise an error if no quota provider is present +def test_enforcer_without_quota_provider(): + test_cases = [ + (random.sample(range(1, 101), 1), None), + (random.sample(range(1, 101), 5), None) + ] + settings = Settings() + system = System(settings) + enforcer = system.require(QuotaEnforcer) + run_static_checks(enforcer, test_cases, 'embeddings') From 93194c8a6a2dde33031cb812af65acd4fada4662 Mon Sep 17 00:00:00 2001 From: Weili Gu <3451471+weiligu@users.noreply.github.com> Date: Fri, 16 Feb 2024 10:46:33 -0800 Subject: [PATCH 2/9] Log Service Setup (#1721) ## Description of changes https://linear.app/trychroma/issue/CHR-241/stand-up-log-service - Stand up Log Service in Dev - stand up postgres DB - stand up migration: atlas - depend on postgres - stand up logservice - depend on migration - stand up coordinator - depend on migration - database migration - change env name - change database name - add definition for reccord log (we can test perf for this later, not hard to change) - log service: go - entry point: main with Cmd - grpc service: with proto change - coordinator - connect to docker postgres - reorganize packages to accommodate with logservice - rename bin to coordinator instead of chroma - tests connect to local postgres instead of sqlite - fix a bug from segment delete - system_test fix will be in a separate PR --- .../workflows/chroma-coordinator-test.yaml | 17 +++ Tiltfile | 13 +- bin/cluster-test.sh | 3 + chromadb/proto/chroma_pb2.py | 38 +++--- chromadb/proto/coordinator_pb2.py | 8 +- chromadb/proto/coordinator_pb2.pyi | 12 ++ chromadb/proto/logservice_pb2.py | 31 +++++ chromadb/proto/logservice_pb2.pyi | 4 + chromadb/proto/logservice_pb2_grpc.py | 31 +++++ go/coordinator/Dockerfile | 5 +- go/coordinator/Dockerfile.migration | 4 + go/coordinator/Makefile | 3 +- go/coordinator/atlas.hcl | 4 +- .../{grpccoordinator => coordinator}/cmd.go | 27 ++-- go/coordinator/cmd/{ => coordinator}/main.go | 3 +- go/coordinator/cmd/logservice/cmd.go | 46 +++++++ go/coordinator/cmd/logservice/main.go | 36 ++++++ go/coordinator/go.sum | 3 + .../grpc}/collection_service.go | 2 +- .../grpc}/collection_service_test.go | 4 +- .../grpc}/proto_model_convert.go | 2 +- .../grpc}/proto_model_convert_test.go | 2 +- .../grpc}/segment_service.go | 2 +- .../grpc}/server.go | 26 +--- .../grpc}/tenant_database_service.go | 2 +- .../{grpccoordinator => }/grpcutils/config.go | 0 .../grpcutils/config_test.go | 0 .../grpcutils/service.go | 0 go/coordinator/internal/logservice/apis.go | 11 ++ .../internal/logservice/grpc/server.go | 104 ++++++++++++++++ .../internal/logservice/recordlog.go | 33 +++++ .../internal/metastore/db/dao/common.go | 4 + .../internal/metastore/db/dao/record_log.go | 9 ++ .../metastore/db/dao/segment_metadata.go | 2 +- .../internal/metastore/db/dbcore/core.go | 39 ++++-- .../internal/metastore/db/dbmodel/common.go | 1 + .../metastore/db/dbmodel/mocks/IMetaDomain.go | 15 +++ .../metastore/db/dbmodel/record_log.go | 16 +++ .../internal/proto/coordinatorpb/chroma.pb.go | 10 +- .../proto/coordinatorpb/chroma_grpc.pb.go | 17 +-- .../proto/coordinatorpb/coordinator.pb.go | 4 +- .../coordinatorpb/coordinator_grpc.pb.go | 72 +++++------ .../proto/logservicepb/logservice.pb.go | 67 ++++++++++ .../proto/logservicepb/logservice_grpc.pb.go | 65 ++++++++++ go/coordinator/migrations/20231129183041.sql | 8 -- ...{20231116210409.sql => 20240215010425.sql} | 16 +++ go/coordinator/migrations/atlas.sum | 5 +- idl/chromadb/proto/logservice.proto | 8 ++ idl/makefile | 1 + k8s/deployment/kubernetes.yaml | 116 +++++++++++++++++- k8s/dev/coordinator.yaml | 4 +- k8s/dev/logservice.yaml | 39 ++++++ k8s/dev/migration.yaml | 22 ++++ k8s/dev/postgres.yaml | 41 +++++++ 54 files changed, 894 insertions(+), 163 deletions(-) create mode 100644 chromadb/proto/logservice_pb2.py create mode 100644 chromadb/proto/logservice_pb2.pyi create mode 100644 chromadb/proto/logservice_pb2_grpc.py create mode 100644 go/coordinator/Dockerfile.migration rename go/coordinator/cmd/{grpccoordinator => coordinator}/cmd.go (64%) rename go/coordinator/cmd/{ => coordinator}/main.go (85%) create mode 100644 go/coordinator/cmd/logservice/cmd.go create mode 100644 go/coordinator/cmd/logservice/main.go rename go/coordinator/internal/{grpccoordinator => coordinator/grpc}/collection_service.go (99%) rename go/coordinator/internal/{grpccoordinator => coordinator/grpc}/collection_service_test.go (97%) rename go/coordinator/internal/{grpccoordinator => coordinator/grpc}/proto_model_convert.go (99%) rename go/coordinator/internal/{grpccoordinator => coordinator/grpc}/proto_model_convert_test.go (99%) rename go/coordinator/internal/{grpccoordinator => coordinator/grpc}/segment_service.go (99%) rename go/coordinator/internal/{grpccoordinator => coordinator/grpc}/server.go (90%) rename go/coordinator/internal/{grpccoordinator => coordinator/grpc}/tenant_database_service.go (99%) rename go/coordinator/internal/{grpccoordinator => }/grpcutils/config.go (100%) rename go/coordinator/internal/{grpccoordinator => }/grpcutils/config_test.go (100%) rename go/coordinator/internal/{grpccoordinator => }/grpcutils/service.go (100%) create mode 100644 go/coordinator/internal/logservice/apis.go create mode 100644 go/coordinator/internal/logservice/grpc/server.go create mode 100644 go/coordinator/internal/logservice/recordlog.go create mode 100644 go/coordinator/internal/metastore/db/dao/record_log.go create mode 100644 go/coordinator/internal/metastore/db/dbmodel/record_log.go create mode 100644 go/coordinator/internal/proto/logservicepb/logservice.pb.go create mode 100644 go/coordinator/internal/proto/logservicepb/logservice_grpc.pb.go delete mode 100644 go/coordinator/migrations/20231129183041.sql rename go/coordinator/migrations/{20231116210409.sql => 20240215010425.sql} (86%) create mode 100644 idl/chromadb/proto/logservice.proto create mode 100644 k8s/dev/logservice.yaml create mode 100644 k8s/dev/migration.yaml create mode 100644 k8s/dev/postgres.yaml diff --git a/.github/workflows/chroma-coordinator-test.yaml b/.github/workflows/chroma-coordinator-test.yaml index 629a9dfb1466..e62ab2a5d0d0 100644 --- a/.github/workflows/chroma-coordinator-test.yaml +++ b/.github/workflows/chroma-coordinator-test.yaml @@ -16,8 +16,25 @@ jobs: matrix: platform: [ubuntu-latest] runs-on: ${{ matrix.platform }} + services: + postgres: + image: postgres + env: + POSTGRES_USER: chroma + POSTGRES_PASSWORD: chroma + POSTGRES_DB: chroma + options: >- + --health-cmd pg_isready + --health-interval 10s + --health-timeout 5s + --health-retries 5 + ports: + - 5432:5432 steps: - name: Checkout uses: actions/checkout@v3 - name: Build and test run: cd go/coordinator && make test + env: + POSTGRES_HOST: localhost + POSTGRES_PORT: 5432 diff --git a/Tiltfile b/Tiltfile index 7be3d4ca594f..f1fa96af2ecb 100644 --- a/Tiltfile +++ b/Tiltfile @@ -1,3 +1,8 @@ +docker_build('migration', + context='.', + dockerfile='./go/coordinator/Dockerfile.migration' +) + docker_build('coordinator', context='.', dockerfile='./go/coordinator/Dockerfile' @@ -22,9 +27,15 @@ k8s_resource( ) k8s_yaml(['k8s/dev/pulsar.yaml']) k8s_resource('pulsar', resource_deps=['k8s_setup'], labels=["infrastructure"]) +k8s_yaml(['k8s/dev/postgres.yaml']) +k8s_resource('postgres', resource_deps=['k8s_setup'], labels=["infrastructure"]) +k8s_yaml(['k8s/dev/migration.yaml']) +k8s_resource('migration', resource_deps=['postgres'], labels=["chroma"]) k8s_yaml(['k8s/dev/server.yaml']) k8s_resource('server', resource_deps=['k8s_setup'],labels=["chroma"], port_forwards=8000 ) k8s_yaml(['k8s/dev/coordinator.yaml']) -k8s_resource('coordinator', resource_deps=['pulsar', 'server'], labels=["chroma"]) +k8s_resource('coordinator', resource_deps=['pulsar', 'server', 'migration'], labels=["chroma"]) +k8s_yaml(['k8s/dev/logservice.yaml']) +k8s_resource('logservice', resource_deps=['migration'], labels=["chroma"]) k8s_yaml(['k8s/dev/worker.yaml']) k8s_resource('worker', resource_deps=['coordinator'],labels=["chroma"]) diff --git a/bin/cluster-test.sh b/bin/cluster-test.sh index 10c48781c072..d18185b8c02f 100755 --- a/bin/cluster-test.sh +++ b/bin/cluster-test.sh @@ -25,6 +25,7 @@ minikube addons enable ingress-dns -p chroma-test # Setup docker to build inside the minikube cluster and build the image eval $(minikube -p chroma-test docker-env) docker build -t server:latest -f Dockerfile . +docker build -t migration -f go/coordinator/Dockerfile.migration . docker build -t chroma-coordinator:latest -f go/coordinator/Dockerfile . docker build -t worker -f rust/worker/Dockerfile . --build-arg CHROMA_KUBERNETES_INTEGRATION=1 @@ -35,6 +36,8 @@ kubectl apply -f k8s/cr kubectl apply -f k8s/test # Wait for the pods in the chroma namespace to be ready +kubectl wait --for=condition=complete --timeout=100s job/migration -n chroma +kubectl delete job migration -n chroma kubectl wait --namespace chroma --for=condition=Ready pods --all --timeout=400s # Run mini kube tunnel in the background to expose the service diff --git a/chromadb/proto/chroma_pb2.py b/chromadb/proto/chroma_pb2.py index 84a3ba9b13dd..bc8d43e57ec8 100644 --- a/chromadb/proto/chroma_pb2.py +++ b/chromadb/proto/chroma_pb2.py @@ -13,7 +13,7 @@ -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x1b\x63hromadb/proto/chroma.proto\x12\x06\x63hroma\"&\n\x06Status\x12\x0e\n\x06reason\x18\x01 \x01(\t\x12\x0c\n\x04\x63ode\x18\x02 \x01(\x05\"0\n\x0e\x43hromaResponse\x12\x1e\n\x06status\x18\x01 \x01(\x0b\x32\x0e.chroma.Status\"U\n\x06Vector\x12\x11\n\tdimension\x18\x01 \x01(\x05\x12\x0e\n\x06vector\x18\x02 \x01(\x0c\x12(\n\x08\x65ncoding\x18\x03 \x01(\x0e\x32\x16.chroma.ScalarEncoding\"\xca\x01\n\x07Segment\x12\n\n\x02id\x18\x01 \x01(\t\x12\x0c\n\x04type\x18\x02 \x01(\t\x12#\n\x05scope\x18\x03 \x01(\x0e\x32\x14.chroma.SegmentScope\x12\x12\n\x05topic\x18\x04 \x01(\tH\x00\x88\x01\x01\x12\x17\n\ncollection\x18\x05 \x01(\tH\x01\x88\x01\x01\x12-\n\x08metadata\x18\x06 \x01(\x0b\x32\x16.chroma.UpdateMetadataH\x02\x88\x01\x01\x42\x08\n\x06_topicB\r\n\x0b_collectionB\x0b\n\t_metadata\"\xb9\x01\n\nCollection\x12\n\n\x02id\x18\x01 \x01(\t\x12\x0c\n\x04name\x18\x02 \x01(\t\x12\r\n\x05topic\x18\x03 \x01(\t\x12-\n\x08metadata\x18\x04 \x01(\x0b\x32\x16.chroma.UpdateMetadataH\x00\x88\x01\x01\x12\x16\n\tdimension\x18\x05 \x01(\x05H\x01\x88\x01\x01\x12\x0e\n\x06tenant\x18\x06 \x01(\t\x12\x10\n\x08\x64\x61tabase\x18\x07 \x01(\tB\x0b\n\t_metadataB\x0c\n\n_dimension\"4\n\x08\x44\x61tabase\x12\n\n\x02id\x18\x01 \x01(\t\x12\x0c\n\x04name\x18\x02 \x01(\t\x12\x0e\n\x06tenant\x18\x03 \x01(\t\"\x16\n\x06Tenant\x12\x0c\n\x04name\x18\x01 \x01(\t\"b\n\x13UpdateMetadataValue\x12\x16\n\x0cstring_value\x18\x01 \x01(\tH\x00\x12\x13\n\tint_value\x18\x02 \x01(\x03H\x00\x12\x15\n\x0b\x66loat_value\x18\x03 \x01(\x01H\x00\x42\x07\n\x05value\"\x96\x01\n\x0eUpdateMetadata\x12\x36\n\x08metadata\x18\x01 \x03(\x0b\x32$.chroma.UpdateMetadata.MetadataEntry\x1aL\n\rMetadataEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12*\n\x05value\x18\x02 \x01(\x0b\x32\x1b.chroma.UpdateMetadataValue:\x02\x38\x01\"\xcc\x01\n\x15SubmitEmbeddingRecord\x12\n\n\x02id\x18\x01 \x01(\t\x12#\n\x06vector\x18\x02 \x01(\x0b\x32\x0e.chroma.VectorH\x00\x88\x01\x01\x12-\n\x08metadata\x18\x03 \x01(\x0b\x32\x16.chroma.UpdateMetadataH\x01\x88\x01\x01\x12$\n\toperation\x18\x04 \x01(\x0e\x32\x11.chroma.Operation\x12\x15\n\rcollection_id\x18\x05 \x01(\tB\t\n\x07_vectorB\x0b\n\t_metadata\"S\n\x15VectorEmbeddingRecord\x12\n\n\x02id\x18\x01 \x01(\t\x12\x0e\n\x06seq_id\x18\x02 \x01(\x0c\x12\x1e\n\x06vector\x18\x03 \x01(\x0b\x32\x0e.chroma.Vector\"q\n\x11VectorQueryResult\x12\n\n\x02id\x18\x01 \x01(\t\x12\x0e\n\x06seq_id\x18\x02 \x01(\x0c\x12\x10\n\x08\x64istance\x18\x03 \x01(\x01\x12#\n\x06vector\x18\x04 \x01(\x0b\x32\x0e.chroma.VectorH\x00\x88\x01\x01\x42\t\n\x07_vector\"@\n\x12VectorQueryResults\x12*\n\x07results\x18\x01 \x03(\x0b\x32\x19.chroma.VectorQueryResult\"(\n\x15SegmentServerResponse\x12\x0f\n\x07success\x18\x01 \x01(\x08\"4\n\x11GetVectorsRequest\x12\x0b\n\x03ids\x18\x01 \x03(\t\x12\x12\n\nsegment_id\x18\x02 \x01(\t\"D\n\x12GetVectorsResponse\x12.\n\x07records\x18\x01 \x03(\x0b\x32\x1d.chroma.VectorEmbeddingRecord\"\x86\x01\n\x13QueryVectorsRequest\x12\x1f\n\x07vectors\x18\x01 \x03(\x0b\x32\x0e.chroma.Vector\x12\t\n\x01k\x18\x02 \x01(\x05\x12\x13\n\x0b\x61llowed_ids\x18\x03 \x03(\t\x12\x1a\n\x12include_embeddings\x18\x04 \x01(\x08\x12\x12\n\nsegment_id\x18\x05 \x01(\t\"C\n\x14QueryVectorsResponse\x12+\n\x07results\x18\x01 \x03(\x0b\x32\x1a.chroma.VectorQueryResults*8\n\tOperation\x12\x07\n\x03\x41\x44\x44\x10\x00\x12\n\n\x06UPDATE\x10\x01\x12\n\n\x06UPSERT\x10\x02\x12\n\n\x06\x44\x45LETE\x10\x03*(\n\x0eScalarEncoding\x12\x0b\n\x07\x46LOAT32\x10\x00\x12\t\n\x05INT32\x10\x01*(\n\x0cSegmentScope\x12\n\n\x06VECTOR\x10\x00\x12\x0c\n\x08METADATA\x10\x01\x32\x94\x01\n\rSegmentServer\x12?\n\x0bLoadSegment\x12\x0f.chroma.Segment\x1a\x1d.chroma.SegmentServerResponse\"\x00\x12\x42\n\x0eReleaseSegment\x12\x0f.chroma.Segment\x1a\x1d.chroma.SegmentServerResponse\"\x00\x32\xa2\x01\n\x0cVectorReader\x12\x45\n\nGetVectors\x12\x19.chroma.GetVectorsRequest\x1a\x1a.chroma.GetVectorsResponse\"\x00\x12K\n\x0cQueryVectors\x12\x1b.chroma.QueryVectorsRequest\x1a\x1c.chroma.QueryVectorsResponse\"\x00\x42\x43ZAgithub.com/chroma/chroma-coordinator/internal/proto/coordinatorpbb\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x1b\x63hromadb/proto/chroma.proto\x12\x06\x63hroma\"&\n\x06Status\x12\x0e\n\x06reason\x18\x01 \x01(\t\x12\x0c\n\x04\x63ode\x18\x02 \x01(\x05\"0\n\x0e\x43hromaResponse\x12\x1e\n\x06status\x18\x01 \x01(\x0b\x32\x0e.chroma.Status\"U\n\x06Vector\x12\x11\n\tdimension\x18\x01 \x01(\x05\x12\x0e\n\x06vector\x18\x02 \x01(\x0c\x12(\n\x08\x65ncoding\x18\x03 \x01(\x0e\x32\x16.chroma.ScalarEncoding\"\xca\x01\n\x07Segment\x12\n\n\x02id\x18\x01 \x01(\t\x12\x0c\n\x04type\x18\x02 \x01(\t\x12#\n\x05scope\x18\x03 \x01(\x0e\x32\x14.chroma.SegmentScope\x12\x12\n\x05topic\x18\x04 \x01(\tH\x00\x88\x01\x01\x12\x17\n\ncollection\x18\x05 \x01(\tH\x01\x88\x01\x01\x12-\n\x08metadata\x18\x06 \x01(\x0b\x32\x16.chroma.UpdateMetadataH\x02\x88\x01\x01\x42\x08\n\x06_topicB\r\n\x0b_collectionB\x0b\n\t_metadata\"\xb9\x01\n\nCollection\x12\n\n\x02id\x18\x01 \x01(\t\x12\x0c\n\x04name\x18\x02 \x01(\t\x12\r\n\x05topic\x18\x03 \x01(\t\x12-\n\x08metadata\x18\x04 \x01(\x0b\x32\x16.chroma.UpdateMetadataH\x00\x88\x01\x01\x12\x16\n\tdimension\x18\x05 \x01(\x05H\x01\x88\x01\x01\x12\x0e\n\x06tenant\x18\x06 \x01(\t\x12\x10\n\x08\x64\x61tabase\x18\x07 \x01(\tB\x0b\n\t_metadataB\x0c\n\n_dimension\"4\n\x08\x44\x61tabase\x12\n\n\x02id\x18\x01 \x01(\t\x12\x0c\n\x04name\x18\x02 \x01(\t\x12\x0e\n\x06tenant\x18\x03 \x01(\t\"\x16\n\x06Tenant\x12\x0c\n\x04name\x18\x01 \x01(\t\"b\n\x13UpdateMetadataValue\x12\x16\n\x0cstring_value\x18\x01 \x01(\tH\x00\x12\x13\n\tint_value\x18\x02 \x01(\x03H\x00\x12\x15\n\x0b\x66loat_value\x18\x03 \x01(\x01H\x00\x42\x07\n\x05value\"\x96\x01\n\x0eUpdateMetadata\x12\x36\n\x08metadata\x18\x01 \x03(\x0b\x32$.chroma.UpdateMetadata.MetadataEntry\x1aL\n\rMetadataEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12*\n\x05value\x18\x02 \x01(\x0b\x32\x1b.chroma.UpdateMetadataValue:\x02\x38\x01\"\xcc\x01\n\x15SubmitEmbeddingRecord\x12\n\n\x02id\x18\x01 \x01(\t\x12#\n\x06vector\x18\x02 \x01(\x0b\x32\x0e.chroma.VectorH\x00\x88\x01\x01\x12-\n\x08metadata\x18\x03 \x01(\x0b\x32\x16.chroma.UpdateMetadataH\x01\x88\x01\x01\x12$\n\toperation\x18\x04 \x01(\x0e\x32\x11.chroma.Operation\x12\x15\n\rcollection_id\x18\x05 \x01(\tB\t\n\x07_vectorB\x0b\n\t_metadata\"S\n\x15VectorEmbeddingRecord\x12\n\n\x02id\x18\x01 \x01(\t\x12\x0e\n\x06seq_id\x18\x02 \x01(\x0c\x12\x1e\n\x06vector\x18\x03 \x01(\x0b\x32\x0e.chroma.Vector\"q\n\x11VectorQueryResult\x12\n\n\x02id\x18\x01 \x01(\t\x12\x0e\n\x06seq_id\x18\x02 \x01(\x0c\x12\x10\n\x08\x64istance\x18\x03 \x01(\x02\x12#\n\x06vector\x18\x04 \x01(\x0b\x32\x0e.chroma.VectorH\x00\x88\x01\x01\x42\t\n\x07_vector\"@\n\x12VectorQueryResults\x12*\n\x07results\x18\x01 \x03(\x0b\x32\x19.chroma.VectorQueryResult\"4\n\x11GetVectorsRequest\x12\x0b\n\x03ids\x18\x01 \x03(\t\x12\x12\n\nsegment_id\x18\x02 \x01(\t\"D\n\x12GetVectorsResponse\x12.\n\x07records\x18\x01 \x03(\x0b\x32\x1d.chroma.VectorEmbeddingRecord\"\x86\x01\n\x13QueryVectorsRequest\x12\x1f\n\x07vectors\x18\x01 \x03(\x0b\x32\x0e.chroma.Vector\x12\t\n\x01k\x18\x02 \x01(\x05\x12\x13\n\x0b\x61llowed_ids\x18\x03 \x03(\t\x12\x1a\n\x12include_embeddings\x18\x04 \x01(\x08\x12\x12\n\nsegment_id\x18\x05 \x01(\t\"C\n\x14QueryVectorsResponse\x12+\n\x07results\x18\x01 \x03(\x0b\x32\x1a.chroma.VectorQueryResults*8\n\tOperation\x12\x07\n\x03\x41\x44\x44\x10\x00\x12\n\n\x06UPDATE\x10\x01\x12\n\n\x06UPSERT\x10\x02\x12\n\n\x06\x44\x45LETE\x10\x03*(\n\x0eScalarEncoding\x12\x0b\n\x07\x46LOAT32\x10\x00\x12\t\n\x05INT32\x10\x01*(\n\x0cSegmentScope\x12\n\n\x06VECTOR\x10\x00\x12\x0c\n\x08METADATA\x10\x01\x32\xa2\x01\n\x0cVectorReader\x12\x45\n\nGetVectors\x12\x19.chroma.GetVectorsRequest\x1a\x1a.chroma.GetVectorsResponse\"\x00\x12K\n\x0cQueryVectors\x12\x1b.chroma.QueryVectorsRequest\x1a\x1c.chroma.QueryVectorsResponse\"\x00\x42\x43ZAgithub.com/chroma/chroma-coordinator/internal/proto/coordinatorpbb\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) @@ -23,12 +23,12 @@ DESCRIPTOR._serialized_options = b'ZAgithub.com/chroma/chroma-coordinator/internal/proto/coordinatorpb' _UPDATEMETADATA_METADATAENTRY._options = None _UPDATEMETADATA_METADATAENTRY._serialized_options = b'8\001' - _globals['_OPERATION']._serialized_start=1785 - _globals['_OPERATION']._serialized_end=1841 - _globals['_SCALARENCODING']._serialized_start=1843 - _globals['_SCALARENCODING']._serialized_end=1883 - _globals['_SEGMENTSCOPE']._serialized_start=1885 - _globals['_SEGMENTSCOPE']._serialized_end=1925 + _globals['_OPERATION']._serialized_start=1743 + _globals['_OPERATION']._serialized_end=1799 + _globals['_SCALARENCODING']._serialized_start=1801 + _globals['_SCALARENCODING']._serialized_end=1841 + _globals['_SEGMENTSCOPE']._serialized_start=1843 + _globals['_SEGMENTSCOPE']._serialized_end=1883 _globals['_STATUS']._serialized_start=39 _globals['_STATUS']._serialized_end=77 _globals['_CHROMARESPONSE']._serialized_start=79 @@ -57,18 +57,14 @@ _globals['_VECTORQUERYRESULT']._serialized_end=1345 _globals['_VECTORQUERYRESULTS']._serialized_start=1347 _globals['_VECTORQUERYRESULTS']._serialized_end=1411 - _globals['_SEGMENTSERVERRESPONSE']._serialized_start=1413 - _globals['_SEGMENTSERVERRESPONSE']._serialized_end=1453 - _globals['_GETVECTORSREQUEST']._serialized_start=1455 - _globals['_GETVECTORSREQUEST']._serialized_end=1507 - _globals['_GETVECTORSRESPONSE']._serialized_start=1509 - _globals['_GETVECTORSRESPONSE']._serialized_end=1577 - _globals['_QUERYVECTORSREQUEST']._serialized_start=1580 - _globals['_QUERYVECTORSREQUEST']._serialized_end=1714 - _globals['_QUERYVECTORSRESPONSE']._serialized_start=1716 - _globals['_QUERYVECTORSRESPONSE']._serialized_end=1783 - _globals['_SEGMENTSERVER']._serialized_start=1928 - _globals['_SEGMENTSERVER']._serialized_end=2076 - _globals['_VECTORREADER']._serialized_start=2079 - _globals['_VECTORREADER']._serialized_end=2241 + _globals['_GETVECTORSREQUEST']._serialized_start=1413 + _globals['_GETVECTORSREQUEST']._serialized_end=1465 + _globals['_GETVECTORSRESPONSE']._serialized_start=1467 + _globals['_GETVECTORSRESPONSE']._serialized_end=1535 + _globals['_QUERYVECTORSREQUEST']._serialized_start=1538 + _globals['_QUERYVECTORSREQUEST']._serialized_end=1672 + _globals['_QUERYVECTORSRESPONSE']._serialized_start=1674 + _globals['_QUERYVECTORSRESPONSE']._serialized_end=1741 + _globals['_VECTORREADER']._serialized_start=1886 + _globals['_VECTORREADER']._serialized_end=2048 # @@protoc_insertion_point(module_scope) diff --git a/chromadb/proto/coordinator_pb2.py b/chromadb/proto/coordinator_pb2.py index fda6a0998670..888aece92853 100644 --- a/chromadb/proto/coordinator_pb2.py +++ b/chromadb/proto/coordinator_pb2.py @@ -15,7 +15,7 @@ from google.protobuf import empty_pb2 as google_dot_protobuf_dot_empty__pb2 -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n chromadb/proto/coordinator.proto\x12\x06\x63hroma\x1a\x1b\x63hromadb/proto/chroma.proto\x1a\x1bgoogle/protobuf/empty.proto\"A\n\x15\x43reateDatabaseRequest\x12\n\n\x02id\x18\x01 \x01(\t\x12\x0c\n\x04name\x18\x02 \x01(\t\x12\x0e\n\x06tenant\x18\x03 \x01(\t\"2\n\x12GetDatabaseRequest\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0e\n\x06tenant\x18\x02 \x01(\t\"Y\n\x13GetDatabaseResponse\x12\"\n\x08\x64\x61tabase\x18\x01 \x01(\x0b\x32\x10.chroma.Database\x12\x1e\n\x06status\x18\x02 \x01(\x0b\x32\x0e.chroma.Status\"#\n\x13\x43reateTenantRequest\x12\x0c\n\x04name\x18\x02 \x01(\t\" \n\x10GetTenantRequest\x12\x0c\n\x04name\x18\x01 \x01(\t\"S\n\x11GetTenantResponse\x12\x1e\n\x06tenant\x18\x01 \x01(\x0b\x32\x0e.chroma.Tenant\x12\x1e\n\x06status\x18\x02 \x01(\x0b\x32\x0e.chroma.Status\"8\n\x14\x43reateSegmentRequest\x12 \n\x07segment\x18\x01 \x01(\x0b\x32\x0f.chroma.Segment\"\"\n\x14\x44\x65leteSegmentRequest\x12\n\n\x02id\x18\x01 \x01(\t\"\xc2\x01\n\x12GetSegmentsRequest\x12\x0f\n\x02id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x11\n\x04type\x18\x02 \x01(\tH\x01\x88\x01\x01\x12(\n\x05scope\x18\x03 \x01(\x0e\x32\x14.chroma.SegmentScopeH\x02\x88\x01\x01\x12\x12\n\x05topic\x18\x04 \x01(\tH\x03\x88\x01\x01\x12\x17\n\ncollection\x18\x05 \x01(\tH\x04\x88\x01\x01\x42\x05\n\x03_idB\x07\n\x05_typeB\x08\n\x06_scopeB\x08\n\x06_topicB\r\n\x0b_collection\"X\n\x13GetSegmentsResponse\x12!\n\x08segments\x18\x01 \x03(\x0b\x32\x0f.chroma.Segment\x12\x1e\n\x06status\x18\x02 \x01(\x0b\x32\x0e.chroma.Status\"\xfa\x01\n\x14UpdateSegmentRequest\x12\n\n\x02id\x18\x01 \x01(\t\x12\x0f\n\x05topic\x18\x02 \x01(\tH\x00\x12\x15\n\x0breset_topic\x18\x03 \x01(\x08H\x00\x12\x14\n\ncollection\x18\x04 \x01(\tH\x01\x12\x1a\n\x10reset_collection\x18\x05 \x01(\x08H\x01\x12*\n\x08metadata\x18\x06 \x01(\x0b\x32\x16.chroma.UpdateMetadataH\x02\x12\x18\n\x0ereset_metadata\x18\x07 \x01(\x08H\x02\x42\x0e\n\x0ctopic_updateB\x13\n\x11\x63ollection_updateB\x11\n\x0fmetadata_update\"\xe5\x01\n\x17\x43reateCollectionRequest\x12\n\n\x02id\x18\x01 \x01(\t\x12\x0c\n\x04name\x18\x02 \x01(\t\x12-\n\x08metadata\x18\x03 \x01(\x0b\x32\x16.chroma.UpdateMetadataH\x00\x88\x01\x01\x12\x16\n\tdimension\x18\x04 \x01(\x05H\x01\x88\x01\x01\x12\x1a\n\rget_or_create\x18\x05 \x01(\x08H\x02\x88\x01\x01\x12\x0e\n\x06tenant\x18\x06 \x01(\t\x12\x10\n\x08\x64\x61tabase\x18\x07 \x01(\tB\x0b\n\t_metadataB\x0c\n\n_dimensionB\x10\n\x0e_get_or_create\"s\n\x18\x43reateCollectionResponse\x12&\n\ncollection\x18\x01 \x01(\x0b\x32\x12.chroma.Collection\x12\x0f\n\x07\x63reated\x18\x02 \x01(\x08\x12\x1e\n\x06status\x18\x03 \x01(\x0b\x32\x0e.chroma.Status\"G\n\x17\x44\x65leteCollectionRequest\x12\n\n\x02id\x18\x01 \x01(\t\x12\x0e\n\x06tenant\x18\x02 \x01(\t\x12\x10\n\x08\x64\x61tabase\x18\x03 \x01(\t\"\x8b\x01\n\x15GetCollectionsRequest\x12\x0f\n\x02id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x11\n\x04name\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x12\n\x05topic\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x0e\n\x06tenant\x18\x04 \x01(\t\x12\x10\n\x08\x64\x61tabase\x18\x05 \x01(\tB\x05\n\x03_idB\x07\n\x05_nameB\x08\n\x06_topic\"a\n\x16GetCollectionsResponse\x12\'\n\x0b\x63ollections\x18\x01 \x03(\x0b\x32\x12.chroma.Collection\x12\x1e\n\x06status\x18\x02 \x01(\x0b\x32\x0e.chroma.Status\"\xde\x01\n\x17UpdateCollectionRequest\x12\n\n\x02id\x18\x01 \x01(\t\x12\x12\n\x05topic\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x11\n\x04name\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x16\n\tdimension\x18\x04 \x01(\x05H\x03\x88\x01\x01\x12*\n\x08metadata\x18\x05 \x01(\x0b\x32\x16.chroma.UpdateMetadataH\x00\x12\x18\n\x0ereset_metadata\x18\x06 \x01(\x08H\x00\x42\x11\n\x0fmetadata_updateB\x08\n\x06_topicB\x07\n\x05_nameB\x0c\n\n_dimension2\xd6\x07\n\x05SysDB\x12I\n\x0e\x43reateDatabase\x12\x1d.chroma.CreateDatabaseRequest\x1a\x16.chroma.ChromaResponse\"\x00\x12H\n\x0bGetDatabase\x12\x1a.chroma.GetDatabaseRequest\x1a\x1b.chroma.GetDatabaseResponse\"\x00\x12\x45\n\x0c\x43reateTenant\x12\x1b.chroma.CreateTenantRequest\x1a\x16.chroma.ChromaResponse\"\x00\x12\x42\n\tGetTenant\x12\x18.chroma.GetTenantRequest\x1a\x19.chroma.GetTenantResponse\"\x00\x12G\n\rCreateSegment\x12\x1c.chroma.CreateSegmentRequest\x1a\x16.chroma.ChromaResponse\"\x00\x12G\n\rDeleteSegment\x12\x1c.chroma.DeleteSegmentRequest\x1a\x16.chroma.ChromaResponse\"\x00\x12H\n\x0bGetSegments\x12\x1a.chroma.GetSegmentsRequest\x1a\x1b.chroma.GetSegmentsResponse\"\x00\x12G\n\rUpdateSegment\x12\x1c.chroma.UpdateSegmentRequest\x1a\x16.chroma.ChromaResponse\"\x00\x12W\n\x10\x43reateCollection\x12\x1f.chroma.CreateCollectionRequest\x1a .chroma.CreateCollectionResponse\"\x00\x12M\n\x10\x44\x65leteCollection\x12\x1f.chroma.DeleteCollectionRequest\x1a\x16.chroma.ChromaResponse\"\x00\x12Q\n\x0eGetCollections\x12\x1d.chroma.GetCollectionsRequest\x1a\x1e.chroma.GetCollectionsResponse\"\x00\x12M\n\x10UpdateCollection\x12\x1f.chroma.UpdateCollectionRequest\x1a\x16.chroma.ChromaResponse\"\x00\x12>\n\nResetState\x12\x16.google.protobuf.Empty\x1a\x16.chroma.ChromaResponse\"\x00\x42\x43ZAgithub.com/chroma/chroma-coordinator/internal/proto/coordinatorpbb\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n chromadb/proto/coordinator.proto\x12\x06\x63hroma\x1a\x1b\x63hromadb/proto/chroma.proto\x1a\x1bgoogle/protobuf/empty.proto\"A\n\x15\x43reateDatabaseRequest\x12\n\n\x02id\x18\x01 \x01(\t\x12\x0c\n\x04name\x18\x02 \x01(\t\x12\x0e\n\x06tenant\x18\x03 \x01(\t\"2\n\x12GetDatabaseRequest\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0e\n\x06tenant\x18\x02 \x01(\t\"Y\n\x13GetDatabaseResponse\x12\"\n\x08\x64\x61tabase\x18\x01 \x01(\x0b\x32\x10.chroma.Database\x12\x1e\n\x06status\x18\x02 \x01(\x0b\x32\x0e.chroma.Status\"#\n\x13\x43reateTenantRequest\x12\x0c\n\x04name\x18\x02 \x01(\t\" \n\x10GetTenantRequest\x12\x0c\n\x04name\x18\x01 \x01(\t\"S\n\x11GetTenantResponse\x12\x1e\n\x06tenant\x18\x01 \x01(\x0b\x32\x0e.chroma.Tenant\x12\x1e\n\x06status\x18\x02 \x01(\x0b\x32\x0e.chroma.Status\"8\n\x14\x43reateSegmentRequest\x12 \n\x07segment\x18\x01 \x01(\x0b\x32\x0f.chroma.Segment\"\"\n\x14\x44\x65leteSegmentRequest\x12\n\n\x02id\x18\x01 \x01(\t\"\xc2\x01\n\x12GetSegmentsRequest\x12\x0f\n\x02id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x11\n\x04type\x18\x02 \x01(\tH\x01\x88\x01\x01\x12(\n\x05scope\x18\x03 \x01(\x0e\x32\x14.chroma.SegmentScopeH\x02\x88\x01\x01\x12\x12\n\x05topic\x18\x04 \x01(\tH\x03\x88\x01\x01\x12\x17\n\ncollection\x18\x05 \x01(\tH\x04\x88\x01\x01\x42\x05\n\x03_idB\x07\n\x05_typeB\x08\n\x06_scopeB\x08\n\x06_topicB\r\n\x0b_collection\"X\n\x13GetSegmentsResponse\x12!\n\x08segments\x18\x01 \x03(\x0b\x32\x0f.chroma.Segment\x12\x1e\n\x06status\x18\x02 \x01(\x0b\x32\x0e.chroma.Status\"\xfa\x01\n\x14UpdateSegmentRequest\x12\n\n\x02id\x18\x01 \x01(\t\x12\x0f\n\x05topic\x18\x02 \x01(\tH\x00\x12\x15\n\x0breset_topic\x18\x03 \x01(\x08H\x00\x12\x14\n\ncollection\x18\x04 \x01(\tH\x01\x12\x1a\n\x10reset_collection\x18\x05 \x01(\x08H\x01\x12*\n\x08metadata\x18\x06 \x01(\x0b\x32\x16.chroma.UpdateMetadataH\x02\x12\x18\n\x0ereset_metadata\x18\x07 \x01(\x08H\x02\x42\x0e\n\x0ctopic_updateB\x13\n\x11\x63ollection_updateB\x11\n\x0fmetadata_update\"\xe5\x01\n\x17\x43reateCollectionRequest\x12\n\n\x02id\x18\x01 \x01(\t\x12\x0c\n\x04name\x18\x02 \x01(\t\x12-\n\x08metadata\x18\x03 \x01(\x0b\x32\x16.chroma.UpdateMetadataH\x00\x88\x01\x01\x12\x16\n\tdimension\x18\x04 \x01(\x05H\x01\x88\x01\x01\x12\x1a\n\rget_or_create\x18\x05 \x01(\x08H\x02\x88\x01\x01\x12\x0e\n\x06tenant\x18\x06 \x01(\t\x12\x10\n\x08\x64\x61tabase\x18\x07 \x01(\tB\x0b\n\t_metadataB\x0c\n\n_dimensionB\x10\n\x0e_get_or_create\"s\n\x18\x43reateCollectionResponse\x12&\n\ncollection\x18\x01 \x01(\x0b\x32\x12.chroma.Collection\x12\x0f\n\x07\x63reated\x18\x02 \x01(\x08\x12\x1e\n\x06status\x18\x03 \x01(\x0b\x32\x0e.chroma.Status\"G\n\x17\x44\x65leteCollectionRequest\x12\n\n\x02id\x18\x01 \x01(\t\x12\x0e\n\x06tenant\x18\x02 \x01(\t\x12\x10\n\x08\x64\x61tabase\x18\x03 \x01(\t\"\x8b\x01\n\x15GetCollectionsRequest\x12\x0f\n\x02id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x11\n\x04name\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x12\n\x05topic\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x0e\n\x06tenant\x18\x04 \x01(\t\x12\x10\n\x08\x64\x61tabase\x18\x05 \x01(\tB\x05\n\x03_idB\x07\n\x05_nameB\x08\n\x06_topic\"a\n\x16GetCollectionsResponse\x12\'\n\x0b\x63ollections\x18\x01 \x03(\x0b\x32\x12.chroma.Collection\x12\x1e\n\x06status\x18\x02 \x01(\x0b\x32\x0e.chroma.Status\"\xde\x01\n\x17UpdateCollectionRequest\x12\n\n\x02id\x18\x01 \x01(\t\x12\x12\n\x05topic\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x11\n\x04name\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x16\n\tdimension\x18\x04 \x01(\x05H\x03\x88\x01\x01\x12*\n\x08metadata\x18\x05 \x01(\x0b\x32\x16.chroma.UpdateMetadataH\x00\x12\x18\n\x0ereset_metadata\x18\x06 \x01(\x08H\x00\x42\x11\n\x0fmetadata_updateB\x08\n\x06_topicB\x07\n\x05_nameB\x0c\n\n_dimension\"O\n\x0cNotification\x12\n\n\x02id\x18\x01 \x01(\x03\x12\x15\n\rcollection_id\x18\x02 \x01(\t\x12\x0c\n\x04type\x18\x03 \x01(\t\x12\x0e\n\x06status\x18\x04 \x01(\t2\xd6\x07\n\x05SysDB\x12I\n\x0e\x43reateDatabase\x12\x1d.chroma.CreateDatabaseRequest\x1a\x16.chroma.ChromaResponse\"\x00\x12H\n\x0bGetDatabase\x12\x1a.chroma.GetDatabaseRequest\x1a\x1b.chroma.GetDatabaseResponse\"\x00\x12\x45\n\x0c\x43reateTenant\x12\x1b.chroma.CreateTenantRequest\x1a\x16.chroma.ChromaResponse\"\x00\x12\x42\n\tGetTenant\x12\x18.chroma.GetTenantRequest\x1a\x19.chroma.GetTenantResponse\"\x00\x12G\n\rCreateSegment\x12\x1c.chroma.CreateSegmentRequest\x1a\x16.chroma.ChromaResponse\"\x00\x12G\n\rDeleteSegment\x12\x1c.chroma.DeleteSegmentRequest\x1a\x16.chroma.ChromaResponse\"\x00\x12H\n\x0bGetSegments\x12\x1a.chroma.GetSegmentsRequest\x1a\x1b.chroma.GetSegmentsResponse\"\x00\x12G\n\rUpdateSegment\x12\x1c.chroma.UpdateSegmentRequest\x1a\x16.chroma.ChromaResponse\"\x00\x12W\n\x10\x43reateCollection\x12\x1f.chroma.CreateCollectionRequest\x1a .chroma.CreateCollectionResponse\"\x00\x12M\n\x10\x44\x65leteCollection\x12\x1f.chroma.DeleteCollectionRequest\x1a\x16.chroma.ChromaResponse\"\x00\x12Q\n\x0eGetCollections\x12\x1d.chroma.GetCollectionsRequest\x1a\x1e.chroma.GetCollectionsResponse\"\x00\x12M\n\x10UpdateCollection\x12\x1f.chroma.UpdateCollectionRequest\x1a\x16.chroma.ChromaResponse\"\x00\x12>\n\nResetState\x12\x16.google.protobuf.Empty\x1a\x16.chroma.ChromaResponse\"\x00\x42\x43ZAgithub.com/chroma/chroma-coordinator/internal/proto/coordinatorpbb\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) @@ -57,6 +57,8 @@ _globals['_GETCOLLECTIONSRESPONSE']._serialized_end=1763 _globals['_UPDATECOLLECTIONREQUEST']._serialized_start=1766 _globals['_UPDATECOLLECTIONREQUEST']._serialized_end=1988 - _globals['_SYSDB']._serialized_start=1991 - _globals['_SYSDB']._serialized_end=2973 + _globals['_NOTIFICATION']._serialized_start=1990 + _globals['_NOTIFICATION']._serialized_end=2069 + _globals['_SYSDB']._serialized_start=2072 + _globals['_SYSDB']._serialized_end=3054 # @@protoc_insertion_point(module_scope) diff --git a/chromadb/proto/coordinator_pb2.pyi b/chromadb/proto/coordinator_pb2.pyi index 81545e4e2832..ec926340cdfa 100644 --- a/chromadb/proto/coordinator_pb2.pyi +++ b/chromadb/proto/coordinator_pb2.pyi @@ -180,3 +180,15 @@ class UpdateCollectionRequest(_message.Message): metadata: _chroma_pb2.UpdateMetadata reset_metadata: bool def __init__(self, id: _Optional[str] = ..., topic: _Optional[str] = ..., name: _Optional[str] = ..., dimension: _Optional[int] = ..., metadata: _Optional[_Union[_chroma_pb2.UpdateMetadata, _Mapping]] = ..., reset_metadata: bool = ...) -> None: ... + +class Notification(_message.Message): + __slots__ = ["id", "collection_id", "type", "status"] + ID_FIELD_NUMBER: _ClassVar[int] + COLLECTION_ID_FIELD_NUMBER: _ClassVar[int] + TYPE_FIELD_NUMBER: _ClassVar[int] + STATUS_FIELD_NUMBER: _ClassVar[int] + id: int + collection_id: str + type: str + status: str + def __init__(self, id: _Optional[int] = ..., collection_id: _Optional[str] = ..., type: _Optional[str] = ..., status: _Optional[str] = ...) -> None: ... diff --git a/chromadb/proto/logservice_pb2.py b/chromadb/proto/logservice_pb2.py new file mode 100644 index 000000000000..f7dd81efc1bd --- /dev/null +++ b/chromadb/proto/logservice_pb2.py @@ -0,0 +1,31 @@ +# -*- coding: utf-8 -*- +# Generated by the protocol buffer compiler. DO NOT EDIT! +# source: chromadb/proto/logservice.proto +"""Generated protocol buffer code.""" +from google.protobuf import descriptor as _descriptor +from google.protobuf import descriptor_pool as _descriptor_pool +from google.protobuf import symbol_database as _symbol_database +from google.protobuf.internal import builder as _builder + +# @@protoc_insertion_point(imports) + +_sym_db = _symbol_database.Default() + + +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile( + b"\n\x1f\x63hromadb/proto/logservice.proto\x12\x06\x63hroma2\x0c\n\nLogServiceBBZ@github.com/chroma/chroma-coordinator/internal/proto/logservicepbb\x06proto3" +) + +_globals = globals() +_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) +_builder.BuildTopDescriptorsAndMessages( + DESCRIPTOR, "chromadb.proto.logservice_pb2", _globals +) +if _descriptor._USE_C_DESCRIPTORS == False: + DESCRIPTOR._options = None + DESCRIPTOR._serialized_options = ( + b"Z@github.com/chroma/chroma-coordinator/internal/proto/logservicepb" + ) + _globals["_LOGSERVICE"]._serialized_start = 43 + _globals["_LOGSERVICE"]._serialized_end = 55 +# @@protoc_insertion_point(module_scope) diff --git a/chromadb/proto/logservice_pb2.pyi b/chromadb/proto/logservice_pb2.pyi new file mode 100644 index 000000000000..869ab9d2d1e0 --- /dev/null +++ b/chromadb/proto/logservice_pb2.pyi @@ -0,0 +1,4 @@ +from google.protobuf import descriptor as _descriptor +from typing import ClassVar as _ClassVar + +DESCRIPTOR: _descriptor.FileDescriptor diff --git a/chromadb/proto/logservice_pb2_grpc.py b/chromadb/proto/logservice_pb2_grpc.py new file mode 100644 index 000000000000..d98303113da8 --- /dev/null +++ b/chromadb/proto/logservice_pb2_grpc.py @@ -0,0 +1,31 @@ +# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! +"""Client and server classes corresponding to protobuf-defined services.""" +import grpc + + +class LogServiceStub(object): + """Missing associated documentation comment in .proto file.""" + + def __init__(self, channel): + """Constructor. + + Args: + channel: A grpc.Channel. + """ + + +class LogServiceServicer(object): + """Missing associated documentation comment in .proto file.""" + + +def add_LogServiceServicer_to_server(servicer, server): + rpc_method_handlers = {} + generic_handler = grpc.method_handlers_generic_handler( + "chroma.LogService", rpc_method_handlers + ) + server.add_generic_rpc_handlers((generic_handler,)) + + +# This class is part of an EXPERIMENTAL API. +class LogService(object): + """Missing associated documentation comment in .proto file.""" diff --git a/go/coordinator/Dockerfile b/go/coordinator/Dockerfile index a86f5cc258f0..554da75f93ad 100644 --- a/go/coordinator/Dockerfile +++ b/go/coordinator/Dockerfile @@ -23,9 +23,8 @@ RUN apk add \ RUN mkdir /chroma-coordinator WORKDIR /chroma-coordinator -COPY --from=build /src/chroma-coordinator/bin/chroma /chroma-coordinator/bin/chroma +COPY --from=build /src/chroma-coordinator/bin/coordinator /chroma-coordinator/bin/coordinator +COPY --from=build /src/chroma-coordinator/bin/logservice /chroma-coordinator/bin/logservice ENV PATH=$PATH:/chroma-coordinator/bin -COPY --from=build /src/chroma-coordinator/migrations /chroma-coordinator/migrations - CMD /bin/bash diff --git a/go/coordinator/Dockerfile.migration b/go/coordinator/Dockerfile.migration new file mode 100644 index 000000000000..092f2629540f --- /dev/null +++ b/go/coordinator/Dockerfile.migration @@ -0,0 +1,4 @@ +FROM arigaio/atlas:latest +workdir /app +COPY ./go/coordinator/migrations migrations +COPY ./go/coordinator/atlas.hcl atlas.hcl diff --git a/go/coordinator/Makefile b/go/coordinator/Makefile index 8fb52e4bb748..f1a440e4744c 100644 --- a/go/coordinator/Makefile +++ b/go/coordinator/Makefile @@ -1,6 +1,7 @@ .PHONY: build build: - go build -v -o bin/chroma ./cmd + go build -v -o bin/coordinator ./cmd/coordinator/ + go build -v -o bin/logservice ./cmd/logservice/ test: build go test -cover -race ./... diff --git a/go/coordinator/atlas.hcl b/go/coordinator/atlas.hcl index 2883c58d65e8..f2c17f57c191 100644 --- a/go/coordinator/atlas.hcl +++ b/go/coordinator/atlas.hcl @@ -10,9 +10,9 @@ data "external_schema" "gorm" { ] } -env "gorm" { +env "dev" { src = data.external_schema.gorm.url - dev = "postgres://localhost:5432/dev?sslmode=disable" + dev = "postgres://localhost:5432/chroma?sslmode=disable" migration { dir = "file://migrations" } diff --git a/go/coordinator/cmd/grpccoordinator/cmd.go b/go/coordinator/cmd/coordinator/cmd.go similarity index 64% rename from go/coordinator/cmd/grpccoordinator/cmd.go rename to go/coordinator/cmd/coordinator/cmd.go index 8859790b56c8..a1dadfc5cdca 100644 --- a/go/coordinator/cmd/grpccoordinator/cmd.go +++ b/go/coordinator/cmd/coordinator/cmd.go @@ -1,18 +1,18 @@ -package grpccoordinator +package main import ( + "github.com/chroma/chroma-coordinator/internal/coordinator/grpc" + "github.com/chroma/chroma-coordinator/internal/grpcutils" "io" "time" "github.com/chroma/chroma-coordinator/cmd/flag" - "github.com/chroma/chroma-coordinator/internal/grpccoordinator" - "github.com/chroma/chroma-coordinator/internal/grpccoordinator/grpcutils" "github.com/chroma/chroma-coordinator/internal/utils" "github.com/spf13/cobra" ) var ( - conf = grpccoordinator.Config{ + conf = grpc.Config{ GrpcConfig: &grpcutils.GrpcConfig{}, } @@ -30,14 +30,15 @@ func init() { flag.GRPCAddr(Cmd, &conf.GrpcConfig.BindAddress) // System Catalog - Cmd.Flags().StringVar(&conf.SystemCatalogProvider, "system-catalog-provider", "memory", "System catalog provider") - Cmd.Flags().StringVar(&conf.Username, "username", "root", "MetaTable username") - Cmd.Flags().StringVar(&conf.Password, "password", "", "MetaTable password") - Cmd.Flags().StringVar(&conf.Address, "db-address", "127.0.0.1", "MetaTable db address") - Cmd.Flags().IntVar(&conf.Port, "db-port", 5432, "MetaTable db port") - Cmd.Flags().StringVar(&conf.DBName, "db-name", "", "MetaTable db name") - Cmd.Flags().IntVar(&conf.MaxIdleConns, "max-idle-conns", 10, "MetaTable max idle connections") - Cmd.Flags().IntVar(&conf.MaxOpenConns, "max-open-conns", 10, "MetaTable max open connections") + Cmd.Flags().StringVar(&conf.SystemCatalogProvider, "system-catalog-provider", "database", "System catalog provider") + Cmd.Flags().StringVar(&conf.DBConfig.Username, "username", "chroma", "MetaTable username") + Cmd.Flags().StringVar(&conf.DBConfig.Password, "password", "chroma", "MetaTable password") + Cmd.Flags().StringVar(&conf.DBConfig.Address, "db-address", "postgres", "MetaTable db address") + Cmd.Flags().IntVar(&conf.DBConfig.Port, "db-port", 5432, "MetaTable db port") + Cmd.Flags().StringVar(&conf.DBConfig.DBName, "db-name", "chroma", "MetaTable db name") + Cmd.Flags().IntVar(&conf.DBConfig.MaxIdleConns, "max-idle-conns", 10, "MetaTable max idle connections") + Cmd.Flags().IntVar(&conf.DBConfig.MaxOpenConns, "max-open-conns", 10, "MetaTable max open connections") + Cmd.Flags().StringVar(&conf.DBConfig.SslMode, "ssl-mode", "disable", "SSL mode for database connection") // Pulsar Cmd.Flags().StringVar(&conf.PulsarAdminURL, "pulsar-admin-url", "http://localhost:8080", "Pulsar admin url") @@ -59,6 +60,6 @@ func init() { func exec(*cobra.Command, []string) { utils.RunProcess(func() (io.Closer, error) { - return grpccoordinator.New(conf) + return grpc.New(conf) }) } diff --git a/go/coordinator/cmd/main.go b/go/coordinator/cmd/coordinator/main.go similarity index 85% rename from go/coordinator/cmd/main.go rename to go/coordinator/cmd/coordinator/main.go index 0b7cfa7b54d7..bfa31c8c9be9 100644 --- a/go/coordinator/cmd/main.go +++ b/go/coordinator/cmd/coordinator/main.go @@ -4,7 +4,6 @@ import ( "fmt" "os" - "github.com/chroma/chroma-coordinator/cmd/grpccoordinator" "github.com/chroma/chroma-coordinator/internal/utils" "github.com/rs/zerolog" "github.com/spf13/cobra" @@ -20,7 +19,7 @@ var ( ) func init() { - rootCmd.AddCommand(grpccoordinator.Cmd) + rootCmd.AddCommand(Cmd) } func main() { diff --git a/go/coordinator/cmd/logservice/cmd.go b/go/coordinator/cmd/logservice/cmd.go new file mode 100644 index 000000000000..721067bb3b2e --- /dev/null +++ b/go/coordinator/cmd/logservice/cmd.go @@ -0,0 +1,46 @@ +package main + +import ( + "github.com/chroma/chroma-coordinator/cmd/flag" + "github.com/chroma/chroma-coordinator/internal/grpcutils" + "github.com/chroma/chroma-coordinator/internal/logservice/grpc" + "github.com/chroma/chroma-coordinator/internal/utils" + "github.com/spf13/cobra" + "io" +) + +var ( + conf = grpc.Config{ + GrpcConfig: &grpcutils.GrpcConfig{}, + } + + Cmd = &cobra.Command{ + Use: "logservice", + Short: "Start a logservice service", + Long: `RecordLog root command`, + Run: exec, + } +) + +func init() { + // GRPC + flag.GRPCAddr(Cmd, &conf.GrpcConfig.BindAddress) + Cmd.Flags().BoolVar(&conf.StartGrpc, "start-grpc", true, "start grpc server or not") + + // DB provider + Cmd.Flags().StringVar(&conf.DBProvider, "db-provider", "postgres", "DB provider") + + // DB dev + Cmd.Flags().StringVar(&conf.DBConfig.Address, "db-host", "postgres", "DB host") + Cmd.Flags().IntVar(&conf.DBConfig.Port, "db-port", 5432, "DB port") + Cmd.Flags().StringVar(&conf.DBConfig.Username, "db-user", "chroma", "DB user") + Cmd.Flags().StringVar(&conf.DBConfig.Password, "db-password", "chroma", "DB password") + Cmd.Flags().StringVar(&conf.DBConfig.DBName, "db-name", "chroma", "DB name") + Cmd.Flags().StringVar(&conf.DBConfig.SslMode, "ssl-mode", "disable", "SSL mode for database connection") +} + +func exec(*cobra.Command, []string) { + utils.RunProcess(func() (io.Closer, error) { + return grpc.New(conf) + }) +} diff --git a/go/coordinator/cmd/logservice/main.go b/go/coordinator/cmd/logservice/main.go new file mode 100644 index 000000000000..d88c70ec61e9 --- /dev/null +++ b/go/coordinator/cmd/logservice/main.go @@ -0,0 +1,36 @@ +package main + +import ( + "fmt" + "os" + + "github.com/chroma/chroma-coordinator/internal/utils" + "github.com/rs/zerolog" + "github.com/spf13/cobra" + "go.uber.org/automaxprocs/maxprocs" +) + +var ( + rootCmd = &cobra.Command{ + Use: "logservice", + Short: "RecordLog root command", + Long: `RecordLog root command`, + } +) + +func init() { + rootCmd.AddCommand(Cmd) +} + +func main() { + utils.LogLevel = zerolog.DebugLevel + utils.ConfigureLogger() + if _, err := maxprocs.Set(); err != nil { + _, _ = fmt.Fprintln(os.Stderr, err) + os.Exit(1) + } + if err := rootCmd.Execute(); err != nil { + _, _ = fmt.Fprintln(os.Stderr, err) + os.Exit(1) + } +} diff --git a/go/coordinator/go.sum b/go/coordinator/go.sum index 15390626451c..1977a3665238 100644 --- a/go/coordinator/go.sum +++ b/go/coordinator/go.sum @@ -12,6 +12,8 @@ github.com/AthenZ/athenz v1.10.39/go.mod h1:3Tg8HLsiQZp81BJY58JBeU2BR6B/H4/0MQGf github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/DataDog/zstd v1.5.0 h1:+K/VEwIAaPcHiMtQvpLD4lqW7f0Gk3xdYZmI1hD+CXo= github.com/DataDog/zstd v1.5.0/go.mod h1:g4AWEaM3yOg3HYfnJ3YIawPnVdXJh9QME85blwSAmyw= +github.com/alecthomas/kong v0.7.1 h1:azoTh0IOfwlAX3qN9sHWTxACE2oV8Bg2gAwBsMwDQY4= +github.com/alecthomas/kong v0.7.1/go.mod h1:n1iCIO2xS46oE8ZfYCNDqdR0b0wZNrXAIAqro/2132U= github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= @@ -344,6 +346,7 @@ golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtn golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= golang.org/x/tools v0.10.0 h1:tvDr/iQoUqNdohiYm0LmmKcBk+q86lb9EprIUFhHHGg= +golang.org/x/tools v0.10.0/go.mod h1:UJwyiVBsOA2uwvK/e5OY3GTpDUJriEd+/YlqAwLPmyM= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= diff --git a/go/coordinator/internal/grpccoordinator/collection_service.go b/go/coordinator/internal/coordinator/grpc/collection_service.go similarity index 99% rename from go/coordinator/internal/grpccoordinator/collection_service.go rename to go/coordinator/internal/coordinator/grpc/collection_service.go index faaf6b4dbf9e..9276f1401072 100644 --- a/go/coordinator/internal/grpccoordinator/collection_service.go +++ b/go/coordinator/internal/coordinator/grpc/collection_service.go @@ -1,4 +1,4 @@ -package grpccoordinator +package grpc import ( "context" diff --git a/go/coordinator/internal/grpccoordinator/collection_service_test.go b/go/coordinator/internal/coordinator/grpc/collection_service_test.go similarity index 97% rename from go/coordinator/internal/grpccoordinator/collection_service_test.go rename to go/coordinator/internal/coordinator/grpc/collection_service_test.go index 390b08f76075..c4f02a0682c2 100644 --- a/go/coordinator/internal/grpccoordinator/collection_service_test.go +++ b/go/coordinator/internal/coordinator/grpc/collection_service_test.go @@ -1,11 +1,11 @@ -package grpccoordinator +package grpc import ( "context" + "github.com/chroma/chroma-coordinator/internal/grpcutils" "testing" "github.com/chroma/chroma-coordinator/internal/common" - "github.com/chroma/chroma-coordinator/internal/grpccoordinator/grpcutils" "github.com/chroma/chroma-coordinator/internal/metastore/db/dbcore" "github.com/chroma/chroma-coordinator/internal/proto/coordinatorpb" "pgregory.net/rapid" diff --git a/go/coordinator/internal/grpccoordinator/proto_model_convert.go b/go/coordinator/internal/coordinator/grpc/proto_model_convert.go similarity index 99% rename from go/coordinator/internal/grpccoordinator/proto_model_convert.go rename to go/coordinator/internal/coordinator/grpc/proto_model_convert.go index 18c4fd307ab2..9b47f1f33ce0 100644 --- a/go/coordinator/internal/grpccoordinator/proto_model_convert.go +++ b/go/coordinator/internal/coordinator/grpc/proto_model_convert.go @@ -1,4 +1,4 @@ -package grpccoordinator +package grpc import ( "github.com/chroma/chroma-coordinator/internal/common" diff --git a/go/coordinator/internal/grpccoordinator/proto_model_convert_test.go b/go/coordinator/internal/coordinator/grpc/proto_model_convert_test.go similarity index 99% rename from go/coordinator/internal/grpccoordinator/proto_model_convert_test.go rename to go/coordinator/internal/coordinator/grpc/proto_model_convert_test.go index 9cfa2f0632fe..2586151d3c71 100644 --- a/go/coordinator/internal/grpccoordinator/proto_model_convert_test.go +++ b/go/coordinator/internal/coordinator/grpc/proto_model_convert_test.go @@ -1,4 +1,4 @@ -package grpccoordinator +package grpc import ( "testing" diff --git a/go/coordinator/internal/grpccoordinator/segment_service.go b/go/coordinator/internal/coordinator/grpc/segment_service.go similarity index 99% rename from go/coordinator/internal/grpccoordinator/segment_service.go rename to go/coordinator/internal/coordinator/grpc/segment_service.go index b2d3be5e4ff2..6e63e384ef15 100644 --- a/go/coordinator/internal/grpccoordinator/segment_service.go +++ b/go/coordinator/internal/coordinator/grpc/segment_service.go @@ -1,4 +1,4 @@ -package grpccoordinator +package grpc import ( "context" diff --git a/go/coordinator/internal/grpccoordinator/server.go b/go/coordinator/internal/coordinator/grpc/server.go similarity index 90% rename from go/coordinator/internal/grpccoordinator/server.go rename to go/coordinator/internal/coordinator/grpc/server.go index 4205a47153b6..578298719a7c 100644 --- a/go/coordinator/internal/grpccoordinator/server.go +++ b/go/coordinator/internal/coordinator/grpc/server.go @@ -1,13 +1,13 @@ -package grpccoordinator +package grpc import ( "context" "errors" + "github.com/chroma/chroma-coordinator/internal/grpcutils" "time" "github.com/apache/pulsar-client-go/pulsar" "github.com/chroma/chroma-coordinator/internal/coordinator" - "github.com/chroma/chroma-coordinator/internal/grpccoordinator/grpcutils" "github.com/chroma/chroma-coordinator/internal/memberlist_manager" "github.com/chroma/chroma-coordinator/internal/metastore/db/dao" "github.com/chroma/chroma-coordinator/internal/metastore/db/dbcore" @@ -29,13 +29,7 @@ type Config struct { SystemCatalogProvider string // MetaTable config - Username string - Password string - Address string - Port int - DBName string - MaxIdleConns int - MaxOpenConns int + DBConfig dbcore.DBConfig // Notification config NotificationStoreProvider string @@ -77,16 +71,8 @@ func New(config Config) (*Server, error) { if config.SystemCatalogProvider == "memory" { return NewWithGrpcProvider(config, grpcutils.Default, nil) } else if config.SystemCatalogProvider == "database" { - dBConfig := dbcore.DBConfig{ - Username: config.Username, - Password: config.Password, - Address: config.Address, - Port: config.Port, - DBName: config.DBName, - MaxIdleConns: config.MaxIdleConns, - MaxOpenConns: config.MaxOpenConns, - } - db, err := dbcore.Connect(dBConfig) + dBConfig := config.DBConfig + db, err := dbcore.ConnectPostgres(dBConfig) if err != nil { return nil, err } @@ -175,7 +161,7 @@ func NewWithGrpcProvider(config Config, provider grpcutils.GrpcProvider, db *gor return nil, err } - s.grpcServer, err = provider.StartGrpcServer("coordinator", config.GrpcConfig, func(registrar grpc.ServiceRegistrar) { + s.grpcServer, err = provider.StartGrpcServer("coordinator", config.GrpcConfig, func(registrar grpc.ServiceRegistrar) { coordinatorpb.RegisterSysDBServer(registrar, s) }) if err != nil { diff --git a/go/coordinator/internal/grpccoordinator/tenant_database_service.go b/go/coordinator/internal/coordinator/grpc/tenant_database_service.go similarity index 99% rename from go/coordinator/internal/grpccoordinator/tenant_database_service.go rename to go/coordinator/internal/coordinator/grpc/tenant_database_service.go index eb36b3de949a..5ec1045c5ec7 100644 --- a/go/coordinator/internal/grpccoordinator/tenant_database_service.go +++ b/go/coordinator/internal/coordinator/grpc/tenant_database_service.go @@ -1,4 +1,4 @@ -package grpccoordinator +package grpc import ( "context" diff --git a/go/coordinator/internal/grpccoordinator/grpcutils/config.go b/go/coordinator/internal/grpcutils/config.go similarity index 100% rename from go/coordinator/internal/grpccoordinator/grpcutils/config.go rename to go/coordinator/internal/grpcutils/config.go diff --git a/go/coordinator/internal/grpccoordinator/grpcutils/config_test.go b/go/coordinator/internal/grpcutils/config_test.go similarity index 100% rename from go/coordinator/internal/grpccoordinator/grpcutils/config_test.go rename to go/coordinator/internal/grpcutils/config_test.go diff --git a/go/coordinator/internal/grpccoordinator/grpcutils/service.go b/go/coordinator/internal/grpcutils/service.go similarity index 100% rename from go/coordinator/internal/grpccoordinator/grpcutils/service.go rename to go/coordinator/internal/grpcutils/service.go diff --git a/go/coordinator/internal/logservice/apis.go b/go/coordinator/internal/logservice/apis.go new file mode 100644 index 000000000000..2eba78b20f68 --- /dev/null +++ b/go/coordinator/internal/logservice/apis.go @@ -0,0 +1,11 @@ +package logservice + +import ( + "github.com/chroma/chroma-coordinator/internal/common" +) + +type ( + IRecordLog interface { + common.Component + } +) diff --git a/go/coordinator/internal/logservice/grpc/server.go b/go/coordinator/internal/logservice/grpc/server.go new file mode 100644 index 000000000000..e3fb1980f78b --- /dev/null +++ b/go/coordinator/internal/logservice/grpc/server.go @@ -0,0 +1,104 @@ +package grpc + +import ( + "context" + "errors" + "github.com/chroma/chroma-coordinator/internal/grpcutils" + "github.com/chroma/chroma-coordinator/internal/logservice" + "github.com/chroma/chroma-coordinator/internal/metastore/db/dbcore" + "github.com/chroma/chroma-coordinator/internal/proto/logservicepb" + "github.com/pingcap/log" + "go.uber.org/zap" + "google.golang.org/grpc" + "google.golang.org/grpc/health" +) + +type Config struct { + // GrpcConfig config + GrpcConfig *grpcutils.GrpcConfig + + // System catalog provider + DBProvider string + + // Postgres config + DBConfig dbcore.DBConfig + + // whether to start grpc service + StartGrpc bool +} + +type Server struct { + logservicepb.UnimplementedLogServiceServer + logService logservice.IRecordLog + grpcServer grpcutils.GrpcServer + healthServer *health.Server +} + +func New(config Config) (*Server, error) { + log.Info("New Log Service...") + + if config.DBProvider == "postgres" { + dBConfig := config.DBConfig + _, err := dbcore.ConnectPostgres(dBConfig) + if err != nil { + log.Error("Error connecting to Postgres DB.", zap.Error(err)) + panic(err) + } + } else { + log.Error("invalid DB provider, only postgres is supported") + return nil, errors.New("invalid DB provider, only postgres is supported") + } + + s := startLogService() + if config.StartGrpc { + s.grpcServer = startGrpcService(s, config.GrpcConfig) + } + + log.Info("New Log Service Completed.") + return s, nil +} + +func startLogService() *Server { + log.Info("Staring Log Service...") + ctx := context.Background() + s := &Server{ + healthServer: health.NewServer(), + } + + logService, err := logservice.NewLogService(ctx) + if err != nil { + log.Error("Error creating Log Service.", zap.Error(err)) + panic(err) + } + s.logService = logService + err = s.logService.Start() + if err != nil { + log.Error("Error starting Log Service.", zap.Error(err)) + panic(err) + } + log.Info("Log Service Started.") + return s +} + +func startGrpcService(s *Server, grpcConfig *grpcutils.GrpcConfig) grpcutils.GrpcServer { + log.Info("Staring Grpc Service...") + server, err := grpcutils.Default.StartGrpcServer("logservice", grpcConfig, func(registrar grpc.ServiceRegistrar) { + logservicepb.RegisterLogServiceServer(registrar, s) + }) + if err != nil { + log.Error("Error starting grpc Service.", zap.Error(err)) + panic(err) + } + return server +} + +func (s *Server) Close() error { + s.healthServer.Shutdown() + err := s.logService.Stop() + if err != nil { + log.Error("Failed to stop log service", zap.Error(err)) + return err + } + log.Info("Server closed") + return nil +} diff --git a/go/coordinator/internal/logservice/recordlog.go b/go/coordinator/internal/logservice/recordlog.go new file mode 100644 index 000000000000..78729128de6b --- /dev/null +++ b/go/coordinator/internal/logservice/recordlog.go @@ -0,0 +1,33 @@ +package logservice + +import ( + "context" + "github.com/chroma/chroma-coordinator/internal/metastore/db/dao" + "github.com/chroma/chroma-coordinator/internal/metastore/db/dbmodel" + "github.com/pingcap/log" +) + +var _ IRecordLog = (*RecordLog)(nil) + +type RecordLog struct { + ctx context.Context + recordLogDb dbmodel.IRecordLogDb +} + +func NewLogService(ctx context.Context) (*RecordLog, error) { + s := &RecordLog{ + ctx: ctx, + recordLogDb: dao.NewMetaDomain().RecordLogDb(ctx), + } + return s, nil +} + +func (s *RecordLog) Start() error { + log.Info("RecordLog start") + return nil +} + +func (s *RecordLog) Stop() error { + log.Info("RecordLog stop") + return nil +} diff --git a/go/coordinator/internal/metastore/db/dao/common.go b/go/coordinator/internal/metastore/db/dao/common.go index c67cea6c7597..771def6f99fc 100644 --- a/go/coordinator/internal/metastore/db/dao/common.go +++ b/go/coordinator/internal/metastore/db/dao/common.go @@ -40,3 +40,7 @@ func (*metaDomain) SegmentMetadataDb(ctx context.Context) dbmodel.ISegmentMetada func (*metaDomain) NotificationDb(ctx context.Context) dbmodel.INotificationDb { return ¬ificationDb{dbcore.GetDB(ctx)} } + +func (*metaDomain) RecordLogDb(ctx context.Context) dbmodel.IRecordLogDb { + return &recordLogDb{dbcore.GetDB(ctx)} +} diff --git a/go/coordinator/internal/metastore/db/dao/record_log.go b/go/coordinator/internal/metastore/db/dao/record_log.go new file mode 100644 index 000000000000..d1601e503c86 --- /dev/null +++ b/go/coordinator/internal/metastore/db/dao/record_log.go @@ -0,0 +1,9 @@ +package dao + +import ( + "gorm.io/gorm" +) + +type recordLogDb struct { + db *gorm.DB +} diff --git a/go/coordinator/internal/metastore/db/dao/segment_metadata.go b/go/coordinator/internal/metastore/db/dao/segment_metadata.go index 14d4d2ec2d04..97800c78d8d3 100644 --- a/go/coordinator/internal/metastore/db/dao/segment_metadata.go +++ b/go/coordinator/internal/metastore/db/dao/segment_metadata.go @@ -21,7 +21,7 @@ func (s *segmentMetadataDb) DeleteBySegmentID(segmentID string) error { func (s *segmentMetadataDb) DeleteBySegmentIDAndKeys(segmentID string, keys []string) error { return s.db. Where("segment_id = ?", segmentID). - Where("`key` IN ?", keys). + Where("key IN ?", keys). Delete(&dbmodel.SegmentMetadata{}).Error } diff --git a/go/coordinator/internal/metastore/db/dbcore/core.go b/go/coordinator/internal/metastore/db/dbcore/core.go index 95d2885dfc40..ce05a1b4ca1c 100644 --- a/go/coordinator/internal/metastore/db/dbcore/core.go +++ b/go/coordinator/internal/metastore/db/dbcore/core.go @@ -3,7 +3,9 @@ package dbcore import ( "context" "fmt" + "os" "reflect" + "strconv" "github.com/chroma/chroma-coordinator/internal/common" "github.com/chroma/chroma-coordinator/internal/metastore/db/dbmodel" @@ -11,7 +13,6 @@ import ( "github.com/pingcap/log" "go.uber.org/zap" "gorm.io/driver/postgres" - "gorm.io/driver/sqlite" "gorm.io/gorm" "gorm.io/gorm/logger" ) @@ -28,11 +29,13 @@ type DBConfig struct { DBName string MaxIdleConns int MaxOpenConns int + SslMode string } -func Connect(cfg DBConfig) (*gorm.DB, error) { - dsn := fmt.Sprintf("host=%s user=%s password=%s dbname=%s port=%d sslmode=require", - cfg.Address, cfg.Username, cfg.Password, cfg.DBName, cfg.Port) +func ConnectPostgres(cfg DBConfig) (*gorm.DB, error) { + log.Info("ConnectPostgres", zap.String("host", cfg.Address), zap.String("database", cfg.DBName), zap.Int("port", cfg.Port)) + dsn := fmt.Sprintf("host=%s user=%s password=%s dbname=%s port=%d sslmode=%s", + cfg.Address, cfg.Username, cfg.Password, cfg.DBName, cfg.Port, cfg.SslMode) ormLogger := logger.Default ormLogger.LogMode(logger.Info) @@ -61,7 +64,7 @@ func Connect(cfg DBConfig) (*gorm.DB, error) { globalDB = db - log.Info("db connected success", + log.Info("Postgres connected success", zap.String("host", cfg.Address), zap.String("database", cfg.DBName), zap.Error(err)) @@ -114,14 +117,7 @@ func GetDB(ctx context.Context) *gorm.DB { return globalDB.WithContext(ctx) } -func ConfigDatabaseForTesting() *gorm.DB { - db, err := gorm.Open(sqlite.Open(":memory:"), &gorm.Config{ - Logger: logger.Default.LogMode(logger.Info), - }) - if err != nil { - panic("failed to connect database") - } - SetGlobalDB(db) +func CreateTestTables(db *gorm.DB) { // Setup tenant related tables db.Migrator().DropTable(&dbmodel.Tenant{}) db.Migrator().CreateTable(&dbmodel.Tenant{}) @@ -154,5 +150,22 @@ func ConfigDatabaseForTesting() *gorm.DB { // Setup notification related tables db.Migrator().DropTable(&dbmodel.Notification{}) db.Migrator().CreateTable(&dbmodel.Notification{}) +} + +func ConfigDatabaseForTesting() *gorm.DB { + dbAddress := os.Getenv("POSTGRES_HOST") + dbPort, err := strconv.Atoi(os.Getenv("POSTGRES_PORT")) + db, err := ConnectPostgres(DBConfig{ + Username: "chroma", + Password: "chroma", + Address: dbAddress, + Port: dbPort, + DBName: "chroma", + }) + if err != nil { + panic("failed to connect database") + } + SetGlobalDB(db) + CreateTestTables(db) return db } diff --git a/go/coordinator/internal/metastore/db/dbmodel/common.go b/go/coordinator/internal/metastore/db/dbmodel/common.go index d188193ae184..d90b7df55e61 100644 --- a/go/coordinator/internal/metastore/db/dbmodel/common.go +++ b/go/coordinator/internal/metastore/db/dbmodel/common.go @@ -15,6 +15,7 @@ type IMetaDomain interface { SegmentDb(ctx context.Context) ISegmentDb SegmentMetadataDb(ctx context.Context) ISegmentMetadataDb NotificationDb(ctx context.Context) INotificationDb + RecordLogDb(ctx context.Context) IRecordLogDb } //go:generate mockery --name=ITransaction diff --git a/go/coordinator/internal/metastore/db/dbmodel/mocks/IMetaDomain.go b/go/coordinator/internal/metastore/db/dbmodel/mocks/IMetaDomain.go index 0ee94c373e94..50c33f10e6f7 100644 --- a/go/coordinator/internal/metastore/db/dbmodel/mocks/IMetaDomain.go +++ b/go/coordinator/internal/metastore/db/dbmodel/mocks/IMetaDomain.go @@ -126,6 +126,21 @@ func (_m *IMetaDomain) TenantDb(ctx context.Context) dbmodel.ITenantDb { return r0 } +func (_m *IMetaDomain) RecordLogDb(ctx context.Context) dbmodel.IRecordLogDb { + ret := _m.Called(ctx) + + var r0 dbmodel.IRecordLogDb + if rf, ok := ret.Get(0).(func(context.Context) dbmodel.IRecordLogDb); ok { + r0 = rf(ctx) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(dbmodel.IRecordLogDb) + } + } + + return r0 +} + // NewIMetaDomain creates a new instance of IMetaDomain. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. // The first argument is typically a *testing.T value. func NewIMetaDomain(t interface { diff --git a/go/coordinator/internal/metastore/db/dbmodel/record_log.go b/go/coordinator/internal/metastore/db/dbmodel/record_log.go new file mode 100644 index 000000000000..de8aeaa75b77 --- /dev/null +++ b/go/coordinator/internal/metastore/db/dbmodel/record_log.go @@ -0,0 +1,16 @@ +package dbmodel + +type RecordLog struct { + CollectionID *string `gorm:"collection_id;primaryKey;autoIncrement:false"` + ID int64 `gorm:"id;primaryKey;"` // auto_increment id + Timestamp int64 `gorm:"timestamp;"` + Record *[]byte `gorm:"record;type:bytea"` +} + +func (v RecordLog) TableName() string { + return "record_logs" +} + +//go:generate mockery --name=IRecordLogDb +type IRecordLogDb interface { +} diff --git a/go/coordinator/internal/proto/coordinatorpb/chroma.pb.go b/go/coordinator/internal/proto/coordinatorpb/chroma.pb.go index 3cec5eefe062..d130dd11af3c 100644 --- a/go/coordinator/internal/proto/coordinatorpb/chroma.pb.go +++ b/go/coordinator/internal/proto/coordinatorpb/chroma.pb.go @@ -1,7 +1,7 @@ // Code generated by protoc-gen-go. DO NOT EDIT. // versions: -// protoc-gen-go v1.31.0 -// protoc v4.23.4 +// protoc-gen-go v1.32.0 +// protoc v3.20.3 // source: chromadb/proto/chroma.proto package coordinatorpb @@ -914,7 +914,7 @@ type VectorQueryResult struct { Id string `protobuf:"bytes,1,opt,name=id,proto3" json:"id,omitempty"` SeqId []byte `protobuf:"bytes,2,opt,name=seq_id,json=seqId,proto3" json:"seq_id,omitempty"` - Distance float64 `protobuf:"fixed64,3,opt,name=distance,proto3" json:"distance,omitempty"` + Distance float32 `protobuf:"fixed32,3,opt,name=distance,proto3" json:"distance,omitempty"` Vector *Vector `protobuf:"bytes,4,opt,name=vector,proto3,oneof" json:"vector,omitempty"` } @@ -964,7 +964,7 @@ func (x *VectorQueryResult) GetSeqId() []byte { return nil } -func (x *VectorQueryResult) GetDistance() float64 { +func (x *VectorQueryResult) GetDistance() float32 { if x != nil { return x.Distance } @@ -1356,7 +1356,7 @@ var file_chromadb_proto_chroma_proto_rawDesc = []byte{ 0x74, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, 0x15, 0x0a, 0x06, 0x73, 0x65, 0x71, 0x5f, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x05, 0x73, 0x65, 0x71, 0x49, 0x64, 0x12, 0x1a, 0x0a, 0x08, 0x64, 0x69, 0x73, 0x74, - 0x61, 0x6e, 0x63, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x01, 0x52, 0x08, 0x64, 0x69, 0x73, 0x74, + 0x61, 0x6e, 0x63, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x02, 0x52, 0x08, 0x64, 0x69, 0x73, 0x74, 0x61, 0x6e, 0x63, 0x65, 0x12, 0x2b, 0x0a, 0x06, 0x76, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x0e, 0x2e, 0x63, 0x68, 0x72, 0x6f, 0x6d, 0x61, 0x2e, 0x56, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x48, 0x00, 0x52, 0x06, 0x76, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x88, 0x01, diff --git a/go/coordinator/internal/proto/coordinatorpb/chroma_grpc.pb.go b/go/coordinator/internal/proto/coordinatorpb/chroma_grpc.pb.go index 09283123121b..b2d9a1781496 100644 --- a/go/coordinator/internal/proto/coordinatorpb/chroma_grpc.pb.go +++ b/go/coordinator/internal/proto/coordinatorpb/chroma_grpc.pb.go @@ -1,7 +1,7 @@ // Code generated by protoc-gen-go-grpc. DO NOT EDIT. // versions: -// - protoc-gen-go-grpc v1.3.0 -// - protoc v4.23.4 +// - protoc-gen-go-grpc v1.2.0 +// - protoc v3.20.3 // source: chromadb/proto/chroma.proto package coordinatorpb @@ -18,11 +18,6 @@ import ( // Requires gRPC-Go v1.32.0 or later. const _ = grpc.SupportPackageIsVersion7 -const ( - VectorReader_GetVectors_FullMethodName = "/chroma.VectorReader/GetVectors" - VectorReader_QueryVectors_FullMethodName = "/chroma.VectorReader/QueryVectors" -) - // VectorReaderClient is the client API for VectorReader service. // // For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream. @@ -41,7 +36,7 @@ func NewVectorReaderClient(cc grpc.ClientConnInterface) VectorReaderClient { func (c *vectorReaderClient) GetVectors(ctx context.Context, in *GetVectorsRequest, opts ...grpc.CallOption) (*GetVectorsResponse, error) { out := new(GetVectorsResponse) - err := c.cc.Invoke(ctx, VectorReader_GetVectors_FullMethodName, in, out, opts...) + err := c.cc.Invoke(ctx, "/chroma.VectorReader/GetVectors", in, out, opts...) if err != nil { return nil, err } @@ -50,7 +45,7 @@ func (c *vectorReaderClient) GetVectors(ctx context.Context, in *GetVectorsReque func (c *vectorReaderClient) QueryVectors(ctx context.Context, in *QueryVectorsRequest, opts ...grpc.CallOption) (*QueryVectorsResponse, error) { out := new(QueryVectorsResponse) - err := c.cc.Invoke(ctx, VectorReader_QueryVectors_FullMethodName, in, out, opts...) + err := c.cc.Invoke(ctx, "/chroma.VectorReader/QueryVectors", in, out, opts...) if err != nil { return nil, err } @@ -99,7 +94,7 @@ func _VectorReader_GetVectors_Handler(srv interface{}, ctx context.Context, dec } info := &grpc.UnaryServerInfo{ Server: srv, - FullMethod: VectorReader_GetVectors_FullMethodName, + FullMethod: "/chroma.VectorReader/GetVectors", } handler := func(ctx context.Context, req interface{}) (interface{}, error) { return srv.(VectorReaderServer).GetVectors(ctx, req.(*GetVectorsRequest)) @@ -117,7 +112,7 @@ func _VectorReader_QueryVectors_Handler(srv interface{}, ctx context.Context, de } info := &grpc.UnaryServerInfo{ Server: srv, - FullMethod: VectorReader_QueryVectors_FullMethodName, + FullMethod: "/chroma.VectorReader/QueryVectors", } handler := func(ctx context.Context, req interface{}) (interface{}, error) { return srv.(VectorReaderServer).QueryVectors(ctx, req.(*QueryVectorsRequest)) diff --git a/go/coordinator/internal/proto/coordinatorpb/coordinator.pb.go b/go/coordinator/internal/proto/coordinatorpb/coordinator.pb.go index be93392c3049..1b5347462e2f 100644 --- a/go/coordinator/internal/proto/coordinatorpb/coordinator.pb.go +++ b/go/coordinator/internal/proto/coordinatorpb/coordinator.pb.go @@ -1,7 +1,7 @@ // Code generated by protoc-gen-go. DO NOT EDIT. // versions: -// protoc-gen-go v1.31.0 -// protoc v4.23.4 +// protoc-gen-go v1.32.0 +// protoc v3.20.3 // source: chromadb/proto/coordinator.proto package coordinatorpb diff --git a/go/coordinator/internal/proto/coordinatorpb/coordinator_grpc.pb.go b/go/coordinator/internal/proto/coordinatorpb/coordinator_grpc.pb.go index ed123f9f3a6f..74f79e0711d8 100644 --- a/go/coordinator/internal/proto/coordinatorpb/coordinator_grpc.pb.go +++ b/go/coordinator/internal/proto/coordinatorpb/coordinator_grpc.pb.go @@ -1,7 +1,7 @@ // Code generated by protoc-gen-go-grpc. DO NOT EDIT. // versions: -// - protoc-gen-go-grpc v1.3.0 -// - protoc v4.23.4 +// - protoc-gen-go-grpc v1.2.0 +// - protoc v3.20.3 // source: chromadb/proto/coordinator.proto package coordinatorpb @@ -19,22 +19,6 @@ import ( // Requires gRPC-Go v1.32.0 or later. const _ = grpc.SupportPackageIsVersion7 -const ( - SysDB_CreateDatabase_FullMethodName = "/chroma.SysDB/CreateDatabase" - SysDB_GetDatabase_FullMethodName = "/chroma.SysDB/GetDatabase" - SysDB_CreateTenant_FullMethodName = "/chroma.SysDB/CreateTenant" - SysDB_GetTenant_FullMethodName = "/chroma.SysDB/GetTenant" - SysDB_CreateSegment_FullMethodName = "/chroma.SysDB/CreateSegment" - SysDB_DeleteSegment_FullMethodName = "/chroma.SysDB/DeleteSegment" - SysDB_GetSegments_FullMethodName = "/chroma.SysDB/GetSegments" - SysDB_UpdateSegment_FullMethodName = "/chroma.SysDB/UpdateSegment" - SysDB_CreateCollection_FullMethodName = "/chroma.SysDB/CreateCollection" - SysDB_DeleteCollection_FullMethodName = "/chroma.SysDB/DeleteCollection" - SysDB_GetCollections_FullMethodName = "/chroma.SysDB/GetCollections" - SysDB_UpdateCollection_FullMethodName = "/chroma.SysDB/UpdateCollection" - SysDB_ResetState_FullMethodName = "/chroma.SysDB/ResetState" -) - // SysDBClient is the client API for SysDB service. // // For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream. @@ -64,7 +48,7 @@ func NewSysDBClient(cc grpc.ClientConnInterface) SysDBClient { func (c *sysDBClient) CreateDatabase(ctx context.Context, in *CreateDatabaseRequest, opts ...grpc.CallOption) (*ChromaResponse, error) { out := new(ChromaResponse) - err := c.cc.Invoke(ctx, SysDB_CreateDatabase_FullMethodName, in, out, opts...) + err := c.cc.Invoke(ctx, "/chroma.SysDB/CreateDatabase", in, out, opts...) if err != nil { return nil, err } @@ -73,7 +57,7 @@ func (c *sysDBClient) CreateDatabase(ctx context.Context, in *CreateDatabaseRequ func (c *sysDBClient) GetDatabase(ctx context.Context, in *GetDatabaseRequest, opts ...grpc.CallOption) (*GetDatabaseResponse, error) { out := new(GetDatabaseResponse) - err := c.cc.Invoke(ctx, SysDB_GetDatabase_FullMethodName, in, out, opts...) + err := c.cc.Invoke(ctx, "/chroma.SysDB/GetDatabase", in, out, opts...) if err != nil { return nil, err } @@ -82,7 +66,7 @@ func (c *sysDBClient) GetDatabase(ctx context.Context, in *GetDatabaseRequest, o func (c *sysDBClient) CreateTenant(ctx context.Context, in *CreateTenantRequest, opts ...grpc.CallOption) (*ChromaResponse, error) { out := new(ChromaResponse) - err := c.cc.Invoke(ctx, SysDB_CreateTenant_FullMethodName, in, out, opts...) + err := c.cc.Invoke(ctx, "/chroma.SysDB/CreateTenant", in, out, opts...) if err != nil { return nil, err } @@ -91,7 +75,7 @@ func (c *sysDBClient) CreateTenant(ctx context.Context, in *CreateTenantRequest, func (c *sysDBClient) GetTenant(ctx context.Context, in *GetTenantRequest, opts ...grpc.CallOption) (*GetTenantResponse, error) { out := new(GetTenantResponse) - err := c.cc.Invoke(ctx, SysDB_GetTenant_FullMethodName, in, out, opts...) + err := c.cc.Invoke(ctx, "/chroma.SysDB/GetTenant", in, out, opts...) if err != nil { return nil, err } @@ -100,7 +84,7 @@ func (c *sysDBClient) GetTenant(ctx context.Context, in *GetTenantRequest, opts func (c *sysDBClient) CreateSegment(ctx context.Context, in *CreateSegmentRequest, opts ...grpc.CallOption) (*ChromaResponse, error) { out := new(ChromaResponse) - err := c.cc.Invoke(ctx, SysDB_CreateSegment_FullMethodName, in, out, opts...) + err := c.cc.Invoke(ctx, "/chroma.SysDB/CreateSegment", in, out, opts...) if err != nil { return nil, err } @@ -109,7 +93,7 @@ func (c *sysDBClient) CreateSegment(ctx context.Context, in *CreateSegmentReques func (c *sysDBClient) DeleteSegment(ctx context.Context, in *DeleteSegmentRequest, opts ...grpc.CallOption) (*ChromaResponse, error) { out := new(ChromaResponse) - err := c.cc.Invoke(ctx, SysDB_DeleteSegment_FullMethodName, in, out, opts...) + err := c.cc.Invoke(ctx, "/chroma.SysDB/DeleteSegment", in, out, opts...) if err != nil { return nil, err } @@ -118,7 +102,7 @@ func (c *sysDBClient) DeleteSegment(ctx context.Context, in *DeleteSegmentReques func (c *sysDBClient) GetSegments(ctx context.Context, in *GetSegmentsRequest, opts ...grpc.CallOption) (*GetSegmentsResponse, error) { out := new(GetSegmentsResponse) - err := c.cc.Invoke(ctx, SysDB_GetSegments_FullMethodName, in, out, opts...) + err := c.cc.Invoke(ctx, "/chroma.SysDB/GetSegments", in, out, opts...) if err != nil { return nil, err } @@ -127,7 +111,7 @@ func (c *sysDBClient) GetSegments(ctx context.Context, in *GetSegmentsRequest, o func (c *sysDBClient) UpdateSegment(ctx context.Context, in *UpdateSegmentRequest, opts ...grpc.CallOption) (*ChromaResponse, error) { out := new(ChromaResponse) - err := c.cc.Invoke(ctx, SysDB_UpdateSegment_FullMethodName, in, out, opts...) + err := c.cc.Invoke(ctx, "/chroma.SysDB/UpdateSegment", in, out, opts...) if err != nil { return nil, err } @@ -136,7 +120,7 @@ func (c *sysDBClient) UpdateSegment(ctx context.Context, in *UpdateSegmentReques func (c *sysDBClient) CreateCollection(ctx context.Context, in *CreateCollectionRequest, opts ...grpc.CallOption) (*CreateCollectionResponse, error) { out := new(CreateCollectionResponse) - err := c.cc.Invoke(ctx, SysDB_CreateCollection_FullMethodName, in, out, opts...) + err := c.cc.Invoke(ctx, "/chroma.SysDB/CreateCollection", in, out, opts...) if err != nil { return nil, err } @@ -145,7 +129,7 @@ func (c *sysDBClient) CreateCollection(ctx context.Context, in *CreateCollection func (c *sysDBClient) DeleteCollection(ctx context.Context, in *DeleteCollectionRequest, opts ...grpc.CallOption) (*ChromaResponse, error) { out := new(ChromaResponse) - err := c.cc.Invoke(ctx, SysDB_DeleteCollection_FullMethodName, in, out, opts...) + err := c.cc.Invoke(ctx, "/chroma.SysDB/DeleteCollection", in, out, opts...) if err != nil { return nil, err } @@ -154,7 +138,7 @@ func (c *sysDBClient) DeleteCollection(ctx context.Context, in *DeleteCollection func (c *sysDBClient) GetCollections(ctx context.Context, in *GetCollectionsRequest, opts ...grpc.CallOption) (*GetCollectionsResponse, error) { out := new(GetCollectionsResponse) - err := c.cc.Invoke(ctx, SysDB_GetCollections_FullMethodName, in, out, opts...) + err := c.cc.Invoke(ctx, "/chroma.SysDB/GetCollections", in, out, opts...) if err != nil { return nil, err } @@ -163,7 +147,7 @@ func (c *sysDBClient) GetCollections(ctx context.Context, in *GetCollectionsRequ func (c *sysDBClient) UpdateCollection(ctx context.Context, in *UpdateCollectionRequest, opts ...grpc.CallOption) (*ChromaResponse, error) { out := new(ChromaResponse) - err := c.cc.Invoke(ctx, SysDB_UpdateCollection_FullMethodName, in, out, opts...) + err := c.cc.Invoke(ctx, "/chroma.SysDB/UpdateCollection", in, out, opts...) if err != nil { return nil, err } @@ -172,7 +156,7 @@ func (c *sysDBClient) UpdateCollection(ctx context.Context, in *UpdateCollection func (c *sysDBClient) ResetState(ctx context.Context, in *emptypb.Empty, opts ...grpc.CallOption) (*ChromaResponse, error) { out := new(ChromaResponse) - err := c.cc.Invoke(ctx, SysDB_ResetState_FullMethodName, in, out, opts...) + err := c.cc.Invoke(ctx, "/chroma.SysDB/ResetState", in, out, opts...) if err != nil { return nil, err } @@ -265,7 +249,7 @@ func _SysDB_CreateDatabase_Handler(srv interface{}, ctx context.Context, dec fun } info := &grpc.UnaryServerInfo{ Server: srv, - FullMethod: SysDB_CreateDatabase_FullMethodName, + FullMethod: "/chroma.SysDB/CreateDatabase", } handler := func(ctx context.Context, req interface{}) (interface{}, error) { return srv.(SysDBServer).CreateDatabase(ctx, req.(*CreateDatabaseRequest)) @@ -283,7 +267,7 @@ func _SysDB_GetDatabase_Handler(srv interface{}, ctx context.Context, dec func(i } info := &grpc.UnaryServerInfo{ Server: srv, - FullMethod: SysDB_GetDatabase_FullMethodName, + FullMethod: "/chroma.SysDB/GetDatabase", } handler := func(ctx context.Context, req interface{}) (interface{}, error) { return srv.(SysDBServer).GetDatabase(ctx, req.(*GetDatabaseRequest)) @@ -301,7 +285,7 @@ func _SysDB_CreateTenant_Handler(srv interface{}, ctx context.Context, dec func( } info := &grpc.UnaryServerInfo{ Server: srv, - FullMethod: SysDB_CreateTenant_FullMethodName, + FullMethod: "/chroma.SysDB/CreateTenant", } handler := func(ctx context.Context, req interface{}) (interface{}, error) { return srv.(SysDBServer).CreateTenant(ctx, req.(*CreateTenantRequest)) @@ -319,7 +303,7 @@ func _SysDB_GetTenant_Handler(srv interface{}, ctx context.Context, dec func(int } info := &grpc.UnaryServerInfo{ Server: srv, - FullMethod: SysDB_GetTenant_FullMethodName, + FullMethod: "/chroma.SysDB/GetTenant", } handler := func(ctx context.Context, req interface{}) (interface{}, error) { return srv.(SysDBServer).GetTenant(ctx, req.(*GetTenantRequest)) @@ -337,7 +321,7 @@ func _SysDB_CreateSegment_Handler(srv interface{}, ctx context.Context, dec func } info := &grpc.UnaryServerInfo{ Server: srv, - FullMethod: SysDB_CreateSegment_FullMethodName, + FullMethod: "/chroma.SysDB/CreateSegment", } handler := func(ctx context.Context, req interface{}) (interface{}, error) { return srv.(SysDBServer).CreateSegment(ctx, req.(*CreateSegmentRequest)) @@ -355,7 +339,7 @@ func _SysDB_DeleteSegment_Handler(srv interface{}, ctx context.Context, dec func } info := &grpc.UnaryServerInfo{ Server: srv, - FullMethod: SysDB_DeleteSegment_FullMethodName, + FullMethod: "/chroma.SysDB/DeleteSegment", } handler := func(ctx context.Context, req interface{}) (interface{}, error) { return srv.(SysDBServer).DeleteSegment(ctx, req.(*DeleteSegmentRequest)) @@ -373,7 +357,7 @@ func _SysDB_GetSegments_Handler(srv interface{}, ctx context.Context, dec func(i } info := &grpc.UnaryServerInfo{ Server: srv, - FullMethod: SysDB_GetSegments_FullMethodName, + FullMethod: "/chroma.SysDB/GetSegments", } handler := func(ctx context.Context, req interface{}) (interface{}, error) { return srv.(SysDBServer).GetSegments(ctx, req.(*GetSegmentsRequest)) @@ -391,7 +375,7 @@ func _SysDB_UpdateSegment_Handler(srv interface{}, ctx context.Context, dec func } info := &grpc.UnaryServerInfo{ Server: srv, - FullMethod: SysDB_UpdateSegment_FullMethodName, + FullMethod: "/chroma.SysDB/UpdateSegment", } handler := func(ctx context.Context, req interface{}) (interface{}, error) { return srv.(SysDBServer).UpdateSegment(ctx, req.(*UpdateSegmentRequest)) @@ -409,7 +393,7 @@ func _SysDB_CreateCollection_Handler(srv interface{}, ctx context.Context, dec f } info := &grpc.UnaryServerInfo{ Server: srv, - FullMethod: SysDB_CreateCollection_FullMethodName, + FullMethod: "/chroma.SysDB/CreateCollection", } handler := func(ctx context.Context, req interface{}) (interface{}, error) { return srv.(SysDBServer).CreateCollection(ctx, req.(*CreateCollectionRequest)) @@ -427,7 +411,7 @@ func _SysDB_DeleteCollection_Handler(srv interface{}, ctx context.Context, dec f } info := &grpc.UnaryServerInfo{ Server: srv, - FullMethod: SysDB_DeleteCollection_FullMethodName, + FullMethod: "/chroma.SysDB/DeleteCollection", } handler := func(ctx context.Context, req interface{}) (interface{}, error) { return srv.(SysDBServer).DeleteCollection(ctx, req.(*DeleteCollectionRequest)) @@ -445,7 +429,7 @@ func _SysDB_GetCollections_Handler(srv interface{}, ctx context.Context, dec fun } info := &grpc.UnaryServerInfo{ Server: srv, - FullMethod: SysDB_GetCollections_FullMethodName, + FullMethod: "/chroma.SysDB/GetCollections", } handler := func(ctx context.Context, req interface{}) (interface{}, error) { return srv.(SysDBServer).GetCollections(ctx, req.(*GetCollectionsRequest)) @@ -463,7 +447,7 @@ func _SysDB_UpdateCollection_Handler(srv interface{}, ctx context.Context, dec f } info := &grpc.UnaryServerInfo{ Server: srv, - FullMethod: SysDB_UpdateCollection_FullMethodName, + FullMethod: "/chroma.SysDB/UpdateCollection", } handler := func(ctx context.Context, req interface{}) (interface{}, error) { return srv.(SysDBServer).UpdateCollection(ctx, req.(*UpdateCollectionRequest)) @@ -481,7 +465,7 @@ func _SysDB_ResetState_Handler(srv interface{}, ctx context.Context, dec func(in } info := &grpc.UnaryServerInfo{ Server: srv, - FullMethod: SysDB_ResetState_FullMethodName, + FullMethod: "/chroma.SysDB/ResetState", } handler := func(ctx context.Context, req interface{}) (interface{}, error) { return srv.(SysDBServer).ResetState(ctx, req.(*emptypb.Empty)) diff --git a/go/coordinator/internal/proto/logservicepb/logservice.pb.go b/go/coordinator/internal/proto/logservicepb/logservice.pb.go new file mode 100644 index 000000000000..6eaa51a4349e --- /dev/null +++ b/go/coordinator/internal/proto/logservicepb/logservice.pb.go @@ -0,0 +1,67 @@ +// Code generated by protoc-gen-go. DO NOT EDIT. +// versions: +// protoc-gen-go v1.32.0 +// protoc v3.20.3 +// source: chromadb/proto/logservice.proto + +package logservicepb + +import ( + protoreflect "google.golang.org/protobuf/reflect/protoreflect" + protoimpl "google.golang.org/protobuf/runtime/protoimpl" + reflect "reflect" +) + +const ( + // Verify that this generated code is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion) + // Verify that runtime/protoimpl is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20) +) + +var File_chromadb_proto_logservice_proto protoreflect.FileDescriptor + +var file_chromadb_proto_logservice_proto_rawDesc = []byte{ + 0x0a, 0x1f, 0x63, 0x68, 0x72, 0x6f, 0x6d, 0x61, 0x64, 0x62, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, + 0x2f, 0x6c, 0x6f, 0x67, 0x73, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, + 0x6f, 0x12, 0x06, 0x63, 0x68, 0x72, 0x6f, 0x6d, 0x61, 0x32, 0x0c, 0x0a, 0x0a, 0x4c, 0x6f, 0x67, + 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x42, 0x42, 0x5a, 0x40, 0x67, 0x69, 0x74, 0x68, 0x75, + 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x63, 0x68, 0x72, 0x6f, 0x6d, 0x61, 0x2f, 0x63, 0x68, 0x72, + 0x6f, 0x6d, 0x61, 0x2d, 0x63, 0x6f, 0x6f, 0x72, 0x64, 0x69, 0x6e, 0x61, 0x74, 0x6f, 0x72, 0x2f, + 0x69, 0x6e, 0x74, 0x65, 0x72, 0x6e, 0x61, 0x6c, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2f, 0x6c, + 0x6f, 0x67, 0x73, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x70, 0x62, 0x62, 0x06, 0x70, 0x72, 0x6f, + 0x74, 0x6f, 0x33, +} + +var file_chromadb_proto_logservice_proto_goTypes = []interface{}{} +var file_chromadb_proto_logservice_proto_depIdxs = []int32{ + 0, // [0:0] is the sub-list for method output_type + 0, // [0:0] is the sub-list for method input_type + 0, // [0:0] is the sub-list for extension type_name + 0, // [0:0] is the sub-list for extension extendee + 0, // [0:0] is the sub-list for field type_name +} + +func init() { file_chromadb_proto_logservice_proto_init() } +func file_chromadb_proto_logservice_proto_init() { + if File_chromadb_proto_logservice_proto != nil { + return + } + type x struct{} + out := protoimpl.TypeBuilder{ + File: protoimpl.DescBuilder{ + GoPackagePath: reflect.TypeOf(x{}).PkgPath(), + RawDescriptor: file_chromadb_proto_logservice_proto_rawDesc, + NumEnums: 0, + NumMessages: 0, + NumExtensions: 0, + NumServices: 1, + }, + GoTypes: file_chromadb_proto_logservice_proto_goTypes, + DependencyIndexes: file_chromadb_proto_logservice_proto_depIdxs, + }.Build() + File_chromadb_proto_logservice_proto = out.File + file_chromadb_proto_logservice_proto_rawDesc = nil + file_chromadb_proto_logservice_proto_goTypes = nil + file_chromadb_proto_logservice_proto_depIdxs = nil +} diff --git a/go/coordinator/internal/proto/logservicepb/logservice_grpc.pb.go b/go/coordinator/internal/proto/logservicepb/logservice_grpc.pb.go new file mode 100644 index 000000000000..5a89141fa817 --- /dev/null +++ b/go/coordinator/internal/proto/logservicepb/logservice_grpc.pb.go @@ -0,0 +1,65 @@ +// Code generated by protoc-gen-go-grpc. DO NOT EDIT. +// versions: +// - protoc-gen-go-grpc v1.2.0 +// - protoc v3.20.3 +// source: chromadb/proto/logservice.proto + +package logservicepb + +import ( + grpc "google.golang.org/grpc" +) + +// This is a compile-time assertion to ensure that this generated file +// is compatible with the grpc package it is being compiled against. +// Requires gRPC-Go v1.32.0 or later. +const _ = grpc.SupportPackageIsVersion7 + +// LogServiceClient is the client API for LogService service. +// +// For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream. +type LogServiceClient interface { +} + +type logServiceClient struct { + cc grpc.ClientConnInterface +} + +func NewLogServiceClient(cc grpc.ClientConnInterface) LogServiceClient { + return &logServiceClient{cc} +} + +// LogServiceServer is the server API for LogService service. +// All implementations must embed UnimplementedLogServiceServer +// for forward compatibility +type LogServiceServer interface { + mustEmbedUnimplementedLogServiceServer() +} + +// UnimplementedLogServiceServer must be embedded to have forward compatible implementations. +type UnimplementedLogServiceServer struct { +} + +func (UnimplementedLogServiceServer) mustEmbedUnimplementedLogServiceServer() {} + +// UnsafeLogServiceServer may be embedded to opt out of forward compatibility for this service. +// Use of this interface is not recommended, as added methods to LogServiceServer will +// result in compilation errors. +type UnsafeLogServiceServer interface { + mustEmbedUnimplementedLogServiceServer() +} + +func RegisterLogServiceServer(s grpc.ServiceRegistrar, srv LogServiceServer) { + s.RegisterService(&LogService_ServiceDesc, srv) +} + +// LogService_ServiceDesc is the grpc.ServiceDesc for LogService service. +// It's only intended for direct use with grpc.RegisterService, +// and not to be introspected or modified (even as a copy) +var LogService_ServiceDesc = grpc.ServiceDesc{ + ServiceName: "chroma.LogService", + HandlerType: (*LogServiceServer)(nil), + Methods: []grpc.MethodDesc{}, + Streams: []grpc.StreamDesc{}, + Metadata: "chromadb/proto/logservice.proto", +} diff --git a/go/coordinator/migrations/20231129183041.sql b/go/coordinator/migrations/20231129183041.sql deleted file mode 100644 index 2a31ebb48778..000000000000 --- a/go/coordinator/migrations/20231129183041.sql +++ /dev/null @@ -1,8 +0,0 @@ --- Create "notifications" table -CREATE TABLE "public"."notifications" ( - "id" bigserial NOT NULL, - "collection_id" text NULL, - "type" text NULL, - "status" text NULL, - PRIMARY KEY ("id") -); diff --git a/go/coordinator/migrations/20231116210409.sql b/go/coordinator/migrations/20240215010425.sql similarity index 86% rename from go/coordinator/migrations/20231116210409.sql rename to go/coordinator/migrations/20240215010425.sql index bb9c8d8a00c4..378c5d630e5a 100644 --- a/go/coordinator/migrations/20231116210409.sql +++ b/go/coordinator/migrations/20240215010425.sql @@ -38,6 +38,22 @@ CREATE TABLE "public"."databases" ( ); -- Create index "idx_tenantid_name" to table: "databases" CREATE UNIQUE INDEX "idx_tenantid_name" ON "public"."databases" ("name", "tenant_id"); +-- Create "notifications" table +CREATE TABLE "public"."notifications" ( + "id" bigserial NOT NULL, + "collection_id" text NULL, + "type" text NULL, + "status" text NULL, + PRIMARY KEY ("id") +); +-- Create "record_logs" table +CREATE TABLE "public"."record_logs" ( + "collection_id" text NOT NULL, + "id" bigserial NOT NULL, + "timestamp" bigint NULL, + "record" bytea NULL, + PRIMARY KEY ("collection_id", "id") +); -- Create "segment_metadata" table CREATE TABLE "public"."segment_metadata" ( "segment_id" text NOT NULL, diff --git a/go/coordinator/migrations/atlas.sum b/go/coordinator/migrations/atlas.sum index d4ee513fa904..624c7eabe3aa 100644 --- a/go/coordinator/migrations/atlas.sum +++ b/go/coordinator/migrations/atlas.sum @@ -1,3 +1,2 @@ -h1:j28ectYxexGfQz/LClD7yYVUHAfIcPHlboAJ1Qw0G7I= -20231116210409.sql h1:vwZRvrXrUMOuDykEaheyEzsnNCpmH73x0QEefzUtf8o= -20231129183041.sql h1:FglI5Hjf7kqvjCsSYWkK2IGS2aThQBaVhpg9WekhNEA= +h1:OoMkQddKcFi1jQ4pCp2i8IJAIEDHjQpI3mw+sHoQ1fI= +20240215010425.sql h1:U4h0i9epzZOrFesFlcMJ8250n3SoY5Uv0AejgcZCTTw= diff --git a/idl/chromadb/proto/logservice.proto b/idl/chromadb/proto/logservice.proto new file mode 100644 index 000000000000..18c32a6a0d46 --- /dev/null +++ b/idl/chromadb/proto/logservice.proto @@ -0,0 +1,8 @@ +syntax = "proto3"; + +package chroma; +option go_package = "github.com/chroma/chroma-coordinator/internal/proto/logservicepb"; + +service LogService { + +} diff --git a/idl/makefile b/idl/makefile index 18cbc1977ba4..183fd24a1985 100644 --- a/idl/makefile +++ b/idl/makefile @@ -17,6 +17,7 @@ proto_go: --go-grpc_opt paths=source_relative \ --plugin protoc-gen-go-grpc="${GOPATH}/bin/protoc-gen-go-grpc" \ chromadb/proto/*.proto + @mv ../go/coordinator/internal/proto/coordinatorpb/chromadb/proto/logservice*.go ../go/coordinator/internal/proto/logservicepb/ @mv ../go/coordinator/internal/proto/coordinatorpb/chromadb/proto/*.go ../go/coordinator/internal/proto/coordinatorpb/ @rm -rf ../go/coordinator/internal/proto/coordinatorpb/chromadb @echo "Done" diff --git a/k8s/deployment/kubernetes.yaml b/k8s/deployment/kubernetes.yaml index b1f9baabdd0b..5b5ec4a7a847 100644 --- a/k8s/deployment/kubernetes.yaml +++ b/k8s/deployment/kubernetes.yaml @@ -77,6 +77,76 @@ spec: --- +apiVersion: v1 +kind: Service +metadata: + name: postgres + namespace: chroma +spec: + ports: + - name: postgres-port + port: 5432 + targetPort: 5432 + selector: + app: postgres + type: ClusterIP + +--- + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: postgres + namespace: chroma +spec: + replicas: 1 + selector: + matchLabels: + app: postgres + template: + metadata: + labels: + app: postgres + spec: + containers: + - name: postgres + image: postgres:14.1-alpine + env: + - name: POSTGRES_DB + value: chroma + - name: POSTGRES_USER + value: chroma + - name: POSTGRES_PASSWORD + value: chroma + ports: + - containerPort: 5432 + +--- + +apiVersion: batch/v1 +kind: Job +metadata: + name: migration + namespace: chroma +spec: + template: + metadata: + labels: + app: migration + spec: + restartPolicy: OnFailure + containers: + - args: + - 'migrate' + - 'apply' + - '--url' + - 'postgres://chroma:chroma@postgres:5432/chroma?sslmode=disable' + image: migration + imagePullPolicy: IfNotPresent + name: migration + +--- + apiVersion: v1 kind: Service metadata: @@ -188,7 +258,7 @@ spec: spec: containers: - command: - - "chroma" + - "coordinator" - "coordinator" - "--pulsar-admin-url=http://pulsar.chroma:8080" - "--pulsar-url=pulsar://pulsar.chroma:6650" @@ -219,3 +289,47 @@ spec: selector: app: coordinator type: ClusterIP + +--- + +apiVersion: v1 +kind: Service +metadata: + name: logservice + namespace: chroma +spec: + ports: + - name: grpc + port: 50051 + targetPort: grpc + selector: + app: logservice + type: ClusterIP + +--- + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: logservice + namespace: chroma +spec: + replicas: 1 + selector: + matchLabels: + app: logservice + template: + metadata: + labels: + app: logservice + spec: + containers: + - command: + - "logservice" + - "logservice" + image: chroma-coordinator + imagePullPolicy: IfNotPresent + name: logservice + ports: + - containerPort: 50051 + name: grpc diff --git a/k8s/dev/coordinator.yaml b/k8s/dev/coordinator.yaml index ce897d44c82b..f7f8c122bd45 100644 --- a/k8s/dev/coordinator.yaml +++ b/k8s/dev/coordinator.yaml @@ -15,7 +15,7 @@ spec: spec: containers: - command: - - "chroma" + - "coordinator" - "coordinator" - "--pulsar-admin-url=http://pulsar.chroma:8080" - "--pulsar-url=pulsar://pulsar.chroma:6650" @@ -39,4 +39,4 @@ spec: targetPort: grpc selector: app: coordinator - type: ClusterIP \ No newline at end of file + type: ClusterIP diff --git a/k8s/dev/logservice.yaml b/k8s/dev/logservice.yaml new file mode 100644 index 000000000000..a4b491116ee9 --- /dev/null +++ b/k8s/dev/logservice.yaml @@ -0,0 +1,39 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: logservice + namespace: chroma +spec: + replicas: 1 + selector: + matchLabels: + app: logservice + template: + metadata: + labels: + app: logservice + spec: + containers: + - command: + - "logservice" + - "logservice" + image: coordinator + imagePullPolicy: IfNotPresent + name: logservice + ports: + - containerPort: 50051 + name: grpc +--- +apiVersion: v1 +kind: Service +metadata: + name: logservice + namespace: chroma +spec: + ports: + - name: grpc + port: 50051 + targetPort: grpc + selector: + app: logservice + type: ClusterIP diff --git a/k8s/dev/migration.yaml b/k8s/dev/migration.yaml new file mode 100644 index 000000000000..df4ac881740e --- /dev/null +++ b/k8s/dev/migration.yaml @@ -0,0 +1,22 @@ +apiVersion: batch/v1 +kind: Job +metadata: + name: migration + namespace: chroma +spec: + template: + metadata: + labels: + app: migration + spec: + restartPolicy: OnFailure + containers: + - args: + - 'migrate' + - 'apply' + - '--url' + - 'postgres://chroma:chroma@postgres:5432/chroma?sslmode=disable' + image: migration + imagePullPolicy: IfNotPresent + name: migration +--- diff --git a/k8s/dev/postgres.yaml b/k8s/dev/postgres.yaml new file mode 100644 index 000000000000..e2b8fad31593 --- /dev/null +++ b/k8s/dev/postgres.yaml @@ -0,0 +1,41 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: postgres + namespace: chroma +spec: + replicas: 1 + selector: + matchLabels: + app: postgres + template: + metadata: + labels: + app: postgres + spec: + containers: + - name: postgres + image: postgres:14.1-alpine + env: + - name: POSTGRES_DB + value: chroma + - name: POSTGRES_USER + value: chroma + - name: POSTGRES_PASSWORD + value: chroma + ports: + - containerPort: 5432 +--- +apiVersion: v1 +kind: Service +metadata: + name: postgres + namespace: chroma +spec: + ports: + - name: postgres-port + port: 5432 + targetPort: 5432 + selector: + app: postgres + type: ClusterIP From cf476d70f0cebb7c87cb30c7172ba74d6ea175cd Mon Sep 17 00:00:00 2001 From: Trayan Azarov Date: Fri, 16 Feb 2024 23:12:37 +0200 Subject: [PATCH 3/9] [BUG]: Fixed test_collections.py property test (#1716) Needed to fix the failing property tests in #1715 ## Description of changes *Summarize the changes made by this PR.* - Improvements & Bug fixes - Moved the model update after conditional checks for new_name and metadata. - New functionality - ... ## Test plan *How are these changes tested?* - [ ] Tests pass locally with `pytest` for python, `yarn test` for js ## Documentation Changes Failure logs + Error analysis: ``` > assert c.metadata == self.model[coll.name] E AssertionError: assert {'g': 1.1, 'n...': 31734, ...} == {'3': 'd71IL'...235e-208, ...} E E Left contains 5 more items: E {'g': 1.1, E 'n1dUTalF-MY': -1000000.0, E 'ugXZ_hK': 5494, E 'xVW09xUpDZA': 31734, E 'y': 'G3EtXTZ'} E Right contains 9 more items: E {'3': 'd71IL', E '45227B': '65', E '7DjCkbusc-K': 'vc94', E '8-tD9nJd': 4.8728578364902235e-208, E 'Bpyj': -675165.8688164671, E 'Uy6KZu6abCD9Z': -72, E 'giC': -6.103515625e-05, E 'pO4': -0.0, E 'r3': -41479} E E Full diff: E { E + 'g': 1.1, E + 'n1dUTalF-MY': -1000000.0, E + 'ugXZ_hK': 5494, E + 'xVW09xUpDZA': 31734, E + 'y': 'G3EtXTZ', E - '3': 'd71IL', E - '45227B': '65', E - '7DjCkbusc-K': 'vc94', E - '8-tD9nJd': 4.8728578364902235e-208, E - 'Bpyj': -675165.8688164671, E - 'Uy6KZu6abCD9Z': -72, E - 'giC': -6.103515625e-05, E - 'pO4': -0.0, E - 'r3': -41479, E } E Falsifying example: E state = CollectionStateMachine() E state.initialize() E state.list_collections_with_limit_offset(limit=5, offset=0) E state.list_collections_with_limit_offset(limit=4, offset=5) E (v1,) = state.get_or_create_coll(coll=Collection(name='E60V1ekr9eDcL\n', id=UUID('4435abf2-9fc6-4d5a-bb7b-33177a956d44'), metadata={'_m5jalwo': -228}, dimension=1356, dtype=, topic='topic', known_metadata_keys={}, known_document_keywords=[], has_documents=False, has_embeddings=True, embedding_function=), new_metadata={'k5o6Q': 'Op', E 'LP': -5.960464477539063e-08, E 'pzHdzczVCn': '81', E '7': False, E 'e4Lz': 999999.0, E '206': False}) E (v2,) = state.get_or_create_coll(coll=v1, new_metadata=None) E (v3,) = state.get_or_create_coll(coll=v1, new_metadata={'4OQN': -2097032423, E 'cW': -0.99999, E 'o6wq3': -147, E 'M8j3KBU': -2.2250738585072014e-308, E 'D8nZrA0': 252, E 'up4P_': 34761, E 'L_win': -6.103515625e-05, E '5kt': '_q', E 'UybO2dJF4': -0.3333333333333333, E 'NfQ83VsmI': 'Qpy', E 'fk': -1.192092896e-07, E 'J1ck': 'ozL'}) E (v4,) = state.get_or_create_coll(coll=Collection(name='nOeHg-OXVl', id=UUID('9c28b027-9f22-409c-b3fd-c5de03b60018'), metadata=None, dimension=1009, dtype=, topic='topic', known_metadata_keys={}, known_document_keywords=[], has_documents=True, has_embeddings=True, embedding_function=), new_metadata={'p4isW': 'k8l', E 'k2tFn3v1E': True, E 'R': 'ji-2d5lDGV', E 'K5vdi': False, E 'TZs': False, E 'OgJ_DZ2j': False, E 'ovZjD3': -64297, E '9p': True, E '32f3nw8h2d54LPCzsV': 1733994327, E '4P': 2.896381722565434e-121}) E state.list_collections_with_limit_offset(limit=2, offset=0) E state.list_collections_with_limit_offset(limit=3, offset=0) E state.list_collections_with_limit_offset(limit=5, offset=5) E (v5,) = state.modify_coll(coll=v4, new_metadata=None, new_name=None) E (v6,) = state.get_or_create_coll(coll=Collection(name='A1w5m1l5I\n', id=UUID('606d59a6-6f66-456d-81ca-a8ea029c318c'), metadata={'3': '6Y'}, dimension=1544, dtype=, topic='topic', known_metadata_keys={}, known_document_keywords=[], has_documents=False, has_embeddings=True, embedding_function=), new_metadata=None) E (v7,) = state.get_or_create_coll(coll=v4, new_metadata={'01316': -0.0, '14UwVu': 81, 'C9eMDDdnB0oy': False, 'n964': '0a'}) E state.modify_coll(coll=v7, new_metadata={}, new_name='B-5Z2m2j52121') E state.get_or_create_coll(coll=Collection(name='E31\n', id=UUID('e67426e8-8595-4916-92a6-b2777b52f157'), metadata={'0Kr5Wp': -769, '9xT': 143980.04500299558, '8': True}, dimension=1800, dtype=, topic='topic', known_metadata_keys={}, known_document_keywords=[], has_documents=True, has_embeddings=True, embedding_function=), new_metadata={}) E state.list_collections_with_limit_offset(limit=2, offset=1) E state.list_collections_with_limit_offset(limit=2, offset=0) E state.list_collections_with_limit_offset(limit=1, offset=0) E state.list_collections_with_limit_offset(limit=1, offset=1) E (v8,) = state.get_or_create_coll(coll=Collection(name='A00\n', id=UUID('01522a4f-3383-4a58-8b18-0418e38e3ec6'), metadata=None, dimension=1032, dtype=, topic='topic', known_metadata_keys={}, known_document_keywords=[], has_documents=False, has_embeddings=True, embedding_function=), new_metadata=None) E (v9,) = state.get_or_create_coll(coll=v6, new_metadata=None) E state.list_collections_with_limit_offset(limit=3, offset=2) E (v10,) = state.modify_coll(coll=v3, new_metadata=None, new_name=None) E (v11,) = state.modify_coll(coll=v10, new_metadata=None, new_name=None) E state.modify_coll(coll=v9, new_metadata={}, new_name=None) E (v12,) = state.get_or_create_coll(coll=Collection(name='A10\n', id=UUID('01efb806-fffa-4ce6-b285-b9aae55f50af'), metadata={}, dimension=258, dtype=, topic='topic', known_metadata_keys={}, known_document_keywords=[], has_documents=False, has_embeddings=True, embedding_function=), new_metadata=None) E state.modify_coll(coll=v11, new_metadata={}, new_name='A01011110\n') E state.list_collections_with_limit_offset(limit=3, offset=1) ------ Problem start here ------ E (v13,) = state.get_or_create_coll(coll=Collection(name='C1030', id=UUID('7858d028-1295-4769-96c1-e58bf242b7bd'), metadata={}, dimension=2, dtype=, topic='topic', known_metadata_keys={}, known_document_keywords=[], has_documents=False, has_embeddings=True, embedding_function=), new_metadata=None) E (v14,) = state.get_or_create_coll(coll=Collection(name='A01200671\n', id=UUID('f77d01a4-e43f-4b17-9579-daadccad2f71'), metadata={'0': 'L', '01': -4}, dimension=1282, dtype=, topic='topic', known_metadata_keys={}, known_document_keywords=[], has_documents=True, has_embeddings=False, embedding_function=), new_metadata=None) E state.list_collections_with_limit_offset(limit=2, offset=1) E (v15,) = state.modify_coll(coll=v13, new_metadata={'0': '10', '40': '0', 'p1nviWeL7fO': 'qN', '7b': 'YS', 'VYWq4LEMWjCo': True}, new_name='OF5F0MzbQg\n') E (v16,) = state.get_or_create_coll(coll=Collection(name='VS0QGh', id=UUID('c6b85c1d-c3e9-4d37-b9ca-c4b4266193e9'), metadata={'h': 5.681951615025145e-227, 'A1': 61126, 'uhUhLEEMfeC_kN': 2147483647, 'weF': 'pSP', 'B3DSaP': False, '6H533K': 1.192092896e-07}, dimension=1915, dtype=, topic='topic', known_metadata_keys={}, known_document_keywords=[], has_documents=False, has_embeddings=True, embedding_function=), new_metadata={'xVW09xUpDZA': 31734, E 'g': 1.1, E 'n1dUTalF-MY': -1000000.0, E 'y': 'G3EtXTZ', E 'ugXZ_hK': 5494}) E state.list_collections_with_limit_offset(limit=4, offset=5) E state.modify_coll(coll=v16, new_metadata={'giC': -6.103515625e-05, E '45227B': '65', E 'Uy6KZu6abCD9Z': -72, E 'r3': -41479, E 'pO4': -0.0, E 'Bpyj': -675165.8688164671, E '8-tD9nJd': 4.8728578364902235e-208, E '7DjCkbusc-K': 'vc94', E '3': 'd71IL'}, new_name='OF5F0MzbQg\n') E state.list_collections_with_limit_offset(limit=4, offset=4) E (v17,) = state.modify_coll(coll=v15, new_metadata={'L35J2S': 'K0l026'}, new_name='Ai1\n') E (v18,) = state.get_or_create_coll(coll=v13, new_metadata=None) E state.list_collections_with_limit_offset(limit=3, offset=1) E (v19,) = state.modify_coll(coll=v14, new_metadata=None, new_name='F0K570\n') E (v20,) = state.get_or_create_coll(coll=Collection(name='Ad5m003\n', id=UUID('5e23b560-7f62-4f14-bf80-93f5ff4e906a'), metadata={'3M': 'q_'}, dimension=57, dtype=, topic='topic', known_metadata_keys={}, known_document_keywords=[], has_documents=True, has_embeddings=False, embedding_function=), new_metadata={'_000': 852410}) E (v21,) = state.get_or_create_coll(coll=v14, new_metadata=None) E state.list_collections_with_limit_offset(limit=4, offset=1) E (v22,) = state.modify_coll(coll=v21, new_metadata=None, new_name=None) E (v23,) = state.modify_coll(coll=v22, new_metadata=None, new_name=None) E state.list_collections_with_limit_offset(limit=1, offset=1) E state.get_or_create_coll(coll=Collection(name='VS0QGh', id=UUID('ca92837d-3425-436c-bf11-dba969f0f8c7'), metadata=None, dimension=326, dtype=, topic='topic', known_metadata_keys={}, known_document_keywords=[], has_documents=True, has_embeddings=False, embedding_function=), new_metadata=None) E state.teardown() ``` The problem starts in v13 where we create a new collection named `C1030` In v15 we modify the collection `C1030` and rename it to `OF5F0MzbQg\n` In v16 we create a new collection named `VS0QGh` We try to modify the collection `VS0QGh` and rename it to `OF5F0MzbQg\n` which is the same name as the collection `C1030` which is fails in the and we return empty from the rule. However we have already updated the model: ```python if new_metadata is not None: if len(new_metadata) == 0: with pytest.raises(Exception): c = self.api.get_or_create_collection( name=coll.name, metadata=new_metadata, embedding_function=coll.embedding_function, ) return multiple() coll.metadata = new_metadata self.set_model(coll.name, coll.metadata) # <--- here we update the metadata if new_name is not None: if new_name in self.model and new_name != coll.name: with pytest.raises(Exception): # <--- fail here to rename the collection to `OF5F0MzbQg\n` c.modify(metadata=new_metadata, name=new_name) return multiple() prev_metadata = self.model[coll.name] self.delete_from_model(coll.name) self.set_model(new_name, prev_metadata) coll.name = new_name ``` then in `E state.get_or_create_coll(coll=Collection(name='VS0QGh', id=UUID('ca92837d-3425-436c-bf11-dba969f0f8c7'), metadata=None, dimension=326, dtype=, topic='topic', known_metadata_keys={}, known_document_keywords=[], has_documents=True, has_embeddings=False, embedding_function=), new_metadata=None)` We try to create or get collection `VS0QGh` which exists in API and in state. Metadata and new metadata are None so we fall into case 0. Existing collection with old metadata and but we take the metadata from model which has been updated after the failure above. So we have API version of the metadata and partly updated model metadata, which causes the failure. --- chromadb/test/property/test_collections.py | 33 ++++++++++++++-------- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/chromadb/test/property/test_collections.py b/chromadb/test/property/test_collections.py index 844476aa8eaf..251dfa74f38b 100644 --- a/chromadb/test/property/test_collections.py +++ b/chromadb/test/property/test_collections.py @@ -14,7 +14,7 @@ run_state_machine_as_test, MultipleResults, ) -from typing import Dict, Optional +from typing import Dict, Optional, Any, Mapping class CollectionStateMachine(RuleBasedStateMachine): @@ -54,7 +54,7 @@ def create_coll( metadata=coll.metadata, embedding_function=coll.embedding_function, ) - self.set_model(coll.name, coll.metadata) + self.set_model(coll.name, coll.metadata, str(coll.id)) assert c.name == coll.name assert c.metadata == self.model[coll.name] @@ -85,7 +85,7 @@ def delete_coll(self, coll: strategies.Collection) -> None: @rule() def list_collections(self) -> None: colls = self.api.list_collections() - assert len(colls) == len(self.model) + assert len(colls) == len([c for c in self.model if not c.startswith("__id__")]) for c in colls: assert c.name in self.model @@ -163,7 +163,7 @@ def get_or_create_coll( coll.metadata = ( self.model[coll.name] if new_metadata is None else new_metadata ) - self.set_model(coll.name, coll.metadata) + self.set_model(coll.name, coll.metadata, str(coll.id)) # Update API c = self.api.get_or_create_collection( @@ -189,13 +189,17 @@ def modify_coll( new_metadata: types.Metadata, new_name: Optional[str], ) -> MultipleResults[strategies.Collection]: + # early exit if a col with name exists but with diff id, possibly in another tenant/db + if coll.name in self.model and f"__id__:{coll.id}" not in self.model: + return multiple() if coll.name not in self.model: with pytest.raises(Exception): c = self.api.get_collection(name=coll.name) return multiple() c = self.api.get_collection(name=coll.name) - + _metadata: Optional[Mapping[str, Any]] = coll.metadata + _name: str = coll.name if new_metadata is not None: if len(new_metadata) == 0: with pytest.raises(Exception): @@ -206,7 +210,7 @@ def modify_coll( ) return multiple() coll.metadata = new_metadata - self.set_model(coll.name, coll.metadata) + _metadata = new_metadata if new_name is not None: if new_name in self.model and new_name != coll.name: @@ -214,12 +218,12 @@ def modify_coll( c.modify(metadata=new_metadata, name=new_name) return multiple() - prev_metadata = self.model[coll.name] self.delete_from_model(coll.name) - self.set_model(new_name, prev_metadata) coll.name = new_name + _name = new_name + self.set_model(_name, _metadata, str(coll.id)) - c.modify(metadata=new_metadata, name=new_name) + c.modify(metadata=_metadata, name=_name) c = self.api.get_collection(name=coll.name) assert c.name == coll.name @@ -227,14 +231,21 @@ def modify_coll( return multiple(coll) def set_model( - self, name: str, metadata: Optional[types.CollectionMetadata] + self, + name: str, + metadata: Optional[types.CollectionMetadata], + id: Optional[str] = None, ) -> None: model = self.model model[name] = metadata + if id is not None: + model[f"__id__:{id}"] = metadata - def delete_from_model(self, name: str) -> None: + def delete_from_model(self, name: str, id: Optional[str] = None) -> None: model = self.model del model[name] + if id is not None: + del model[f"__id__:{id}"] @property def model(self) -> Dict[str, Optional[types.CollectionMetadata]]: From f96be93643bad5a1ac6f7c139ee886bb8663a744 Mon Sep 17 00:00:00 2001 From: Hammad Bashir Date: Tue, 20 Feb 2024 09:55:51 -0800 Subject: [PATCH 4/9] [ENH] Basic blockfile implementation (#1726) ## Description of changes *Summarize the changes made by this PR.* - Improvements & Bug fixes - N/A - New functionality - This PR adds a basic HashMap based blockfile with the basic interfaces we need. It leaves some todos around for future cleanup, as this we can tackle in subsequent passes while we are building this out. This is to unblock @beggers. ## Test plan *How are these changes tested?* - [x] Tests pass locally with `cargo test` ## Documentation Changes No public facing documentation changes are required. --- Cargo.lock | 411 ++++++++++++++- rust/worker/Cargo.toml | 2 + rust/worker/src/blockstore/mod.rs | 2 + .../positional_posting_list_value.rs | 122 +++++ rust/worker/src/blockstore/types.rs | 478 ++++++++++++++++++ rust/worker/src/lib.rs | 1 + 6 files changed, 1014 insertions(+), 2 deletions(-) create mode 100644 rust/worker/src/blockstore/mod.rs create mode 100644 rust/worker/src/blockstore/positional_posting_list_value.rs create mode 100644 rust/worker/src/blockstore/types.rs diff --git a/Cargo.lock b/Cargo.lock index 932b41154ab1..1b8e6f89aad2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -24,6 +24,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "91429305e9f0a25f6205c5b8e0d2db09e0708a7a6df0f42212bb56c32c8ac97a" dependencies = [ "cfg-if", + "const-random", "getrandom", "once_cell", "version_check", @@ -66,6 +67,218 @@ version = "1.0.75" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a4668cab20f66d8d020e1fbc0ebe47217433c1b6c8f2040faf858554e394ace6" +[[package]] +name = "arrow" +version = "50.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa285343fba4d829d49985bdc541e3789cf6000ed0e84be7c039438df4a4e78c" +dependencies = [ + "arrow-arith", + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-csv", + "arrow-data", + "arrow-ipc", + "arrow-json", + "arrow-ord", + "arrow-row", + "arrow-schema", + "arrow-select", + "arrow-string", +] + +[[package]] +name = "arrow-arith" +version = "50.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "753abd0a5290c1bcade7c6623a556f7d1659c5f4148b140b5b63ce7bd1a45705" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "chrono", + "half", + "num", +] + +[[package]] +name = "arrow-array" +version = "50.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d390feeb7f21b78ec997a4081a025baef1e2e0d6069e181939b61864c9779609" +dependencies = [ + "ahash", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "chrono", + "half", + "hashbrown 0.14.3", + "num", +] + +[[package]] +name = "arrow-buffer" +version = "50.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69615b061701bcdffbc62756bc7e85c827d5290b472b580c972ebbbf690f5aa4" +dependencies = [ + "bytes", + "half", + "num", +] + +[[package]] +name = "arrow-cast" +version = "50.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e448e5dd2f4113bf5b74a1f26531708f5edcacc77335b7066f9398f4bcf4cdef" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", + "base64 0.21.5", + "chrono", + "half", + "lexical-core", + "num", +] + +[[package]] +name = "arrow-csv" +version = "50.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46af72211f0712612f5b18325530b9ad1bfbdc87290d5fbfd32a7da128983781" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-schema", + "chrono", + "csv", + "csv-core", + "lazy_static", + "lexical-core", + "regex", +] + +[[package]] +name = "arrow-data" +version = "50.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67d644b91a162f3ad3135ce1184d0a31c28b816a581e08f29e8e9277a574c64e" +dependencies = [ + "arrow-buffer", + "arrow-schema", + "half", + "num", +] + +[[package]] +name = "arrow-ipc" +version = "50.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "03dea5e79b48de6c2e04f03f62b0afea7105be7b77d134f6c5414868feefb80d" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-schema", + "flatbuffers", +] + +[[package]] +name = "arrow-json" +version = "50.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8950719280397a47d37ac01492e3506a8a724b3fb81001900b866637a829ee0f" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-schema", + "chrono", + "half", + "indexmap 2.1.0", + "lexical-core", + "num", + "serde", + "serde_json", +] + +[[package]] +name = "arrow-ord" +version = "50.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ed9630979034077982d8e74a942b7ac228f33dd93a93b615b4d02ad60c260be" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", + "half", + "num", +] + +[[package]] +name = "arrow-row" +version = "50.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "007035e17ae09c4e8993e4cb8b5b96edf0afb927cd38e2dff27189b274d83dcf" +dependencies = [ + "ahash", + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "half", + "hashbrown 0.14.3", +] + +[[package]] +name = "arrow-schema" +version = "50.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ff3e9c01f7cd169379d269f926892d0e622a704960350d09d331be3ec9e0029" + +[[package]] +name = "arrow-select" +version = "50.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ce20973c1912de6514348e064829e50947e35977bb9d7fb637dc99ea9ffd78c" +dependencies = [ + "ahash", + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "num", +] + +[[package]] +name = "arrow-string" +version = "50.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "00f3b37f2aeece31a2636d1b037dabb69ef590e03bdc7eb68519b51ec86932a7" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", + "num", + "regex", + "regex-syntax", +] + [[package]] name = "async-attributes" version = "1.1.2" @@ -878,9 +1091,9 @@ checksum = "7f30e7476521f6f8af1a1c4c0b8cc94f0bee37d91763d0ca2665f299b6cd8aec" [[package]] name = "bytemuck" -version = "1.14.0" +version = "1.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "374d28ec25809ee0e23827c2ab573d729e293f281dfe393500e7ad618baa61c6" +checksum = "a2ef034f05691a48569bd920a96c81b9d91bbad1ab5ac7c4616c1f6ef36cb79f" [[package]] name = "byteorder" @@ -950,6 +1163,26 @@ version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "28c122c3980598d243d63d9a704629a2d748d101f278052ff068be5a4423ab6f" +[[package]] +name = "const-random" +version = "0.1.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5aaf16c9c2c612020bcfd042e170f6e32de9b9d75adb5277cdbbd2e2c8c8299a" +dependencies = [ + "const-random-macro", +] + +[[package]] +name = "const-random-macro" +version = "0.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e" +dependencies = [ + "getrandom", + "once_cell", + "tiny-keccak", +] + [[package]] name = "core-foundation" version = "0.9.4" @@ -1041,6 +1274,12 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "crunchy" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" + [[package]] name = "crypto-bigint" version = "0.4.9" @@ -1075,6 +1314,27 @@ dependencies = [ "typenum", ] +[[package]] +name = "csv" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac574ff4d437a7b5ad237ef331c17ccca63c46479e5b5453eb8e10bb99a759fe" +dependencies = [ + "csv-core", + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "csv-core" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5efa2b3d7902f4b634a20cae3c9c4e6209dc4779feb6863329607560143efa70" +dependencies = [ + "memchr", +] + [[package]] name = "curve25519-dalek" version = "4.1.1" @@ -1427,6 +1687,16 @@ version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" +[[package]] +name = "flatbuffers" +version = "23.5.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4dac53e22462d78c16d64a1cd22371b54cc3fe94aa15e7886a2fa6e5d1ab8640" +dependencies = [ + "bitflags 1.3.2", + "rustc_version", +] + [[package]] name = "flate2" version = "1.0.28" @@ -1673,6 +1943,17 @@ dependencies = [ "tracing", ] +[[package]] +name = "half" +version = "2.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc52e53916c08643f1b56ec082790d1e86a32e58dc5268f897f313fbae7b4872" +dependencies = [ + "cfg-if", + "crunchy", + "num-traits", +] + [[package]] name = "hashbrown" version = "0.12.3" @@ -2122,6 +2403,70 @@ dependencies = [ "spin 0.5.2", ] +[[package]] +name = "lexical-core" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2cde5de06e8d4c2faabc400238f9ae1c74d5412d03a7bd067645ccbc47070e46" +dependencies = [ + "lexical-parse-float", + "lexical-parse-integer", + "lexical-util", + "lexical-write-float", + "lexical-write-integer", +] + +[[package]] +name = "lexical-parse-float" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "683b3a5ebd0130b8fb52ba0bdc718cc56815b6a097e28ae5a6997d0ad17dc05f" +dependencies = [ + "lexical-parse-integer", + "lexical-util", + "static_assertions", +] + +[[package]] +name = "lexical-parse-integer" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d0994485ed0c312f6d965766754ea177d07f9c00c9b82a5ee62ed5b47945ee9" +dependencies = [ + "lexical-util", + "static_assertions", +] + +[[package]] +name = "lexical-util" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5255b9ff16ff898710eb9eb63cb39248ea8a5bb036bea8085b1a767ff6c4e3fc" +dependencies = [ + "static_assertions", +] + +[[package]] +name = "lexical-write-float" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "accabaa1c4581f05a3923d1b4cfd124c329352288b7b9da09e766b0668116862" +dependencies = [ + "lexical-util", + "lexical-write-integer", + "static_assertions", +] + +[[package]] +name = "lexical-write-integer" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1b6f3d1f4422866b68192d62f77bc5c700bee84f3069f2469d7bc8c77852446" +dependencies = [ + "lexical-util", + "static_assertions", +] + [[package]] name = "libc" version = "0.2.151" @@ -2288,6 +2633,20 @@ dependencies = [ "minimal-lexical", ] +[[package]] +name = "num" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b05180d69e3da0e530ba2a1dae5110317e49e3b7f3d41be227dc5f92e49ee7af" +dependencies = [ + "num-bigint", + "num-complex", + "num-integer", + "num-iter", + "num-rational", + "num-traits", +] + [[package]] name = "num-bigint" version = "0.4.4" @@ -2316,6 +2675,15 @@ dependencies = [ "zeroize", ] +[[package]] +name = "num-complex" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ba157ca0885411de85d6ca030ba7e2a83a28636056c7c699b07c8b6f7383214" +dependencies = [ + "num-traits", +] + [[package]] name = "num-integer" version = "0.1.45" @@ -2337,6 +2705,18 @@ dependencies = [ "num-traits", ] +[[package]] +name = "num-rational" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0638a1c9d0a3c0914158145bc76cff373a75a627e6ecbfb71cbe6f453a5a19b0" +dependencies = [ + "autocfg", + "num-bigint", + "num-integer", + "num-traits", +] + [[package]] name = "num-traits" version = "0.2.17" @@ -3112,6 +3492,16 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "roaring" +version = "0.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1c77081a55300e016cb86f2864415b7518741879db925b8d488a0ee0d2da6bf" +dependencies = [ + "bytemuck", + "byteorder", +] + [[package]] name = "rsa" version = "0.9.6" @@ -3595,6 +3985,12 @@ dependencies = [ "der 0.7.8", ] +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + [[package]] name = "strsim" version = "0.10.0" @@ -3718,6 +4114,15 @@ dependencies = [ "time-core", ] +[[package]] +name = "tiny-keccak" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" +dependencies = [ + "crunchy", +] + [[package]] name = "tinyvec" version = "1.6.0" @@ -4362,6 +4767,7 @@ dependencies = [ name = "worker" version = "0.1.0" dependencies = [ + "arrow", "async-trait", "aws-config", "aws-sdk-s3", @@ -4381,6 +4787,7 @@ dependencies = [ "pulsar", "rand", "rayon", + "roaring", "schemars", "serde", "serde_json", diff --git a/rust/worker/Cargo.toml b/rust/worker/Cargo.toml index 25a3b2d099ee..e3c916fe012d 100644 --- a/rust/worker/Cargo.toml +++ b/rust/worker/Cargo.toml @@ -35,6 +35,8 @@ parking_lot = "0.12.1" aws-sdk-s3 = "1.5.0" aws-smithy-types = "1.1.0" aws-config = { version = "1.1.2", features = ["behavior-version-latest"] } +arrow = "50.0.0" +roaring = "0.10.3" [build-dependencies] tonic-build = "0.10" diff --git a/rust/worker/src/blockstore/mod.rs b/rust/worker/src/blockstore/mod.rs new file mode 100644 index 000000000000..96be70e534a1 --- /dev/null +++ b/rust/worker/src/blockstore/mod.rs @@ -0,0 +1,2 @@ +mod positional_posting_list_value; +mod types; diff --git a/rust/worker/src/blockstore/positional_posting_list_value.rs b/rust/worker/src/blockstore/positional_posting_list_value.rs new file mode 100644 index 000000000000..8c790d17f4cc --- /dev/null +++ b/rust/worker/src/blockstore/positional_posting_list_value.rs @@ -0,0 +1,122 @@ +use arrow::{ + array::{AsArray, Int32Array, Int32Builder, ListArray, ListBuilder}, + datatypes::Int32Type, +}; +use thiserror::Error; + +use std::collections::HashSet; + +use crate::errors::{ChromaError, ErrorCodes}; + +#[derive(Debug, Clone)] +pub(crate) struct PositionalPostingList { + pub(crate) doc_ids: Int32Array, + pub(crate) positions: ListArray, +} + +pub(crate) struct PositionalPostingListBuilder { + doc_ids_builder: Int32Builder, + positions_builder: ListBuilder, + doc_id_set: HashSet, +} + +impl PositionalPostingListBuilder { + pub(crate) fn new() -> Self { + PositionalPostingListBuilder { + doc_ids_builder: Int32Builder::new(), + positions_builder: ListBuilder::new(Int32Builder::new()), + doc_id_set: HashSet::new(), + } + } +} + +impl PositionalPostingList { + pub(crate) fn get_doc_ids(&self) -> Int32Array { + return self.doc_ids.clone(); + } + + pub(crate) fn get_positions_for_doc_id(&self, doc_id: i32) -> Option { + let index = self.doc_ids.iter().position(|x| x == Some(doc_id)); + match index { + Some(index) => { + let target_positions = self.positions.value(index); + // Int32Array is composed of a Datatype, ScalarBuffer, and a null bitmap, these are all cheap to clone since the buffer is Arc'ed + let downcast = target_positions.as_primitive::().clone(); + return Some(downcast); + } + None => None, + } + } +} + +#[derive(Error, Debug)] +pub(crate) enum PositionalPostingListBuilderError { + #[error("Doc ID already exists in the list")] + DocIdAlreadyExists, +} + +impl ChromaError for PositionalPostingListBuilderError { + fn code(&self) -> ErrorCodes { + match self { + PositionalPostingListBuilderError::DocIdAlreadyExists => ErrorCodes::AlreadyExists, + } + } +} + +impl PositionalPostingListBuilder { + pub(crate) fn add_doc_id_and_positions( + &mut self, + doc_id: i32, + positions: Vec, + ) -> Result<(), PositionalPostingListBuilderError> { + if self.doc_id_set.contains(&doc_id) { + return Err(PositionalPostingListBuilderError::DocIdAlreadyExists); + } + + self.doc_ids_builder.append_value(doc_id); + let positions = positions + .into_iter() + .map(Some) + .collect::>>(); + self.positions_builder.append_value(positions); + self.doc_id_set.insert(doc_id); + Ok(()) + } + + pub(crate) fn build(&mut self) -> PositionalPostingList { + let doc_ids = self.doc_ids_builder.finish(); + let positions = self.positions_builder.finish(); + PositionalPostingList { + doc_ids: doc_ids, + positions: positions, + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_positional_posting_list() { + let mut builder = PositionalPostingListBuilder::new(); + + let _res = builder.add_doc_id_and_positions(1, vec![1, 2, 3]); + let _res = builder.add_doc_id_and_positions(2, vec![4, 5, 6]); + + let list = builder.build(); + assert_eq!(list.get_doc_ids().values()[0], 1); + assert_eq!(list.get_doc_ids().values()[1], 2); + assert_eq!( + list.get_positions_for_doc_id(1).unwrap(), + Int32Array::from(vec![1, 2, 3]) + ); + assert_eq!( + list.get_positions_for_doc_id(2).unwrap(), + Int32Array::from(vec![4, 5, 6]) + ); + + let res = builder.add_doc_id_and_positions(1, vec![1, 2, 3]); + assert!(res.is_err()); + } +} diff --git a/rust/worker/src/blockstore/types.rs b/rust/worker/src/blockstore/types.rs new file mode 100644 index 000000000000..b9c0021f334d --- /dev/null +++ b/rust/worker/src/blockstore/types.rs @@ -0,0 +1,478 @@ +use super::positional_posting_list_value::PositionalPostingList; +use crate::errors::ChromaError; +use arrow::array::Int32Array; +use roaring::RoaringBitmap; +use std::fmt::Display; +use std::hash::{Hash, Hasher}; + +// ===== Key Types ===== +#[derive(Clone)] +pub(crate) struct BlockfileKey { + pub(crate) prefix: String, + pub(crate) key: Key, +} + +#[derive(Clone, PartialEq, PartialOrd, Debug)] +pub(crate) enum Key { + String(String), + Float(f32), +} + +#[derive(Debug, Clone)] +pub(crate) enum KeyType { + String, + Float, +} + +impl Display for Key { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Key::String(s) => write!(f, "{}", s), + Key::Float(fl) => write!(f, "{}", fl), + } + } +} + +impl BlockfileKey { + pub(crate) fn new(prefix: String, key: Key) -> Self { + BlockfileKey { prefix, key } + } +} + +impl Hash for BlockfileKey { + // Hash is only used for the HashMap implementation, which is a test/reference implementation + // Therefore this hash implementation is not used in production and allowed to be + // hacky + fn hash(&self, state: &mut H) { + self.prefix.hash(state); + } +} + +impl PartialEq for BlockfileKey { + fn eq(&self, other: &Self) -> bool { + self.prefix == other.prefix && self.key == other.key + } +} + +impl PartialOrd for BlockfileKey { + fn partial_cmp(&self, other: &Self) -> Option { + if self.prefix == other.prefix { + self.key.partial_cmp(&other.key) + } else { + self.prefix.partial_cmp(&other.prefix) + } + } +} + +impl Eq for BlockfileKey {} + +impl Ord for BlockfileKey { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + if self.prefix == other.prefix { + match self.key { + Key::String(ref s1) => match &other.key { + Key::String(s2) => s1.cmp(s2), + _ => panic!("Cannot compare string to float"), + }, + Key::Float(f1) => match &other.key { + Key::Float(f2) => f1.partial_cmp(f2).unwrap(), + _ => panic!("Cannot compare float to string"), + }, + } + } else { + self.prefix.cmp(&other.prefix) + } + } +} + +// ===== Value Types ===== + +#[derive(Debug, Clone)] +pub(crate) enum Value { + Int32ArrayValue(Int32Array), + PositionalPostingListValue(PositionalPostingList), + StringValue(String), + RoaringBitmapValue(RoaringBitmap), +} + +#[derive(Debug, Clone)] +pub(crate) enum ValueType { + Int32Array, + PositionalPostingList, + RoaringBitmap, + String, +} + +pub(crate) trait Blockfile { + // ===== Lifecycle methods ===== + fn open(path: &str) -> Result> + where + Self: Sized; + fn create( + path: &str, + key_type: KeyType, + value_type: ValueType, + ) -> Result> + where + Self: Sized; + + // ===== Transaction methods ===== + fn begin_transaction(&mut self) -> Result<(), Box>; + + fn commit_transaction(&mut self) -> Result<(), Box>; + + // ===== Data methods ===== + fn get(&self, key: BlockfileKey) -> Result>; + fn get_by_prefix( + &self, + prefix: String, + ) -> Result, Box>; + + fn set(&mut self, key: BlockfileKey, value: Value) -> Result<(), Box>; + + fn get_gt( + &self, + prefix: String, + key: Key, + ) -> Result, Box>; + + fn get_lt( + &self, + prefix: String, + key: Key, + ) -> Result, Box>; + + fn get_gte( + &self, + prefix: String, + key: Key, + ) -> Result, Box>; + + fn get_lte( + &self, + prefix: String, + key: Key, + ) -> Result, Box>; +} + +struct HashMapBlockfile { + map: std::collections::HashMap, +} + +impl Blockfile for HashMapBlockfile { + // TODO: change this to respect path instead of ignoring it and creating a new thing + fn open(_path: &str) -> Result> { + Ok(HashMapBlockfile { + map: std::collections::HashMap::new(), + }) + } + fn create( + path: &str, + key_type: KeyType, + value_type: ValueType, + ) -> Result> + where + Self: Sized, + { + Ok(HashMapBlockfile { + map: std::collections::HashMap::new(), + }) + } + fn get(&self, key: BlockfileKey) -> Result> { + match self.map.get(&key) { + Some(value) => Ok(value.clone()), + None => { + // TOOD: make error + panic!("Key not found"); + } + } + } + + fn get_by_prefix( + &self, + prefix: String, + ) -> Result, Box> { + let mut result = Vec::new(); + for (key, value) in self.map.iter() { + if key.prefix == prefix { + result.push((key.clone(), value.clone())); + } + } + Ok(result) + } + + fn set(&mut self, key: BlockfileKey, value: Value) -> Result<(), Box> { + self.map.insert(key, value); + Ok(()) + } + + fn get_gt( + &self, + prefix: String, + key: Key, + ) -> Result, Box> { + let mut result = Vec::new(); + for (k, v) in self.map.iter() { + if k.prefix == prefix && k.key > key { + result.push((k.clone(), v.clone())); + } + } + Ok(result) + } + + fn get_gte( + &self, + prefix: String, + key: Key, + ) -> Result, Box> { + let mut result = Vec::new(); + for (k, v) in self.map.iter() { + if k.prefix == prefix && k.key >= key { + result.push((k.clone(), v.clone())); + } + } + Ok(result) + } + + fn get_lt( + &self, + prefix: String, + key: Key, + ) -> Result, Box> { + let mut result = Vec::new(); + for (k, v) in self.map.iter() { + if k.prefix == prefix && k.key < key { + result.push((k.clone(), v.clone())); + } + } + Ok(result) + } + + fn get_lte( + &self, + prefix: String, + key: Key, + ) -> Result, Box> { + let mut result = Vec::new(); + for (k, v) in self.map.iter() { + if k.prefix == prefix && k.key <= key { + result.push((k.clone(), v.clone())); + } + } + Ok(result) + } + + fn begin_transaction(&mut self) -> Result<(), Box> { + Ok(()) + } + + fn commit_transaction(&mut self) -> Result<(), Box> { + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::blockstore::positional_posting_list_value::PositionalPostingListBuilder; + use arrow::array::Array; + use std::fmt::Debug; + + impl Debug for BlockfileKey { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "BlockfileKey(prefix: {}, key: {})", + self.prefix, self.key + ) + } + } + + #[test] + fn test_blockfile_set_get() { + let mut blockfile = + HashMapBlockfile::create("test", KeyType::String, ValueType::Int32Array).unwrap(); + let key = BlockfileKey { + prefix: "text_prefix".to_string(), + key: Key::String("key1".to_string()), + }; + let _res = blockfile + .set( + key.clone(), + Value::Int32ArrayValue(Int32Array::from(vec![1, 2, 3])), + ) + .unwrap(); + let value = blockfile.get(key); + // downcast to string + match value.unwrap() { + Value::Int32ArrayValue(arr) => assert_eq!(arr, Int32Array::from(vec![1, 2, 3])), + _ => panic!("Value is not a string"), + } + } + + #[test] + fn test_blockfile_get_by_prefix() { + let mut blockfile = HashMapBlockfile::open("test").unwrap(); + let key1 = BlockfileKey { + prefix: "text_prefix".to_string(), + key: Key::String("key1".to_string()), + }; + let key2 = BlockfileKey { + prefix: "text_prefix".to_string(), + key: Key::String("key2".to_string()), + }; + let _res = blockfile + .set( + key1.clone(), + Value::Int32ArrayValue(Int32Array::from(vec![1, 2, 3])), + ) + .unwrap(); + let _res = blockfile + .set( + key2.clone(), + Value::Int32ArrayValue(Int32Array::from(vec![4, 5, 6])), + ) + .unwrap(); + let values = blockfile.get_by_prefix("text_prefix".to_string()).unwrap(); + assert_eq!(values.len(), 2); + // May return values in any order + match &values[0].1 { + Value::Int32ArrayValue(arr) => assert!( + arr == &Int32Array::from(vec![1, 2, 3]) || arr == &Int32Array::from(vec![4, 5, 6]) + ), + _ => panic!("Value is not a string"), + } + match &values[1].1 { + Value::Int32ArrayValue(arr) => assert!( + arr == &Int32Array::from(vec![1, 2, 3]) || arr == &Int32Array::from(vec![4, 5, 6]) + ), + _ => panic!("Value is not a string"), + } + } + + #[test] + fn test_storing_arrow_in_blockfile() { + let mut blockfile = HashMapBlockfile::open("test").unwrap(); + let key = BlockfileKey { + prefix: "text_prefix".to_string(), + key: Key::String("key1".to_string()), + }; + let array = Value::Int32ArrayValue(Int32Array::from(vec![1, 2, 3])); + let _res = blockfile.set(key.clone(), array).unwrap(); + let value = blockfile.get(key).unwrap(); + match value { + Value::Int32ArrayValue(arr) => assert_eq!(arr, Int32Array::from(vec![1, 2, 3])), + _ => panic!("Value is not an arrow int32 array"), + } + } + + #[test] + fn test_blockfile_get_gt() { + let mut blockfile = HashMapBlockfile::open("test").unwrap(); + let key1 = BlockfileKey { + prefix: "text_prefix".to_string(), + key: Key::String("key1".to_string()), + }; + let key2 = BlockfileKey { + prefix: "text_prefix".to_string(), + key: Key::String("key2".to_string()), + }; + let key3 = BlockfileKey { + prefix: "text_prefix".to_string(), + key: Key::String("key3".to_string()), + }; + let _res = blockfile.set( + key1.clone(), + Value::Int32ArrayValue(Int32Array::from(vec![1])), + ); + let _res = blockfile.set( + key2.clone(), + Value::Int32ArrayValue(Int32Array::from(vec![2])), + ); + let _res = blockfile.set( + key3.clone(), + Value::Int32ArrayValue(Int32Array::from(vec![3])), + ); + let values = blockfile + .get_gt("text_prefix".to_string(), Key::String("key1".to_string())) + .unwrap(); + assert_eq!(values.len(), 2); + match &values[0].0.key { + Key::String(s) => assert!(s == "key2" || s == "key3"), + _ => panic!("Key is not a string"), + } + match &values[1].0.key { + Key::String(s) => assert!(s == "key2" || s == "key3"), + _ => panic!("Key is not a string"), + } + } + + #[test] + fn test_learning_arrow_struct() { + let mut builder = PositionalPostingListBuilder::new(); + let _res = builder.add_doc_id_and_positions(1, vec![0]); + let _res = builder.add_doc_id_and_positions(2, vec![0, 1]); + let _res = builder.add_doc_id_and_positions(3, vec![0, 1, 2]); + let list_term_1 = builder.build(); + + // Example of how to use the struct array, which is one value for a term + let mut blockfile = HashMapBlockfile::open("test").unwrap(); + let key = BlockfileKey { + prefix: "text_prefix".to_string(), + key: Key::String("term1".to_string()), + }; + let _res = blockfile + .set(key.clone(), Value::PositionalPostingListValue(list_term_1)) + .unwrap(); + let posting_list = blockfile.get(key).unwrap(); + let posting_list = match posting_list { + Value::PositionalPostingListValue(arr) => arr, + _ => panic!("Value is not an arrow struct array"), + }; + + let ids = posting_list.get_doc_ids(); + let ids = ids.as_any().downcast_ref::().unwrap(); + // find index of target id + let target_id = 2; + + // imagine this is binary search instead of linear + for i in 0..ids.len() { + if ids.is_null(i) { + continue; + } + if ids.value(i) == target_id { + let pos_list = posting_list.get_positions_for_doc_id(target_id).unwrap(); + let pos_list = pos_list.as_any().downcast_ref::().unwrap(); + assert_eq!(pos_list.len(), 2); + assert_eq!(pos_list.value(0), 0); + assert_eq!(pos_list.value(1), 1); + break; + } + } + } + + #[test] + fn test_roaring_bitmap_example() { + let mut bitmap = RoaringBitmap::new(); + bitmap.insert(1); + bitmap.insert(2); + bitmap.insert(3); + let mut blockfile = HashMapBlockfile::open("test").unwrap(); + let key = BlockfileKey::new( + "text_prefix".to_string(), + Key::String("bitmap1".to_string()), + ); + let _res = blockfile + .set(key.clone(), Value::RoaringBitmapValue(bitmap)) + .unwrap(); + let value = blockfile.get(key).unwrap(); + match value { + Value::RoaringBitmapValue(bitmap) => { + assert!(bitmap.contains(1)); + assert!(bitmap.contains(2)); + assert!(bitmap.contains(3)); + } + _ => panic!("Value is not a roaring bitmap"), + } + } +} diff --git a/rust/worker/src/lib.rs b/rust/worker/src/lib.rs index ae7ea7dc7d52..b245f24df280 100644 --- a/rust/worker/src/lib.rs +++ b/rust/worker/src/lib.rs @@ -1,4 +1,5 @@ mod assignment; +mod blockstore; mod config; mod errors; mod index; From 8a0f67edd070774f0fd22fab5b721442ea7c9edc Mon Sep 17 00:00:00 2001 From: Ben Eggers <64657842+beggers@users.noreply.github.com> Date: Tue, 20 Feb 2024 13:04:25 -0800 Subject: [PATCH 5/9] [BUG] Make sure Client parameters are strings (#1577) ## Description of changes *Summarize the changes made by this PR.* - Improvements & Bug fixes - Stringify all paremeters to `Client`s which are meant to be strings. At present some parameters -- `port` in particular -- can be reasonably passed as integers which causes weird and unexpected behavior. - Fixes #1573 ## Test plan *How are these changes tested?* - [ ] Tests pass locally with `pytest` for python, `yarn test` for js ## Documentation Changes *Are all docstrings for user-facing APIs updated if required? Do we need to make documentation changes in the [docs repository](https://github.com/chroma-core/docs)?* --- chromadb/__init__.py | 32 ++++++++++++++++++++--- chromadb/api/fastapi.py | 2 +- chromadb/config.py | 8 +++--- chromadb/test/client/test_cloud_client.py | 6 ++--- chromadb/test/conftest.py | 4 +-- chromadb/test/test_chroma.py | 6 ++--- chromadb/test/test_client.py | 4 +-- 7 files changed, 44 insertions(+), 18 deletions(-) diff --git a/chromadb/__init__.py b/chromadb/__init__.py index 142ab78a05fc..8e5ba91d1f1b 100644 --- a/chromadb/__init__.py +++ b/chromadb/__init__.py @@ -112,6 +112,10 @@ def EphemeralClient( settings = Settings() settings.is_persistent = False + # Make sure paramaters are the correct types -- users can pass anything. + tenant = str(tenant) + database = str(database) + return ClientCreator(settings=settings, tenant=tenant, database=database) @@ -135,12 +139,16 @@ def PersistentClient( settings.persist_directory = path settings.is_persistent = True + # Make sure paramaters are the correct types -- users can pass anything. + tenant = str(tenant) + database = str(database) + return ClientCreator(tenant=tenant, database=database, settings=settings) def HttpClient( host: str = "localhost", - port: str = "8000", + port: int = 8000, ssl: bool = False, headers: Optional[Dict[str, str]] = None, settings: Optional[Settings] = None, @@ -165,6 +173,13 @@ def HttpClient( if settings is None: settings = Settings() + # Make sure paramaters are the correct types -- users can pass anything. + host = str(host) + port = int(port) + ssl = bool(ssl) + tenant = str(tenant) + database = str(database) + settings.chroma_api_impl = "chromadb.api.fastapi.FastAPI" if settings.chroma_server_host and settings.chroma_server_host != host: raise ValueError( @@ -189,7 +204,7 @@ def CloudClient( settings: Optional[Settings] = None, *, # Following arguments are keyword-only, intended for testing only. cloud_host: str = "api.trychroma.com", - cloud_port: str = "8000", + cloud_port: int = 8000, enable_ssl: bool = True, ) -> ClientAPI: """ @@ -217,6 +232,14 @@ def CloudClient( if settings is None: settings = Settings() + # Make sure paramaters are the correct types -- users can pass anything. + tenant = str(tenant) + database = str(database) + api_key = str(api_key) + cloud_host = str(cloud_host) + cloud_port = int(cloud_port) + enable_ssl = bool(enable_ssl) + settings.chroma_api_impl = "chromadb.api.fastapi.FastAPI" settings.chroma_server_host = cloud_host settings.chroma_server_http_port = cloud_port @@ -242,9 +265,12 @@ def Client( tenant: The tenant to use for this client. Defaults to the default tenant. database: The database to use for this client. Defaults to the default database. - """ + # Make sure paramaters are the correct types -- users can pass anything. + tenant = str(tenant) + database = str(database) + return ClientCreator(tenant=tenant, database=database, settings=settings) diff --git a/chromadb/api/fastapi.py b/chromadb/api/fastapi.py index a10fdfaf02d9..d01028c734f8 100644 --- a/chromadb/api/fastapi.py +++ b/chromadb/api/fastapi.py @@ -109,7 +109,7 @@ def __init__(self, system: System): self._api_url = FastAPI.resolve_url( chroma_server_host=str(system.settings.chroma_server_host), - chroma_server_http_port=int(str(system.settings.chroma_server_http_port)), + chroma_server_http_port=system.settings.chroma_server_http_port, chroma_server_ssl_enabled=system.settings.chroma_server_ssl_enabled, default_api_path=system.settings.chroma_server_api_default_path, ) diff --git a/chromadb/config.py b/chromadb/config.py index 98f4549e9f43..b4a78d5746cd 100644 --- a/chromadb/config.py +++ b/chromadb/config.py @@ -123,12 +123,12 @@ class Settings(BaseSettings): # type: ignore chroma_server_host: Optional[str] = None chroma_server_headers: Optional[Dict[str, str]] = None - chroma_server_http_port: Optional[str] = None + chroma_server_http_port: Optional[int] = None chroma_server_ssl_enabled: Optional[bool] = False # the below config value is only applicable to Chroma HTTP clients chroma_server_ssl_verify: Optional[Union[bool, str]] = None chroma_server_api_default_path: Optional[str] = "/api/v1" - chroma_server_grpc_port: Optional[str] = None + chroma_server_grpc_port: Optional[int] = None # eg ["http://localhost:3000"] chroma_server_cors_allow_origins: List[str] = [] @@ -141,8 +141,8 @@ def empty_str_to_none(cls, v: str) -> Optional[str]: chroma_server_nofile: Optional[int] = None pulsar_broker_url: Optional[str] = None - pulsar_admin_port: Optional[str] = "8080" - pulsar_broker_port: Optional[str] = "6650" + pulsar_admin_port: Optional[int] = 8080 + pulsar_broker_port: Optional[int] = 6650 chroma_server_auth_provider: Optional[str] = None diff --git a/chromadb/test/client/test_cloud_client.py b/chromadb/test/client/test_cloud_client.py index aee869ca1c57..48b0252789b7 100644 --- a/chromadb/test/client/test_cloud_client.py +++ b/chromadb/test/client/test_cloud_client.py @@ -61,7 +61,7 @@ def mock_cloud_server(valid_token: str) -> Generator[System, None, None]: settings = Settings( chroma_api_impl="chromadb.api.fastapi.FastAPI", chroma_server_host=TEST_CLOUD_HOST, - chroma_server_http_port=str(port), + chroma_server_http_port=port, chroma_client_auth_provider="chromadb.auth.token.TokenAuthClientProvider", chroma_client_auth_credentials=valid_token, chroma_client_auth_token_transport_header=TOKEN_TRANSPORT_HEADER, @@ -82,7 +82,7 @@ def test_valid_key(mock_cloud_server: System, valid_token: str) -> None: database=DEFAULT_DATABASE, api_key=valid_token, cloud_host=TEST_CLOUD_HOST, - cloud_port=mock_cloud_server.settings.chroma_server_http_port, # type: ignore + cloud_port=mock_cloud_server.settings.chroma_server_http_port or 8000, enable_ssl=False, ) @@ -98,7 +98,7 @@ def test_invalid_key(mock_cloud_server: System, valid_token: str) -> None: database=DEFAULT_DATABASE, api_key=invalid_token, cloud_host=TEST_CLOUD_HOST, - cloud_port=mock_cloud_server.settings.chroma_server_http_port, # type: ignore + cloud_port=mock_cloud_server.settings.chroma_server_http_port or 8000, enable_ssl=False, ) client.heartbeat() diff --git a/chromadb/test/conftest.py b/chromadb/test/conftest.py index 3e041cfe9a71..4e55ffc67498 100644 --- a/chromadb/test/conftest.py +++ b/chromadb/test/conftest.py @@ -246,7 +246,7 @@ def _fastapi_fixture( settings = Settings( chroma_api_impl="chromadb.api.fastapi.FastAPI", chroma_server_host="localhost", - chroma_server_http_port=str(port), + chroma_server_http_port=port, allow_reset=True, chroma_client_auth_provider=chroma_client_auth_provider, chroma_client_auth_credentials=chroma_client_auth_credentials, @@ -286,7 +286,7 @@ def fastapi_ssl() -> Generator[System, None, None]: def basic_http_client() -> Generator[System, None, None]: settings = Settings( chroma_api_impl="chromadb.api.fastapi.FastAPI", - chroma_server_http_port="8000", + chroma_server_http_port=8000, allow_reset=True, ) system = System(settings) diff --git a/chromadb/test/test_chroma.py b/chromadb/test/test_chroma.py index 9d88ea8cc492..89b4ae924eb0 100644 --- a/chromadb/test/test_chroma.py +++ b/chromadb/test/test_chroma.py @@ -66,7 +66,7 @@ def test_fastapi(self, mock: Mock) -> None: chroma_api_impl="chromadb.api.fastapi.FastAPI", persist_directory="./foo", chroma_server_host="foo", - chroma_server_http_port="80", + chroma_server_http_port=80, ) ) assert mock.called @@ -78,7 +78,7 @@ def test_settings_pass_to_fastapi(self, mock: Mock) -> None: settings = chromadb.config.Settings( chroma_api_impl="chromadb.api.fastapi.FastAPI", chroma_server_host="foo", - chroma_server_http_port="80", + chroma_server_http_port=80, chroma_server_headers={"foo": "bar"}, ) client = chromadb.Client(settings) @@ -106,7 +106,7 @@ def test_legacy_values() -> None: chroma_api_impl="chromadb.api.local.LocalAPI", persist_directory="./foo", chroma_server_host="foo", - chroma_server_http_port="80", + chroma_server_http_port=80, ) ) client.clear_system_cache() diff --git a/chromadb/test/test_client.py b/chromadb/test/test_client.py index f67293d85864..34dd2df14127 100644 --- a/chromadb/test/test_client.py +++ b/chromadb/test/test_client.py @@ -60,9 +60,9 @@ def test_http_client_with_inconsistent_host_settings() -> None: def test_http_client_with_inconsistent_port_settings() -> None: try: chromadb.HttpClient( - port="8002", + port=8002, settings=Settings( - chroma_server_http_port="8001", + chroma_server_http_port=8001, ), ) except ValueError as e: From 05fdd46e920cb45900caab13ea848a487e9358fe Mon Sep 17 00:00:00 2001 From: Weili Gu <3451471+weiligu@users.noreply.github.com> Date: Tue, 20 Feb 2024 16:13:49 -0800 Subject: [PATCH 6/9] make collection_id primary key for segment, fix system tests (#1731) ## Description of changes - collection id should be primary key of segment table, for getSegments performance (there will be a follow up on fixing get Segment since we should push down collection_id) - https://linear.app/trychroma/issue/CHR-324/segment-table-should-have-collection-id-as-primary-key - fixing tests broken by https://github.com/chroma-core/chroma/commit/93194c8a6a2dde33031cb812af65acd4fada4662 ## Test plan *How are these changes tested?* - [x] passing existing tests --- Tiltfile | 4 +- chromadb/test/db/test_system.py | 11 +++- go/coordinator/go.mod | 1 + go/coordinator/go.sum | 5 +- go/coordinator/internal/common/errors.go | 1 + .../internal/coordinator/apis_test.go | 60 +++++++++++-------- go/coordinator/internal/coordinator/meta.go | 27 +++++++++ .../metastore/coordinator/table_catalog.go | 19 +++++- .../internal/metastore/db/dao/segment.go | 25 ++++---- .../internal/metastore/db/dbmodel/segment.go | 6 +- ...{20240215010425.sql => 20240216211350.sql} | 4 +- go/coordinator/migrations/atlas.sum | 4 +- 12 files changed, 117 insertions(+), 50 deletions(-) rename go/coordinator/migrations/{20240215010425.sql => 20240216211350.sql} (97%) diff --git a/Tiltfile b/Tiltfile index f1fa96af2ecb..0d0777199f24 100644 --- a/Tiltfile +++ b/Tiltfile @@ -34,8 +34,8 @@ k8s_resource('migration', resource_deps=['postgres'], labels=["chroma"]) k8s_yaml(['k8s/dev/server.yaml']) k8s_resource('server', resource_deps=['k8s_setup'],labels=["chroma"], port_forwards=8000 ) k8s_yaml(['k8s/dev/coordinator.yaml']) -k8s_resource('coordinator', resource_deps=['pulsar', 'server', 'migration'], labels=["chroma"]) +k8s_resource('coordinator', resource_deps=['pulsar', 'server', 'migration'], labels=["chroma"], port_forwards=50051 ) k8s_yaml(['k8s/dev/logservice.yaml']) -k8s_resource('logservice', resource_deps=['migration'], labels=["chroma"]) +k8s_resource('logservice', resource_deps=['migration'], labels=["chroma"], port_forwards='50052:50051') k8s_yaml(['k8s/dev/worker.yaml']) k8s_resource('worker', resource_deps=['coordinator'],labels=["chroma"]) diff --git a/chromadb/test/db/test_system.py b/chromadb/test/db/test_system.py index 3cd2a9954ec9..e65beeb5b62c 100644 --- a/chromadb/test/db/test_system.py +++ b/chromadb/test/db/test_system.py @@ -721,7 +721,7 @@ def test_update_segment(sysdb: SysDB) -> None: scope=SegmentScope.VECTOR, topic="test_topic_a", collection=sample_collections[0]["id"], - metadata=metadata + metadata=metadata, ) sysdb.reset_state() @@ -732,52 +732,61 @@ def test_update_segment(sysdb: SysDB) -> None: sysdb.create_segment(segment) + # TODO: revisit update segment - push collection id # Update topic to new value segment["topic"] = "new_topic" sysdb.update_segment(segment["id"], topic=segment["topic"]) result = sysdb.get_segments(id=segment["id"]) + result[0]["collection"] = segment["collection"] assert result == [segment] # Update topic to None segment["topic"] = None sysdb.update_segment(segment["id"], topic=segment["topic"]) result = sysdb.get_segments(id=segment["id"]) + result[0]["collection"] = segment["collection"] assert result == [segment] # Update collection to new value segment["collection"] = sample_collections[1]["id"] sysdb.update_segment(segment["id"], collection=segment["collection"]) result = sysdb.get_segments(id=segment["id"]) + result[0]["collection"] = segment["collection"] assert result == [segment] # Update collection to None segment["collection"] = None sysdb.update_segment(segment["id"], collection=segment["collection"]) result = sysdb.get_segments(id=segment["id"]) + result[0]["collection"] = segment["collection"] assert result == [segment] # Add a new metadata key metadata["test_str2"] = "str2" sysdb.update_segment(segment["id"], metadata={"test_str2": "str2"}) result = sysdb.get_segments(id=segment["id"]) + result[0]["collection"] = segment["collection"] assert result == [segment] # Update a metadata key metadata["test_str"] = "str3" sysdb.update_segment(segment["id"], metadata={"test_str": "str3"}) result = sysdb.get_segments(id=segment["id"]) + result[0]["collection"] = segment["collection"] assert result == [segment] # Delete a metadata key del metadata["test_str"] sysdb.update_segment(segment["id"], metadata={"test_str": None}) result = sysdb.get_segments(id=segment["id"]) + result[0]["collection"] = segment["collection"] assert result == [segment] # Delete all metadata keys segment["metadata"] = None sysdb.update_segment(segment["id"], metadata=None) result = sysdb.get_segments(id=segment["id"]) + result[0]["collection"] = segment["collection"] assert result == [segment] diff --git a/go/coordinator/go.mod b/go/coordinator/go.mod index 93b04935f57f..8c9317b439ea 100644 --- a/go/coordinator/go.mod +++ b/go/coordinator/go.mod @@ -6,6 +6,7 @@ require ( ariga.io/atlas-provider-gorm v0.1.1 github.com/apache/pulsar-client-go v0.9.1-0.20231030094548-620ecf4addfb github.com/google/uuid v1.3.1 + github.com/lib/pq v1.10.7 github.com/pingcap/log v1.1.0 github.com/rs/zerolog v1.31.0 github.com/spf13/cobra v1.7.0 diff --git a/go/coordinator/go.sum b/go/coordinator/go.sum index 1977a3665238..adb6bb095083 100644 --- a/go/coordinator/go.sum +++ b/go/coordinator/go.sum @@ -12,8 +12,6 @@ github.com/AthenZ/athenz v1.10.39/go.mod h1:3Tg8HLsiQZp81BJY58JBeU2BR6B/H4/0MQGf github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/DataDog/zstd v1.5.0 h1:+K/VEwIAaPcHiMtQvpLD4lqW7f0Gk3xdYZmI1hD+CXo= github.com/DataDog/zstd v1.5.0/go.mod h1:g4AWEaM3yOg3HYfnJ3YIawPnVdXJh9QME85blwSAmyw= -github.com/alecthomas/kong v0.7.1 h1:azoTh0IOfwlAX3qN9sHWTxACE2oV8Bg2gAwBsMwDQY4= -github.com/alecthomas/kong v0.7.1/go.mod h1:n1iCIO2xS46oE8ZfYCNDqdR0b0wZNrXAIAqro/2132U= github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= @@ -156,6 +154,8 @@ github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/lib/pq v1.10.7 h1:p7ZhMD+KsSRozJr34udlUrhboJwWAgCg34+/ZZNvZZw= +github.com/lib/pq v1.10.7/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o= github.com/linkedin/goavro/v2 v2.9.8 h1:jN50elxBsGBDGVDEKqUlDuU1cFwJ11K/yrJCBMe/7Wg= github.com/linkedin/goavro/v2 v2.9.8/go.mod h1:UgQUb2N/pmueQYH9bfqFioWxzYCZXSfF8Jw03O5sjqA= github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= @@ -346,7 +346,6 @@ golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtn golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= golang.org/x/tools v0.10.0 h1:tvDr/iQoUqNdohiYm0LmmKcBk+q86lb9EprIUFhHHGg= -golang.org/x/tools v0.10.0/go.mod h1:UJwyiVBsOA2uwvK/e5OY3GTpDUJriEd+/YlqAwLPmyM= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= diff --git a/go/coordinator/internal/common/errors.go b/go/coordinator/internal/common/errors.go index 0275e2b6574b..5ba4284410f9 100644 --- a/go/coordinator/internal/common/errors.go +++ b/go/coordinator/internal/common/errors.go @@ -31,6 +31,7 @@ var ( ErrInvalidCollectionUpdate = errors.New("invalid collection update, reset collection true and collection value not empty") ErrSegmentUniqueConstraintViolation = errors.New("unique constraint violation") ErrSegmentDeleteNonExistingSegment = errors.New("delete non existing segment") + ErrSegmentUpdateNonExistingSegment = errors.New("update non existing segment") // Segment metadata errors ErrUnknownSegmentMetadataType = errors.New("segment metadata value type not supported") diff --git a/go/coordinator/internal/coordinator/apis_test.go b/go/coordinator/internal/coordinator/apis_test.go index 62ff01ecec05..3f780c258c32 100644 --- a/go/coordinator/internal/coordinator/apis_test.go +++ b/go/coordinator/internal/coordinator/apis_test.go @@ -872,11 +872,13 @@ func TestUpdateSegment(t *testing.T) { }) // Update topic to new value + collectionID := segment.CollectionID.String() newTopic := "new_topic" segment.Topic = &newTopic c.UpdateSegment(ctx, &model.UpdateSegment{ - ID: segment.ID, - Topic: segment.Topic, + Collection: &collectionID, + ID: segment.ID, + Topic: segment.Topic, }) result, err := c.GetSegments(ctx, segment.ID, nil, nil, nil, types.NilUniqueID()) assert.NoError(t, err) @@ -885,6 +887,7 @@ func TestUpdateSegment(t *testing.T) { // Update topic to None segment.Topic = nil c.UpdateSegment(ctx, &model.UpdateSegment{ + Collection: &collectionID, ID: segment.ID, Topic: segment.Topic, ResetTopic: true, @@ -893,33 +896,35 @@ func TestUpdateSegment(t *testing.T) { assert.NoError(t, err) assert.Equal(t, []*model.Segment{segment}, result) + // TODO: revisit why we need this // Update collection to new value - segment.CollectionID = sampleCollections[1].ID - newCollecionID := segment.CollectionID.String() - c.UpdateSegment(ctx, &model.UpdateSegment{ - ID: segment.ID, - Collection: &newCollecionID, - }) - result, err = c.GetSegments(ctx, segment.ID, nil, nil, nil, types.NilUniqueID()) - assert.NoError(t, err) - assert.Equal(t, []*model.Segment{segment}, result) + //segment.CollectionID = sampleCollections[1].ID + //newCollecionID := segment.CollectionID.String() + //c.UpdateSegment(ctx, &model.UpdateSegment{ + // ID: segment.ID, + // Collection: &newCollecionID, + //}) + //result, err = c.GetSegments(ctx, segment.ID, nil, nil, nil, types.NilUniqueID()) + //assert.NoError(t, err) + //assert.Equal(t, []*model.Segment{segment}, result) // Update collection to None - segment.CollectionID = types.NilUniqueID() - c.UpdateSegment(ctx, &model.UpdateSegment{ - ID: segment.ID, - Collection: nil, - ResetCollection: true, - }) - result, err = c.GetSegments(ctx, segment.ID, nil, nil, nil, types.NilUniqueID()) - assert.NoError(t, err) - assert.Equal(t, []*model.Segment{segment}, result) + //segment.CollectionID = types.NilUniqueID() + //c.UpdateSegment(ctx, &model.UpdateSegment{ + // ID: segment.ID, + // Collection: nil, + // ResetCollection: true, + //}) + //result, err = c.GetSegments(ctx, segment.ID, nil, nil, nil, types.NilUniqueID()) + //assert.NoError(t, err) + //assert.Equal(t, []*model.Segment{segment}, result) // Add a new metadata key segment.Metadata.Set("test_str2", &model.SegmentMetadataValueStringType{Value: "str2"}) c.UpdateSegment(ctx, &model.UpdateSegment{ - ID: segment.ID, - Metadata: segment.Metadata}) + Collection: &collectionID, + ID: segment.ID, + Metadata: segment.Metadata}) result, err = c.GetSegments(ctx, segment.ID, nil, nil, nil, types.NilUniqueID()) assert.NoError(t, err) assert.Equal(t, []*model.Segment{segment}, result) @@ -927,8 +932,9 @@ func TestUpdateSegment(t *testing.T) { // Update a metadata key segment.Metadata.Set("test_str", &model.SegmentMetadataValueStringType{Value: "str3"}) c.UpdateSegment(ctx, &model.UpdateSegment{ - ID: segment.ID, - Metadata: segment.Metadata}) + Collection: &collectionID, + ID: segment.ID, + Metadata: segment.Metadata}) result, err = c.GetSegments(ctx, segment.ID, nil, nil, nil, types.NilUniqueID()) assert.NoError(t, err) assert.Equal(t, []*model.Segment{segment}, result) @@ -938,8 +944,9 @@ func TestUpdateSegment(t *testing.T) { newMetadata := model.NewSegmentMetadata[model.SegmentMetadataValueType]() newMetadata.Set("test_str", nil) c.UpdateSegment(ctx, &model.UpdateSegment{ - ID: segment.ID, - Metadata: newMetadata}) + Collection: &collectionID, + ID: segment.ID, + Metadata: newMetadata}) result, err = c.GetSegments(ctx, segment.ID, nil, nil, nil, types.NilUniqueID()) assert.NoError(t, err) assert.Equal(t, []*model.Segment{segment}, result) @@ -947,6 +954,7 @@ func TestUpdateSegment(t *testing.T) { // Delete all metadata keys segment.Metadata = nil c.UpdateSegment(ctx, &model.UpdateSegment{ + Collection: &collectionID, ID: segment.ID, Metadata: segment.Metadata, ResetMetadata: true}, diff --git a/go/coordinator/internal/coordinator/meta.go b/go/coordinator/internal/coordinator/meta.go index f6f2df7584e4..720eb877388a 100644 --- a/go/coordinator/internal/coordinator/meta.go +++ b/go/coordinator/internal/coordinator/meta.go @@ -2,6 +2,8 @@ package coordinator import ( "context" + "errors" + "github.com/jackc/pgx/v5/pgconn" "sync" "github.com/chroma/chroma-coordinator/internal/common" @@ -222,6 +224,18 @@ func (mt *MetaTable) AddCollection(ctx context.Context, createCollection *model. collection, err := mt.catalog.CreateCollection(ctx, createCollection, createCollection.Ts) if err != nil { log.Error("create collection failed", zap.Error(err)) + var pgErr *pgconn.PgError + ok := errors.As(err, &pgErr) + if ok { + log.Error("Postgres Error") + switch pgErr.Code { + case "23505": + log.Error("collection id already exists") + return nil, common.ErrCollectionUniqueConstraintViolation + default: + return nil, err + } + } return nil, err } mt.tenantDatabaseCollectionCache[tenantID][databaseName][collection.ID] = collection @@ -361,6 +375,19 @@ func (mt *MetaTable) AddSegment(ctx context.Context, createSegment *model.Create segment, err := mt.catalog.CreateSegment(ctx, createSegment, createSegment.Ts) if err != nil { + log.Error("create segment failed", zap.Error(err)) + var pgErr *pgconn.PgError + ok := errors.As(err, &pgErr) + if ok { + log.Error("Postgres Error") + switch pgErr.Code { + case "23505": + log.Error("segment id already exists") + return common.ErrSegmentUniqueConstraintViolation + default: + return err + } + } return err } mt.segmentsCache[createSegment.ID] = segment diff --git a/go/coordinator/internal/metastore/coordinator/table_catalog.go b/go/coordinator/internal/metastore/coordinator/table_catalog.go index 4bd0d7f1244f..f8ae8a84e287 100644 --- a/go/coordinator/internal/metastore/coordinator/table_catalog.go +++ b/go/coordinator/internal/metastore/coordinator/table_catalog.go @@ -2,7 +2,6 @@ package coordinator import ( "context" - "github.com/chroma/chroma-coordinator/internal/common" "github.com/chroma/chroma-coordinator/internal/metastore" "github.com/chroma/chroma-coordinator/internal/metastore/db/dbmodel" @@ -222,7 +221,7 @@ func (tc *Catalog) CreateCollection(ctx context.Context, createCollection *model } collectionName := createCollection.Name - existing, err := tc.metaDomain.CollectionDb(txCtx).GetCollections(types.FromUniqueID(createCollection.ID), &collectionName, nil, tenantID, databaseName) + existing, err := tc.metaDomain.CollectionDb(txCtx).GetCollections(nil, &collectionName, nil, tenantID, databaseName) if err != nil { log.Error("error getting collection", zap.Error(err)) return err @@ -492,6 +491,22 @@ func (tc *Catalog) UpdateSegment(ctx context.Context, updateSegment *model.Updat var result *model.Segment err := tc.txImpl.Transaction(ctx, func(txCtx context.Context) error { + // TODO: we should push in collection_id here, add a GET to fix test for now + if updateSegment.Collection == nil { + results, err := tc.metaDomain.SegmentDb(txCtx).GetSegments(updateSegment.ID, nil, nil, nil, types.NilUniqueID()) + if err != nil { + return err + } + if results == nil || len(results) == 0 { + return common.ErrSegmentUpdateNonExistingSegment + } + if results != nil && len(results) > 1 { + // TODO: fix this error + return common.ErrInvalidCollectionUpdate + } + updateSegment.Collection = results[0].Segment.CollectionID + } + // update segment dbSegment := &dbmodel.UpdateSegment{ ID: updateSegment.ID.String(), diff --git a/go/coordinator/internal/metastore/db/dao/segment.go b/go/coordinator/internal/metastore/db/dao/segment.go index c4c3842e2784..5d57e6f941a6 100644 --- a/go/coordinator/internal/metastore/db/dao/segment.go +++ b/go/coordinator/internal/metastore/db/dao/segment.go @@ -165,20 +165,23 @@ func generateSegmentUpdatesWithoutID(in *dbmodel.UpdateSegment) map[string]inter } } - if in.ResetCollection { - if in.Collection == nil { - ret["collection_id"] = nil - } - } else { - if in.Collection != nil { - ret["collection_id"] = *in.Collection - } - } - log.Info("generate segment updates without id", zap.Any("updates", ret)) + // TODO: check this + //if in.ResetCollection { + // if in.Collection == nil { + // ret["collection_id"] = nil + // } + //} else { + // if in.Collection != nil { + // ret["collection_id"] = *in.Collection + // } + //} + //log.Info("generate segment updates without id", zap.Any("updates", ret)) return ret } func (s *segmentDb) Update(in *dbmodel.UpdateSegment) error { updates := generateSegmentUpdatesWithoutID(in) - return s.db.Model(&dbmodel.Segment{}).Where("id = ?", in.ID).Updates(updates).Error + return s.db.Model(&dbmodel.Segment{}). + Where("collection_id = ?", &in.Collection). + Where("id = ?", in.ID).Updates(updates).Error } diff --git a/go/coordinator/internal/metastore/db/dbmodel/segment.go b/go/coordinator/internal/metastore/db/dbmodel/segment.go index 0967436e11e8..50fe84ec7cc2 100644 --- a/go/coordinator/internal/metastore/db/dbmodel/segment.go +++ b/go/coordinator/internal/metastore/db/dbmodel/segment.go @@ -7,6 +7,11 @@ import ( ) type Segment struct { + /* Making CollectionID the primary key allows fast search when we have CollectionID. + This requires us to push down CollectionID from the caller. We don't think there is + need to modify CollectionID in the near future. Each Segment should always have a + collection as a parent and cannot be modified. */ + CollectionID *string `gorm:"collection_id;primaryKey"` ID string `gorm:"id;primaryKey"` Type string `gorm:"type;type:string;not null"` Scope string `gorm:"scope"` @@ -15,7 +20,6 @@ type Segment struct { IsDeleted bool `gorm:"is_deleted;type:bool;default:false"` CreatedAt time.Time `gorm:"created_at;type:timestamp;not null;default:current_timestamp"` UpdatedAt time.Time `gorm:"updated_at;type:timestamp;not null;default:current_timestamp"` - CollectionID *string `gorm:"collection_id"` } func (s Segment) TableName() string { diff --git a/go/coordinator/migrations/20240215010425.sql b/go/coordinator/migrations/20240216211350.sql similarity index 97% rename from go/coordinator/migrations/20240215010425.sql rename to go/coordinator/migrations/20240216211350.sql index 378c5d630e5a..2d4b286c681a 100644 --- a/go/coordinator/migrations/20240215010425.sql +++ b/go/coordinator/migrations/20240216211350.sql @@ -68,6 +68,7 @@ CREATE TABLE "public"."segment_metadata" ( ); -- Create "segments" table CREATE TABLE "public"."segments" ( + "collection_id" text NOT NULL, "id" text NOT NULL, "type" text NOT NULL, "scope" text NULL, @@ -76,8 +77,7 @@ CREATE TABLE "public"."segments" ( "is_deleted" boolean NULL DEFAULT false, "created_at" timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP, "updated_at" timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP, - "collection_id" text NULL, - PRIMARY KEY ("id") + PRIMARY KEY ("collection_id", "id") ); -- Create "tenants" table CREATE TABLE "public"."tenants" ( diff --git a/go/coordinator/migrations/atlas.sum b/go/coordinator/migrations/atlas.sum index 624c7eabe3aa..6d1a0e5baaa9 100644 --- a/go/coordinator/migrations/atlas.sum +++ b/go/coordinator/migrations/atlas.sum @@ -1,2 +1,2 @@ -h1:OoMkQddKcFi1jQ4pCp2i8IJAIEDHjQpI3mw+sHoQ1fI= -20240215010425.sql h1:U4h0i9epzZOrFesFlcMJ8250n3SoY5Uv0AejgcZCTTw= +h1:0AmSHt0xnRVJjHv8/LoOph5FzyVC5io1/O1lOY/Ihdo= +20240216211350.sql h1:yoz9m9lOVG1g7JPG0sWW+PXOb5sNg1W7Y5kLqhibGqg= From 887d0b54a314ee0ce1d4c441f7eb4ab409433234 Mon Sep 17 00:00:00 2001 From: Anton Troynikov Date: Wed, 21 Feb 2024 10:52:51 -0800 Subject: [PATCH 7/9] [ENH] Upgrade tests and release to Python 3.12 (#1715) ## Description of changes Chroma did not support Python 3.12 because of our dependency on the ONNX runtime for our default embedding function. As of version 1.17.0, ONNX supports python 3.12: https://github.com/microsoft/onnxruntime/issues/17842#issuecomment-1936484800 This already automatically fixes the issue for Chroma users when they install the new version of ONNX / reinstall Chroma. This PR is just to update our test and release actions to also use python 3.12. ## Test plan These are changes to test workers. ## Documentation Changes N/A --- .../chroma-client-integration-test.yml | 2 +- .../chroma-release-python-client.yml | 2 +- .github/workflows/chroma-test.yml | 2 +- DEVELOP.md | 6 ++---- requirements.txt | 20 +++++++++---------- 5 files changed, 15 insertions(+), 17 deletions(-) diff --git a/.github/workflows/chroma-client-integration-test.yml b/.github/workflows/chroma-client-integration-test.yml index 5724959c2549..e525f3a70787 100644 --- a/.github/workflows/chroma-client-integration-test.yml +++ b/.github/workflows/chroma-client-integration-test.yml @@ -15,7 +15,7 @@ jobs: timeout-minutes: 90 strategy: matrix: - python: ['3.8', '3.9', '3.10', '3.11'] + python: ['3.8', '3.9', '3.10', '3.11', '3.12'] platform: [ubuntu-latest, windows-latest] runs-on: ${{ matrix.platform }} steps: diff --git a/.github/workflows/chroma-release-python-client.yml b/.github/workflows/chroma-release-python-client.yml index 2abc0d524aba..c4f2a2990a95 100644 --- a/.github/workflows/chroma-release-python-client.yml +++ b/.github/workflows/chroma-release-python-client.yml @@ -33,7 +33,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v4 with: - python-version: '3.10' + python-version: '3.12' - name: Install Client Dev Dependencies run: python -m pip install -r ./clients/python/requirements.txt && python -m pip install -r ./clients/python/requirements_dev.txt - name: Build Client diff --git a/.github/workflows/chroma-test.yml b/.github/workflows/chroma-test.yml index 12a5de4b6eda..14dc63624e91 100644 --- a/.github/workflows/chroma-test.yml +++ b/.github/workflows/chroma-test.yml @@ -16,7 +16,7 @@ jobs: timeout-minutes: 90 strategy: matrix: - python: ['3.8', '3.9', '3.10', '3.11'] + python: ['3.8', '3.9', '3.10', '3.11', '3.12'] platform: [ubuntu-latest, windows-latest] testfile: ["--ignore-glob 'chromadb/test/property/*' --ignore-glob 'chromadb/test/stress/*' --ignore='chromadb/test/auth/test_simple_rbac_authz.py'", "chromadb/test/auth/test_simple_rbac_authz.py", diff --git a/DEVELOP.md b/DEVELOP.md index 05357f29e60a..c9550e639f46 100644 --- a/DEVELOP.md +++ b/DEVELOP.md @@ -6,8 +6,6 @@ https://packaging.python.org. ## Setup -Because of the dependencies it relies on (like `pytorch`), this project does not support Python version >3.10.0. - Set up a virtual environment and install the project's requirements and dev requirements: @@ -51,14 +49,14 @@ api = chromadb.HttpClient(host="localhost", port="8000") print(api.heartbeat()) ``` ## Local dev setup for distributed chroma -We use tilt for providing local dev setup. Tilt is an open source project +We use tilt for providing local dev setup. Tilt is an open source project ##### Requirement - Docker - Local Kubernetes cluster (Recommended: [OrbStack](https://orbstack.dev/) for mac, [Kind](https://kind.sigs.k8s.io/) for linux) - [Tilt](https://docs.tilt.dev/) For starting the distributed Chroma in the workspace, use `tilt up`. It will create all the required resources and build the necessary Docker image in the current kubectl context. -Once done, it will expose Chroma on port 8000. You can also visit the Tilt dashboard UI at http://localhost:10350/. To clean and remove all the resources created by Tilt, use `tilt down`. +Once done, it will expose Chroma on port 8000. You can also visit the Tilt dashboard UI at http://localhost:10350/. To clean and remove all the resources created by Tilt, use `tilt down`. ## Testing diff --git a/requirements.txt b/requirements.txt index 6a1b1fb966f2..0ed94e5033ba 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ -bcrypt==4.0.1 -chroma-hnswlib==0.7.3 +bcrypt>=4.0.1 +chroma-hnswlib>=0.7.3 fastapi>=0.95.2 graphlib_backport==1.0.3; python_version < '3.9' grpcio>=1.58.0 @@ -12,17 +12,17 @@ opentelemetry-api>=1.2.0 opentelemetry-exporter-otlp-proto-grpc>=1.2.0 opentelemetry-instrumentation-fastapi>=0.41b0 opentelemetry-sdk>=1.2.0 -overrides==7.3.1 -posthog==2.4.0 -pulsar-client==3.1.0 +overrides>=7.3.1 +posthog>=2.4.0 +pulsar-client>=3.1.0 pydantic>=1.9 -pypika==0.48.9 +pypika>=0.48.9 PyYAML>=6.0.0 -requests==2.28.1 +requests>=2.28.1 tenacity>=8.2.3 -tokenizers==0.13.2 +tokenizers>=0.13.2 tqdm>=4.65.0 typer>=0.9.0 typing_extensions>=4.5.0 -uvicorn[standard]==0.18.3 -orjson>=3.9.12 \ No newline at end of file +uvicorn[standard]>=0.18.3 +orjson>=3.9.12 From 12ad9e615300aad521eb8bc5589c74e2fa4d7480 Mon Sep 17 00:00:00 2001 From: Anton Troynikov Date: Wed, 21 Feb 2024 13:37:25 -0800 Subject: [PATCH 8/9] [ENH] Remove ONNX Logspam (#1747) ## Description of changes After 1.17, ONNXRuntime produces scary warnings on mac platforms, because it tries to put our default embedding function into the CoreML execution environment, where it doesn't fit. This PR suppresses warnings from ONNX within the default embedding function so that users don't see scary warnings. ## Test plan Locally tested via the `start_here` notebook. ## Documentation Changes N/A --- chromadb/utils/embedding_functions.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/chromadb/utils/embedding_functions.py b/chromadb/utils/embedding_functions.py index ec5fc05e3ee9..f54ab88c42e3 100644 --- a/chromadb/utils/embedding_functions.py +++ b/chromadb/utils/embedding_functions.py @@ -506,11 +506,17 @@ def model(self) -> "InferenceSession": raise ValueError( f"Preferred providers must be subset of available providers: {self.ort.get_available_providers()}" ) + + # Suppress onnxruntime warnings. This produces logspew, mainly when onnx tries to use CoreML, which doesn't fit this model. + so = self.ort.SessionOptions() + so.log_severity_level = 3 + return self.ort.InferenceSession( os.path.join(self.DOWNLOAD_PATH, self.EXTRACTED_FOLDER_NAME, "model.onnx"), # Since 1.9 onnyx runtime requires providers to be specified when there are multiple available - https://onnxruntime.ai/docs/api/python/api_summary.html # This is probably not ideal but will improve DX as no exceptions will be raised in multi-provider envs providers=self._preferred_providers, + sess_options=so, ) def __call__(self, input: Documents) -> Embeddings: From d9a8c28055ca1aa4c602560c0117f7608858d3f0 Mon Sep 17 00:00:00 2001 From: nicolasgere Date: Wed, 21 Feb 2024 17:23:22 -0800 Subject: [PATCH 9/9] [ENH]: update coordinator docker for faster build (#1729) ## Description of changes *Summarize the changes made by this PR.* - Improvements & Bug fixes - Make dockerfile build faster for coordinator ## Test plan *How are these changes tested?* With tilt, locally Co-authored-by: nicolas --- go/coordinator/Dockerfile | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/go/coordinator/Dockerfile b/go/coordinator/Dockerfile index 554da75f93ad..59da87fdb60e 100644 --- a/go/coordinator/Dockerfile +++ b/go/coordinator/Dockerfile @@ -1,12 +1,15 @@ FROM golang:1.20-alpine3.18 as build - +WORKDIR /src/chroma-coordinator RUN apk add --no-cache make git build-base bash +ADD ./go/coordinator/go.mod ./go.mod +ADD ./go/coordinator/go.sum ./go.sum ENV PATH=$PATH:/go/bin -ADD ./go/coordinator /src/chroma-coordinator +RUN go mod download -RUN cd /src/chroma-coordinator \ - && make +ADD ./go/coordinator ./ +ENV GOCACHE=/root/.cache/go-build +RUN --mount=type=cache,target="/root/.cache/go-build" make FROM alpine:3.17.3