From 5348b7ff9ea00a4ba06198aabeaeddec9d9731af Mon Sep 17 00:00:00 2001 From: Trayan Azarov Date: Fri, 23 Feb 2024 09:54:51 +0200 Subject: [PATCH 01/27] feat: Adding close() method to clients - WIP Refs: #1756 --- chromadb/api/__init__.py | 10 ++++++++++ chromadb/api/client.py | 5 +++++ chromadb/api/fastapi.py | 5 +++++ chromadb/api/segment.py | 6 ++++++ chromadb/segment/impl/vector/local_persistent_hnsw.py | 6 ++++++ 5 files changed, 32 insertions(+) diff --git a/chromadb/api/__init__.py b/chromadb/api/__init__.py index b6d5b769afc..ab6cae93bb0 100644 --- a/chromadb/api/__init__.py +++ b/chromadb/api/__init__.py @@ -439,6 +439,11 @@ def max_batch_size(self) -> int: to submit_embeddings.""" pass + @abstractmethod + def close(self) -> None: + """Close the client and release any resources.""" + pass + class ClientAPI(BaseAPI, ABC): tenant: str @@ -473,6 +478,11 @@ def clear_system_cache() -> None: This should only be used for testing purposes.""" pass + @abstractmethod + def close(self) -> None: + """Close the client and release any resources.""" + pass + class AdminAPI(ABC): @abstractmethod diff --git a/chromadb/api/client.py b/chromadb/api/client.py index ba797677e46..f7bc40cff3f 100644 --- a/chromadb/api/client.py +++ b/chromadb/api/client.py @@ -459,6 +459,11 @@ def _validate_tenant_database(self, tenant: str, database: str) -> None: f"Could not connect to database {database} for tenant {tenant}. Are you sure it exists?" ) + @override + def close(self) -> None: + if self.get_settings().is_persistent: + self._server.close() + # endregion diff --git a/chromadb/api/fastapi.py b/chromadb/api/fastapi.py index d01028c734f..d01528b840c 100644 --- a/chromadb/api/fastapi.py +++ b/chromadb/api/fastapi.py @@ -629,6 +629,11 @@ def max_batch_size(self) -> int: self._max_batch_size = cast(int, json.loads(resp.text)["max_batch_size"]) return self._max_batch_size + @trace_method("FastAPI.close", OpenTelemetryGranularity.OPERATION) + @override + def close(self) -> None: + self._session.close() + def raise_chroma_error(resp: requests.Response) -> None: """Raises an error if the response is not ok, using a ChromaError if possible""" diff --git a/chromadb/api/segment.py b/chromadb/api/segment.py index f92e7607d6d..5e0e566c1d5 100644 --- a/chromadb/api/segment.py +++ b/chromadb/api/segment.py @@ -99,6 +99,7 @@ class SegmentAPI(ServerAPI): def __init__(self, system: System): super().__init__(system) + self._system = system self._settings = system.settings self._sysdb = self.require(SysDB) self._manager = self.require(SegmentManager) @@ -841,6 +842,11 @@ def _get_collection(self, collection_id: UUID) -> t.Collection: self._collection_cache[collection_id] = collections[0] return self._collection_cache[collection_id] + @trace_method("SegmentAPI.close", OpenTelemetryGranularity.ALL) + @override + def close(self) -> None: + self._system.stop() + def _records( operation: t.Operation, diff --git a/chromadb/segment/impl/vector/local_persistent_hnsw.py b/chromadb/segment/impl/vector/local_persistent_hnsw.py index 4ab60a1725d..b3332664f82 100644 --- a/chromadb/segment/impl/vector/local_persistent_hnsw.py +++ b/chromadb/segment/impl/vector/local_persistent_hnsw.py @@ -127,6 +127,12 @@ def __init__(self, system: System, segment: Segment): self._id_to_seq_id, ) + @trace_method("LocalHnswSegment.stop", OpenTelemetryGranularity.ALL) + @override + def stop(self) -> None: + super().stop() + self.close_persistent_index() + @staticmethod @override def propagate_collection_metadata(metadata: Metadata) -> Optional[Metadata]: From 6a4cd702df897f341bbcd312a95bbed202473c4e Mon Sep 17 00:00:00 2001 From: Trayan Azarov Date: Fri, 15 Mar 2024 21:28:04 +0200 Subject: [PATCH 02/27] feat: Added tests --- chromadb/api/client.py | 3 +- chromadb/test/test_client.py | 57 ++++++++++++++++++++++++++++++++++++ requirements_dev.txt | 1 + 3 files changed, 59 insertions(+), 2 deletions(-) diff --git a/chromadb/api/client.py b/chromadb/api/client.py index f7bc40cff3f..5ff921c9b85 100644 --- a/chromadb/api/client.py +++ b/chromadb/api/client.py @@ -461,8 +461,7 @@ def _validate_tenant_database(self, tenant: str, database: str) -> None: @override def close(self) -> None: - if self.get_settings().is_persistent: - self._server.close() + self._server.close() # endregion diff --git a/chromadb/test/test_client.py b/chromadb/test/test_client.py index 34dd2df1412..b6ab32ef5f4 100644 --- a/chromadb/test/test_client.py +++ b/chromadb/test/test_client.py @@ -1,5 +1,10 @@ +import json +import shutil from typing import Generator from unittest.mock import patch +from pytest_httpserver import HTTPServer +import psutil + import chromadb from chromadb.config import Settings from chromadb.api import ClientAPI @@ -22,6 +27,7 @@ def persistent_api() -> Generator[ClientAPI, None, None]: ) yield client client.clear_system_cache() + shutil.rmtree(tempfile.gettempdir() + "/test_server", ignore_errors=True) @pytest.fixture @@ -70,3 +76,54 @@ def test_http_client_with_inconsistent_port_settings() -> None: str(e) == "Chroma server http port provided in settings[8001] is different to the one provided in HttpClient: [8002]" ) + + +def test_persistent_client_close(persistent_api: ClientAPI) -> None: + current_process = psutil.Process() + col = persistent_api.create_collection("test") + col.add(ids=["1"], documents=["test"]) + open_files = current_process.open_files() + assert any(["test_server/chroma.sqlite3" in file.path for file in open_files]) + assert any(["data_level0.bin" in file.path for file in open_files]) + persistent_api.close() + open_files = current_process.open_files() + assert all(["test_server/chroma.sqlite3" not in file.path for file in open_files]) + assert all(["data_level0.bin" not in file.path for file in open_files]) + + +def test_http_client_close(http_api: ClientAPI) -> None: + with HTTPServer(port=8000) as httpserver: + # Define the response + httpserver.expect_request("/api/v1/tenants/default_tenant").respond_with_data( + "default_tenant" + ) + httpserver.expect_request( + "/api/v1/databases/default_database?tenant=default_tenant" + ).respond_with_data(json.dumps({"version": "0.0.1"})) + httpserver.expect_request("/api/v1/collections").respond_with_data( + json.dumps( + { + "name": "x", + "id": "4ca8f010-b535-4778-9262-c6f3812e17b6", + "metadata": None, + "tenant": "default_tenant", + "database": "default_database", + } + ) + ) + httpserver.expect_request("/api/v1/pre-flight-checks").respond_with_data( + json.dumps( + { + "max_batch_size": 10000, + } + ) + ) + httpserver.expect_request( + "/api/v1/collections/4ca8f010-b535-4778-9262-c6f3812e17b6/add" + ).respond_with_data(json.dumps({})) + col = http_api.create_collection("test") + col.add(ids=["1"], documents=["test"]) + _pool_manager = http_api._server._session.get_adapter("http://").poolmanager # type: ignore + assert len(_pool_manager.pools._container) > 0 + http_api.close() + assert len(_pool_manager.pools._container) == 0 diff --git a/requirements_dev.txt b/requirements_dev.txt index 4df73fb8c56..03123bdcfc8 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -8,6 +8,7 @@ mypy-protobuf pre-commit pytest pytest-asyncio +pytest-httpserver setuptools_scm types-protobuf types-requests==2.30.0.0 From 0a79168a658d3ea0f83da1e797d26c1b5495bbbf Mon Sep 17 00:00:00 2001 From: Trayan Azarov Date: Fri, 15 Mar 2024 21:32:44 +0200 Subject: [PATCH 03/27] fix: Added psutil to dev deps --- requirements_dev.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements_dev.txt b/requirements_dev.txt index 03123bdcfc8..32cd152ff54 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -6,6 +6,7 @@ hypothesis<=6.98.9 # > Than this version has API changes we don't currently supp hypothesis[numpy]<=6.98.9 mypy-protobuf pre-commit +psutil pytest pytest-asyncio pytest-httpserver From 1602fdb049ac941a725d45448258cad6476af48b Mon Sep 17 00:00:00 2001 From: Trayan Azarov Date: Sat, 16 Mar 2024 12:26:15 +0200 Subject: [PATCH 04/27] fix: Fixing for integration tests --- chromadb/test/test_client.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/chromadb/test/test_client.py b/chromadb/test/test_client.py index b6ab32ef5f4..14c88c6b9b3 100644 --- a/chromadb/test/test_client.py +++ b/chromadb/test/test_client.py @@ -1,4 +1,5 @@ import json +import os import shutil from typing import Generator from unittest.mock import patch @@ -79,19 +80,28 @@ def test_http_client_with_inconsistent_port_settings() -> None: def test_persistent_client_close(persistent_api: ClientAPI) -> None: + if os.environ.get("CHROMA_INTEGRATION_TEST_ONLY") == "1": + pytest.skip( + "Skipping test that closes the persistent client in integration test" + ) current_process = psutil.Process() col = persistent_api.create_collection("test") col.add(ids=["1"], documents=["test"]) open_files = current_process.open_files() - assert any(["test_server/chroma.sqlite3" in file.path for file in open_files]) + print("OPEN FILES", open_files) + assert any(["chroma.sqlite3" in file.path for file in open_files]) assert any(["data_level0.bin" in file.path for file in open_files]) persistent_api.close() open_files = current_process.open_files() - assert all(["test_server/chroma.sqlite3" not in file.path for file in open_files]) + assert all(["chroma.sqlite3" not in file.path for file in open_files]) assert all(["data_level0.bin" not in file.path for file in open_files]) def test_http_client_close(http_api: ClientAPI) -> None: + if os.environ.get("CHROMA_INTEGRATION_TEST_ONLY") == "1": + pytest.skip( + "Skipping test that closes the persistent client in integration test" + ) with HTTPServer(port=8000) as httpserver: # Define the response httpserver.expect_request("/api/v1/tenants/default_tenant").respond_with_data( From 20ca1359dd09977856d4ebd7abd5608acec15ef1 Mon Sep 17 00:00:00 2001 From: Trayan Azarov Date: Sat, 16 Mar 2024 12:54:39 +0200 Subject: [PATCH 05/27] fix: Adding debug to tests --- chromadb/test/test_client.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/chromadb/test/test_client.py b/chromadb/test/test_client.py index 14c88c6b9b3..91a4d4b8bba 100644 --- a/chromadb/test/test_client.py +++ b/chromadb/test/test_client.py @@ -86,13 +86,16 @@ def test_persistent_client_close(persistent_api: ClientAPI) -> None: ) current_process = psutil.Process() col = persistent_api.create_collection("test") + col1 = persistent_api.create_collection("test1") col.add(ids=["1"], documents=["test"]) + col1.add(ids=["1"], documents=["test1"]) open_files = current_process.open_files() - print("OPEN FILES", open_files) + print("OPEN FILES BEFORE", open_files) assert any(["chroma.sqlite3" in file.path for file in open_files]) assert any(["data_level0.bin" in file.path for file in open_files]) persistent_api.close() open_files = current_process.open_files() + print("OPEN FILES AFTER", open_files) assert all(["chroma.sqlite3" not in file.path for file in open_files]) assert all(["data_level0.bin" not in file.path for file in open_files]) From 883082af59f25cd52d28fa0cd84c8a18c9867ae7 Mon Sep 17 00:00:00 2001 From: Trayan Azarov Date: Wed, 20 Mar 2024 17:40:19 +0100 Subject: [PATCH 06/27] fix: Checking for persistent path in test. --- chromadb/test/test_client.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/chromadb/test/test_client.py b/chromadb/test/test_client.py index 91a4d4b8bba..30b4053735b 100644 --- a/chromadb/test/test_client.py +++ b/chromadb/test/test_client.py @@ -1,5 +1,6 @@ import json import os +import re import shutil from typing import Generator from unittest.mock import patch @@ -86,18 +87,19 @@ def test_persistent_client_close(persistent_api: ClientAPI) -> None: ) current_process = psutil.Process() col = persistent_api.create_collection("test") + temp_persist_dir=persistent_api.get_settings().persist_directory col1 = persistent_api.create_collection("test1") col.add(ids=["1"], documents=["test"]) col1.add(ids=["1"], documents=["test1"]) open_files = current_process.open_files() print("OPEN FILES BEFORE", open_files) - assert any(["chroma.sqlite3" in file.path for file in open_files]) - assert any(["data_level0.bin" in file.path for file in open_files]) + assert any([re.search(fr'{temp_persist_dir}.*chroma.sqlite3', file.path) is not None for file in open_files]) + assert any([re.search(fr'{temp_persist_dir}.*data_level0.bin',file.path) is not None for file in open_files]) persistent_api.close() open_files = current_process.open_files() print("OPEN FILES AFTER", open_files) - assert all(["chroma.sqlite3" not in file.path for file in open_files]) - assert all(["data_level0.bin" not in file.path for file in open_files]) + assert all([re.search(fr'{temp_persist_dir}.*chroma.sqlite3', file.path) is None for file in open_files]) + assert all([re.search(fr'{temp_persist_dir}.*data_level0.bin',file.path) is None for file in open_files]) def test_http_client_close(http_api: ClientAPI) -> None: From 4e4a50e352b5d1f036e6ed508d9f3bf8226ac926 Mon Sep 17 00:00:00 2001 From: Trayan Azarov Date: Thu, 21 Mar 2024 11:30:50 +0100 Subject: [PATCH 07/27] test: Added test for double close and use after close --- chromadb/api/segment.py | 48 +++++++ chromadb/test/test_client.py | 260 ++++++++++++++++++++++++++++++----- 2 files changed, 277 insertions(+), 31 deletions(-) diff --git a/chromadb/api/segment.py b/chromadb/api/segment.py index 5e0e566c1d5..f70a9378ed9 100644 --- a/chromadb/api/segment.py +++ b/chromadb/api/segment.py @@ -109,13 +109,21 @@ def __init__(self, system: System): self._producer = self.require(Producer) self._collection_cache = {} + @override + def start(self) -> None: + super().start() + @override def heartbeat(self) -> int: + if not self._running: + raise RuntimeError("Component not running or already closed") return int(time.time_ns()) @trace_method("SegmentAPI.create_database", OpenTelemetryGranularity.OPERATION) @override def create_database(self, name: str, tenant: str = DEFAULT_TENANT) -> None: + if not self._running: + raise RuntimeError("Component not running or already closed") if len(name) < 3: raise ValueError("Database name must be at least 3 characters long") @@ -127,10 +135,14 @@ def create_database(self, name: str, tenant: str = DEFAULT_TENANT) -> None: @trace_method("SegmentAPI.get_database", OpenTelemetryGranularity.OPERATION) @override def get_database(self, name: str, tenant: str = DEFAULT_TENANT) -> t.Database: + if not self._running: + raise RuntimeError("Component not running or already closed") return self._sysdb.get_database(name=name, tenant=tenant) @trace_method("SegmentAPI.create_tenant", OpenTelemetryGranularity.OPERATION) @override def create_tenant(self, name: str) -> None: + if not self._running: + raise RuntimeError("Component not running or already closed") if len(name) < 3: raise ValueError("Tenant name must be at least 3 characters long") @@ -140,6 +152,8 @@ def create_tenant(self, name: str) -> None: @trace_method("SegmentAPI.get_tenant", OpenTelemetryGranularity.OPERATION) @override def get_tenant(self, name: str) -> t.Tenant: + if not self._running: + raise RuntimeError("Component not running or already closed") return self._sysdb.get_tenant(name=name) # TODO: Actually fix CollectionMetadata type to remove type: ignore flags. This is @@ -159,6 +173,8 @@ def create_collection( tenant: str = DEFAULT_TENANT, database: str = DEFAULT_DATABASE, ) -> Collection: + if not self._running: + raise RuntimeError("Component not running or already closed") if metadata is not None: validate_metadata(metadata) @@ -220,6 +236,8 @@ def get_or_create_collection( tenant: str = DEFAULT_TENANT, database: str = DEFAULT_DATABASE, ) -> Collection: + if not self._running: + raise RuntimeError("Component not running or already closed") return self.create_collection( # type: ignore name=name, metadata=metadata, @@ -246,6 +264,8 @@ def get_collection( tenant: str = DEFAULT_TENANT, database: str = DEFAULT_DATABASE, ) -> Collection: + if not self._running: + raise RuntimeError("Component not running or already closed") if id is None and name is None or (id is not None and name is not None): raise ValueError("Name or id must be specified, but not both") existing = self._sysdb.get_collections( @@ -275,6 +295,8 @@ def list_collections( tenant: str = DEFAULT_TENANT, database: str = DEFAULT_DATABASE, ) -> Sequence[Collection]: + if not self._running: + raise RuntimeError("Component not running or already closed") collections = [] db_collections = self._sysdb.get_collections( limit=limit, offset=offset, tenant=tenant, database=database @@ -299,6 +321,8 @@ def count_collections( tenant: str = DEFAULT_TENANT, database: str = DEFAULT_DATABASE, ) -> int: + if not self._running: + raise RuntimeError("Component not running or already closed") collection_count = len( self._sysdb.get_collections(tenant=tenant, database=database) ) @@ -313,6 +337,8 @@ def _modify( new_name: Optional[str] = None, new_metadata: Optional[CollectionMetadata] = None, ) -> None: + if not self._running: + raise RuntimeError("Component not running or already closed") if new_name: # backwards compatibility in naming requirements (for now) check_index_name(new_name) @@ -337,6 +363,8 @@ def delete_collection( tenant: str = DEFAULT_TENANT, database: str = DEFAULT_DATABASE, ) -> None: + if not self._running: + raise RuntimeError("Component not running or already closed") existing = self._sysdb.get_collections( name=name, tenant=tenant, database=database ) @@ -364,6 +392,8 @@ def _add( documents: Optional[Documents] = None, uris: Optional[URIs] = None, ) -> bool: + if not self._running: + raise RuntimeError("Component not running or already closed") self._quota.static_check(metadatas, documents, embeddings, str(collection_id)) coll = self._get_collection(collection_id) self._manager.hint_use_collection(collection_id, t.Operation.ADD) @@ -407,6 +437,8 @@ def _update( documents: Optional[Documents] = None, uris: Optional[URIs] = None, ) -> bool: + if not self._running: + raise RuntimeError("Component not running or already closed") self._quota.static_check(metadatas, documents, embeddings, str(collection_id)) coll = self._get_collection(collection_id) self._manager.hint_use_collection(collection_id, t.Operation.UPDATE) @@ -452,6 +484,8 @@ def _upsert( documents: Optional[Documents] = None, uris: Optional[URIs] = None, ) -> bool: + if not self._running: + raise RuntimeError("Component not running or already closed") self._quota.static_check(metadatas, documents, embeddings, str(collection_id)) coll = self._get_collection(collection_id) self._manager.hint_use_collection(collection_id, t.Operation.UPSERT) @@ -491,6 +525,8 @@ def _get( where_document: Optional[WhereDocument] = {}, include: Include = ["embeddings", "metadatas", "documents"], ) -> GetResult: + if not self._running: + raise RuntimeError("Component not running or already closed") add_attributes_to_current_span( { "collection_id": str(collection_id), @@ -585,6 +621,8 @@ def _delete( where: Optional[Where] = None, where_document: Optional[WhereDocument] = None, ) -> IDs: + if not self._running: + raise RuntimeError("Component not running or already closed") add_attributes_to_current_span( { "collection_id": str(collection_id), @@ -650,6 +688,8 @@ def _delete( @trace_method("SegmentAPI._count", OpenTelemetryGranularity.OPERATION) @override def _count(self, collection_id: UUID) -> int: + if not self._running: + raise RuntimeError("Component not running or already closed") add_attributes_to_current_span({"collection_id": str(collection_id)}) metadata_segment = self._manager.get_segment(collection_id, MetadataReader) return metadata_segment.count() @@ -666,6 +706,8 @@ def _query( where_document: WhereDocument = {}, include: Include = ["documents", "metadatas", "distances"], ) -> QueryResult: + if not self._running: + raise RuntimeError("Component not running or already closed") add_attributes_to_current_span( { "collection_id": str(collection_id), @@ -782,10 +824,14 @@ def get_version(self) -> str: @override def reset_state(self) -> None: + if not self._running: + raise RuntimeError("Component not running or already closed") self._collection_cache = {} @override def reset(self) -> bool: + if not self._running: + raise RuntimeError("Component not running or already closed") self._system.reset_state() return True @@ -796,6 +842,8 @@ def get_settings(self) -> Settings: @property @override def max_batch_size(self) -> int: + if not self._running: + raise RuntimeError("Component not running or already closed") return self._producer.max_batch_size # TODO: This could potentially cause race conditions in a distributed version of the diff --git a/chromadb/test/test_client.py b/chromadb/test/test_client.py index 30b4053735b..b0b4e66ad4a 100644 --- a/chromadb/test/test_client.py +++ b/chromadb/test/test_client.py @@ -2,6 +2,7 @@ import os import re import shutil +import time from typing import Generator from unittest.mock import patch from pytest_httpserver import HTTPServer @@ -87,19 +88,185 @@ def test_persistent_client_close(persistent_api: ClientAPI) -> None: ) current_process = psutil.Process() col = persistent_api.create_collection("test") - temp_persist_dir=persistent_api.get_settings().persist_directory + temp_persist_dir = persistent_api.get_settings().persist_directory col1 = persistent_api.create_collection("test1") col.add(ids=["1"], documents=["test"]) col1.add(ids=["1"], documents=["test1"]) open_files = current_process.open_files() print("OPEN FILES BEFORE", open_files) - assert any([re.search(fr'{temp_persist_dir}.*chroma.sqlite3', file.path) is not None for file in open_files]) - assert any([re.search(fr'{temp_persist_dir}.*data_level0.bin',file.path) is not None for file in open_files]) + assert any( + [ + re.search(rf"{temp_persist_dir}.*chroma.sqlite3", file.path) is not None + for file in open_files + ] + ) + assert any( + [ + re.search(rf"{temp_persist_dir}.*data_level0.bin", file.path) is not None + for file in open_files + ] + ) persistent_api.close() open_files = current_process.open_files() print("OPEN FILES AFTER", open_files) - assert all([re.search(fr'{temp_persist_dir}.*chroma.sqlite3', file.path) is None for file in open_files]) - assert all([re.search(fr'{temp_persist_dir}.*data_level0.bin',file.path) is None for file in open_files]) + assert all( + [ + re.search(rf"{temp_persist_dir}.*chroma.sqlite3", file.path) is None + for file in open_files + ] + ) + assert all( + [ + re.search(rf"{temp_persist_dir}.*data_level0.bin", file.path) is None + for file in open_files + ] + ) + + +def test_persistent_client_double_close(persistent_api: ClientAPI) -> None: + if os.environ.get("CHROMA_INTEGRATION_TEST_ONLY") == "1": + pytest.skip( + "Skipping test that closes the persistent client in integration test" + ) + current_process = psutil.Process() + col = persistent_api.create_collection("test") + temp_persist_dir = persistent_api.get_settings().persist_directory + col.add(ids=["1"], documents=["test"]) + open_files = current_process.open_files() + assert any( + [ + re.search(rf"{temp_persist_dir}.*chroma.sqlite3", file.path) is not None + for file in open_files + ] + ) + assert any( + [ + re.search(rf"{temp_persist_dir}.*data_level0.bin", file.path) is not None + for file in open_files + ] + ) + persistent_api.close() + open_files = current_process.open_files() + assert all( + [ + re.search(rf"{temp_persist_dir}.*chroma.sqlite3", file.path) is None + for file in open_files + ] + ) + assert all( + [ + re.search(rf"{temp_persist_dir}.*data_level0.bin", file.path) is None + for file in open_files + ] + ) + persistent_api.close() + assert all( + [ + re.search(rf"{temp_persist_dir}.*chroma.sqlite3", file.path) is None + for file in open_files + ] + ) + assert all( + [ + re.search(rf"{temp_persist_dir}.*data_level0.bin", file.path) is None + for file in open_files + ] + ) + + +def test_persistent_client_use_after_close(persistent_api: ClientAPI) -> None: + if os.environ.get("CHROMA_INTEGRATION_TEST_ONLY") == "1": + pytest.skip( + "Skipping test that closes the persistent client in integration test" + ) + current_process = psutil.Process() + col = persistent_api.create_collection("test") + temp_persist_dir = persistent_api.get_settings().persist_directory + col.add(ids=["1"], documents=["test"]) + open_files = current_process.open_files() + assert any( + [ + re.search(rf"{temp_persist_dir}.*chroma.sqlite3", file.path) is not None + for file in open_files + ] + ) + assert any( + [ + re.search(rf"{temp_persist_dir}.*data_level0.bin", file.path) is not None + for file in open_files + ] + ) + persistent_api.close() + open_files = current_process.open_files() + assert all( + [ + re.search(rf"{temp_persist_dir}.*chroma.sqlite3", file.path) is None + for file in open_files + ] + ) + assert all( + [ + re.search(rf"{temp_persist_dir}.*data_level0.bin", file.path) is None + for file in open_files + ] + ) + with pytest.raises(RuntimeError, match="Component not running"): + col.add(ids=["1"], documents=["test"]) + with pytest.raises(RuntimeError, match="Component not running"): + col.delete(ids=["1"]) + with pytest.raises(RuntimeError, match="Component not running"): + col.update(ids=["1"], documents=["test1231"]) + with pytest.raises(RuntimeError, match="Component not running"): + col.upsert(ids=["1"], documents=["test1231"]) + with pytest.raises(RuntimeError, match="Component not running"): + col.count() + with pytest.raises(RuntimeError, match="Component not running"): + persistent_api.create_collection("test1") + with pytest.raises(RuntimeError, match="Component not running"): + persistent_api.get_collection("test") + with pytest.raises(RuntimeError, match="Component not running"): + persistent_api.get_or_create_collection("test") + with pytest.raises(RuntimeError, match="Component not running"): + persistent_api.list_collections() + with pytest.raises(RuntimeError, match="Component not running"): + persistent_api.delete_collection("test") + with pytest.raises(RuntimeError, match="Component not running"): + persistent_api.count_collections() + with pytest.raises(RuntimeError, match="Component not running"): + persistent_api.heartbeat() + + +def _instrument_http_server(httpserver: HTTPServer) -> None: + httpserver.expect_request("/api/v1/tenants/default_tenant").respond_with_data( + "default_tenant" + ) + httpserver.expect_request( + "/api/v1/databases/default_database?tenant=default_tenant" + ).respond_with_data(json.dumps({"version": "0.0.1"})) + httpserver.expect_request("/api/v1/collections").respond_with_data( + json.dumps( + { + "name": "x", + "id": "4ca8f010-b535-4778-9262-c6f3812e17b6", + "metadata": None, + "tenant": "default_tenant", + "database": "default_database", + } + ) + ) + httpserver.expect_request("/api/v1/pre-flight-checks").respond_with_data( + json.dumps( + { + "max_batch_size": 10000, + } + ) + ) + httpserver.expect_request( + "/api/v1/collections/4ca8f010-b535-4778-9262-c6f3812e17b6/add" + ).respond_with_data(json.dumps({})) + httpserver.expect_request("/api/v1").respond_with_data( + json.dumps({"nanosecond heartbeat": time.time_ns()}) + ) def test_http_client_close(http_api: ClientAPI) -> None: @@ -108,37 +275,68 @@ def test_http_client_close(http_api: ClientAPI) -> None: "Skipping test that closes the persistent client in integration test" ) with HTTPServer(port=8000) as httpserver: - # Define the response - httpserver.expect_request("/api/v1/tenants/default_tenant").respond_with_data( - "default_tenant" - ) - httpserver.expect_request( - "/api/v1/databases/default_database?tenant=default_tenant" - ).respond_with_data(json.dumps({"version": "0.0.1"})) - httpserver.expect_request("/api/v1/collections").respond_with_data( - json.dumps( - { - "name": "x", - "id": "4ca8f010-b535-4778-9262-c6f3812e17b6", - "metadata": None, - "tenant": "default_tenant", - "database": "default_database", - } - ) + _instrument_http_server(httpserver) + col = http_api.create_collection("test") + col.add(ids=["1"], documents=["test"]) + _pool_manager = http_api._server._session.get_adapter("http://").poolmanager # type: ignore + assert len(_pool_manager.pools._container) > 0 + http_api.close() + assert len(_pool_manager.pools._container) == 0 + + +def test_http_client_double_close(http_api: ClientAPI) -> None: + if os.environ.get("CHROMA_INTEGRATION_TEST_ONLY") == "1": + pytest.skip( + "Skipping test that closes the persistent client in integration test" ) - httpserver.expect_request("/api/v1/pre-flight-checks").respond_with_data( - json.dumps( - { - "max_batch_size": 10000, - } - ) + with HTTPServer(port=8000) as httpserver: + _instrument_http_server(httpserver) + http_api.heartbeat() + _pool_manager = http_api._server._session.get_adapter("http://").poolmanager # type: ignore + assert len(_pool_manager.pools._container) > 0 + http_api.close() + assert len(_pool_manager.pools._container) == 0 + + +def test_http_client_use_after_close(http_api: ClientAPI) -> None: + if os.environ.get("CHROMA_INTEGRATION_TEST_ONLY") == "1": + pytest.skip( + "Skipping test that closes the persistent client in integration test" ) - httpserver.expect_request( - "/api/v1/collections/4ca8f010-b535-4778-9262-c6f3812e17b6/add" - ).respond_with_data(json.dumps({})) + with HTTPServer(port=8000) as httpserver: + _instrument_http_server(httpserver) + http_api.heartbeat() col = http_api.create_collection("test") col.add(ids=["1"], documents=["test"]) _pool_manager = http_api._server._session.get_adapter("http://").poolmanager # type: ignore assert len(_pool_manager.pools._container) > 0 http_api.close() assert len(_pool_manager.pools._container) == 0 + http_api.heartbeat() + assert len(_pool_manager.pools._container) > 0 + # with pytest.raises(RuntimeError,match="Component not running"): + # http_api.heartbeat() + # with pytest.raises(RuntimeError,match="Component not running"): + # col.add(ids=["1"], documents=["test"]) + # with pytest.raises(RuntimeError,match="Component not running"): + # col.delete(ids=["1"]) + # with pytest.raises(RuntimeError,match="Component not running"): + # col.update(ids=["1"], documents=["test1231"]) + # with pytest.raises(RuntimeError,match="Component not running"): + # col.upsert(ids=["1"], documents=["test1231"]) + # with pytest.raises(RuntimeError,match="Component not running"): + # col.count() + # with pytest.raises(RuntimeError,match="Component not running"): + # http_api.create_collection("test1") + # with pytest.raises(RuntimeError,match="Component not running"): + # http_api.get_collection("test") + # with pytest.raises(RuntimeError,match="Component not running"): + # http_api.get_or_create_collection("test") + # with pytest.raises(RuntimeError,match="Component not running"): + # http_api.list_collections() + # with pytest.raises(RuntimeError,match="Component not running"): + # http_api.delete_collection("test") + # with pytest.raises(RuntimeError,match="Component not running"): + # http_api.count_collections() + # with pytest.raises(RuntimeError,match="Component not running"): + # http_api.heartbeat() From 0e35813302149ca2146bef37b971c00a3f4f17dc Mon Sep 17 00:00:00 2001 From: Trayan Azarov Date: Thu, 21 Mar 2024 14:01:16 +0100 Subject: [PATCH 08/27] test: Update fastapi for raising error after close + tests --- chromadb/api/fastapi.py | 34 +++++++++++++++++ chromadb/api/segment.py | 73 ++++++++++++++---------------------- chromadb/test/test_client.py | 58 ++++++++++++++-------------- 3 files changed, 93 insertions(+), 72 deletions(-) diff --git a/chromadb/api/fastapi.py b/chromadb/api/fastapi.py index d01528b840c..f3b3c81d175 100644 --- a/chromadb/api/fastapi.py +++ b/chromadb/api/fastapi.py @@ -51,6 +51,10 @@ class FastAPI(ServerAPI): _settings: Settings _max_batch_size: int = -1 + @override + def start(self) -> None: + super().start() + @staticmethod def _validate_host(host: str) -> None: parsed = urlparse(host) @@ -141,10 +145,15 @@ def __init__(self, system: System): if self._settings.chroma_server_ssl_verify is not None: self._session.verify = self._settings.chroma_server_ssl_verify + def _raise_for_running(self) -> None: + if not self._running: + raise RuntimeError("Component not running or already closed") + @trace_method("FastAPI.heartbeat", OpenTelemetryGranularity.OPERATION) @override def heartbeat(self) -> int: """Returns the current server time in nanoseconds to check if the server is alive""" + self._raise_for_running() resp = self._session.get(self._api_url) raise_chroma_error(resp) return int(json.loads(resp.text)["nanosecond heartbeat"]) @@ -157,6 +166,7 @@ def create_database( tenant: str = DEFAULT_TENANT, ) -> None: """Creates a database""" + self._raise_for_running() resp = self._session.post( self._api_url + "/databases", data=json.dumps({"name": name}), @@ -172,6 +182,7 @@ def get_database( tenant: str = DEFAULT_TENANT, ) -> Database: """Returns a database""" + self._raise_for_running() resp = self._session.get( self._api_url + "/databases/" + name, params={"tenant": tenant}, @@ -185,6 +196,7 @@ def get_database( @trace_method("FastAPI.create_tenant", OpenTelemetryGranularity.OPERATION) @override def create_tenant(self, name: str) -> None: + self._raise_for_running() resp = self._session.post( self._api_url + "/tenants", data=json.dumps({"name": name}), @@ -194,6 +206,7 @@ def create_tenant(self, name: str) -> None: @trace_method("FastAPI.get_tenant", OpenTelemetryGranularity.OPERATION) @override def get_tenant(self, name: str) -> Tenant: + self._raise_for_running() resp = self._session.get( self._api_url + "/tenants/" + name, ) @@ -211,6 +224,7 @@ def list_collections( database: str = DEFAULT_DATABASE, ) -> Sequence[Collection]: """Returns a list of all collections""" + self._raise_for_running() resp = self._session.get( self._api_url + "/collections", params={ @@ -234,6 +248,7 @@ def count_collections( self, tenant: str = DEFAULT_TENANT, database: str = DEFAULT_DATABASE ) -> int: """Returns a count of collections""" + self._raise_for_running() resp = self._session.get( self._api_url + "/count_collections", params={"tenant": tenant, "database": database}, @@ -256,6 +271,7 @@ def create_collection( database: str = DEFAULT_DATABASE, ) -> Collection: """Creates a collection""" + self._raise_for_running() resp = self._session.post( self._api_url + "/collections", data=json.dumps( @@ -292,6 +308,7 @@ def get_collection( database: str = DEFAULT_DATABASE, ) -> Collection: """Returns a collection""" + self._raise_for_running() if (name is None and id is None) or (name is not None and id is not None): raise ValueError("Name or id must be specified, but not both") @@ -327,6 +344,7 @@ def get_or_create_collection( tenant: str = DEFAULT_TENANT, database: str = DEFAULT_DATABASE, ) -> Collection: + self._raise_for_running() return cast( Collection, self.create_collection( @@ -349,6 +367,7 @@ def _modify( new_metadata: Optional[CollectionMetadata] = None, ) -> None: """Updates a collection""" + self._raise_for_running() resp = self._session.put( self._api_url + "/collections/" + str(id), data=json.dumps({"new_metadata": new_metadata, "new_name": new_name}), @@ -364,6 +383,7 @@ def delete_collection( database: str = DEFAULT_DATABASE, ) -> None: """Deletes a collection""" + self._raise_for_running() resp = self._session.delete( self._api_url + "/collections/" + name, params={"tenant": tenant, "database": database}, @@ -377,6 +397,7 @@ def _count( collection_id: UUID, ) -> int: """Returns the number of embeddings in the database""" + self._raise_for_running() resp = self._session.get( self._api_url + "/collections/" + str(collection_id) + "/count" ) @@ -390,6 +411,7 @@ def _peek( collection_id: UUID, n: int = 10, ) -> GetResult: + self._raise_for_running() return cast( GetResult, self._get( @@ -414,6 +436,7 @@ def _get( where_document: Optional[WhereDocument] = {}, include: Include = ["metadatas", "documents"], ) -> GetResult: + self._raise_for_running() if page and page_size: offset = (page - 1) * page_size limit = page_size @@ -454,6 +477,7 @@ def _delete( where_document: Optional[WhereDocument] = {}, ) -> IDs: """Deletes embeddings from the database""" + self._raise_for_running() resp = self._session.post( self._api_url + "/collections/" + str(collection_id) + "/delete", data=json.dumps( @@ -479,6 +503,7 @@ def _submit_batch( """ Submits a batch of embeddings to the database """ + self._raise_for_running() resp = self._session.post( self._api_url + url, data=json.dumps( @@ -508,6 +533,7 @@ def _add( Adds a batch of embeddings to the database - pass in column oriented data lists """ + self._raise_for_running() batch = (ids, embeddings, metadatas, documents, uris) validate_batch(batch, {"max_batch_size": self.max_batch_size}) resp = self._submit_batch(batch, "/collections/" + str(collection_id) + "/add") @@ -529,6 +555,7 @@ def _update( Updates a batch of embeddings in the database - pass in column oriented data lists """ + self._raise_for_running() batch = (ids, embeddings, metadatas, documents, uris) validate_batch(batch, {"max_batch_size": self.max_batch_size}) resp = self._submit_batch( @@ -552,6 +579,7 @@ def _upsert( Upserts a batch of embeddings in the database - pass in column oriented data lists """ + self._raise_for_running() batch = (ids, embeddings, metadatas, documents, uris) validate_batch(batch, {"max_batch_size": self.max_batch_size}) resp = self._submit_batch( @@ -572,6 +600,7 @@ def _query( include: Include = ["metadatas", "documents", "distances"], ) -> QueryResult: """Gets the nearest neighbors of a single embedding""" + self._raise_for_running() resp = self._session.post( self._api_url + "/collections/" + str(collection_id) + "/query", data=json.dumps( @@ -602,6 +631,7 @@ def _query( @override def reset(self) -> bool: """Resets the database""" + self._raise_for_running() resp = self._session.post(self._api_url + "/reset") raise_chroma_error(resp) return cast(bool, json.loads(resp.text)) @@ -610,6 +640,7 @@ def reset(self) -> bool: @override def get_version(self) -> str: """Returns the version of the server""" + self._raise_for_running() resp = self._session.get(self._api_url + "/version") raise_chroma_error(resp) return cast(str, json.loads(resp.text)) @@ -623,6 +654,7 @@ def get_settings(self) -> Settings: @trace_method("FastAPI.max_batch_size", OpenTelemetryGranularity.OPERATION) @override def max_batch_size(self) -> int: + self._raise_for_running() if self._max_batch_size == -1: resp = self._session.get(self._api_url + "/pre-flight-checks") raise_chroma_error(resp) @@ -632,7 +664,9 @@ def max_batch_size(self) -> int: @trace_method("FastAPI.close", OpenTelemetryGranularity.OPERATION) @override def close(self) -> None: + self._raise_for_running() self._session.close() + self._system.stop() def raise_chroma_error(resp: requests.Response) -> None: diff --git a/chromadb/api/segment.py b/chromadb/api/segment.py index f70a9378ed9..23d23c5e17e 100644 --- a/chromadb/api/segment.py +++ b/chromadb/api/segment.py @@ -109,21 +109,22 @@ def __init__(self, system: System): self._producer = self.require(Producer) self._collection_cache = {} + def _raise_for_running(self) -> None: + self._raise_for_running() + @override def start(self) -> None: super().start() @override def heartbeat(self) -> int: - if not self._running: - raise RuntimeError("Component not running or already closed") + self._raise_for_running() return int(time.time_ns()) @trace_method("SegmentAPI.create_database", OpenTelemetryGranularity.OPERATION) @override def create_database(self, name: str, tenant: str = DEFAULT_TENANT) -> None: - if not self._running: - raise RuntimeError("Component not running or already closed") + self._raise_for_running() if len(name) < 3: raise ValueError("Database name must be at least 3 characters long") @@ -132,28 +133,28 @@ def create_database(self, name: str, tenant: str = DEFAULT_TENANT) -> None: name=name, tenant=tenant, ) + @trace_method("SegmentAPI.get_database", OpenTelemetryGranularity.OPERATION) @override def get_database(self, name: str, tenant: str = DEFAULT_TENANT) -> t.Database: - if not self._running: - raise RuntimeError("Component not running or already closed") + self._raise_for_running() return self._sysdb.get_database(name=name, tenant=tenant) + @trace_method("SegmentAPI.create_tenant", OpenTelemetryGranularity.OPERATION) @override def create_tenant(self, name: str) -> None: - if not self._running: - raise RuntimeError("Component not running or already closed") + self._raise_for_running() if len(name) < 3: raise ValueError("Tenant name must be at least 3 characters long") self._sysdb.create_tenant( name=name, ) + @trace_method("SegmentAPI.get_tenant", OpenTelemetryGranularity.OPERATION) @override def get_tenant(self, name: str) -> t.Tenant: - if not self._running: - raise RuntimeError("Component not running or already closed") + self._raise_for_running() return self._sysdb.get_tenant(name=name) # TODO: Actually fix CollectionMetadata type to remove type: ignore flags. This is @@ -173,8 +174,7 @@ def create_collection( tenant: str = DEFAULT_TENANT, database: str = DEFAULT_DATABASE, ) -> Collection: - if not self._running: - raise RuntimeError("Component not running or already closed") + self._raise_for_running() if metadata is not None: validate_metadata(metadata) @@ -236,8 +236,7 @@ def get_or_create_collection( tenant: str = DEFAULT_TENANT, database: str = DEFAULT_DATABASE, ) -> Collection: - if not self._running: - raise RuntimeError("Component not running or already closed") + self._raise_for_running() return self.create_collection( # type: ignore name=name, metadata=metadata, @@ -264,8 +263,7 @@ def get_collection( tenant: str = DEFAULT_TENANT, database: str = DEFAULT_DATABASE, ) -> Collection: - if not self._running: - raise RuntimeError("Component not running or already closed") + self._raise_for_running() if id is None and name is None or (id is not None and name is not None): raise ValueError("Name or id must be specified, but not both") existing = self._sysdb.get_collections( @@ -295,8 +293,7 @@ def list_collections( tenant: str = DEFAULT_TENANT, database: str = DEFAULT_DATABASE, ) -> Sequence[Collection]: - if not self._running: - raise RuntimeError("Component not running or already closed") + self._raise_for_running() collections = [] db_collections = self._sysdb.get_collections( limit=limit, offset=offset, tenant=tenant, database=database @@ -321,8 +318,7 @@ def count_collections( tenant: str = DEFAULT_TENANT, database: str = DEFAULT_DATABASE, ) -> int: - if not self._running: - raise RuntimeError("Component not running or already closed") + self._raise_for_running() collection_count = len( self._sysdb.get_collections(tenant=tenant, database=database) ) @@ -337,8 +333,7 @@ def _modify( new_name: Optional[str] = None, new_metadata: Optional[CollectionMetadata] = None, ) -> None: - if not self._running: - raise RuntimeError("Component not running or already closed") + self._raise_for_running() if new_name: # backwards compatibility in naming requirements (for now) check_index_name(new_name) @@ -363,8 +358,7 @@ def delete_collection( tenant: str = DEFAULT_TENANT, database: str = DEFAULT_DATABASE, ) -> None: - if not self._running: - raise RuntimeError("Component not running or already closed") + self._raise_for_running() existing = self._sysdb.get_collections( name=name, tenant=tenant, database=database ) @@ -392,8 +386,7 @@ def _add( documents: Optional[Documents] = None, uris: Optional[URIs] = None, ) -> bool: - if not self._running: - raise RuntimeError("Component not running or already closed") + self._raise_for_running() self._quota.static_check(metadatas, documents, embeddings, str(collection_id)) coll = self._get_collection(collection_id) self._manager.hint_use_collection(collection_id, t.Operation.ADD) @@ -437,8 +430,7 @@ def _update( documents: Optional[Documents] = None, uris: Optional[URIs] = None, ) -> bool: - if not self._running: - raise RuntimeError("Component not running or already closed") + self._raise_for_running() self._quota.static_check(metadatas, documents, embeddings, str(collection_id)) coll = self._get_collection(collection_id) self._manager.hint_use_collection(collection_id, t.Operation.UPDATE) @@ -484,8 +476,7 @@ def _upsert( documents: Optional[Documents] = None, uris: Optional[URIs] = None, ) -> bool: - if not self._running: - raise RuntimeError("Component not running or already closed") + self._raise_for_running() self._quota.static_check(metadatas, documents, embeddings, str(collection_id)) coll = self._get_collection(collection_id) self._manager.hint_use_collection(collection_id, t.Operation.UPSERT) @@ -525,8 +516,7 @@ def _get( where_document: Optional[WhereDocument] = {}, include: Include = ["embeddings", "metadatas", "documents"], ) -> GetResult: - if not self._running: - raise RuntimeError("Component not running or already closed") + self._raise_for_running() add_attributes_to_current_span( { "collection_id": str(collection_id), @@ -621,8 +611,7 @@ def _delete( where: Optional[Where] = None, where_document: Optional[WhereDocument] = None, ) -> IDs: - if not self._running: - raise RuntimeError("Component not running or already closed") + self._raise_for_running() add_attributes_to_current_span( { "collection_id": str(collection_id), @@ -688,8 +677,7 @@ def _delete( @trace_method("SegmentAPI._count", OpenTelemetryGranularity.OPERATION) @override def _count(self, collection_id: UUID) -> int: - if not self._running: - raise RuntimeError("Component not running or already closed") + self._raise_for_running() add_attributes_to_current_span({"collection_id": str(collection_id)}) metadata_segment = self._manager.get_segment(collection_id, MetadataReader) return metadata_segment.count() @@ -706,8 +694,7 @@ def _query( where_document: WhereDocument = {}, include: Include = ["documents", "metadatas", "distances"], ) -> QueryResult: - if not self._running: - raise RuntimeError("Component not running or already closed") + self._raise_for_running() add_attributes_to_current_span( { "collection_id": str(collection_id), @@ -824,14 +811,12 @@ def get_version(self) -> str: @override def reset_state(self) -> None: - if not self._running: - raise RuntimeError("Component not running or already closed") + self._raise_for_running() self._collection_cache = {} @override def reset(self) -> bool: - if not self._running: - raise RuntimeError("Component not running or already closed") + self._raise_for_running() self._system.reset_state() return True @@ -842,8 +827,7 @@ def get_settings(self) -> Settings: @property @override def max_batch_size(self) -> int: - if not self._running: - raise RuntimeError("Component not running or already closed") + self._raise_for_running() return self._producer.max_batch_size # TODO: This could potentially cause race conditions in a distributed version of the @@ -893,6 +877,7 @@ def _get_collection(self, collection_id: UUID) -> t.Collection: @trace_method("SegmentAPI.close", OpenTelemetryGranularity.ALL) @override def close(self) -> None: + self._raise_for_running() self._system.stop() diff --git a/chromadb/test/test_client.py b/chromadb/test/test_client.py index b0b4e66ad4a..6365111c08e 100644 --- a/chromadb/test/test_client.py +++ b/chromadb/test/test_client.py @@ -296,6 +296,10 @@ def test_http_client_double_close(http_api: ClientAPI) -> None: assert len(_pool_manager.pools._container) > 0 http_api.close() assert len(_pool_manager.pools._container) == 0 + with pytest.raises( + RuntimeError, match="Component not running or already closed" + ): + http_api.close() def test_http_client_use_after_close(http_api: ClientAPI) -> None: @@ -312,31 +316,29 @@ def test_http_client_use_after_close(http_api: ClientAPI) -> None: assert len(_pool_manager.pools._container) > 0 http_api.close() assert len(_pool_manager.pools._container) == 0 - http_api.heartbeat() - assert len(_pool_manager.pools._container) > 0 - # with pytest.raises(RuntimeError,match="Component not running"): - # http_api.heartbeat() - # with pytest.raises(RuntimeError,match="Component not running"): - # col.add(ids=["1"], documents=["test"]) - # with pytest.raises(RuntimeError,match="Component not running"): - # col.delete(ids=["1"]) - # with pytest.raises(RuntimeError,match="Component not running"): - # col.update(ids=["1"], documents=["test1231"]) - # with pytest.raises(RuntimeError,match="Component not running"): - # col.upsert(ids=["1"], documents=["test1231"]) - # with pytest.raises(RuntimeError,match="Component not running"): - # col.count() - # with pytest.raises(RuntimeError,match="Component not running"): - # http_api.create_collection("test1") - # with pytest.raises(RuntimeError,match="Component not running"): - # http_api.get_collection("test") - # with pytest.raises(RuntimeError,match="Component not running"): - # http_api.get_or_create_collection("test") - # with pytest.raises(RuntimeError,match="Component not running"): - # http_api.list_collections() - # with pytest.raises(RuntimeError,match="Component not running"): - # http_api.delete_collection("test") - # with pytest.raises(RuntimeError,match="Component not running"): - # http_api.count_collections() - # with pytest.raises(RuntimeError,match="Component not running"): - # http_api.heartbeat() + with pytest.raises(RuntimeError, match="Component not running"): + http_api.heartbeat() + with pytest.raises(RuntimeError, match="Component not running"): + col.add(ids=["1"], documents=["test"]) + with pytest.raises(RuntimeError, match="Component not running"): + col.delete(ids=["1"]) + with pytest.raises(RuntimeError, match="Component not running"): + col.update(ids=["1"], documents=["test1231"]) + with pytest.raises(RuntimeError, match="Component not running"): + col.upsert(ids=["1"], documents=["test1231"]) + with pytest.raises(RuntimeError, match="Component not running"): + col.count() + with pytest.raises(RuntimeError, match="Component not running"): + http_api.create_collection("test1") + with pytest.raises(RuntimeError, match="Component not running"): + http_api.get_collection("test") + with pytest.raises(RuntimeError, match="Component not running"): + http_api.get_or_create_collection("test") + with pytest.raises(RuntimeError, match="Component not running"): + http_api.list_collections() + with pytest.raises(RuntimeError, match="Component not running"): + http_api.delete_collection("test") + with pytest.raises(RuntimeError, match="Component not running"): + http_api.count_collections() + with pytest.raises(RuntimeError, match="Component not running"): + http_api.heartbeat() From 4837dc305b9095d7f312faefa3cc7f9bf967adfe Mon Sep 17 00:00:00 2001 From: Trayan Azarov Date: Thu, 21 Mar 2024 14:14:54 +0100 Subject: [PATCH 09/27] fix: Fixed a recursion and a failing test for double close --- chromadb/api/segment.py | 3 ++- chromadb/test/test_client.py | 17 ++--------------- 2 files changed, 4 insertions(+), 16 deletions(-) diff --git a/chromadb/api/segment.py b/chromadb/api/segment.py index 23d23c5e17e..7edd72b8f12 100644 --- a/chromadb/api/segment.py +++ b/chromadb/api/segment.py @@ -110,7 +110,8 @@ def __init__(self, system: System): self._collection_cache = {} def _raise_for_running(self) -> None: - self._raise_for_running() + if not self._running: + raise RuntimeError("Component not running or already closed") @override def start(self) -> None: diff --git a/chromadb/test/test_client.py b/chromadb/test/test_client.py index 6365111c08e..04b589c1b18 100644 --- a/chromadb/test/test_client.py +++ b/chromadb/test/test_client.py @@ -93,7 +93,6 @@ def test_persistent_client_close(persistent_api: ClientAPI) -> None: col.add(ids=["1"], documents=["test"]) col1.add(ids=["1"], documents=["test1"]) open_files = current_process.open_files() - print("OPEN FILES BEFORE", open_files) assert any( [ re.search(rf"{temp_persist_dir}.*chroma.sqlite3", file.path) is not None @@ -108,7 +107,6 @@ def test_persistent_client_close(persistent_api: ClientAPI) -> None: ) persistent_api.close() open_files = current_process.open_files() - print("OPEN FILES AFTER", open_files) assert all( [ re.search(rf"{temp_persist_dir}.*chroma.sqlite3", file.path) is None @@ -159,19 +157,8 @@ def test_persistent_client_double_close(persistent_api: ClientAPI) -> None: for file in open_files ] ) - persistent_api.close() - assert all( - [ - re.search(rf"{temp_persist_dir}.*chroma.sqlite3", file.path) is None - for file in open_files - ] - ) - assert all( - [ - re.search(rf"{temp_persist_dir}.*data_level0.bin", file.path) is None - for file in open_files - ] - ) + with pytest.raises(RuntimeError, match="Component not running or already closed"): + persistent_api.close() def test_persistent_client_use_after_close(persistent_api: ClientAPI) -> None: From 4dd37aa7f84b7dc5e21c6b5ea515118d9eb444db Mon Sep 17 00:00:00 2001 From: Trayan Azarov Date: Thu, 21 Mar 2024 14:48:10 +0100 Subject: [PATCH 10/27] fix: Added api component start in tests --- chromadb/test/conftest.py | 1 + 1 file changed, 1 insertion(+) diff --git a/chromadb/test/conftest.py b/chromadb/test/conftest.py index 8a8cd979072..3cb2c2b0506 100644 --- a/chromadb/test/conftest.py +++ b/chromadb/test/conftest.py @@ -292,6 +292,7 @@ def basic_http_client() -> Generator[System, None, None]: ) system = System(settings) api = system.instance(ServerAPI) + api.start() _await_server(api) system.start() yield system From 8ef16a0b13c49b7a51a4bc5bbba0a1ee9aee6265 Mon Sep 17 00:00:00 2001 From: Trayan Azarov Date: Thu, 21 Mar 2024 15:27:22 +0100 Subject: [PATCH 11/27] fix: Added open file filtering to prevent failing test due to multiple persistent clients. --- chromadb/test/test_client.py | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/chromadb/test/test_client.py b/chromadb/test/test_client.py index 04b589c1b18..55b75801d05 100644 --- a/chromadb/test/test_client.py +++ b/chromadb/test/test_client.py @@ -93,32 +93,33 @@ def test_persistent_client_close(persistent_api: ClientAPI) -> None: col.add(ids=["1"], documents=["test"]) col1.add(ids=["1"], documents=["test1"]) open_files = current_process.open_files() + filtered_open_files = [ + file + for file in open_files + if re.search(rf"{temp_persist_dir}.*chroma.sqlite3", file.path) + or re.search(rf"{temp_persist_dir}.*data_level0.bin", file.path) + ] assert any( [ re.search(rf"{temp_persist_dir}.*chroma.sqlite3", file.path) is not None - for file in open_files + for file in filtered_open_files ] ) assert any( [ re.search(rf"{temp_persist_dir}.*data_level0.bin", file.path) is not None - for file in open_files + for file in filtered_open_files ] ) persistent_api.close() open_files = current_process.open_files() - assert all( - [ - re.search(rf"{temp_persist_dir}.*chroma.sqlite3", file.path) is None - for file in open_files - ] - ) - assert all( - [ - re.search(rf"{temp_persist_dir}.*data_level0.bin", file.path) is None - for file in open_files - ] - ) + post_filtered_open_files = [ + file + for file in open_files + if re.search(rf"{temp_persist_dir}.*chroma.sqlite3", file.path) + or re.search(rf"{temp_persist_dir}.*data_level0.bin", file.path) + ] + assert len(post_filtered_open_files) == 0 def test_persistent_client_double_close(persistent_api: ClientAPI) -> None: From c208914949f13cd9be78f342954d5f6a993c5593 Mon Sep 17 00:00:00 2001 From: Trayan Azarov Date: Thu, 21 Mar 2024 15:29:16 +0100 Subject: [PATCH 12/27] fix: Simplified assertions --- chromadb/test/test_client.py | 51 +++++++++++------------------------- 1 file changed, 15 insertions(+), 36 deletions(-) diff --git a/chromadb/test/test_client.py b/chromadb/test/test_client.py index 55b75801d05..8da10006024 100644 --- a/chromadb/test/test_client.py +++ b/chromadb/test/test_client.py @@ -99,18 +99,7 @@ def test_persistent_client_close(persistent_api: ClientAPI) -> None: if re.search(rf"{temp_persist_dir}.*chroma.sqlite3", file.path) or re.search(rf"{temp_persist_dir}.*data_level0.bin", file.path) ] - assert any( - [ - re.search(rf"{temp_persist_dir}.*chroma.sqlite3", file.path) is not None - for file in filtered_open_files - ] - ) - assert any( - [ - re.search(rf"{temp_persist_dir}.*data_level0.bin", file.path) is not None - for file in filtered_open_files - ] - ) + assert len(filtered_open_files) > 0 persistent_api.close() open_files = current_process.open_files() post_filtered_open_files = [ @@ -132,32 +121,22 @@ def test_persistent_client_double_close(persistent_api: ClientAPI) -> None: temp_persist_dir = persistent_api.get_settings().persist_directory col.add(ids=["1"], documents=["test"]) open_files = current_process.open_files() - assert any( - [ - re.search(rf"{temp_persist_dir}.*chroma.sqlite3", file.path) is not None - for file in open_files - ] - ) - assert any( - [ - re.search(rf"{temp_persist_dir}.*data_level0.bin", file.path) is not None - for file in open_files - ] - ) + filtered_open_files = [ + file + for file in open_files + if re.search(rf"{temp_persist_dir}.*chroma.sqlite3", file.path) + or re.search(rf"{temp_persist_dir}.*data_level0.bin", file.path) + ] + assert len(filtered_open_files) > 0 persistent_api.close() open_files = current_process.open_files() - assert all( - [ - re.search(rf"{temp_persist_dir}.*chroma.sqlite3", file.path) is None - for file in open_files - ] - ) - assert all( - [ - re.search(rf"{temp_persist_dir}.*data_level0.bin", file.path) is None - for file in open_files - ] - ) + post_filtered_open_files = [ + file + for file in open_files + if re.search(rf"{temp_persist_dir}.*chroma.sqlite3", file.path) + or re.search(rf"{temp_persist_dir}.*data_level0.bin", file.path) + ] + assert len(post_filtered_open_files) == 0 with pytest.raises(RuntimeError, match="Component not running or already closed"): persistent_api.close() From 53c0b9642e7ebd13309b0c06be5edcd4c54455e7 Mon Sep 17 00:00:00 2001 From: Trayan Azarov Date: Thu, 21 Mar 2024 16:18:15 +0100 Subject: [PATCH 13/27] fix: Added reset --- chromadb/test/test_client.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/chromadb/test/test_client.py b/chromadb/test/test_client.py index 8da10006024..513908eb4a5 100644 --- a/chromadb/test/test_client.py +++ b/chromadb/test/test_client.py @@ -27,6 +27,9 @@ def ephemeral_api() -> Generator[ClientAPI, None, None]: def persistent_api() -> Generator[ClientAPI, None, None]: client = chromadb.PersistentClient( path=tempfile.gettempdir() + "/test_server", + settings=Settings( + allow_reset=True, + ), ) yield client client.clear_system_cache() @@ -86,6 +89,7 @@ def test_persistent_client_close(persistent_api: ClientAPI) -> None: pytest.skip( "Skipping test that closes the persistent client in integration test" ) + persistent_api.reset() current_process = psutil.Process() col = persistent_api.create_collection("test") temp_persist_dir = persistent_api.get_settings().persist_directory @@ -116,6 +120,7 @@ def test_persistent_client_double_close(persistent_api: ClientAPI) -> None: pytest.skip( "Skipping test that closes the persistent client in integration test" ) + persistent_api.reset() current_process = psutil.Process() col = persistent_api.create_collection("test") temp_persist_dir = persistent_api.get_settings().persist_directory @@ -146,6 +151,7 @@ def test_persistent_client_use_after_close(persistent_api: ClientAPI) -> None: pytest.skip( "Skipping test that closes the persistent client in integration test" ) + persistent_api.reset() current_process = psutil.Process() col = persistent_api.create_collection("test") temp_persist_dir = persistent_api.get_settings().persist_directory From a26fb03b36784c086c67741316222275ea28dfc1 Mon Sep 17 00:00:00 2001 From: Trayan Azarov Date: Thu, 21 Mar 2024 16:36:13 +0100 Subject: [PATCH 14/27] fix: Added a fix for Windows paths escape --- chromadb/test/test_client.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/chromadb/test/test_client.py b/chromadb/test/test_client.py index 513908eb4a5..871b504f2d8 100644 --- a/chromadb/test/test_client.py +++ b/chromadb/test/test_client.py @@ -92,7 +92,9 @@ def test_persistent_client_close(persistent_api: ClientAPI) -> None: persistent_api.reset() current_process = psutil.Process() col = persistent_api.create_collection("test") - temp_persist_dir = persistent_api.get_settings().persist_directory + temp_persist_dir = persistent_api.get_settings().persist_directory.replace( + "\\", "\\\\" + ) col1 = persistent_api.create_collection("test1") col.add(ids=["1"], documents=["test"]) col1.add(ids=["1"], documents=["test1"]) @@ -123,7 +125,9 @@ def test_persistent_client_double_close(persistent_api: ClientAPI) -> None: persistent_api.reset() current_process = psutil.Process() col = persistent_api.create_collection("test") - temp_persist_dir = persistent_api.get_settings().persist_directory + temp_persist_dir = persistent_api.get_settings().persist_directory.replace( + "\\", "\\\\" + ) col.add(ids=["1"], documents=["test"]) open_files = current_process.open_files() filtered_open_files = [ @@ -154,7 +158,9 @@ def test_persistent_client_use_after_close(persistent_api: ClientAPI) -> None: persistent_api.reset() current_process = psutil.Process() col = persistent_api.create_collection("test") - temp_persist_dir = persistent_api.get_settings().persist_directory + temp_persist_dir = persistent_api.get_settings().persist_directory.replace( + "\\", "\\\\" + ) col.add(ids=["1"], documents=["test"]) open_files = current_process.open_files() assert any( From 7dd6683305ad28601565912b5f84c6caf9e84a46 Mon Sep 17 00:00:00 2001 From: Trayan Azarov Date: Thu, 21 Mar 2024 23:18:49 +0100 Subject: [PATCH 15/27] fix: Added debug for windows failures --- chromadb/test/test_client.py | 1 + 1 file changed, 1 insertion(+) diff --git a/chromadb/test/test_client.py b/chromadb/test/test_client.py index 871b504f2d8..0f8ab1c706f 100644 --- a/chromadb/test/test_client.py +++ b/chromadb/test/test_client.py @@ -99,6 +99,7 @@ def test_persistent_client_close(persistent_api: ClientAPI) -> None: col.add(ids=["1"], documents=["test"]) col1.add(ids=["1"], documents=["test1"]) open_files = current_process.open_files() + print("OPEN FILES", open_files) filtered_open_files = [ file for file in open_files From 1114eaa96e03c2b4f793837590bedfb8afe320b8 Mon Sep 17 00:00:00 2001 From: Trayan Azarov Date: Fri, 22 Mar 2024 00:01:00 +0100 Subject: [PATCH 16/27] fix: Added debug for windows failures --- chromadb/test/test_client.py | 1 + 1 file changed, 1 insertion(+) diff --git a/chromadb/test/test_client.py b/chromadb/test/test_client.py index 0f8ab1c706f..2ca04ac022a 100644 --- a/chromadb/test/test_client.py +++ b/chromadb/test/test_client.py @@ -106,6 +106,7 @@ def test_persistent_client_close(persistent_api: ClientAPI) -> None: if re.search(rf"{temp_persist_dir}.*chroma.sqlite3", file.path) or re.search(rf"{temp_persist_dir}.*data_level0.bin", file.path) ] + print("FILTERED OPEN FILES", filtered_open_files) assert len(filtered_open_files) > 0 persistent_api.close() open_files = current_process.open_files() From fcc06ab20bc7acb64b239b94da5cba0d4cf24894 Mon Sep 17 00:00:00 2001 From: Trayan Azarov Date: Fri, 22 Mar 2024 01:24:18 +0100 Subject: [PATCH 17/27] fix: Create persistent client for each test (no fixture) --- chromadb/test/test_client.py | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/chromadb/test/test_client.py b/chromadb/test/test_client.py index 2ca04ac022a..c3d5f8995a6 100644 --- a/chromadb/test/test_client.py +++ b/chromadb/test/test_client.py @@ -3,6 +3,7 @@ import re import shutil import time +import uuid from typing import Generator from unittest.mock import patch from pytest_httpserver import HTTPServer @@ -84,12 +85,16 @@ def test_http_client_with_inconsistent_port_settings() -> None: ) -def test_persistent_client_close(persistent_api: ClientAPI) -> None: +def test_persistent_client_close() -> None: if os.environ.get("CHROMA_INTEGRATION_TEST_ONLY") == "1": pytest.skip( "Skipping test that closes the persistent client in integration test" ) - persistent_api.reset() + persistent_api = chromadb.PersistentClient( + path=tempfile.gettempdir() + "/test_server-"+uuid.uuid4().hex, + settings=Settings( + ), + ) current_process = psutil.Process() col = persistent_api.create_collection("test") temp_persist_dir = persistent_api.get_settings().persist_directory.replace( @@ -119,12 +124,15 @@ def test_persistent_client_close(persistent_api: ClientAPI) -> None: assert len(post_filtered_open_files) == 0 -def test_persistent_client_double_close(persistent_api: ClientAPI) -> None: +def test_persistent_client_double_close() -> None: if os.environ.get("CHROMA_INTEGRATION_TEST_ONLY") == "1": pytest.skip( "Skipping test that closes the persistent client in integration test" ) - persistent_api.reset() + persistent_api = chromadb.PersistentClient( + path=tempfile.gettempdir() + "/test_server-"+uuid.uuid4().hex, + settings=Settings(), + ) current_process = psutil.Process() col = persistent_api.create_collection("test") temp_persist_dir = persistent_api.get_settings().persist_directory.replace( @@ -152,12 +160,15 @@ def test_persistent_client_double_close(persistent_api: ClientAPI) -> None: persistent_api.close() -def test_persistent_client_use_after_close(persistent_api: ClientAPI) -> None: +def test_persistent_client_use_after_close() -> None: if os.environ.get("CHROMA_INTEGRATION_TEST_ONLY") == "1": pytest.skip( "Skipping test that closes the persistent client in integration test" ) - persistent_api.reset() + persistent_api = chromadb.PersistentClient( + path=tempfile.gettempdir() + "/test_server-"+uuid.uuid4().hex, + settings=Settings(), + ) current_process = psutil.Process() col = persistent_api.create_collection("test") temp_persist_dir = persistent_api.get_settings().persist_directory.replace( From 2bc23ef9d6afec535e2832f0a22d56c975186242 Mon Sep 17 00:00:00 2001 From: Trayan Azarov Date: Fri, 22 Mar 2024 09:23:52 +0100 Subject: [PATCH 18/27] fix: Simplifying regex in test --- chromadb/test/test_client.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/chromadb/test/test_client.py b/chromadb/test/test_client.py index c3d5f8995a6..d29b253c83c 100644 --- a/chromadb/test/test_client.py +++ b/chromadb/test/test_client.py @@ -108,8 +108,7 @@ def test_persistent_client_close() -> None: filtered_open_files = [ file for file in open_files - if re.search(rf"{temp_persist_dir}.*chroma.sqlite3", file.path) - or re.search(rf"{temp_persist_dir}.*data_level0.bin", file.path) + if re.search(temp_persist_dir, file.path) ] print("FILTERED OPEN FILES", filtered_open_files) assert len(filtered_open_files) > 0 From f87b92c118b7092bc2369f76dfb833b06c8777a1 Mon Sep 17 00:00:00 2001 From: Trayan Azarov Date: Tue, 26 Mar 2024 10:58:03 +0200 Subject: [PATCH 19/27] fix: Changing the regexp match --- chromadb/test/test_client.py | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/chromadb/test/test_client.py b/chromadb/test/test_client.py index d29b253c83c..63bcf57b21d 100644 --- a/chromadb/test/test_client.py +++ b/chromadb/test/test_client.py @@ -91,9 +91,8 @@ def test_persistent_client_close() -> None: "Skipping test that closes the persistent client in integration test" ) persistent_api = chromadb.PersistentClient( - path=tempfile.gettempdir() + "/test_server-"+uuid.uuid4().hex, - settings=Settings( - ), + path=tempfile.gettempdir() + "/test_server-" + uuid.uuid4().hex, + settings=Settings(), ) current_process = psutil.Process() col = persistent_api.create_collection("test") @@ -106,9 +105,7 @@ def test_persistent_client_close() -> None: open_files = current_process.open_files() print("OPEN FILES", open_files) filtered_open_files = [ - file - for file in open_files - if re.search(temp_persist_dir, file.path) + file for file in open_files if re.search(temp_persist_dir, file.path) ] print("FILTERED OPEN FILES", filtered_open_files) assert len(filtered_open_files) > 0 @@ -117,8 +114,8 @@ def test_persistent_client_close() -> None: post_filtered_open_files = [ file for file in open_files - if re.search(rf"{temp_persist_dir}.*chroma.sqlite3", file.path) - or re.search(rf"{temp_persist_dir}.*data_level0.bin", file.path) + if re.search(temp_persist_dir + ".*chroma.sqlite3", file.path) + or re.search(temp_persist_dir + ".*data_level0.bin", file.path) ] assert len(post_filtered_open_files) == 0 @@ -129,7 +126,7 @@ def test_persistent_client_double_close() -> None: "Skipping test that closes the persistent client in integration test" ) persistent_api = chromadb.PersistentClient( - path=tempfile.gettempdir() + "/test_server-"+uuid.uuid4().hex, + path=tempfile.gettempdir() + "/test_server-" + uuid.uuid4().hex, settings=Settings(), ) current_process = psutil.Process() @@ -142,8 +139,8 @@ def test_persistent_client_double_close() -> None: filtered_open_files = [ file for file in open_files - if re.search(rf"{temp_persist_dir}.*chroma.sqlite3", file.path) - or re.search(rf"{temp_persist_dir}.*data_level0.bin", file.path) + if re.search(temp_persist_dir + ".*chroma.sqlite3", file.path) + or re.search(temp_persist_dir + ".*data_level0.bin", file.path) ] assert len(filtered_open_files) > 0 persistent_api.close() @@ -165,7 +162,7 @@ def test_persistent_client_use_after_close() -> None: "Skipping test that closes the persistent client in integration test" ) persistent_api = chromadb.PersistentClient( - path=tempfile.gettempdir() + "/test_server-"+uuid.uuid4().hex, + path=tempfile.gettempdir() + "/test_server-" + uuid.uuid4().hex, settings=Settings(), ) current_process = psutil.Process() From cf1b0b8d64adaea522dd973a4209c53fbf17e6dc Mon Sep 17 00:00:00 2001 From: Trayan Azarov Date: Tue, 26 Mar 2024 11:18:26 +0200 Subject: [PATCH 20/27] chore: Adding random collection name --- chromadb/test/test_client.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/chromadb/test/test_client.py b/chromadb/test/test_client.py index 63bcf57b21d..e75dfb55f8f 100644 --- a/chromadb/test/test_client.py +++ b/chromadb/test/test_client.py @@ -99,7 +99,7 @@ def test_persistent_client_close() -> None: temp_persist_dir = persistent_api.get_settings().persist_directory.replace( "\\", "\\\\" ) - col1 = persistent_api.create_collection("test1") + col1 = persistent_api.create_collection("test1"+uuid.uuid4().hex) col.add(ids=["1"], documents=["test"]) col1.add(ids=["1"], documents=["test1"]) open_files = current_process.open_files() @@ -130,7 +130,7 @@ def test_persistent_client_double_close() -> None: settings=Settings(), ) current_process = psutil.Process() - col = persistent_api.create_collection("test") + col = persistent_api.create_collection("test"+uuid.uuid4().hex) temp_persist_dir = persistent_api.get_settings().persist_directory.replace( "\\", "\\\\" ) @@ -166,7 +166,7 @@ def test_persistent_client_use_after_close() -> None: settings=Settings(), ) current_process = psutil.Process() - col = persistent_api.create_collection("test") + col = persistent_api.create_collection("test"+uuid.uuid4().hex) temp_persist_dir = persistent_api.get_settings().persist_directory.replace( "\\", "\\\\" ) @@ -264,7 +264,7 @@ def test_http_client_close(http_api: ClientAPI) -> None: ) with HTTPServer(port=8000) as httpserver: _instrument_http_server(httpserver) - col = http_api.create_collection("test") + col = http_api.create_collection("test"+uuid.uuid4().hex) col.add(ids=["1"], documents=["test"]) _pool_manager = http_api._server._session.get_adapter("http://").poolmanager # type: ignore assert len(_pool_manager.pools._container) > 0 @@ -298,7 +298,7 @@ def test_http_client_use_after_close(http_api: ClientAPI) -> None: with HTTPServer(port=8000) as httpserver: _instrument_http_server(httpserver) http_api.heartbeat() - col = http_api.create_collection("test") + col = http_api.create_collection("test"+uuid.uuid4().hex) col.add(ids=["1"], documents=["test"]) _pool_manager = http_api._server._session.get_adapter("http://").poolmanager # type: ignore assert len(_pool_manager.pools._container) > 0 From cb31780ed1f112d37676040742c30eab43f4ced6 Mon Sep 17 00:00:00 2001 From: Trayan Azarov Date: Tue, 26 Mar 2024 11:18:31 +0200 Subject: [PATCH 21/27] chore: Adding random collection name --- chromadb/test/test_client.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/chromadb/test/test_client.py b/chromadb/test/test_client.py index e75dfb55f8f..c92e6faca9e 100644 --- a/chromadb/test/test_client.py +++ b/chromadb/test/test_client.py @@ -99,7 +99,7 @@ def test_persistent_client_close() -> None: temp_persist_dir = persistent_api.get_settings().persist_directory.replace( "\\", "\\\\" ) - col1 = persistent_api.create_collection("test1"+uuid.uuid4().hex) + col1 = persistent_api.create_collection("test1" + uuid.uuid4().hex) col.add(ids=["1"], documents=["test"]) col1.add(ids=["1"], documents=["test1"]) open_files = current_process.open_files() @@ -130,7 +130,7 @@ def test_persistent_client_double_close() -> None: settings=Settings(), ) current_process = psutil.Process() - col = persistent_api.create_collection("test"+uuid.uuid4().hex) + col = persistent_api.create_collection("test" + uuid.uuid4().hex) temp_persist_dir = persistent_api.get_settings().persist_directory.replace( "\\", "\\\\" ) @@ -166,7 +166,7 @@ def test_persistent_client_use_after_close() -> None: settings=Settings(), ) current_process = psutil.Process() - col = persistent_api.create_collection("test"+uuid.uuid4().hex) + col = persistent_api.create_collection("test" + uuid.uuid4().hex) temp_persist_dir = persistent_api.get_settings().persist_directory.replace( "\\", "\\\\" ) @@ -264,7 +264,7 @@ def test_http_client_close(http_api: ClientAPI) -> None: ) with HTTPServer(port=8000) as httpserver: _instrument_http_server(httpserver) - col = http_api.create_collection("test"+uuid.uuid4().hex) + col = http_api.create_collection("test" + uuid.uuid4().hex) col.add(ids=["1"], documents=["test"]) _pool_manager = http_api._server._session.get_adapter("http://").poolmanager # type: ignore assert len(_pool_manager.pools._container) > 0 @@ -298,7 +298,7 @@ def test_http_client_use_after_close(http_api: ClientAPI) -> None: with HTTPServer(port=8000) as httpserver: _instrument_http_server(httpserver) http_api.heartbeat() - col = http_api.create_collection("test"+uuid.uuid4().hex) + col = http_api.create_collection("test" + uuid.uuid4().hex) col.add(ids=["1"], documents=["test"]) _pool_manager = http_api._server._session.get_adapter("http://").poolmanager # type: ignore assert len(_pool_manager.pools._container) > 0 From d353f91d013b1ed3c8e6c68349fbc4298ad386b2 Mon Sep 17 00:00:00 2001 From: Trayan Azarov Date: Wed, 27 Mar 2024 18:18:21 +0200 Subject: [PATCH 22/27] test: Removing backslash escapes --- chromadb/test/test_client.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/chromadb/test/test_client.py b/chromadb/test/test_client.py index c92e6faca9e..fce80d1b66a 100644 --- a/chromadb/test/test_client.py +++ b/chromadb/test/test_client.py @@ -96,9 +96,7 @@ def test_persistent_client_close() -> None: ) current_process = psutil.Process() col = persistent_api.create_collection("test") - temp_persist_dir = persistent_api.get_settings().persist_directory.replace( - "\\", "\\\\" - ) + temp_persist_dir = persistent_api.get_settings().persist_directory col1 = persistent_api.create_collection("test1" + uuid.uuid4().hex) col.add(ids=["1"], documents=["test"]) col1.add(ids=["1"], documents=["test1"]) @@ -131,9 +129,7 @@ def test_persistent_client_double_close() -> None: ) current_process = psutil.Process() col = persistent_api.create_collection("test" + uuid.uuid4().hex) - temp_persist_dir = persistent_api.get_settings().persist_directory.replace( - "\\", "\\\\" - ) + temp_persist_dir = persistent_api.get_settings().persist_directory col.add(ids=["1"], documents=["test"]) open_files = current_process.open_files() filtered_open_files = [ @@ -167,9 +163,7 @@ def test_persistent_client_use_after_close() -> None: ) current_process = psutil.Process() col = persistent_api.create_collection("test" + uuid.uuid4().hex) - temp_persist_dir = persistent_api.get_settings().persist_directory.replace( - "\\", "\\\\" - ) + temp_persist_dir = persistent_api.get_settings().persist_directory col.add(ids=["1"], documents=["test"]) open_files = current_process.open_files() assert any( From f9264ca597dceb145209bf47f607efcbf77189e7 Mon Sep 17 00:00:00 2001 From: Trayan Azarov Date: Wed, 27 Mar 2024 18:52:21 +0200 Subject: [PATCH 23/27] test: Regex on windows are the bane of my existence!!! --- chromadb/test/test_client.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/chromadb/test/test_client.py b/chromadb/test/test_client.py index fce80d1b66a..d032b79dcb7 100644 --- a/chromadb/test/test_client.py +++ b/chromadb/test/test_client.py @@ -103,7 +103,7 @@ def test_persistent_client_close() -> None: open_files = current_process.open_files() print("OPEN FILES", open_files) filtered_open_files = [ - file for file in open_files if re.search(temp_persist_dir, file.path) + file for file in open_files if re.search(re.escape(temp_persist_dir), file.path) ] print("FILTERED OPEN FILES", filtered_open_files) assert len(filtered_open_files) > 0 @@ -112,8 +112,8 @@ def test_persistent_client_close() -> None: post_filtered_open_files = [ file for file in open_files - if re.search(temp_persist_dir + ".*chroma.sqlite3", file.path) - or re.search(temp_persist_dir + ".*data_level0.bin", file.path) + if re.search(re.escape(temp_persist_dir + ".*chroma.sqlite3"), file.path) + or re.search(re.escape(temp_persist_dir + ".*data_level0.bin"), file.path) ] assert len(post_filtered_open_files) == 0 From b0b8b6d329921424b48f9bd9a188e0739faad21d Mon Sep 17 00:00:00 2001 From: Trayan Azarov Date: Wed, 27 Mar 2024 19:19:50 +0200 Subject: [PATCH 24/27] test: Regex, why! --- chromadb/test/test_client.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/chromadb/test/test_client.py b/chromadb/test/test_client.py index d032b79dcb7..2e3be52ab2a 100644 --- a/chromadb/test/test_client.py +++ b/chromadb/test/test_client.py @@ -112,8 +112,8 @@ def test_persistent_client_close() -> None: post_filtered_open_files = [ file for file in open_files - if re.search(re.escape(temp_persist_dir + ".*chroma.sqlite3"), file.path) - or re.search(re.escape(temp_persist_dir + ".*data_level0.bin"), file.path) + if re.search(re.escape(temp_persist_dir)+ ".*chroma.sqlite3", file.path) + or re.search(re.escape(temp_persist_dir)+ ".*data_level0.bin", file.path) ] assert len(post_filtered_open_files) == 0 From 628587aa15a8f447185a6adfd66e2ef6092cd4ff Mon Sep 17 00:00:00 2001 From: Trayan Azarov Date: Wed, 27 Mar 2024 19:20:44 +0200 Subject: [PATCH 25/27] test: Regex, why! --- chromadb/test/test_client.py | 1 + 1 file changed, 1 insertion(+) diff --git a/chromadb/test/test_client.py b/chromadb/test/test_client.py index 2e3be52ab2a..cf6d7f12ac4 100644 --- a/chromadb/test/test_client.py +++ b/chromadb/test/test_client.py @@ -102,6 +102,7 @@ def test_persistent_client_close() -> None: col1.add(ids=["1"], documents=["test1"]) open_files = current_process.open_files() print("OPEN FILES", open_files) + print(re.escape(temp_persist_dir)) filtered_open_files = [ file for file in open_files if re.search(re.escape(temp_persist_dir), file.path) ] From 78dd080c9f5aaec911c615fe648708843fa6f8e0 Mon Sep 17 00:00:00 2001 From: Trayan Azarov Date: Wed, 27 Mar 2024 20:00:37 +0200 Subject: [PATCH 26/27] test: Weird persistent dir under windows --- chromadb/test/test_client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chromadb/test/test_client.py b/chromadb/test/test_client.py index cf6d7f12ac4..af25f0beb09 100644 --- a/chromadb/test/test_client.py +++ b/chromadb/test/test_client.py @@ -91,7 +91,7 @@ def test_persistent_client_close() -> None: "Skipping test that closes the persistent client in integration test" ) persistent_api = chromadb.PersistentClient( - path=tempfile.gettempdir() + "/test_server-" + uuid.uuid4().hex, + path=os.path.join(tempfile.gettempdir(),"test_server-" + uuid.uuid4().hex), settings=Settings(), ) current_process = psutil.Process() From b57fb4406266724ae03eb2ca26372335978e6563 Mon Sep 17 00:00:00 2001 From: Trayan Azarov Date: Wed, 27 Mar 2024 20:21:19 +0200 Subject: [PATCH 27/27] test: Me - 1, Windows Paths - 0 --- chromadb/test/test_client.py | 56 +++++++++++++++--------------------- 1 file changed, 23 insertions(+), 33 deletions(-) diff --git a/chromadb/test/test_client.py b/chromadb/test/test_client.py index af25f0beb09..1a2b4662db2 100644 --- a/chromadb/test/test_client.py +++ b/chromadb/test/test_client.py @@ -91,7 +91,7 @@ def test_persistent_client_close() -> None: "Skipping test that closes the persistent client in integration test" ) persistent_api = chromadb.PersistentClient( - path=os.path.join(tempfile.gettempdir(),"test_server-" + uuid.uuid4().hex), + path=os.path.join(tempfile.gettempdir(), "test_server-" + uuid.uuid4().hex), settings=Settings(), ) current_process = psutil.Process() @@ -113,8 +113,8 @@ def test_persistent_client_close() -> None: post_filtered_open_files = [ file for file in open_files - if re.search(re.escape(temp_persist_dir)+ ".*chroma.sqlite3", file.path) - or re.search(re.escape(temp_persist_dir)+ ".*data_level0.bin", file.path) + if re.search(re.escape(temp_persist_dir) + ".*chroma.sqlite3", file.path) + or re.search(re.escape(temp_persist_dir) + ".*data_level0.bin", file.path) ] assert len(post_filtered_open_files) == 0 @@ -125,7 +125,7 @@ def test_persistent_client_double_close() -> None: "Skipping test that closes the persistent client in integration test" ) persistent_api = chromadb.PersistentClient( - path=tempfile.gettempdir() + "/test_server-" + uuid.uuid4().hex, + path=os.path.join(tempfile.gettempdir(), "test_server-" + uuid.uuid4().hex), settings=Settings(), ) current_process = psutil.Process() @@ -136,8 +136,8 @@ def test_persistent_client_double_close() -> None: filtered_open_files = [ file for file in open_files - if re.search(temp_persist_dir + ".*chroma.sqlite3", file.path) - or re.search(temp_persist_dir + ".*data_level0.bin", file.path) + if re.search(re.escape(temp_persist_dir) + ".*chroma.sqlite3", file.path) + or re.search(re.escape(temp_persist_dir) + ".*data_level0.bin", file.path) ] assert len(filtered_open_files) > 0 persistent_api.close() @@ -145,8 +145,8 @@ def test_persistent_client_double_close() -> None: post_filtered_open_files = [ file for file in open_files - if re.search(rf"{temp_persist_dir}.*chroma.sqlite3", file.path) - or re.search(rf"{temp_persist_dir}.*data_level0.bin", file.path) + if re.search(re.escape(temp_persist_dir) + ".*chroma.sqlite3", file.path) + or re.search(re.escape(temp_persist_dir) + ".*data_level0.bin", file.path) ] assert len(post_filtered_open_files) == 0 with pytest.raises(RuntimeError, match="Component not running or already closed"): @@ -159,7 +159,7 @@ def test_persistent_client_use_after_close() -> None: "Skipping test that closes the persistent client in integration test" ) persistent_api = chromadb.PersistentClient( - path=tempfile.gettempdir() + "/test_server-" + uuid.uuid4().hex, + path=os.path.join(tempfile.gettempdir(), "test_server-" + uuid.uuid4().hex), settings=Settings(), ) current_process = psutil.Process() @@ -167,32 +167,22 @@ def test_persistent_client_use_after_close() -> None: temp_persist_dir = persistent_api.get_settings().persist_directory col.add(ids=["1"], documents=["test"]) open_files = current_process.open_files() - assert any( - [ - re.search(rf"{temp_persist_dir}.*chroma.sqlite3", file.path) is not None - for file in open_files - ] - ) - assert any( - [ - re.search(rf"{temp_persist_dir}.*data_level0.bin", file.path) is not None - for file in open_files - ] - ) + filtered_open_files = [ + file + for file in open_files + if re.search(re.escape(temp_persist_dir) + ".*chroma.sqlite3", file.path) + or re.search(re.escape(temp_persist_dir) + ".*data_level0.bin", file.path) + ] + assert len(filtered_open_files) > 0 persistent_api.close() open_files = current_process.open_files() - assert all( - [ - re.search(rf"{temp_persist_dir}.*chroma.sqlite3", file.path) is None - for file in open_files - ] - ) - assert all( - [ - re.search(rf"{temp_persist_dir}.*data_level0.bin", file.path) is None - for file in open_files - ] - ) + post_filtered_open_files = [ + file + for file in open_files + if re.search(re.escape(temp_persist_dir) + ".*chroma.sqlite3", file.path) + or re.search(re.escape(temp_persist_dir) + ".*data_level0.bin", file.path) + ] + assert len(post_filtered_open_files) == 0 with pytest.raises(RuntimeError, match="Component not running"): col.add(ids=["1"], documents=["test"]) with pytest.raises(RuntimeError, match="Component not running"):