diff --git a/chromadb/api/types.py b/chromadb/api/types.py index 0054f283e8d..347461718fd 100644 --- a/chromadb/api/types.py +++ b/chromadb/api/types.py @@ -20,7 +20,7 @@ # Re-export types from chromadb.types __all__ = ["Metadata", "Where", "WhereDocument", "UpdateCollectionMetadata"] - +META_KEY_CHROMA_DOCUMENT = "chroma:document" T = TypeVar("T") OneOrMany = Union[T, List[T]] @@ -265,6 +265,10 @@ def validate_metadata(metadata: Metadata) -> Metadata: if len(metadata) == 0: raise ValueError(f"Expected metadata to be a non-empty dict, got {metadata}") for key, value in metadata.items(): + if key == META_KEY_CHROMA_DOCUMENT: + raise ValueError( + f"Expected metadata to not contain the reserved key {META_KEY_CHROMA_DOCUMENT}" + ) if not isinstance(key, str): raise TypeError( f"Expected metadata key to be a str, got {key} which is a {type(key)}" @@ -476,7 +480,7 @@ def validate_embeddings(embeddings: Embeddings) -> Embeddings: raise ValueError( f"Expected each embedding in the embeddings to be a list, got {embeddings}" ) - for i,embedding in enumerate(embeddings): + for i, embedding in enumerate(embeddings): if len(embedding) == 0: raise ValueError( f"Expected each embedding in the embeddings to be a non-empty list, got empty embedding at pos {i}" diff --git a/chromadb/test/segment/test_metadata.py b/chromadb/test/segment/test_metadata.py index 1f03d6350f4..2126c6d1feb 100644 --- a/chromadb/test/segment/test_metadata.py +++ b/chromadb/test/segment/test_metadata.py @@ -3,6 +3,8 @@ import tempfile import pytest from typing import Generator, List, Callable, Iterator, Dict, Optional, Union, Sequence + +from chromadb.api.types import validate_metadata from chromadb.config import System, Settings from chromadb.db.base import ParameterValue, get_sql from chromadb.db.impl.sqlite import SqliteDB @@ -677,3 +679,10 @@ def test_delete_segment( res = cur.execute(sql, params) # assert that all FTS rows are gone assert len(res.fetchall()) == 0 + + +def test_metadata_validation_forbidden_key() -> None: + with pytest.raises(ValueError, match="chroma:document"): + validate_metadata( + {"chroma:document": "this is not the document you are looking for"} + )