Skip to content

Commit

Permalink
feat: CIP-5: Large Batch Handling Improvements Proposal
Browse files Browse the repository at this point in the history
- Minor improvement suggested by @imartinez to pass API to create_batches utility method.

Refs: chroma-core#1049
  • Loading branch information
tazarov committed Sep 5, 2023
1 parent 4f3c6b5 commit ee4ae63
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 8 deletions.
2 changes: 1 addition & 1 deletion chromadb/test/property/test_add.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ def test_add_large(api: API, collection: strategies.Collection) -> None:
with pytest.raises(Exception):
coll.add(**normalized_record_set)
return
for batch in create_batches(api.max_batch_size, **record_set):
for batch in create_batches(api, **record_set):
coll.add(*batch)
invariants.count(coll, cast(strategies.RecordSet, normalized_record_set))

Expand Down
15 changes: 8 additions & 7 deletions chromadb/utils/batch_utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from typing import Optional, Tuple, List

from chromadb.api import API
from chromadb.api.types import (
Documents,
Embeddings,
Expand All @@ -9,7 +10,7 @@


def create_batches(
max_batch_size: int,
api: API,
ids: IDs,
embeddings: Optional[Embeddings] = None,
metadatas: Optional[Metadatas] = None,
Expand All @@ -18,15 +19,15 @@ def create_batches(
_batches: List[
Tuple[IDs, Embeddings, Optional[Metadatas], Optional[Documents]]
] = []
if len(ids) > max_batch_size:
if len(ids) > api.max_batch_size:
# create split batches
for i in range(0, len(ids), max_batch_size):
for i in range(0, len(ids), api.max_batch_size):
_batches.append(
( # type: ignore
ids[i : i + max_batch_size],
embeddings[i : i + max_batch_size] if embeddings else None,
metadatas[i : i + max_batch_size] if metadatas else None,
documents[i : i + max_batch_size] if documents else None,
ids[i : i + api.max_batch_size],
embeddings[i : i + api.max_batch_size] if embeddings else None,
metadatas[i : i + api.max_batch_size] if metadatas else None,
documents[i : i + api.max_batch_size] if documents else None,
)
)
else:
Expand Down

0 comments on commit ee4ae63

Please sign in to comment.