Skip to content

Commit

Permalink
fix: de-duplicate IDs before inserting into ChromaDB (#1001)
Browse files Browse the repository at this point in the history
  • Loading branch information
sarahwooders authored Feb 14, 2024
1 parent 8d4f698 commit 3b76c63
Showing 1 changed file with 18 additions and 5 deletions.
23 changes: 18 additions & 5 deletions memgpt/agent_store/chroma.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,13 +126,26 @@ def get(self, id: uuid.UUID) -> Optional[RecordType]:

def format_records(self, records: List[RecordType]):
assert all([isinstance(r, Passage) for r in records])
recs = [cast(Passage, r) for r in records]
metadatas = []
ids = [str(record.id) for record in recs]
documents = [record.text for record in recs]
embeddings = [record.embedding for record in recs]

recs = []
ids = []
documents = []
embeddings = []

# de-duplication of ids
exist_ids = set()
for i in range(len(records)):
record = records[i]
if record.id in exist_ids:
continue
exist_ids.add(record.id)
recs.append(cast(Passage, record))
ids.append(str(record.id))
documents.append(record.text)
embeddings.append(record.embedding)

# collect/format record metadata
metadatas = []
for record in recs:
metadata = vars(record)
metadata.pop("id")
Expand Down

0 comments on commit 3b76c63

Please sign in to comment.