From ed29d6becb243f6d55764ea1297c05ea734a9132 Mon Sep 17 00:00:00 2001 From: MJDeligan <48515433+MJDeligan@users.noreply.github.com> Date: Thu, 21 Dec 2023 03:06:22 +0100 Subject: [PATCH] community[minor]: Adds optional IDs parameter to PGVectorStore add-* methods (#3692) * added delete method to pgvectorstore * added tests for pgvectorstore delete method * fix comments * Add example * add ids param to pgvector add methods * update doc comments * add test for id insertion * fix doc comments * Change options arg for consistency with base class * Change port to integration test default --------- Co-authored-by: jacoblee93 Co-authored-by: bracesproul --- .../src/vectorstores/pgvector.ts | 38 ++++++++++++++++--- .../tests/pgvector/pgvector.int.test.ts | 21 +++++++++- 2 files changed, 53 insertions(+), 6 deletions(-) diff --git a/libs/langchain-community/src/vectorstores/pgvector.ts b/libs/langchain-community/src/vectorstores/pgvector.ts index 1b06d1a21048..58c7cb98cc2a 100644 --- a/libs/langchain-community/src/vectorstores/pgvector.ts +++ b/libs/langchain-community/src/vectorstores/pgvector.ts @@ -31,6 +31,7 @@ export interface PGVectorStoreArgs { * @default 500 */ chunkSize?: number; + ids?: string[]; } /** @@ -130,14 +131,19 @@ export class PGVectorStore extends VectorStore { * vectors, and adds them to the store. * * @param documents - Array of `Document` instances. + * @param options - Optional arguments for adding documents * @returns Promise that resolves when the documents have been added. */ - async addDocuments(documents: Document[]): Promise { + async addDocuments( + documents: Document[], + options?: { ids?: string[] } + ): Promise { const texts = documents.map(({ pageContent }) => pageContent); return this.addVectors( await this.embeddings.embedDocuments(texts), - documents + documents, + options ); } @@ -222,13 +228,18 @@ export class PGVectorStore extends VectorStore { columns.push("collection_id"); } + // Check if we have added ids to the rows. + if (rows.length !== 0 && columns.length === rows[0].length - 1) { + columns.push(this.idColumnName); + } + const valuesPlaceholders = rows .map((_, j) => this.generatePlaceholderForRowAt(j, columns.length)) .join(", "); const text = ` INSERT INTO ${this.tableName}( - ${columns} + ${columns.map((column) => `"${column}"`).join(", ")} ) VALUES ${valuesPlaceholders} `; @@ -241,9 +252,23 @@ export class PGVectorStore extends VectorStore { * * @param vectors - Array of vectors. * @param documents - Array of `Document` instances. + * @param options - Optional arguments for adding documents * @returns Promise that resolves when the vectors have been added. */ - async addVectors(vectors: number[][], documents: Document[]): Promise { + async addVectors( + vectors: number[][], + documents: Document[], + options?: { ids?: string[] } + ): Promise { + const ids = options?.ids; + + // Either all documents have ids or none of them do to avoid confusion. + if (ids !== undefined && ids.length !== vectors.length) { + throw new Error( + "The number of ids must match the number of vectors provided." + ); + } + const rows = []; let collectionId; if (this.collectionTableName) { @@ -262,6 +287,9 @@ export class PGVectorStore extends VectorStore { if (collectionId) { values.push(collectionId); } + if (ids) { + values.push(ids[i]); + } rows.push(values); } @@ -512,7 +540,7 @@ export class PGVectorStore extends VectorStore { dbConfig: PGVectorStoreArgs ): Promise { const instance = await PGVectorStore.initialize(embeddings, dbConfig); - await instance.addDocuments(docs); + await instance.addDocuments(docs, { ids: dbConfig.ids }); return instance; } diff --git a/libs/langchain-community/src/vectorstores/tests/pgvector/pgvector.int.test.ts b/libs/langchain-community/src/vectorstores/tests/pgvector/pgvector.int.test.ts index 66525863a5c8..9bacaecef641 100644 --- a/libs/langchain-community/src/vectorstores/tests/pgvector/pgvector.int.test.ts +++ b/libs/langchain-community/src/vectorstores/tests/pgvector/pgvector.int.test.ts @@ -12,7 +12,7 @@ describe("PGVectorStore", () => { postgresConnectionOptions: { type: "postgres", host: "127.0.0.1", - port: 5432, + port: 5433, user: "myuser", password: "ChangeMe", database: "api", @@ -88,6 +88,25 @@ describe("PGVectorStore", () => { } }); + test("PGvector can save documents with ids", async () => { + const id1 = "d8e70e98-19ab-4438-9c14-4bb2bb21a1f9"; + const id2 = "2bbb4b73-efec-4d5e-80ea-df94a4ed3aa3"; + + const documents = [ + { pageContent: "Lorem Ipsum", metadata: { a: 1 } }, + { pageContent: "Lorem Ipsum", metadata: { a: 2 } }, + ]; + + await pgvectorVectorStore.addDocuments(documents, { ids: [id1, id2] }); + + const result = await pgvectorVectorStore.pool.query( + `SELECT id FROM "${tableName}" WHERE id = $1`, + [id1] + ); + + expect(result.rowCount).toEqual(1); + }); + test("PGvector can delete document by id", async () => { try { const documents = [