From c4e7e4265fa2b789e402509b52a7b9f706dd04c8 Mon Sep 17 00:00:00 2001 From: Tomaz Bratanic Date: Thu, 14 Dec 2023 09:52:53 +0100 Subject: [PATCH 1/2] Fix RRF normalization and lucene characters for neo4j vector --- .../src/vectorstores/neo4j_vector.ts | 36 ++++++++++++++- .../tests/neo4j_vector.int.test.ts | 46 ++++++++++++++++++- 2 files changed, 79 insertions(+), 3 deletions(-) diff --git a/libs/langchain-community/src/vectorstores/neo4j_vector.ts b/libs/langchain-community/src/vectorstores/neo4j_vector.ts index e35f24fde603..6b951ddf94b6 100644 --- a/libs/langchain-community/src/vectorstores/neo4j_vector.ts +++ b/libs/langchain-community/src/vectorstores/neo4j_vector.ts @@ -614,7 +614,7 @@ export class Neo4jVectorStore extends VectorStore { k: Number(k), embedding: vector, keyword_index: this.keywordIndexName, - query, + query: removeLuceneChars(query), }; const results = await this.query(readQuery, parameters); @@ -717,7 +717,10 @@ function getSearchIndexQuery(searchType: SearchType): string { hybrid: ` CALL { CALL db.index.vector.queryNodes($index, $k, $embedding) YIELD node, score - RETURN node, score UNION + WITH collect({node:node, score:score}) AS nodes, max(score) AS max + UNWIND nodes AS n + // We use 0 as min + RETURN n.node AS node, (n.score / max) AS score UNION CALL db.index.fulltext.queryNodes($keyword_index, $query, {limit: $k}) YIELD node, score WITH collect({node: node, score: score}) AS nodes, max(score) AS max UNWIND nodes AS n @@ -729,3 +732,32 @@ function getSearchIndexQuery(searchType: SearchType): string { return typeToQueryMap[searchType]; } + +function removeLuceneChars(text: string): string { + // Remove Lucene special characters + const specialChars = [ + "+", + "-", + "&", + "|", + "!", + "(", + ")", + "{", + "}", + "[", + "]", + "^", + '"', + "~", + "*", + "?", + ":", + "\\", + ]; + let modifiedText = text; + for (const char of specialChars) { + modifiedText = modifiedText.split(char).join(" "); + } + return modifiedText.trim(); +} diff --git a/libs/langchain-community/src/vectorstores/tests/neo4j_vector.int.test.ts b/libs/langchain-community/src/vectorstores/tests/neo4j_vector.int.test.ts index 7aed797d4cc3..1d74860b9a09 100644 --- a/libs/langchain-community/src/vectorstores/tests/neo4j_vector.int.test.ts +++ b/libs/langchain-community/src/vectorstores/tests/neo4j_vector.int.test.ts @@ -5,7 +5,7 @@ import { Neo4jVectorStore } from "../neo4j_vector.js"; const OS_TOKEN_COUNT = 1536; -const texts = ["foo", "bar", "baz"]; +const texts = ["foo", "bar", "baz", "This is the end of the world!"]; class FakeEmbeddingsWithOsDimension extends FakeEmbeddings { async embedDocuments(documents: string[]): Promise { @@ -469,3 +469,47 @@ test.skip("Test fromExistingGraph multiple properties hybrid", async () => { await neo4jVectorStore.close(); await existingGraph.close(); }); + +test.skip("Test escape lucene characters", async () => { + const url = process.env.NEO4J_URI as string; + const username = process.env.NEO4J_USERNAME as string; + const password = process.env.NEO4J_PASSWORD as string; + + expect(url).toBeDefined(); + expect(username).toBeDefined(); + expect(password).toBeDefined(); + + const embeddings = new FakeEmbeddingsWithOsDimension(); + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const metadatas: any[] = []; + + const neo4jVectorStore = await Neo4jVectorStore.fromTexts( + texts, + metadatas, + embeddings, + { + url, + username, + password, + preDeleteCollection: true, + searchType: "hybrid", + } + ); + + const output = await neo4jVectorStore.similaritySearch("This is the end of the world!", 2); + console.log(output) + const expectedResult = [ + new Document({ + pageContent: "This is the end of the world!", + metadata: {}, + }), + new Document({ + pageContent: "baz", + metadata: {}, + }), + ]; + + expect(output).toStrictEqual(expectedResult); + await dropVectorIndexes(neo4jVectorStore); + await neo4jVectorStore.close(); +}); \ No newline at end of file From c36f5d3d50ef697d7a37d8fdc29f7e5a949c23f8 Mon Sep 17 00:00:00 2001 From: Tomaz Bratanic Date: Thu, 14 Dec 2023 21:19:38 +0100 Subject: [PATCH 2/2] Formatting --- .../src/vectorstores/neo4j_vector.ts | 36 +++++++++---------- .../tests/neo4j_vector.int.test.ts | 9 +++-- 2 files changed, 24 insertions(+), 21 deletions(-) diff --git a/libs/langchain-community/src/vectorstores/neo4j_vector.ts b/libs/langchain-community/src/vectorstores/neo4j_vector.ts index 6b951ddf94b6..3837dced1e66 100644 --- a/libs/langchain-community/src/vectorstores/neo4j_vector.ts +++ b/libs/langchain-community/src/vectorstores/neo4j_vector.ts @@ -736,24 +736,24 @@ function getSearchIndexQuery(searchType: SearchType): string { function removeLuceneChars(text: string): string { // Remove Lucene special characters const specialChars = [ - "+", - "-", - "&", - "|", - "!", - "(", - ")", - "{", - "}", - "[", - "]", - "^", - '"', - "~", - "*", - "?", - ":", - "\\", + "+", + "-", + "&", + "|", + "!", + "(", + ")", + "{", + "}", + "[", + "]", + "^", + '"', + "~", + "*", + "?", + ":", + "\\", ]; let modifiedText = text; for (const char of specialChars) { diff --git a/libs/langchain-community/src/vectorstores/tests/neo4j_vector.int.test.ts b/libs/langchain-community/src/vectorstores/tests/neo4j_vector.int.test.ts index 1d74860b9a09..6b5257379d74 100644 --- a/libs/langchain-community/src/vectorstores/tests/neo4j_vector.int.test.ts +++ b/libs/langchain-community/src/vectorstores/tests/neo4j_vector.int.test.ts @@ -496,8 +496,11 @@ test.skip("Test escape lucene characters", async () => { } ); - const output = await neo4jVectorStore.similaritySearch("This is the end of the world!", 2); - console.log(output) + const output = await neo4jVectorStore.similaritySearch( + "This is the end of the world!", + 2 + ); + console.log(output); const expectedResult = [ new Document({ pageContent: "This is the end of the world!", @@ -512,4 +515,4 @@ test.skip("Test escape lucene characters", async () => { expect(output).toStrictEqual(expectedResult); await dropVectorIndexes(neo4jVectorStore); await neo4jVectorStore.close(); -}); \ No newline at end of file +});