From 1d67741372c826266b761de379a45c91204d65de Mon Sep 17 00:00:00 2001 From: Tommaso Bolis Date: Tue, 12 Nov 2024 23:42:10 +0100 Subject: [PATCH] Implements list sources operation for chroma directly through apis. Block other vector store from using it until fully tested. --- pom.xml | 2 +- .../helper/EmbeddingOperationValidator.java | 2 +- .../internal/store/chroma/ChromaStore.java | 72 ++++++++++++++++++- 3 files changed, 73 insertions(+), 3 deletions(-) diff --git a/pom.xml b/pom.xml index 113ed50..2c5c5e2 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ 4.0.0 com.mule.mulechain mulechain-vectors - 0.1.88-SNAPSHOT + 0.1.89-SNAPSHOT mule-extension MAC Vectors diff --git a/src/main/java/org/mule/extension/mulechain/vectors/internal/helper/EmbeddingOperationValidator.java b/src/main/java/org/mule/extension/mulechain/vectors/internal/helper/EmbeddingOperationValidator.java index 9f03659..173f4cb 100644 --- a/src/main/java/org/mule/extension/mulechain/vectors/internal/helper/EmbeddingOperationValidator.java +++ b/src/main/java/org/mule/extension/mulechain/vectors/internal/helper/EmbeddingOperationValidator.java @@ -81,7 +81,7 @@ public class EmbeddingOperationValidator { // Constants.VECTOR_STORE_ELASTICSEARCH, // Needs to be tested // Constants.VECTOR_STORE_OPENSEARCH, // Needs to be tested Constants.VECTOR_STORE_MILVUS, - // Constants.VECTOR_STORE_CHROMA, // Needs to be tested + Constants.VECTOR_STORE_CHROMA, // Constants.VECTOR_STORE_PINECONE, // Do not support GTE with strings. Constants.VECTOR_STORE_AI_SEARCH // Needs to be tested ))); diff --git a/src/main/java/org/mule/extension/mulechain/vectors/internal/store/chroma/ChromaStore.java b/src/main/java/org/mule/extension/mulechain/vectors/internal/store/chroma/ChromaStore.java index 56d52f6..2b0910d 100644 --- a/src/main/java/org/mule/extension/mulechain/vectors/internal/store/chroma/ChromaStore.java +++ b/src/main/java/org/mule/extension/mulechain/vectors/internal/store/chroma/ChromaStore.java @@ -1,5 +1,6 @@ package org.mule.extension.mulechain.vectors.internal.store.chroma; +import org.json.JSONArray; import org.json.JSONObject; import org.mule.extension.mulechain.vectors.internal.config.Configuration; import org.mule.extension.mulechain.vectors.internal.constant.Constants; @@ -10,6 +11,7 @@ import java.io.BufferedReader; import java.io.InputStreamReader; +import java.io.OutputStream; import java.net.HttpURLConnection; import java.net.URL; import java.util.HashMap; @@ -36,7 +38,7 @@ public ChromaStore(String storeName, Configuration configuration, QueryParameter super(storeName, configuration, queryParams, modelParams); JSONObject config = JsonUtils.readConfigFile(configuration.getConfigFilePath()); - JSONObject vectorStoreConfig = config.getJSONObject(Constants.VECTOR_STORE_ELASTICSEARCH); + JSONObject vectorStoreConfig = config.getJSONObject(Constants.VECTOR_STORE_CHROMA); this.url = vectorStoreConfig.getString("CHROMA_URL"); } @@ -62,6 +64,14 @@ public JSONObject listSources() { while(offset < segmentCount) { + JSONArray metadataObjects = getMetadataObjects(collectionId, offset, queryParams.embeddingPageSize()); + for(int i = 0; i< metadataObjects.length(); i++) { + + JSONObject metadataObject = metadataObjects.getJSONObject(i); + JSONObject sourceObject = getSourceObject(metadataObject); + addOrUpdateSourceObjectIntoSourceObjectMap(sourceObjectMap, sourceObject); + } + offset = offset + metadataObjects.length(); } } catch (Exception e) { @@ -76,6 +86,66 @@ public JSONObject listSources() { return jsonObject; } + private JSONArray getMetadataObjects(String collectionId, long offset, long limit) { + + JSONArray metadataObjects = new JSONArray(); + try { + + String urlString = url + "/api/v1/collections/" + collectionId + "/get"; + URL url = new URL(urlString); + + // Open connection and configure HTTP request + HttpURLConnection connection = (HttpURLConnection) url.openConnection(); + connection.setRequestMethod("POST"); + connection.setRequestProperty("Content-Type", "application/json"); + connection.setDoOutput(true); // Enable output for the connection + + JSONObject jsonRequest = new JSONObject(); + jsonRequest.put("limit", limit); + jsonRequest.put("offset", offset); + + JSONArray jsonInclude = new JSONArray(); + jsonInclude.put("metadatas"); + + jsonRequest.put("include", jsonInclude); + + // Write JSON body to the request output stream + try (OutputStream os = connection.getOutputStream()) { + byte[] input = jsonRequest.toString().getBytes("utf-8"); + os.write(input, 0, input.length); + } + + // Check the response code and handle accordingly + if (connection.getResponseCode() == 200) { + + BufferedReader in = new BufferedReader(new InputStreamReader(connection.getInputStream())); + StringBuilder responseBuilder = new StringBuilder(); + String line; + + // Read response line by line + while ((line = in.readLine()) != null) { + responseBuilder.append(line); + } + in.close(); + + // Parse JSON response + JSONObject jsonResponse = new JSONObject(responseBuilder.toString()); + metadataObjects = jsonResponse.getJSONArray("metadatas"); + + } else { + + // Log any error responses from the server + LOGGER.error("Error: " + connection.getResponseCode() + " " + connection.getResponseMessage()); + } + + } catch (Exception e) { + + // Handle any exceptions that occur during the process + LOGGER.error("Error getting collection segments", e); + } + return metadataObjects; + } + /** * Retrieves the total number of segments in the specified collection. *