Skip to content

Commit

Permalink
Merge pull request #27 from MuleSoft-AI-Chain-Project/bugfix/list-sou…
Browse files Browse the repository at this point in the history
…rces

Bugfix/list sources
  • Loading branch information
tbolis-at-mulesoft authored Nov 11, 2024
2 parents 24b97c5 + 764ea93 commit 0e14155
Show file tree
Hide file tree
Showing 7 changed files with 197 additions and 24 deletions.
4 changes: 2 additions & 2 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@
<modelVersion>4.0.0</modelVersion>
<groupId>com.mule.mulechain</groupId>
<artifactId>mulechain-vectors</artifactId>
<version>0.1.86-SNAPSHOT</version>
<version>0.1.87-SNAPSHOT</version>
<packaging>mule-extension</packaging>
<name>MAC Vectors</name>

<parent>
<parent>
<groupId>org.mule.extensions</groupId>
<artifactId>mule-modules-parent</artifactId>
<version>1.1.3</version>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ public class EmbeddingOperationValidator {
new HashSet<>(Arrays.asList(
Constants.VECTOR_STORE_PGVECTOR,
Constants.VECTOR_STORE_ELASTICSEARCH,
Constants.VECTOR_STORE_OPENSEARCH,
Constants.VECTOR_STORE_OPENSEARCH,
Constants.VECTOR_STORE_MILVUS,
Constants.VECTOR_STORE_CHROMA,
Constants.VECTOR_STORE_PINECONE,
Expand All @@ -56,7 +56,7 @@ public class EmbeddingOperationValidator {
new HashSet<>(Arrays.asList(
Constants.VECTOR_STORE_PGVECTOR,
Constants.VECTOR_STORE_ELASTICSEARCH,
Constants.VECTOR_STORE_OPENSEARCH,
Constants.VECTOR_STORE_OPENSEARCH,
Constants.VECTOR_STORE_MILVUS,
Constants.VECTOR_STORE_CHROMA,
Constants.VECTOR_STORE_PINECONE,
Expand All @@ -67,7 +67,7 @@ public class EmbeddingOperationValidator {
new HashSet<>(Arrays.asList(
Constants.VECTOR_STORE_PGVECTOR,
Constants.VECTOR_STORE_ELASTICSEARCH,
Constants.VECTOR_STORE_OPENSEARCH,
// Constants.VECTOR_STORE_OPENSEARCH, // Not supported yet.
Constants.VECTOR_STORE_MILVUS,
Constants.VECTOR_STORE_CHROMA,
Constants.VECTOR_STORE_PINECONE,
Expand All @@ -79,10 +79,11 @@ public class EmbeddingOperationValidator {
new HashSet<>(Arrays.asList(
Constants.VECTOR_STORE_PGVECTOR,
// Constants.VECTOR_STORE_ELASTICSEARCH, // Needs to be tested
Constants.VECTOR_STORE_MILVUS
// Constants.VECTOR_STORE_OPENSEARCH, // Needs to be tested
Constants.VECTOR_STORE_MILVUS,
// Constants.VECTOR_STORE_CHROMA, // Needs to be tested
// Constants.VECTOR_STORE_PINECONE, // Do not support GTE with strings.
// Constants.VECTOR_STORE_AI_SEARCH // Needs to be tested
Constants.VECTOR_STORE_AI_SEARCH // Needs to be tested
)));

}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ public class MetadataKeyProvider implements ValueProvider {
public Set<Value> resolve() throws ValueResolvingException {

return ValueBuilder.getValuesFor(
Constants.METADATA_KEY_SOURCE_ID,
Constants.METADATA_KEY_FILE_NAME,
Constants.METADATA_KEY_FILE_TYPE,
Constants.METADATA_KEY_URL,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@
import org.mule.extension.mulechain.vectors.internal.helper.factory.EmbeddingStoreFactory;
import org.mule.extension.mulechain.vectors.internal.helper.parameter.EmbeddingModelNameParameters;
import org.mule.extension.mulechain.vectors.internal.helper.parameter.QueryParameters;
import org.mule.extension.mulechain.vectors.internal.helper.store.milvus.MilvusVectorStore;
import org.mule.extension.mulechain.vectors.internal.helper.store.milvus.PGVectorVectorStore;
import org.mule.extension.mulechain.vectors.internal.helper.store.aisearch.AISearchStore;
import org.mule.extension.mulechain.vectors.internal.helper.store.milvus.MilvusStore;
import org.mule.extension.mulechain.vectors.internal.helper.store.pgvector.PGVectorStore;
import org.mule.extension.mulechain.vectors.internal.util.JsonUtils;
import org.mule.runtime.module.extension.internal.runtime.operation.IllegalOperationException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

Expand Down Expand Up @@ -194,11 +194,13 @@ protected String getSourceUniqueKey(JSONObject sourceObject) {

String url = sourceObject.has(Constants.METADATA_KEY_URL) ? sourceObject.getString(Constants.METADATA_KEY_URL) : "";
String fullPath = sourceObject.has(Constants.METADATA_KEY_FULL_PATH) ? sourceObject.getString(Constants.METADATA_KEY_FULL_PATH) : "";
String source = sourceObject.has(Constants.METADATA_KEY_SOURCE) ? sourceObject.getString(Constants.METADATA_KEY_SOURCE) : "";
String ingestionDatetime = sourceObject.has(Constants.METADATA_KEY_INGESTION_DATETIME) ? sourceObject.getString(Constants.METADATA_KEY_INGESTION_DATETIME) : "";

String alternativeKey =
((fullPath != null && !fullPath.isEmpty()) ? fullPath :
(url != null && !url.isEmpty()) ? url : "") +
((url != null && !url.isEmpty()) ? url :
(source != null && !source.isEmpty()) ? source : "")) +
((ingestionDatetime != null && !ingestionDatetime.isEmpty()) ? ingestionDatetime : "");

return !sourceId.isEmpty() ? sourceId : alternativeKey;
Expand All @@ -211,13 +213,15 @@ protected JSONObject getSourceObject(JSONObject metadataObject) {
String fileName = metadataObject.has(Constants.METADATA_KEY_FILE_NAME) ? metadataObject.getString(Constants.METADATA_KEY_FILE_NAME) : null;
String url = metadataObject.has(Constants.METADATA_KEY_URL) ? metadataObject.getString(Constants.METADATA_KEY_URL) : null;
String fullPath = metadataObject.has(Constants.METADATA_KEY_FULL_PATH) ? metadataObject.getString(Constants.METADATA_KEY_FULL_PATH) : null;
String source = metadataObject.has(Constants.METADATA_KEY_SOURCE) ? metadataObject.getString(Constants.METADATA_KEY_SOURCE) : null;
String absoluteDirectoryPath = metadataObject.has(Constants.METADATA_KEY_ABSOLUTE_DIRECTORY_PATH) ? metadataObject.getString(Constants.METADATA_KEY_ABSOLUTE_DIRECTORY_PATH) : null;
String ingestionDatetime = metadataObject.has(Constants.METADATA_KEY_INGESTION_DATETIME) ? metadataObject.getString(Constants.METADATA_KEY_INGESTION_DATETIME) : null;

JSONObject sourceObject = new JSONObject();
sourceObject.put("segmentCount", Integer.parseInt(index) + 1);
sourceObject.put(Constants.METADATA_KEY_SOURCE_ID, sourceId);
sourceObject.put(Constants.METADATA_KEY_ABSOLUTE_DIRECTORY_PATH, absoluteDirectoryPath);
sourceObject.put(Constants.METADATA_KEY_SOURCE, source);
sourceObject.put(Constants.METADATA_KEY_FULL_PATH, fullPath);
sourceObject.put(Constants.METADATA_KEY_FILE_NAME, fileName);
sourceObject.put(Constants.METADATA_KEY_URL, url);
Expand Down Expand Up @@ -276,16 +280,19 @@ public VectorStore build() {

case Constants.VECTOR_STORE_MILVUS:

embeddingStore = new MilvusVectorStore(storeName, configuration, queryParams, modelParams);
embeddingStore = new MilvusStore(storeName, configuration, queryParams, modelParams);
break;

case Constants.VECTOR_STORE_PGVECTOR:

embeddingStore = new PGVectorVectorStore(storeName, configuration, queryParams, modelParams);
embeddingStore = new PGVectorStore(storeName, configuration, queryParams, modelParams);
break;

case Constants.VECTOR_STORE_AI_SEARCH:

embeddingStore = new AISearchStore(storeName, configuration, queryParams, modelParams);
break;

case Constants.VECTOR_STORE_CHROMA:

case Constants.VECTOR_STORE_PINECONE:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
package org.mule.extension.mulechain.vectors.internal.helper.store.aisearch;

import org.json.JSONArray;
import org.json.JSONObject;
import org.mule.extension.mulechain.vectors.internal.config.Configuration;
import org.mule.extension.mulechain.vectors.internal.constant.Constants;
import org.mule.extension.mulechain.vectors.internal.helper.parameter.EmbeddingModelNameParameters;
import org.mule.extension.mulechain.vectors.internal.helper.parameter.QueryParameters;
import org.mule.extension.mulechain.vectors.internal.helper.store.VectorStore;
import org.mule.extension.mulechain.vectors.internal.util.JsonUtils;

import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.HashMap;

import static org.mule.extension.mulechain.vectors.internal.util.JsonUtils.readConfigFile;

public class AISearchStore extends VectorStore {

private static final String API_VERSION = "2024-07-01";

private String apiKey;
private String url;

public AISearchStore(String storeName, Configuration configuration, QueryParameters queryParams, EmbeddingModelNameParameters modelParams) {

super(storeName, configuration, queryParams, modelParams);

JSONObject config = readConfigFile(configuration.getConfigFilePath());
JSONObject vectorStoreConfig = config.getJSONObject(Constants.VECTOR_STORE_AI_SEARCH);
this.apiKey = vectorStoreConfig.getString("AI_SEARCH_KEY");
this.url = vectorStoreConfig.getString("AI_SEARCH_URL");
}

public JSONObject listSources() {

HashMap<String, JSONObject> sourcesJSONObjectHashMap = new HashMap<String, JSONObject>();

JSONObject jsonObject = new JSONObject();
jsonObject.put("storeName", storeName);

int segmentCount = 0; // Counter to track the number of documents processed
int offset = 0; // Initialize offset for pagination

try {

boolean hasMore = true; // Flag to check if more pages are available

// Loop to process pages until no more documents are available
do {
// Construct the URL with $top and $skip for pagination
String urlString = this.url + "/indexes/" + storeName + "/docs?search=*&$top=" + queryParams.embeddingPageSize() +
"&$skip=" + offset + "&$select=id,metadata&api-version=" + API_VERSION;

// Nested loop to handle each page of results
while (urlString != null) {

URL url = new URL(urlString);

// Open connection and configure HTTP request
HttpURLConnection connection = (HttpURLConnection) url.openConnection();
connection.setRequestMethod("GET");
connection.setRequestProperty("Content-Type", "application/json");
connection.setRequestProperty("api-key", apiKey);

// Check the response code and handle accordingly
if (connection.getResponseCode() == 200) {
BufferedReader in = new BufferedReader(new InputStreamReader(connection.getInputStream()));
StringBuilder responseBuilder = new StringBuilder();
String line;

// Read response line by line
while ((line = in.readLine()) != null) {
responseBuilder.append(line);
}
in.close();

// Parse JSON response
JSONObject jsonResponse = new JSONObject(responseBuilder.toString());
JSONArray documents = jsonResponse.getJSONArray("value");

// Iterate over each document in the current page
for (int i = 0; i < documents.length(); i++) {

JSONObject document = documents.getJSONObject(i);
String id = document.getString("id"); // Document ID
JSONObject metadata = document.optJSONObject("metadata"); // Metadata of the document

if (metadata != null) {

// Extract metadata attributes if available
JSONArray attributes = metadata.optJSONArray("attributes");

JSONObject metadataObject = new JSONObject(); // Object to store key-value pairs from attributes

int index =0;
// Iterate over attributes array to populate sourceObject
for (int j = 0; j < attributes.length(); j++) {
JSONObject attribute = attributes.getJSONObject(j);
metadataObject.put(attribute.getString("key"), attribute.get("value"));
if(attribute.getString("key").compareTo(Constants.METADATA_KEY_INDEX) == 0) {
index = Integer.parseInt(metadataObject.getString("index"));
}
}

JSONObject sourceObject = getSourceObject(metadataObject);
String sourceUniqueKey = getSourceUniqueKey(sourceObject);

// Add sourceObject to sources only if it has at least one key-value pair and it's possible to generate a key
if (!sourceObject.isEmpty() && sourceUniqueKey != null && !sourceUniqueKey.isEmpty()) {
// Overwrite sourceObject if current one has a greater index (greatest index represents the number of segments)
if(sourcesJSONObjectHashMap.containsKey(sourceUniqueKey)){
// Get current index
int currentSegmentCount = index + 1;
// Get previously stored index
int storedSegmentCount = (int) sourcesJSONObjectHashMap.get(sourceUniqueKey).get("segmentCount");
// Check if object need to be updated
if(currentSegmentCount > storedSegmentCount) {
sourcesJSONObjectHashMap.put(sourceUniqueKey, sourceObject);
}
} else {
sourcesJSONObjectHashMap.put(sourceUniqueKey, sourceObject);
}
}

LOGGER.debug("sourceObject: " + sourceObject);
segmentCount++; // Increment document count
} else {
LOGGER.warn("No metadata available");
}
}

// Check for the next page link in the response
urlString = jsonResponse.optString("@odata.nextLink", null);

// If there is no next page, check if fewer documents were returned than PAGE_SIZE
if (urlString == null && documents.length() < queryParams.embeddingPageSize()) {
hasMore = false; // No more documents to retrieve
}

} else {
// Log any error responses from the server
LOGGER.error("Error: " + connection.getResponseCode() + " " + connection.getResponseMessage());
break;
}
}

// Increment offset to fetch the next segment of documents
offset += queryParams.embeddingPageSize();

} while (hasMore); // Continue if more pages are available

// Output total count of processed documents
LOGGER.debug("segmentCount: " + segmentCount);

} catch (Exception e) {

// Handle any exceptions that occur during the process
LOGGER.error("Error while listing sources", e);
}

jsonObject.put("sources", JsonUtils.jsonObjectCollectionToJsonArray(sourcesJSONObjectHashMap.values()));
jsonObject.put("sourceCount", sourcesJSONObjectHashMap.size());

return jsonObject;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,10 @@

import com.google.gson.JsonObject;
import io.milvus.client.MilvusServiceClient;
import io.milvus.grpc.QueryResults;
import io.milvus.orm.iterator.QueryIterator;
import io.milvus.param.ConnectParam;
import io.milvus.param.dml.QueryIteratorParam;
import io.milvus.param.dml.QueryParam;
import io.milvus.param.R;
import io.milvus.param.collection.LoadCollectionParam;
import io.milvus.response.QueryResultsWrapper;
import org.json.JSONArray;
import org.json.JSONObject;
Expand All @@ -26,11 +23,11 @@

import static org.mule.extension.mulechain.vectors.internal.util.JsonUtils.readConfigFile;

public class MilvusVectorStore extends VectorStore {
public class MilvusStore extends VectorStore {

private String uri;

public MilvusVectorStore(String storeName, Configuration configuration, QueryParameters queryParams, EmbeddingModelNameParameters modelParams) {
public MilvusStore(String storeName, Configuration configuration, QueryParameters queryParams, EmbeddingModelNameParameters modelParams) {

super(storeName, configuration, queryParams, modelParams);

Expand All @@ -45,7 +42,6 @@ public JSONObject listSources() {

JSONObject jsonObject = new JSONObject();
jsonObject.put("storeName", storeName);
JSONArray sources = new JSONArray();

// Specify the host and port for the Milvus server
ConnectParam connectParam = ConnectParam.newBuilder()
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
package org.mule.extension.mulechain.vectors.internal.helper.store.milvus;
package org.mule.extension.mulechain.vectors.internal.helper.store.pgvector;

import org.json.JSONArray;
import org.json.JSONObject;
import org.mule.extension.mulechain.vectors.internal.config.Configuration;
import org.mule.extension.mulechain.vectors.internal.constant.Constants;
Expand All @@ -26,9 +25,9 @@
* Represents a store for vector data using PostgreSQL with PGVector extension.
* This class is responsible for interacting with a PostgreSQL database to store and retrieve vector metadata.
*/
public class PGVectorVectorStore extends VectorStore {
public class PGVectorStore extends VectorStore {

private static final Logger LOGGER = LoggerFactory.getLogger(PGVectorVectorStore.class);
private static final Logger LOGGER = LoggerFactory.getLogger(PGVectorStore.class);

private String userName;
private String password;
Expand All @@ -44,7 +43,7 @@ public class PGVectorVectorStore extends VectorStore {
* @param queryParams Parameters related to query configurations.
* @param modelParams Parameters related to embedding model.
*/
public PGVectorVectorStore(String storeName, Configuration configuration, QueryParameters queryParams, EmbeddingModelNameParameters modelParams) {
public PGVectorStore(String storeName, Configuration configuration, QueryParameters queryParams, EmbeddingModelNameParameters modelParams) {

super(storeName, configuration, queryParams, modelParams);

Expand Down

0 comments on commit 0e14155

Please sign in to comment.