diff --git a/pom.xml b/pom.xml
index 9bd1c50..522c3b1 100644
--- a/pom.xml
+++ b/pom.xml
@@ -6,7 +6,7 @@
4.0.0
com.mule.mulechain
mulechain-vectors
- 0.1.93-SNAPSHOT
+ 0.1.95-SNAPSHOT
mule-extension
MAC Vectors
diff --git a/src/main/java/org/mule/extension/mulechain/vectors/internal/helper/EmbeddingStoreIngestorHelper.java b/src/main/java/org/mule/extension/mulechain/vectors/internal/helper/EmbeddingStoreIngestorHelper.java
deleted file mode 100644
index a60bae5..0000000
--- a/src/main/java/org/mule/extension/mulechain/vectors/internal/helper/EmbeddingStoreIngestorHelper.java
+++ /dev/null
@@ -1,265 +0,0 @@
-package org.mule.extension.mulechain.vectors.internal.helper;
-
-import dev.langchain4j.data.document.Document;
-import dev.langchain4j.data.document.loader.UrlDocumentLoader;
-import dev.langchain4j.data.document.parser.TextDocumentParser;
-import dev.langchain4j.data.document.parser.apache.tika.ApacheTikaDocumentParser;
-import dev.langchain4j.data.document.transformer.jsoup.HtmlToTextDocumentTransformer;
-import dev.langchain4j.store.embedding.EmbeddingStoreIngestor;
-import org.json.JSONObject;
-import org.mule.extension.mulechain.vectors.internal.constant.Constants;
-import org.mule.extension.mulechain.vectors.internal.helper.parameter.FileTypeParameters;
-import org.mule.extension.mulechain.vectors.internal.storage.azure.AzureBlobStorage;
-import org.mule.extension.mulechain.vectors.internal.storage.s3.AWSS3Storage;
-import org.mule.extension.mulechain.vectors.internal.util.DocumentUtils;
-
-import org.mule.extension.mulechain.vectors.internal.util.Utils;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.IOException;
-import java.net.MalformedURLException;
-import java.net.URL;
-import java.nio.file.Files;
-import java.nio.file.Path;
-import java.nio.file.Paths;
-import java.util.concurrent.atomic.AtomicInteger;
-import java.util.stream.Stream;
-
-import static dev.langchain4j.data.document.loader.FileSystemDocumentLoader.loadDocument;
-
-/**
- * Helper class for ingesting documents into an embedding store from various sources, including local files,
- * S3, and Azure storage containers.
- */
-public class EmbeddingStoreIngestorHelper {
-
- private static final Logger LOGGER = LoggerFactory.getLogger(EmbeddingStoreIngestorHelper.class);
-
- private EmbeddingStoreIngestor ingestor;
- private String storeName;
-
- /**
- * Constructs a new EmbeddingStoreIngestorHelper.
- *
- * @param ingestor the embedding store ingestor used to store documents.
- * @param storeName the name of the store where documents are being ingested.
- */
- public EmbeddingStoreIngestorHelper(EmbeddingStoreIngestor ingestor, String storeName) {
-
- this.ingestor = ingestor;
- this.storeName = storeName;
- }
-
- /**
- * Ingests documents from a local folder into the embedding store.
- *
- * @param folderPath the path of the folder containing documents to ingest.
- * @param fileTypeParameters the type of files to ingest, as defined by FileTypeParameters.
- * @return a JSONObject with ingestion status and metadata.
- */
- public JSONObject ingestFromLocalFolder(String folderPath, FileTypeParameters fileTypeParameters) {
-
- long totalFiles = 0;
- try (Stream paths = Files.walk(Paths.get(folderPath))) {
- totalFiles = paths.filter(Files::isRegularFile).count();
- } catch (IOException e) {
- e.printStackTrace();
- }
-
- LOGGER.info("Total number of files to process: " + totalFiles);
- AtomicInteger fileCounter = new AtomicInteger(0);
- try (Stream paths = Files.walk(Paths.get(folderPath))) {
- paths.filter(Files::isRegularFile).forEach(path -> {
- int currentFileCounter = fileCounter.incrementAndGet();
- LOGGER.info("Processing file " + currentFileCounter + ": " + path.getFileName());
-
- Document document;
- switch (fileTypeParameters.getFileType()) {
- case Constants.FILE_TYPE_CRAWL:
- document = loadDocument(path.toString(), new TextDocumentParser());
- DocumentUtils.addMetadataToDocument(document, Constants.FILE_TYPE_CRAWL, path);
- ingestor.ingest(document);
- break;
- case Constants.FILE_TYPE_TEXT:
- document = loadDocument(path.toString(), new TextDocumentParser());
- DocumentUtils.addMetadataToDocument(document, Constants.FILE_TYPE_TEXT, path.getFileName().toString(), folderPath + path.getFileName());
- ingestor.ingest(document);
- break;
- case Constants.FILE_TYPE_ANY:
- document = loadDocument(path.toString(), new ApacheTikaDocumentParser());
- DocumentUtils.addMetadataToDocument(document, Constants.FILE_TYPE_ANY, path.getFileName().toString(),folderPath + path.getFileName());
- ingestor.ingest(document);
- break;
- default:
- throw new IllegalArgumentException("Unsupported File Type: " + fileTypeParameters.getFileType());
- }
- });
- } catch (IOException e) {
- e.printStackTrace();
- }
- LOGGER.info("Processing complete");
-
- return createFolderIngestionStatusObject(totalFiles, folderPath);
- }
-
- /**
- * Ingests a single local file into the embedding store.
- *
- * @param filePath the path of the file to ingest.
- * @param fileTypeParameters the type of file, as defined by FileTypeParameters.
- * @return a JSONObject with ingestion status and metadata.
- */
- public JSONObject ingestFromLocalFile(String filePath, FileTypeParameters fileTypeParameters) {
-
- Path path = Paths.get(filePath);
-
- Document document;
- switch (fileTypeParameters.getFileType()) {
- case Constants.FILE_TYPE_CRAWL:
- document = loadDocument(path.toString(), new TextDocumentParser());
- DocumentUtils.addMetadataToDocument(document, Constants.FILE_TYPE_CRAWL, path);
- break;
- case Constants.FILE_TYPE_TEXT:
- document = loadDocument(path.toString(), new TextDocumentParser());
- DocumentUtils.addMetadataToDocument(document, Constants.FILE_TYPE_TEXT, Utils.getFileNameFromPath(filePath), filePath);
- break;
- case Constants.FILE_TYPE_ANY:
- document = loadDocument(path.toString(), new ApacheTikaDocumentParser());
- DocumentUtils.addMetadataToDocument(document, Constants.FILE_TYPE_ANY, Utils.getFileNameFromPath(filePath), filePath);
- break;
- case Constants.FILE_TYPE_URL:
- document = loadUrlDocument(filePath);
- break;
- default:
- throw new IllegalArgumentException("Unsupported File Type: " + fileTypeParameters.getFileType());
- }
- ingestor.ingest(document);
- return createFileIngestionStatusObject(fileTypeParameters.getFileType(), filePath);
- }
-
- /**
- * Ingests documents from an S3 folder into the embedding store.
- *
- * @param folderPath the path of the folder in S3.
- * @param fileTypeParameters the type of files to ingest, as defined by FileTypeParameters.
- * @param awsKey AWS access key.
- * @param awsSecret AWS secret key.
- * @param awsRegion AWS region.
- * @param s3Bucket the name of the S3 bucket.
- * @return a JSONObject with ingestion status and metadata.
- */
- public JSONObject ingestFromS3Folder(String folderPath, FileTypeParameters fileTypeParameters, String awsKey, String awsSecret, String awsRegion, String s3Bucket) {
-
- AWSS3Storage awsS3Storage = new AWSS3Storage(s3Bucket, awsKey, awsSecret, awsRegion);
- long totalFiles = awsS3Storage.readAllFiles(folderPath, ingestor, fileTypeParameters);
- return createFolderIngestionStatusObject(totalFiles, folderPath);
- }
-
- /**
- * Ingests a single file from an S3 bucket into the embedding store.
- *
- * @param filePath the path of the file in the S3 bucket.
- * @param fileTypeParameters the type of file to ingest, as defined by FileTypeParameters.
- * @param awsKey AWS access key.
- * @param awsSecret AWS secret key.
- * @param awsRegion AWS region.
- * @param s3Bucket the name of the S3 bucket.
- * @return a JSONObject with ingestion status and metadata.
- */
- public JSONObject ingestFromS3File(String filePath, FileTypeParameters fileTypeParameters, String awsKey, String awsSecret, String awsRegion, String s3Bucket) {
-
- AWSS3Storage awsS3Storage = new AWSS3Storage(s3Bucket, awsKey, awsSecret, awsRegion);
- awsS3Storage.readFile(filePath, fileTypeParameters, ingestor);
- return createFileIngestionStatusObject(fileTypeParameters.getFileType(), filePath);
- }
-
- /**
- * Ingests documents from an Azure storage container into the embedding store.
- *
- * @param containerName the name of the Azure container.
- * @param fileType the type of files to ingest, as defined by FileTypeParameters.
- * @param azureName the Azure storage account name.
- * @param azureKey the Azure storage account key.
- * @return a JSONObject with ingestion status and metadata.
- */
- public JSONObject ingestFromAZContainer(String containerName, FileTypeParameters fileType, String azureName, String azureKey) {
-
- AzureBlobStorage azureBlobStorage = new AzureBlobStorage(azureName, azureKey);
- long totalFiles = azureBlobStorage.readAllFiles(containerName, ingestor, fileType);
- return createFolderIngestionStatusObject(totalFiles, containerName);
- }
-
- /**
- * Ingests a single file from an Azure storage container into the embedding store.
- *
- * @param containerName the name of the Azure container.
- * @param blobName the name of the file in the Azure container.
- * @param fileType the type of file to ingest, as defined by FileTypeParameters.
- * @param azureName the Azure storage account name.
- * @param azureKey the Azure storage account key.
- * @return a JSONObject with ingestion status and metadata.
- */
- public JSONObject ingestFromAZFile(String containerName, String blobName, FileTypeParameters fileType, String azureName, String azureKey) {
-
- AzureBlobStorage azureBlobStorage = new AzureBlobStorage(azureName, azureKey);
- azureBlobStorage.readFile(containerName, blobName, fileType, ingestor);
- return createFileIngestionStatusObject(fileType.getFileType(), containerName);
- }
-
- /**
- * Loads and transforms a document from a URL into a text document.
- *
- * @param contextPath the URL to load.
- * @return the transformed document.
- */
- private Document loadUrlDocument(String contextPath) {
-
- Document document;
- try {
- URL url = new URL(contextPath);
- Document htmlDocument = UrlDocumentLoader.load(url, new TextDocumentParser());
- HtmlToTextDocumentTransformer transformer = new HtmlToTextDocumentTransformer(null, null, true);
- document = transformer.transform(htmlDocument);
- document.metadata().put(Constants.METADATA_KEY_URL, contextPath);
- } catch (MalformedURLException e) {
- throw new RuntimeException("Invalid URL: " + contextPath, e);
- }
- return document;
- }
-
- /**
- * Creates a JSONObject representing the ingestion status.
- *
- * @param fileType the type of the ingested file.
- * @param folderPath the path of the ingested file or folder.
- * @return a JSONObject containing ingestion status metadata.
- */
- private JSONObject createFileIngestionStatusObject(String fileType, String folderPath) {
-
- JSONObject jsonObject = new JSONObject();
- jsonObject.put("fileType", fileType);
- jsonObject.put("filePath", folderPath);
- jsonObject.put("storeName", storeName);
- jsonObject.put("status", "updated");
- return jsonObject;
- }
-
- /**
- * Creates a JSONObject representing the ingestion status of a folder or set of files.
- *
- * @param totalFiles the total number of files processed.
- * @param folderPath the path of the processed folder.
- * @return a JSONObject containing the ingestion status with file count, folder path, store name, and status.
- */
- private JSONObject createFolderIngestionStatusObject(Long totalFiles, String folderPath) {
-
- JSONObject jsonObject = new JSONObject();
- jsonObject.put("filesCount", totalFiles);
- jsonObject.put("folderPath", folderPath);
- jsonObject.put("storeName", storeName);
- jsonObject.put("status", "updated");
- return jsonObject;
- }
-
-}
diff --git a/src/main/java/org/mule/extension/mulechain/vectors/internal/helper/factory/EmbeddingModelFactory.java b/src/main/java/org/mule/extension/mulechain/vectors/internal/helper/factory/EmbeddingModelFactory.java
deleted file mode 100644
index 97ff985..0000000
--- a/src/main/java/org/mule/extension/mulechain/vectors/internal/helper/factory/EmbeddingModelFactory.java
+++ /dev/null
@@ -1,106 +0,0 @@
-package org.mule.extension.mulechain.vectors.internal.helper.factory;
-
-import dev.langchain4j.model.azure.AzureOpenAiEmbeddingModel;
-import dev.langchain4j.model.embedding.EmbeddingModel;
-import dev.langchain4j.model.huggingface.HuggingFaceEmbeddingModel;
-import dev.langchain4j.model.mistralai.MistralAiEmbeddingModel;
-import dev.langchain4j.model.nomic.NomicEmbeddingModel;
-import dev.langchain4j.model.openai.OpenAiEmbeddingModel;
-import org.json.JSONObject;
-import org.mule.extension.mulechain.vectors.internal.config.Configuration;
-import org.mule.extension.mulechain.vectors.internal.constant.Constants;
-import org.mule.extension.mulechain.vectors.internal.helper.parameter.EmbeddingModelNameParameters;
-
-import static org.mule.extension.mulechain.vectors.internal.util.JsonUtils.readConfigFile;
-
-public class EmbeddingModelFactory {
-
- public static EmbeddingModel createModel(Configuration configuration, EmbeddingModelNameParameters modelParams) {
-
- EmbeddingModel model = null;
- JSONObject config = readConfigFile(configuration.getConfigFilePath());
- JSONObject llmType;
- String llmTypeKey;
- String llmTypeEndpoint;
-
- switch (configuration.getEmbeddingModelService()) {
- case Constants.EMBEDDING_MODEL_SERVICE_AZURE_OPENAI:
- llmType = config.getJSONObject("AZURE_OPENAI");
- llmTypeKey = llmType.getString("AZURE_OPENAI_KEY");
- llmTypeEndpoint = llmType.getString("AZURE_OPENAI_ENDPOINT");
- model = createAzureOpenAiModel(llmTypeKey, llmTypeEndpoint, modelParams);
- break;
-
- case Constants.EMBEDDING_MODEL_SERVICE_OPENAI:
- llmType = config.getJSONObject("OPENAI");
- llmTypeKey = llmType.getString("OPENAI_API_KEY");
- model = createOpenAiModel(llmTypeKey, modelParams);
- break;
-
- case Constants.EMBEDDING_MODEL_SERVICE_MISTRAL_AI:
- llmType = config.getJSONObject("MISTRAL_AI");
- llmTypeKey = llmType.getString("MISTRAL_AI_API_KEY");
- model = createMistralAIModel(llmTypeKey, modelParams);
- break;
-
- case Constants.EMBEDDING_MODEL_SERVICE_NOMIC:
- llmType = config.getJSONObject("NOMIC");
- llmTypeKey = llmType.getString("NOMIC_API_KEY");
- model = createNomicModel(llmTypeKey, modelParams);
-
- break;
- case Constants.EMBEDDING_MODEL_SERVICE_HUGGING_FACE:
- llmType = config.getJSONObject("HUGGING_FACE");
- llmTypeKey = llmType.getString("HUGGING_FACE_API_KEY");
- model = createHuggingFaceModel(llmTypeKey, modelParams);
-
- break;
- default:
- throw new IllegalArgumentException("Unsupported Embedding Model: " + configuration.getEmbeddingModelService());
- }
- return model;
- }
-
- private static EmbeddingModel createAzureOpenAiModel(String llmTypeKey, String llmTypeEndpoint, EmbeddingModelNameParameters modelParams) {
- System.out.println("Inside createAzureOpenAiModel");
- return AzureOpenAiEmbeddingModel.builder()
- .apiKey(llmTypeKey)
- .endpoint(llmTypeEndpoint)
- .deploymentName(modelParams.getEmbeddingModelName())
- .build();
- }
-
- private static EmbeddingModel createOpenAiModel(String llmTypeKey, EmbeddingModelNameParameters modelParams) {
- return OpenAiEmbeddingModel.builder()
- .apiKey(llmTypeKey)
- .modelName(modelParams.getEmbeddingModelName())
- .build();
- }
-
- private static EmbeddingModel createMistralAIModel(String llmTypeKey, EmbeddingModelNameParameters modelParams) {
- return MistralAiEmbeddingModel.builder()
- .apiKey(llmTypeKey)
- .modelName(modelParams.getEmbeddingModelName())
- .build();
- }
-
- private static EmbeddingModel createNomicModel(String llmTypeKey, EmbeddingModelNameParameters modelParams) {
- return NomicEmbeddingModel.builder()
- //.baseUrl("https://api-atlas.nomic.ai/v1/")
- .apiKey(llmTypeKey)
- .modelName(modelParams.getEmbeddingModelName())
- //.taskType("clustering")
- .maxRetries(2)
- .logRequests(true)
- .logResponses(true)
- .build();
- }
-
- private static EmbeddingModel createHuggingFaceModel(String llmTypeKey, EmbeddingModelNameParameters modelParams) {
- return HuggingFaceEmbeddingModel.builder()
- .accessToken(llmTypeKey)
- .modelId(modelParams.getEmbeddingModelName())
- .build();
- }
-
-}
diff --git a/src/main/java/org/mule/extension/mulechain/vectors/internal/helper/parameter/EmbeddingModelNameParameters.java b/src/main/java/org/mule/extension/mulechain/vectors/internal/helper/parameter/EmbeddingModelParameters.java
similarity index 93%
rename from src/main/java/org/mule/extension/mulechain/vectors/internal/helper/parameter/EmbeddingModelNameParameters.java
rename to src/main/java/org/mule/extension/mulechain/vectors/internal/helper/parameter/EmbeddingModelParameters.java
index ce415ac..7a7d1c8 100644
--- a/src/main/java/org/mule/extension/mulechain/vectors/internal/helper/parameter/EmbeddingModelNameParameters.java
+++ b/src/main/java/org/mule/extension/mulechain/vectors/internal/helper/parameter/EmbeddingModelParameters.java
@@ -6,7 +6,7 @@
import org.mule.runtime.extension.api.annotation.param.Parameter;
import org.mule.runtime.extension.api.annotation.values.OfValues;
-public class EmbeddingModelNameParameters {
+public class EmbeddingModelParameters {
@Parameter
@Expression(ExpressionSupport.SUPPORTED)
diff --git a/src/main/java/org/mule/extension/mulechain/vectors/internal/model/BaseModel.java b/src/main/java/org/mule/extension/mulechain/vectors/internal/model/BaseModel.java
new file mode 100644
index 0000000..ae7c7bf
--- /dev/null
+++ b/src/main/java/org/mule/extension/mulechain/vectors/internal/model/BaseModel.java
@@ -0,0 +1,91 @@
+package org.mule.extension.mulechain.vectors.internal.model;
+
+import dev.langchain4j.model.embedding.EmbeddingModel;
+import org.mule.extension.mulechain.vectors.internal.config.Configuration;
+import org.mule.extension.mulechain.vectors.internal.constant.Constants;
+import org.mule.extension.mulechain.vectors.internal.helper.parameter.EmbeddingModelParameters;
+import org.mule.extension.mulechain.vectors.internal.model.azureopenai.AzureOpenAIModel;
+import org.mule.extension.mulechain.vectors.internal.model.huggingface.HuggingFaceModel;
+import org.mule.extension.mulechain.vectors.internal.model.mistralai.MistralAIModel;
+import org.mule.extension.mulechain.vectors.internal.model.nomic.NomicModel;
+import org.mule.extension.mulechain.vectors.internal.model.openai.OpenAIModel;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class BaseModel {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(BaseModel.class);
+
+ protected Configuration configuration;
+ protected EmbeddingModelParameters embeddingModelParameters;
+
+ public BaseModel(Configuration configuration, EmbeddingModelParameters embeddingModelParameters) {
+
+ this.configuration = configuration;
+ this.embeddingModelParameters = embeddingModelParameters;
+ }
+
+ public EmbeddingModel buildEmbeddingModel() {
+
+ throw new UnsupportedOperationException("This method should be overridden by subclasses");
+ }
+
+ public static BaseModel.Builder builder() {
+
+ return new BaseModel.Builder();
+ }
+
+ public static class Builder {
+
+ private Configuration configuration;
+ private EmbeddingModelParameters embeddingModelParameters;
+
+ public Builder() {
+
+ }
+
+ public BaseModel.Builder configuration(Configuration configuration) {
+ this.configuration = configuration;
+ return this;
+ }
+
+ public BaseModel.Builder embeddingModelParameters(EmbeddingModelParameters embeddingModelParameters) {
+ this.embeddingModelParameters = embeddingModelParameters;
+ return this;
+ }
+
+ public BaseModel build() {
+
+ BaseModel baseModel;
+
+ LOGGER.debug("Embedding Model Service: " + configuration.getEmbeddingModelService());
+ switch (configuration.getEmbeddingModelService()) {
+
+ case Constants.EMBEDDING_MODEL_SERVICE_AZURE_OPENAI:
+ baseModel = new AzureOpenAIModel(configuration, embeddingModelParameters);
+ break;
+
+ case Constants.EMBEDDING_MODEL_SERVICE_OPENAI:
+ baseModel = new OpenAIModel(configuration, embeddingModelParameters);
+ break;
+
+ case Constants.EMBEDDING_MODEL_SERVICE_MISTRAL_AI:
+ baseModel = new MistralAIModel(configuration, embeddingModelParameters);
+ break;
+
+ case Constants.EMBEDDING_MODEL_SERVICE_NOMIC:
+ baseModel = new NomicModel(configuration, embeddingModelParameters);
+ break;
+
+ case Constants.EMBEDDING_MODEL_SERVICE_HUGGING_FACE:
+ baseModel = new HuggingFaceModel(configuration, embeddingModelParameters);
+ break;
+
+ default:
+ //throw new IllegalOperationException("Unsupported Vector Store: " + configuration.getVectorStore());
+ baseModel = null;
+ }
+ return baseModel;
+ }
+ }
+}
diff --git a/src/main/java/org/mule/extension/mulechain/vectors/internal/model/azureopenai/AzureOpenAIModel.java b/src/main/java/org/mule/extension/mulechain/vectors/internal/model/azureopenai/AzureOpenAIModel.java
new file mode 100644
index 0000000..5750bbe
--- /dev/null
+++ b/src/main/java/org/mule/extension/mulechain/vectors/internal/model/azureopenai/AzureOpenAIModel.java
@@ -0,0 +1,35 @@
+package org.mule.extension.mulechain.vectors.internal.model.azureopenai;
+
+import dev.langchain4j.model.azure.AzureOpenAiEmbeddingModel;
+import dev.langchain4j.model.embedding.EmbeddingModel;
+import org.json.JSONObject;
+import org.mule.extension.mulechain.vectors.internal.config.Configuration;
+import org.mule.extension.mulechain.vectors.internal.helper.parameter.EmbeddingModelParameters;
+import org.mule.extension.mulechain.vectors.internal.model.BaseModel;
+
+import static org.mule.extension.mulechain.vectors.internal.util.JsonUtils.readConfigFile;
+
+public class AzureOpenAIModel extends BaseModel {
+
+ private final String apiKey;
+ private final String endpoint;
+
+ public AzureOpenAIModel(Configuration configuration, EmbeddingModelParameters embeddingModelParameters) {
+
+ super(configuration,embeddingModelParameters);
+ JSONObject config = readConfigFile(configuration.getConfigFilePath());
+ assert config != null;
+ JSONObject modelConfig = config.getJSONObject("AZURE_OPENAI");
+ this.apiKey = modelConfig.getString("AZURE_OPENAI_KEY");
+ this.endpoint = modelConfig.getString("AZURE_OPENAI_ENDPOINT");
+ }
+
+ public EmbeddingModel buildEmbeddingModel() {
+
+ return AzureOpenAiEmbeddingModel.builder()
+ .apiKey(apiKey)
+ .endpoint(endpoint)
+ .deploymentName(embeddingModelParameters.getEmbeddingModelName())
+ .build();
+ }
+}
diff --git a/src/main/java/org/mule/extension/mulechain/vectors/internal/model/huggingface/HuggingFaceModel.java b/src/main/java/org/mule/extension/mulechain/vectors/internal/model/huggingface/HuggingFaceModel.java
new file mode 100644
index 0000000..7223847
--- /dev/null
+++ b/src/main/java/org/mule/extension/mulechain/vectors/internal/model/huggingface/HuggingFaceModel.java
@@ -0,0 +1,32 @@
+package org.mule.extension.mulechain.vectors.internal.model.huggingface;
+
+import dev.langchain4j.model.embedding.EmbeddingModel;
+import dev.langchain4j.model.huggingface.HuggingFaceEmbeddingModel;
+import org.json.JSONObject;
+import org.mule.extension.mulechain.vectors.internal.config.Configuration;
+import org.mule.extension.mulechain.vectors.internal.helper.parameter.EmbeddingModelParameters;
+import org.mule.extension.mulechain.vectors.internal.model.BaseModel;
+
+import static org.mule.extension.mulechain.vectors.internal.util.JsonUtils.readConfigFile;
+
+public class HuggingFaceModel extends BaseModel {
+
+ private final String apiKey;
+
+ public HuggingFaceModel(Configuration configuration, EmbeddingModelParameters embeddingModelParameters) {
+
+ super(configuration,embeddingModelParameters);
+ JSONObject config = readConfigFile(configuration.getConfigFilePath());
+ assert config != null;
+ JSONObject modelConfig = config.getJSONObject("HUGGING_FACE");
+ this.apiKey = modelConfig.getString("HUGGING_FACE_API_KEY");
+ }
+
+ public EmbeddingModel buildEmbeddingModel() {
+
+ return HuggingFaceEmbeddingModel.builder()
+ .accessToken(apiKey)
+ .modelId(embeddingModelParameters.getEmbeddingModelName())
+ .build();
+ }
+}
diff --git a/src/main/java/org/mule/extension/mulechain/vectors/internal/model/mistralai/MistralAIModel.java b/src/main/java/org/mule/extension/mulechain/vectors/internal/model/mistralai/MistralAIModel.java
new file mode 100644
index 0000000..7089f2f
--- /dev/null
+++ b/src/main/java/org/mule/extension/mulechain/vectors/internal/model/mistralai/MistralAIModel.java
@@ -0,0 +1,32 @@
+package org.mule.extension.mulechain.vectors.internal.model.mistralai;
+
+import dev.langchain4j.model.embedding.EmbeddingModel;
+import dev.langchain4j.model.mistralai.MistralAiEmbeddingModel;
+import org.json.JSONObject;
+import org.mule.extension.mulechain.vectors.internal.config.Configuration;
+import org.mule.extension.mulechain.vectors.internal.helper.parameter.EmbeddingModelParameters;
+import org.mule.extension.mulechain.vectors.internal.model.BaseModel;
+
+import static org.mule.extension.mulechain.vectors.internal.util.JsonUtils.readConfigFile;
+
+public class MistralAIModel extends BaseModel {
+
+ private final String apiKey;
+
+ public MistralAIModel(Configuration configuration, EmbeddingModelParameters embeddingModelParameters) {
+
+ super(configuration,embeddingModelParameters);
+ JSONObject config = readConfigFile(configuration.getConfigFilePath());
+ assert config != null;
+ JSONObject modelConfig = config.getJSONObject("MISTRAL_AI");
+ this.apiKey = modelConfig.getString("MISTRAL_AI_API_KEY");
+ }
+
+ public EmbeddingModel buildEmbeddingModel() {
+
+ return MistralAiEmbeddingModel.builder()
+ .apiKey(apiKey)
+ .modelName(embeddingModelParameters.getEmbeddingModelName())
+ .build();
+ }
+}
diff --git a/src/main/java/org/mule/extension/mulechain/vectors/internal/model/nomic/NomicModel.java b/src/main/java/org/mule/extension/mulechain/vectors/internal/model/nomic/NomicModel.java
new file mode 100644
index 0000000..b00d61b
--- /dev/null
+++ b/src/main/java/org/mule/extension/mulechain/vectors/internal/model/nomic/NomicModel.java
@@ -0,0 +1,37 @@
+package org.mule.extension.mulechain.vectors.internal.model.nomic;
+
+import dev.langchain4j.model.embedding.EmbeddingModel;
+import dev.langchain4j.model.nomic.NomicEmbeddingModel;
+import org.json.JSONObject;
+import org.mule.extension.mulechain.vectors.internal.config.Configuration;
+import org.mule.extension.mulechain.vectors.internal.helper.parameter.EmbeddingModelParameters;
+import org.mule.extension.mulechain.vectors.internal.model.BaseModel;
+
+import static org.mule.extension.mulechain.vectors.internal.util.JsonUtils.readConfigFile;
+
+public class NomicModel extends BaseModel {
+
+ private final String apiKey;
+
+ public NomicModel(Configuration configuration, EmbeddingModelParameters embeddingModelParameters) {
+
+ super(configuration,embeddingModelParameters);
+ JSONObject config = readConfigFile(configuration.getConfigFilePath());
+ assert config != null;
+ JSONObject modelConfig = config.getJSONObject("NOMIC");
+ this.apiKey = modelConfig.getString("NOMIC_API_KEY");
+ }
+
+ public EmbeddingModel buildEmbeddingModel() {
+
+ return NomicEmbeddingModel.builder()
+ //.baseUrl("https://api-atlas.nomic.ai/v1/")
+ .apiKey(apiKey)
+ .modelName(embeddingModelParameters.getEmbeddingModelName())
+ //.taskType("clustering")
+ .maxRetries(2)
+ .logRequests(true)
+ .logResponses(true)
+ .build();
+ }
+}
diff --git a/src/main/java/org/mule/extension/mulechain/vectors/internal/model/openai/OpenAIModel.java b/src/main/java/org/mule/extension/mulechain/vectors/internal/model/openai/OpenAIModel.java
new file mode 100644
index 0000000..43d10e9
--- /dev/null
+++ b/src/main/java/org/mule/extension/mulechain/vectors/internal/model/openai/OpenAIModel.java
@@ -0,0 +1,32 @@
+package org.mule.extension.mulechain.vectors.internal.model.openai;
+
+import dev.langchain4j.model.embedding.EmbeddingModel;
+import dev.langchain4j.model.openai.OpenAiEmbeddingModel;
+import org.json.JSONObject;
+import org.mule.extension.mulechain.vectors.internal.config.Configuration;
+import org.mule.extension.mulechain.vectors.internal.helper.parameter.EmbeddingModelParameters;
+import org.mule.extension.mulechain.vectors.internal.model.BaseModel;
+
+import static org.mule.extension.mulechain.vectors.internal.util.JsonUtils.readConfigFile;
+
+public class OpenAIModel extends BaseModel {
+
+ private final String apiKey;
+
+ public OpenAIModel(Configuration configuration, EmbeddingModelParameters embeddingModelParameters) {
+
+ super(configuration,embeddingModelParameters);
+ JSONObject config = readConfigFile(configuration.getConfigFilePath());
+ assert config != null;
+ JSONObject modelConfig = config.getJSONObject("OPENAI");
+ this.apiKey = modelConfig.getString("OPENAI_API_KEY");
+ }
+
+ public EmbeddingModel buildEmbeddingModel() {
+
+ return OpenAiEmbeddingModel.builder()
+ .apiKey(apiKey)
+ .modelName(embeddingModelParameters.getEmbeddingModelName())
+ .build();
+ }
+}
diff --git a/src/main/java/org/mule/extension/mulechain/vectors/internal/operation/EmbeddingOperations.java b/src/main/java/org/mule/extension/mulechain/vectors/internal/operation/EmbeddingOperations.java
index a84bdff..664d9ea 100644
--- a/src/main/java/org/mule/extension/mulechain/vectors/internal/operation/EmbeddingOperations.java
+++ b/src/main/java/org/mule/extension/mulechain/vectors/internal/operation/EmbeddingOperations.java
@@ -1,7 +1,6 @@
package org.mule.extension.mulechain.vectors.internal.operation;
import static org.apache.commons.io.IOUtils.toInputStream;
-import static org.mule.extension.mulechain.vectors.internal.util.JsonUtils.readConfigFile;
import static org.mule.runtime.extension.api.annotation.param.MediaType.APPLICATION_JSON;
import java.io.InputStream;
@@ -10,14 +9,14 @@
import org.mule.extension.mulechain.vectors.internal.constant.Constants;
import org.mule.extension.mulechain.vectors.internal.helper.EmbeddingOperationValidator;
-import org.mule.extension.mulechain.vectors.internal.helper.EmbeddingStoreIngestorHelper;
-import org.mule.extension.mulechain.vectors.internal.helper.factory.EmbeddingModelFactory;
import org.mule.extension.mulechain.vectors.internal.helper.parameter.*;
import org.mule.extension.mulechain.vectors.internal.config.Configuration;
import dev.langchain4j.store.embedding.*;
import dev.langchain4j.store.embedding.filter.Filter;
import org.json.JSONArray;
import org.json.JSONObject;
+import org.mule.extension.mulechain.vectors.internal.model.BaseModel;
+import org.mule.extension.mulechain.vectors.internal.storage.BaseStorage;
import org.mule.extension.mulechain.vectors.internal.store.BaseStore;
import org.mule.runtime.extension.api.annotation.Alias;
import org.mule.runtime.extension.api.annotation.param.*;
@@ -44,14 +43,18 @@ public class EmbeddingOperations {
*/
@MediaType(value = APPLICATION_JSON, strict = false)
@Alias("Embedding-add-text-to-store")
- public InputStream addTextToStore(String storeName, String textToAdd, @Config Configuration configuration, @ParameterGroup(name = "Additional Properties") EmbeddingModelNameParameters modelParams){
+ public InputStream addTextToStore(String storeName, String textToAdd, @Config Configuration configuration, @ParameterGroup(name = "Additional Properties") EmbeddingModelParameters modelParams){
- EmbeddingModel embeddingModel = EmbeddingModelFactory.createModel(configuration, modelParams);
+ BaseModel baseModel = BaseModel.builder()
+ .configuration(configuration)
+ .embeddingModelParameters(modelParams)
+ .build();
+
+ EmbeddingModel embeddingModel = baseModel.buildEmbeddingModel();
BaseStore baseStore = BaseStore.builder()
.storeName(storeName)
.configuration(configuration)
- .embeddingModel(embeddingModel)
.dimension(embeddingModel.dimension())
.build();
@@ -75,9 +78,14 @@ public InputStream addTextToStore(String storeName, String textToAdd, @Config Co
*/
@MediaType(value = APPLICATION_JSON, strict = false)
@Alias("Embedding-generate-from-text")
- public InputStream generateEmbedding(String textToAdd, @Config Configuration configuration, @ParameterGroup(name = "Additional Properties") EmbeddingModelNameParameters modelParams){
+ public InputStream generateEmbedding(String textToAdd, @Config Configuration configuration, @ParameterGroup(name = "Additional Properties") EmbeddingModelParameters modelParams){
+
+ BaseModel baseModel = BaseModel.builder()
+ .configuration(configuration)
+ .embeddingModelParameters(modelParams)
+ .build();
- EmbeddingModel embeddingModel = EmbeddingModelFactory.createModel(configuration, modelParams);
+ EmbeddingModel embeddingModel = baseModel.buildEmbeddingModel();
TextSegment textSegment = TextSegment.from(textToAdd);
Embedding textEmbedding = embeddingModel.embed(textSegment).content();
@@ -100,43 +108,40 @@ public InputStream addFolderToStore(String storeName, String folderPath, @Config
@ParameterGroup(name = "Context") FileTypeParameters fileType,
@ParameterGroup(name = "Storage") StorageTypeParameters storageType,
int maxSegmentSizeInChars, int maxOverlapSizeInChars,
- @ParameterGroup(name = "Additional Properties") EmbeddingModelNameParameters modelParams){
+ @ParameterGroup(name = "Additional Properties") EmbeddingModelParameters modelParams){
EmbeddingOperationValidator.validateOperationType(
Constants.EMBEDDING_OPERATION_TYPE_STORE_METADATA,configuration.getVectorStore());
- EmbeddingModel embeddingModel = EmbeddingModelFactory.createModel(configuration, modelParams);
+ BaseModel baseModel = BaseModel.builder()
+ .configuration(configuration)
+ .embeddingModelParameters(modelParams)
+ .build();
+
+ EmbeddingModel embeddingModel = baseModel.buildEmbeddingModel();
BaseStore baseStore = BaseStore.builder()
.storeName(storeName)
.configuration(configuration)
- .embeddingModel(embeddingModel)
.dimension(embeddingModel.dimension())
.build();
EmbeddingStore embeddingStore = baseStore.buildEmbeddingStore();
- EmbeddingStoreIngestor ingestor = EmbeddingStoreIngestor.builder()
+ EmbeddingStoreIngestor embeddingStoreIngestor = EmbeddingStoreIngestor.builder()
.documentSplitter(DocumentSplitters.recursive(maxSegmentSizeInChars, maxOverlapSizeInChars))
.embeddingModel(embeddingModel)
.embeddingStore(embeddingStore)
.build();
- EmbeddingStoreIngestorHelper embeddingStoreIngestorHelper = new EmbeddingStoreIngestorHelper(ingestor, storeName);
+ BaseStorage baseStorage = BaseStorage.builder()
+ .storeName(storeName)
+ .configuration(configuration)
+ .storageType(storageType.getStorageType())
+ .embeddingStoreIngestor(embeddingStoreIngestor)
+ .build();
- JSONObject config = readConfigFile(configuration.getConfigFilePath());
- JSONObject jsonObject = new JSONObject();
- System.out.println("Storage Type: " + storageType.getStorageType());
- if (storageType.getStorageType().equals("S3") && !fileType.getFileType().equals("url")) {
- JSONObject s3Json = config.getJSONObject("S3");
- String awsKey = s3Json.getString("AWS_ACCESS_KEY_ID");
- String awsSecret = s3Json.getString("AWS_SECRET_ACCESS_KEY");
- String awsRegion = s3Json.getString("AWS_DEFAULT_REGION");
- String s3Bucket = s3Json.getString("AWS_S3_BUCKET");
- jsonObject = embeddingStoreIngestorHelper.ingestFromS3Folder(folderPath, fileType, awsKey, awsSecret, awsRegion, s3Bucket);
- } else {
- jsonObject = embeddingStoreIngestorHelper.ingestFromLocalFolder(folderPath, fileType);
- }
+ JSONObject jsonObject = baseStorage.readAndIngestAllFiles(folderPath, fileType.getFileType());
return toInputStream(jsonObject.toString(), StandardCharsets.UTF_8);
}
@@ -151,49 +156,40 @@ public InputStream addFileEmbedding(String storeName, String contextPath, @Confi
@ParameterGroup(name = "Context") FileTypeParameters fileType,
@ParameterGroup(name = "Storage") StorageTypeParameters storageType,
int maxSegmentSizeInChars, int maxOverlapSizeInChars,
- @ParameterGroup(name = "Additional Properties") EmbeddingModelNameParameters modelParams) {
+ @ParameterGroup(name = "Additional Properties") EmbeddingModelParameters modelParams) {
EmbeddingOperationValidator.validateOperationType(
Constants.EMBEDDING_OPERATION_TYPE_STORE_METADATA,configuration.getVectorStore());
-
- EmbeddingModel embeddingModel = EmbeddingModelFactory.createModel(configuration, modelParams);
+
+ BaseModel baseModel = BaseModel.builder()
+ .configuration(configuration)
+ .embeddingModelParameters(modelParams)
+ .build();
+
+ EmbeddingModel embeddingModel = baseModel.buildEmbeddingModel();
BaseStore baseStore = BaseStore.builder()
.storeName(storeName)
.configuration(configuration)
- .embeddingModel(embeddingModel)
.dimension(embeddingModel.dimension())
.build();
EmbeddingStore embeddingStore = baseStore.buildEmbeddingStore();
- EmbeddingStoreIngestor ingestor = EmbeddingStoreIngestor.builder()
+ EmbeddingStoreIngestor embeddingStoreIngestor = EmbeddingStoreIngestor.builder()
.documentSplitter(DocumentSplitters.recursive(maxSegmentSizeInChars, maxOverlapSizeInChars))
.embeddingModel(embeddingModel)
.embeddingStore(embeddingStore)
.build();
- EmbeddingStoreIngestorHelper embeddingStoreIngestorHelper = new EmbeddingStoreIngestorHelper(ingestor, storeName);
+ BaseStorage baseStorage = BaseStorage.builder()
+ .storeName(storeName)
+ .configuration(configuration)
+ .storageType(storageType.getStorageType())
+ .embeddingStoreIngestor(embeddingStoreIngestor)
+ .build();
- JSONObject config = readConfigFile(configuration.getConfigFilePath());
- JSONObject jsonObject = new JSONObject();
- System.out.println("Storage Type: " + storageType.getStorageType());
- if (storageType.getStorageType().equals("S3") && !fileType.getFileType().equals("url")) {
- JSONObject s3Json = config.getJSONObject("S3");
- String awsKey = s3Json.getString("AWS_ACCESS_KEY_ID");
- String awsSecret = s3Json.getString("AWS_SECRET_ACCESS_KEY");
- String awsRegion = s3Json.getString("AWS_DEFAULT_REGION");
- String s3Bucket = s3Json.getString("AWS_S3_BUCKET");
- jsonObject = embeddingStoreIngestorHelper.ingestFromS3File(contextPath, fileType, awsKey, awsSecret, awsRegion, s3Bucket);
- } else if (storageType.getStorageType().equals("AZURE_BLOB") && !fileType.getFileType().equals("url")) {
- JSONObject azJson = config.getJSONObject("AZURE_BLOB");
- String azureName = azJson.getString("AZURE_BLOB_ACCOUNT_NAME");
- String azureKey = azJson.getString("AZURE_BLOB_ACCOUNT_KEY");
- String[] parts = contextPath.split("/", 2);
- jsonObject = embeddingStoreIngestorHelper.ingestFromAZFile(parts[0], parts[1], fileType, azureName, azureKey);
- } else {
- jsonObject = embeddingStoreIngestorHelper.ingestFromLocalFile(contextPath, fileType);
- }
+ JSONObject jsonObject = baseStorage.readAndIngestFile(contextPath, fileType.getFileType());
return toInputStream(jsonObject.toString(), StandardCharsets.UTF_8);
}
@@ -205,18 +201,22 @@ public InputStream addFileEmbedding(String storeName, String contextPath, @Confi
@Alias("EMBEDDING-query-from-store")
public InputStream queryFromEmbedding(String storeName, String question, Number maxResults, Double minScore,
@Config Configuration configuration,
- @ParameterGroup(name = "Additional Properties") EmbeddingModelNameParameters modelParams) {
+ @ParameterGroup(name = "Additional Properties") EmbeddingModelParameters modelParams) {
int maximumResults = (int) maxResults;
if (minScore == null) { //|| minScore == 0) {
minScore = Constants.EMBEDDING_SEARCH_REQUEST_DEFAULT_MIN_SCORE;
}
- EmbeddingModel embeddingModel = EmbeddingModelFactory.createModel(configuration, modelParams);
+ BaseModel baseModel = BaseModel.builder()
+ .configuration(configuration)
+ .embeddingModelParameters(modelParams)
+ .build();
+
+ EmbeddingModel embeddingModel = baseModel.buildEmbeddingModel();
BaseStore baseStore = BaseStore.builder()
.storeName(storeName)
.configuration(configuration)
- .embeddingModel(embeddingModel)
.dimension(embeddingModel.dimension())
.build();
@@ -244,7 +244,6 @@ public InputStream queryFromEmbedding(String storeName, String question, Number
JSONArray sources = new JSONArray();
JSONObject contentObject;
- String fullPath;
for (EmbeddingMatch match : embeddingMatches) {
Metadata matchMetadata = match.embedded().metadata();
@@ -280,7 +279,7 @@ public InputStream queryFromEmbedding(String storeName, String question, Number
public InputStream queryByFilterFromEmbedding(String storeName, String question, Number maxResults, Double minScore,
@Config Configuration configuration,
@ParameterGroup(name = "Filter") MetadataFilterParameters.SearchFilterParameters searchFilterParams,
- @ParameterGroup(name = "Additional Properties") EmbeddingModelNameParameters modelParams) {
+ @ParameterGroup(name = "Additional Properties") EmbeddingModelParameters modelParams) {
EmbeddingOperationValidator.validateOperationType(
Constants.EMBEDDING_OPERATION_TYPE_FILTER_BY_METADATA,configuration.getVectorStore());
@@ -290,12 +289,16 @@ public InputStream queryByFilterFromEmbedding(String storeName, String question,
minScore = Constants.EMBEDDING_SEARCH_REQUEST_DEFAULT_MIN_SCORE;
}
- EmbeddingModel embeddingModel = EmbeddingModelFactory.createModel(configuration, modelParams);
+ BaseModel baseModel = BaseModel.builder()
+ .configuration(configuration)
+ .embeddingModelParameters(modelParams)
+ .build();
+
+ EmbeddingModel embeddingModel = baseModel.buildEmbeddingModel();
BaseStore baseStore = BaseStore.builder()
.storeName(storeName)
.configuration(configuration)
- .embeddingModel(embeddingModel)
.dimension(embeddingModel.dimension())
.build();
@@ -380,7 +383,7 @@ public InputStream queryByFilterFromEmbedding(String storeName, String question,
public InputStream listSourcesFromStore(String storeName,
@Config Configuration configuration,
@ParameterGroup(name = "Querying Strategy") QueryParameters queryParams,
- @ParameterGroup(name = "Additional Properties") EmbeddingModelNameParameters modelParams
+ @ParameterGroup(name = "Additional Properties") EmbeddingModelParameters modelParams
) {
EmbeddingOperationValidator.validateOperationType(
@@ -408,19 +411,23 @@ public InputStream listSourcesFromStore(String storeName,
public InputStream removeEmbeddingsByFilter(String storeName,
@Config Configuration configuration,
@ParameterGroup(name = "Filter") MetadataFilterParameters.RemoveFilterParameters removeFilterParams,
- @ParameterGroup(name = "Additional Properties") EmbeddingModelNameParameters modelParams) {
+ @ParameterGroup(name = "Additional Properties") EmbeddingModelParameters modelParams) {
EmbeddingOperationValidator.validateOperationType(
Constants.EMBEDDING_OPERATION_TYPE_REMOVE_EMBEDDINGS,configuration.getVectorStore());
EmbeddingOperationValidator.validateOperationType(
Constants.EMBEDDING_OPERATION_TYPE_FILTER_BY_METADATA,configuration.getVectorStore());
- EmbeddingModel embeddingModel = EmbeddingModelFactory.createModel(configuration, modelParams);
+ BaseModel baseModel = BaseModel.builder()
+ .configuration(configuration)
+ .embeddingModelParameters(modelParams)
+ .build();
+
+ EmbeddingModel embeddingModel = baseModel.buildEmbeddingModel();
BaseStore baseStore = BaseStore.builder()
.storeName(storeName)
.configuration(configuration)
- .embeddingModel(embeddingModel)
.dimension(embeddingModel.dimension())
.build();
@@ -429,6 +436,7 @@ public InputStream removeEmbeddingsByFilter(String storeName,
Filter filter = removeFilterParams.buildMetadataFilter();
embeddingStore.removeAll(filter);
+
JSONObject jsonObject = new JSONObject();
jsonObject.put("storeName", storeName);
jsonObject.put("filter", removeFilterParams.getFilterJSONObject());
diff --git a/src/main/java/org/mule/extension/mulechain/vectors/internal/storage/BaseStorage.java b/src/main/java/org/mule/extension/mulechain/vectors/internal/storage/BaseStorage.java
index 9a6ce5e..f58d71b 100644
--- a/src/main/java/org/mule/extension/mulechain/vectors/internal/storage/BaseStorage.java
+++ b/src/main/java/org/mule/extension/mulechain/vectors/internal/storage/BaseStorage.java
@@ -1,17 +1,136 @@
package org.mule.extension.mulechain.vectors.internal.storage;
import dev.langchain4j.store.embedding.EmbeddingStoreIngestor;
-import org.mule.extension.mulechain.vectors.internal.helper.parameter.FileTypeParameters;
+import org.json.JSONObject;
+import org.mule.extension.mulechain.vectors.internal.config.Configuration;
+import org.mule.extension.mulechain.vectors.internal.constant.Constants;
+import org.mule.extension.mulechain.vectors.internal.storage.azureblob.AzureBlobStorage;
+import org.mule.extension.mulechain.vectors.internal.storage.local.LocalStorage;
+import org.mule.extension.mulechain.vectors.internal.storage.s3.AWSS3Storage;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
public abstract class BaseStorage {
- protected EmbeddingStoreIngestor ingestor;
+ private static final Logger LOGGER = LoggerFactory.getLogger(BaseStorage.class);
- public BaseStorage(EmbeddingStoreIngestor ingestor) {
- this.ingestor = ingestor;
+ protected String storeName;
+ protected Configuration configuration;
+ protected EmbeddingStoreIngestor embeddingStoreIngestor;
+
+ public BaseStorage(Configuration configuration, String storeName, EmbeddingStoreIngestor embeddingStoreIngestor) {
+
+ this.storeName = storeName;
+ this.configuration = configuration;
+ this.embeddingStoreIngestor = embeddingStoreIngestor;
+ }
+
+ public JSONObject readAndIngestAllFiles(String contextPath, String fileType) {
+
+ throw new UnsupportedOperationException("This method should be overridden by subclasses");
+ }
+
+ public JSONObject readAndIngestFile(String contextPath, String fileType) {
+
+ throw new UnsupportedOperationException("This method should be overridden by subclasses");
+ }
+
+ /**
+ * Creates a JSONObject representing the ingestion status.
+ *
+ * @param fileType the type of the ingested file.
+ * @param contextPath the path of the ingested file or folder.
+ * @return a JSONObject containing ingestion status metadata.
+ */
+ protected JSONObject createFileIngestionStatusObject(String fileType, String contextPath) {
+
+ JSONObject jsonObject = new JSONObject();
+ jsonObject.put("fileType", fileType);
+ jsonObject.put("filePath", contextPath);
+ jsonObject.put("storeName", storeName);
+ jsonObject.put("status", "updated");
+ return jsonObject;
+ }
+
+ /**
+ * Creates a JSONObject representing the ingestion status of a folder or set of files.
+ *
+ * @param totalFiles the total number of files processed.
+ * @param contextPath the path of the processed folder.
+ * @return a JSONObject containing the ingestion status with file count, folder path, store name, and status.
+ */
+ protected JSONObject createFolderIngestionStatusObject(Long totalFiles, String contextPath) {
+
+ JSONObject jsonObject = new JSONObject();
+ jsonObject.put("filesCount", totalFiles);
+ jsonObject.put("folderPath", contextPath);
+ jsonObject.put("storeName", storeName);
+ jsonObject.put("status", "updated");
+ return jsonObject;
+ }
+
+ public static BaseStorage.Builder builder() {
+
+ return new BaseStorage.Builder();
}
- public abstract long readAllFiles(String contextPath, FileTypeParameters fileType);
+ public static class Builder {
- public abstract long readFile(String contextPath, FileTypeParameters fileType);
+ private String storeName;
+ private Configuration configuration;
+ private String storageType;
+ private EmbeddingStoreIngestor embeddingStoreIngestor;
+
+ public Builder() {
+
+ }
+
+ public BaseStorage.Builder storeName(String storeName) {
+ this.storeName = storeName;
+ return this;
+ }
+
+ public BaseStorage.Builder configuration(Configuration configuration) {
+ this.configuration = configuration;
+ return this;
+ }
+
+ public BaseStorage.Builder storageType(String storageType) {
+ this.storageType = storageType;
+ return this;
+ }
+
+ public BaseStorage.Builder embeddingStoreIngestor(EmbeddingStoreIngestor embeddingStoreIngestor) {
+ this.embeddingStoreIngestor = embeddingStoreIngestor;
+ return this;
+ }
+
+ public BaseStorage build() {
+
+ BaseStorage baseStorage;
+
+ LOGGER.debug("Storage Type: " + storageType);
+ switch (storageType) {
+
+ case Constants.STORAGE_TYPE_LOCAL:
+
+ baseStorage = new LocalStorage(configuration, storeName, embeddingStoreIngestor);
+ break;
+ case Constants.STORAGE_TYPE_S3:
+
+ baseStorage = new AWSS3Storage(configuration, storeName, embeddingStoreIngestor);
+ break;
+
+ case Constants.STORAGE_TYPE_AZURE_BLOB:
+
+ baseStorage = new AzureBlobStorage(configuration, storeName, embeddingStoreIngestor);
+ break;
+
+ default:
+ //throw new IllegalOperationException("Unsupported Vector Store: " + configuration.getVectorStore());
+ baseStorage = null;
+ }
+ return baseStorage;
+ }
+ }
}
diff --git a/src/main/java/org/mule/extension/mulechain/vectors/internal/storage/azure/AzureBlobStorage.java b/src/main/java/org/mule/extension/mulechain/vectors/internal/storage/azureblob/AzureBlobStorage.java
similarity index 50%
rename from src/main/java/org/mule/extension/mulechain/vectors/internal/storage/azure/AzureBlobStorage.java
rename to src/main/java/org/mule/extension/mulechain/vectors/internal/storage/azureblob/AzureBlobStorage.java
index e0e86a8..aadc5e3 100644
--- a/src/main/java/org/mule/extension/mulechain/vectors/internal/storage/azure/AzureBlobStorage.java
+++ b/src/main/java/org/mule/extension/mulechain/vectors/internal/storage/azureblob/AzureBlobStorage.java
@@ -1,4 +1,4 @@
-package org.mule.extension.mulechain.vectors.internal.storage.azure;
+package org.mule.extension.mulechain.vectors.internal.storage.azureblob;
import com.azure.storage.blob.BlobServiceClient;
import com.azure.storage.blob.BlobServiceClientBuilder;
@@ -9,49 +9,60 @@
import dev.langchain4j.data.document.DocumentParser;
import java.util.List;
+import org.json.JSONObject;
+import org.mule.extension.mulechain.vectors.internal.config.Configuration;
import org.mule.extension.mulechain.vectors.internal.constant.Constants;
-import org.mule.extension.mulechain.vectors.internal.helper.parameter.FileTypeParameters;
import dev.langchain4j.data.document.parser.TextDocumentParser;
import dev.langchain4j.data.document.parser.apache.tika.ApacheTikaDocumentParser;
import dev.langchain4j.data.document.Document;
+import org.mule.extension.mulechain.vectors.internal.storage.BaseStorage;
import org.mule.extension.mulechain.vectors.internal.util.DocumentUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-public class AzureBlobStorage {
+import static org.mule.extension.mulechain.vectors.internal.util.JsonUtils.readConfigFile;
+
+public class AzureBlobStorage extends BaseStorage {
private static final Logger LOGGER = LoggerFactory.getLogger(AzureBlobStorage.class);
- private final AzureBlobStorageDocumentLoader loader;
+ private final String azureName;
+ private final String azureKey;
+
+ private StorageSharedKeyCredential getCredentials() {
+ return new StorageSharedKeyCredential(azureName, azureKey);
+ }
+
+ private AzureBlobStorageDocumentLoader loader;
+
+ private AzureBlobStorageDocumentLoader getLoader() {
- public AzureBlobStorage(String azureName, String azureKey) {
- StorageSharedKeyCredential credential = new StorageSharedKeyCredential(azureName, azureKey);
+ if(this.loader == null) {
- // Azure SDK client builders accept the credential as a parameter
- BlobServiceClient blobServiceClient = new BlobServiceClientBuilder()
+ // Azure SDK client builders accept the credential as a parameter
+ BlobServiceClient blobServiceClient = new BlobServiceClientBuilder()
.endpoint(String.format("https://%s.blob.core.windows.net/", azureName))
- .credential(credential)
+ .credential(getCredentials())
.buildClient();
- this.loader = new AzureBlobStorageDocumentLoader(blobServiceClient);
+ this.loader = new AzureBlobStorageDocumentLoader(blobServiceClient);
+ }
+ return this.loader;
}
- public static BlobServiceClient GetBlobServiceClientAccountKey(String accountName, String accountKey) {
- StorageSharedKeyCredential credential = new StorageSharedKeyCredential(accountName, accountKey);
+ public AzureBlobStorage(Configuration configuration, String storeName, EmbeddingStoreIngestor embeddingStoreIngestor) {
- // Azure SDK client builders accept the credential as a parameter
- BlobServiceClient blobServiceClient = new BlobServiceClientBuilder()
- .endpoint(String.format("https://%s.blob.core.windows.net/", accountName))
- .credential(credential)
- .buildClient();
-
- return blobServiceClient;
+ super(configuration, storeName, embeddingStoreIngestor);
+ JSONObject config = readConfigFile(configuration.getConfigFilePath());
+ assert config != null;
+ JSONObject storageConfig = config.getJSONObject("AZURE_BLOB");
+ this.azureName = storageConfig.getString("AZURE_BLOB_ACCOUNT_NAME");
+ this.azureKey = storageConfig.getString("AZURE_BLOB_ACCOUNT_KEY");
}
- public long readAllFiles(String containerName, EmbeddingStoreIngestor ingestor, FileTypeParameters fileType)
- {
+ public JSONObject readAndIngestAllFiles(String containerName, String fileType) {
DocumentParser parser = null;
- switch (fileType.getFileType()){
+ switch (fileType){
case Constants.FILE_TYPE_TEXT:
case Constants.FILE_TYPE_CRAWL:
parser = new TextDocumentParser();
@@ -60,10 +71,10 @@ public long readAllFiles(String containerName, EmbeddingStoreIngestor ingestor,
parser = new ApacheTikaDocumentParser();
break;
default:
- throw new IllegalArgumentException("Unsupported File Type: " + fileType.getFileType());
+ throw new IllegalArgumentException("Unsupported File Type: " + fileType);
}
- List documents = loader.loadDocuments(containerName, parser);
+ List documents = getLoader().loadDocuments(containerName, parser);
int fileCount = documents.size();
LOGGER.debug("Total number of files in '" + containerName + "': " + fileCount);
@@ -71,20 +82,25 @@ public long readAllFiles(String containerName, EmbeddingStoreIngestor ingestor,
for (Document document : documents) {
totalFiles += 1;
- if (fileType.getFileType().equals(Constants.FILE_TYPE_CRAWL)){
+ if (fileType.equals(Constants.FILE_TYPE_CRAWL)){
DocumentUtils.addMetadataToDocument(document);
}
LOGGER.debug("Ingesting File " + totalFiles + ": " + document.metadata().toMap().get("source"));
- ingestor.ingest(document);
+ embeddingStoreIngestor.ingest(document);
}
LOGGER.debug("Total number of files processed: " + totalFiles);
- return totalFiles;
+ return createFolderIngestionStatusObject(totalFiles, fileType);
}
- public void readFile(String containerName, String blobName, FileTypeParameters fileType, EmbeddingStoreIngestor ingestor) {
+ public JSONObject readAndIngestFile(String contextPath, String fileType) {
+
+ String[] parts = contextPath.split("/", 2);
+ String containerName = parts[0];
+ String blobName = parts[1];
+
DocumentParser parser = null;
- switch (fileType.getFileType()){
+ switch (fileType){
case Constants.FILE_TYPE_TEXT:
case Constants.FILE_TYPE_CRAWL:
parser = new TextDocumentParser();
@@ -93,16 +109,16 @@ public void readFile(String containerName, String blobName, FileTypeParameters f
parser = new ApacheTikaDocumentParser();
break;
default:
- throw new IllegalArgumentException("Unsupported File Type: " + fileType.getFileType());
+ throw new IllegalArgumentException("Unsupported File Type: " + fileType);
}
- Document document = loader.loadDocument(containerName, blobName, parser);
- System.out.println("Ready to add metadata: " + fileType.getFileType());
+ Document document = getLoader().loadDocument(containerName, blobName, parser);
+ System.out.println("Ready to add metadata: " + fileType);
- if (fileType.getFileType().equals(Constants.FILE_TYPE_CRAWL)){
+ if (fileType.equals(Constants.FILE_TYPE_CRAWL)){
DocumentUtils.addMetadataToDocument(document);
}
- ingestor.ingest(document);
-
+ embeddingStoreIngestor.ingest(document);
+ return createFileIngestionStatusObject(fileType, containerName);
}
}
diff --git a/src/main/java/org/mule/extension/mulechain/vectors/internal/storage/local/LocalStorage.java b/src/main/java/org/mule/extension/mulechain/vectors/internal/storage/local/LocalStorage.java
new file mode 100644
index 0000000..dd43321
--- /dev/null
+++ b/src/main/java/org/mule/extension/mulechain/vectors/internal/storage/local/LocalStorage.java
@@ -0,0 +1,129 @@
+package org.mule.extension.mulechain.vectors.internal.storage.local;
+
+import dev.langchain4j.data.document.Document;
+import dev.langchain4j.data.document.DocumentParser;
+import dev.langchain4j.data.document.loader.UrlDocumentLoader;
+import dev.langchain4j.data.document.parser.TextDocumentParser;
+import dev.langchain4j.data.document.parser.apache.tika.ApacheTikaDocumentParser;
+import dev.langchain4j.data.document.transformer.jsoup.HtmlToTextDocumentTransformer;
+import dev.langchain4j.store.embedding.EmbeddingStoreIngestor;
+import org.json.JSONObject;
+import org.mule.extension.mulechain.vectors.internal.config.Configuration;
+import org.mule.extension.mulechain.vectors.internal.constant.Constants;
+import org.mule.extension.mulechain.vectors.internal.storage.BaseStorage;
+import org.mule.extension.mulechain.vectors.internal.storage.s3.AWSS3Storage;
+import org.mule.extension.mulechain.vectors.internal.util.DocumentUtils;
+import org.mule.extension.mulechain.vectors.internal.util.Utils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.Arrays;
+import java.util.List;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.stream.Stream;
+
+import static dev.langchain4j.data.document.loader.FileSystemDocumentLoader.loadDocument;
+import static org.mule.extension.mulechain.vectors.internal.util.JsonUtils.readConfigFile;
+
+public class LocalStorage extends BaseStorage {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(LocalStorage.class);
+
+ public LocalStorage(Configuration configuration, String storeName, EmbeddingStoreIngestor embeddingStoreIngestor) {
+
+ super(configuration, storeName, embeddingStoreIngestor);
+ }
+
+ public JSONObject readAndIngestAllFiles(String folderPath, String fileType) {
+
+ long totalFiles = 0;
+ try (Stream paths = Files.walk(Paths.get(folderPath))) {
+ totalFiles = paths.filter(Files::isRegularFile).count();
+ } catch (IOException e) {
+ LOGGER.error(Arrays.toString(e.getStackTrace()));
+ }
+
+ LOGGER.info("Total number of files to process: " + totalFiles);
+ AtomicInteger fileCounter = new AtomicInteger(0);
+ try (Stream paths = Files.walk(Paths.get(folderPath))) {
+ paths.filter(Files::isRegularFile).forEach(path -> {
+ int currentFileCounter = fileCounter.incrementAndGet();
+ LOGGER.info("Processing file " + currentFileCounter + ": " + path.getFileName());
+
+ Document document;
+ switch (fileType) {
+ case Constants.FILE_TYPE_CRAWL:
+ document = loadDocument(path.toString(), new TextDocumentParser());
+ DocumentUtils.addMetadataToDocument(document, Constants.FILE_TYPE_CRAWL, path);
+ embeddingStoreIngestor.ingest(document);
+ break;
+ case Constants.FILE_TYPE_TEXT:
+ document = loadDocument(path.toString(), new TextDocumentParser());
+ DocumentUtils.addMetadataToDocument(document, Constants.FILE_TYPE_TEXT, path.getFileName().toString(), folderPath + path.getFileName());
+ embeddingStoreIngestor.ingest(document);
+ break;
+ case Constants.FILE_TYPE_ANY:
+ document = loadDocument(path.toString(), new ApacheTikaDocumentParser());
+ DocumentUtils.addMetadataToDocument(document, Constants.FILE_TYPE_ANY, path.getFileName().toString(),folderPath + path.getFileName());
+ embeddingStoreIngestor.ingest(document);
+ break;
+ default:
+ throw new IllegalArgumentException("Unsupported File Type: " + fileType);
+ }
+ });
+ } catch (IOException e) {
+ LOGGER.error(Arrays.toString(e.getStackTrace()));
+ }
+ LOGGER.info("Processing complete");
+ return createFolderIngestionStatusObject(totalFiles, fileType);
+ }
+
+ public JSONObject readAndIngestFile(String filePath, String fileType) {
+
+ Path path = Paths.get(filePath);
+
+ Document document;
+ switch (fileType) {
+ case Constants.FILE_TYPE_CRAWL:
+ document = loadDocument(path.toString(), new TextDocumentParser());
+ DocumentUtils.addMetadataToDocument(document, Constants.FILE_TYPE_CRAWL, path);
+ break;
+ case Constants.FILE_TYPE_TEXT:
+ document = loadDocument(path.toString(), new TextDocumentParser());
+ DocumentUtils.addMetadataToDocument(document, Constants.FILE_TYPE_TEXT, Utils.getFileNameFromPath(filePath), filePath);
+ break;
+ case Constants.FILE_TYPE_ANY:
+ document = loadDocument(path.toString(), new ApacheTikaDocumentParser());
+ DocumentUtils.addMetadataToDocument(document, Constants.FILE_TYPE_ANY, Utils.getFileNameFromPath(filePath), filePath);
+ break;
+ case Constants.FILE_TYPE_URL:
+ document = loadUrlDocument(filePath);
+ break;
+ default:
+ throw new IllegalArgumentException("Unsupported File Type: " + fileType);
+ }
+ embeddingStoreIngestor.ingest(document);
+ return createFileIngestionStatusObject(fileType, filePath);
+ }
+
+ private Document loadUrlDocument(String contextPath) {
+
+ Document document;
+ try {
+ URL url = new URL(contextPath);
+ Document htmlDocument = UrlDocumentLoader.load(url, new TextDocumentParser());
+ HtmlToTextDocumentTransformer transformer = new HtmlToTextDocumentTransformer(null, null, true);
+ document = transformer.transform(htmlDocument);
+ document.metadata().put(Constants.METADATA_KEY_URL, contextPath);
+ } catch (MalformedURLException e) {
+ throw new RuntimeException("Invalid URL: " + contextPath, e);
+ }
+ return document;
+ }
+}
diff --git a/src/main/java/org/mule/extension/mulechain/vectors/internal/storage/s3/AWSS3Storage.java b/src/main/java/org/mule/extension/mulechain/vectors/internal/storage/s3/AWSS3Storage.java
index cef396b..85763cf 100644
--- a/src/main/java/org/mule/extension/mulechain/vectors/internal/storage/s3/AWSS3Storage.java
+++ b/src/main/java/org/mule/extension/mulechain/vectors/internal/storage/s3/AWSS3Storage.java
@@ -6,37 +6,62 @@
import dev.langchain4j.data.document.DocumentParser;
import java.util.List;
+import org.json.JSONObject;
+import org.mule.extension.mulechain.vectors.internal.config.Configuration;
import org.mule.extension.mulechain.vectors.internal.constant.Constants;
-import org.mule.extension.mulechain.vectors.internal.helper.parameter.FileTypeParameters;
import dev.langchain4j.data.document.parser.TextDocumentParser;
import dev.langchain4j.data.document.parser.apache.tika.ApacheTikaDocumentParser;
import dev.langchain4j.data.document.Document;
+import org.mule.extension.mulechain.vectors.internal.storage.BaseStorage;
import org.mule.extension.mulechain.vectors.internal.util.DocumentUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import static dev.langchain4j.data.document.loader.FileSystemDocumentLoader.loadDocument;
+import static org.mule.extension.mulechain.vectors.internal.util.JsonUtils.readConfigFile;
-public class AWSS3Storage {
+public class AWSS3Storage extends BaseStorage {
private static final Logger LOGGER = LoggerFactory.getLogger(AWSS3Storage.class);
- private final String bucketName;
- private final AmazonS3DocumentLoader loader;
+ private final String awsAccessKeyId;
+ private final String awsSecretAccessKey;
+ private final String awsRegion;
+ private final String awsS3Bucket;
- public AWSS3Storage(String bucketName, String awsKey, String awsSecret, String awsRegion) {
- this.bucketName = bucketName;
- AwsCredentials creds = new AwsCredentials(awsKey, awsSecret);
- this.loader = AmazonS3DocumentLoader.builder()
- .region(awsRegion)
- .awsCredentials(creds)
- .build();
+ private AwsCredentials getCredentials() {
+ return new AwsCredentials(awsAccessKeyId, awsSecretAccessKey);
}
- public long readAllFiles(String folderPath, EmbeddingStoreIngestor ingestor, FileTypeParameters fileType)
- {
+ private AmazonS3DocumentLoader loader;
+
+ private AmazonS3DocumentLoader getLoader() {
+
+ if(loader == null) {
+
+ loader = AmazonS3DocumentLoader.builder()
+ .region(awsRegion)
+ .awsCredentials(getCredentials())
+ .build();
+ }
+ return loader;
+ }
+
+ public AWSS3Storage(Configuration configuration, String storeName, EmbeddingStoreIngestor embeddingStoreIngestor) {
+
+ super(configuration, storeName, embeddingStoreIngestor);
+ JSONObject config = readConfigFile(configuration.getConfigFilePath());
+ assert config != null;
+ JSONObject storageConfig = config.getJSONObject("S3");
+ this.awsAccessKeyId = storageConfig.getString("AWS_ACCESS_KEY_ID");
+ this.awsSecretAccessKey = storageConfig.getString("AWS_SECRET_ACCESS_KEY");
+ this.awsRegion = storageConfig.getString("AWS_DEFAULT_REGION");
+ this.awsS3Bucket = storageConfig.getString("AWS_S3_BUCKET");
+ }
+
+ public JSONObject readAndIngestAllFiles(String folderPath, String fileType) {
+
DocumentParser parser = null;
- switch (fileType.getFileType()){
+ switch (fileType){
case Constants.FILE_TYPE_TEXT:
parser = new TextDocumentParser();
break;
@@ -44,26 +69,26 @@ public long readAllFiles(String folderPath, EmbeddingStoreIngestor ingestor, Fil
parser = new ApacheTikaDocumentParser();
break;
default:
- throw new IllegalArgumentException("Unsupported File Type: " + fileType.getFileType());
+ throw new IllegalArgumentException("Unsupported File Type: " + fileType);
}
- List documents = loader.loadDocuments(bucketName, folderPath, parser);
+ List documents = getLoader().loadDocuments(awsS3Bucket, folderPath, parser);
int fileCount = documents.size();
LOGGER.debug("Total number of files in '" + folderPath + "': " + fileCount);
long totalFiles = 0;
for (Document document : documents) {
- ingestor.ingest(document);
+ embeddingStoreIngestor.ingest(document);
totalFiles += 1;
LOGGER.debug("Ingesting File " + totalFiles + ": " + document.metadata().toMap().get("source"));
}
LOGGER.debug("Total number of files processed: " + totalFiles);
- return totalFiles;
+ return createFolderIngestionStatusObject(totalFiles, fileType);
}
- public void readFile(String key, FileTypeParameters fileType, EmbeddingStoreIngestor ingestor) {
+ public JSONObject readAndIngestFile(String key, String fileType) {
DocumentParser parser = null;
- switch (fileType.getFileType()){
+ switch (fileType){
case Constants.FILE_TYPE_TEXT:
case Constants.FILE_TYPE_CRAWL:
parser = new TextDocumentParser();
@@ -72,12 +97,13 @@ public void readFile(String key, FileTypeParameters fileType, EmbeddingStoreInge
parser = new ApacheTikaDocumentParser();
break;
default:
- throw new IllegalArgumentException("Unsupported File Type: " + fileType.getFileType());
+ throw new IllegalArgumentException("Unsupported File Type: " + fileType);
}
- Document document = loader.loadDocument(bucketName, key, parser);
- if (fileType.getFileType().equals(Constants.FILE_TYPE_CRAWL)){
+ Document document = getLoader().loadDocument(awsS3Bucket, key, parser);
+ if (fileType.equals(Constants.FILE_TYPE_CRAWL)){
DocumentUtils.addMetadataToDocument(document);
}
- ingestor.ingest(document);
+ embeddingStoreIngestor.ingest(document);
+ return createFileIngestionStatusObject(fileType, key);
}
}
diff --git a/src/main/java/org/mule/extension/mulechain/vectors/internal/store/BaseStore.java b/src/main/java/org/mule/extension/mulechain/vectors/internal/store/BaseStore.java
index 93778f1..4959961 100644
--- a/src/main/java/org/mule/extension/mulechain/vectors/internal/store/BaseStore.java
+++ b/src/main/java/org/mule/extension/mulechain/vectors/internal/store/BaseStore.java
@@ -38,15 +38,13 @@ public class BaseStore {
protected String storeName;
protected Configuration configuration;
protected QueryParameters queryParams;
- protected EmbeddingModel embeddingModel;
protected int dimension;
- public BaseStore(String storeName, Configuration configuration, QueryParameters queryParams, EmbeddingModel embeddingModel, int dimension) {
+ public BaseStore(String storeName, Configuration configuration, QueryParameters queryParams, int dimension) {
this.storeName = storeName;
this.configuration = configuration;
this.queryParams = queryParams;
- this.embeddingModel = embeddingModel;
this.dimension = dimension;
}
@@ -205,7 +203,6 @@ public static class Builder {
private String storeName;
private Configuration configuration;
private QueryParameters queryParams;
- private EmbeddingModel embeddingModel;
private int dimension;
public Builder() {
@@ -245,17 +242,6 @@ public Builder queryParams(QueryParameters queryParams) {
return this;
}
- /**
- * Sets a pre-configured embedding model for the {@code BaseStore}.
- *
- * @param embeddingModel the embedding model to use.
- * @return the {@code Builder} instance, for method chaining.
- */
- public Builder embeddingModel(EmbeddingModel embeddingModel) {
- this.embeddingModel = embeddingModel;
- return this;
- }
-
public Builder dimension(int dimension) {
this.dimension = dimension;
return this;
@@ -274,55 +260,55 @@ public Builder dimension(int dimension) {
*/
public BaseStore build() {
- BaseStore embeddingStore;
+ BaseStore baseStore;
switch (configuration.getVectorStore()) {
case Constants.VECTOR_STORE_MILVUS:
- embeddingStore = new MilvusStore(storeName, configuration, queryParams, embeddingModel, dimension);
+ baseStore = new MilvusStore(storeName, configuration, queryParams, dimension);
break;
case Constants.VECTOR_STORE_PGVECTOR:
- embeddingStore = new PGVectorStore(storeName, configuration, queryParams, embeddingModel, dimension);
+ baseStore = new PGVectorStore(storeName, configuration, queryParams, dimension);
break;
case Constants.VECTOR_STORE_AI_SEARCH:
- embeddingStore = new AISearchStore(storeName, configuration, queryParams, embeddingModel, dimension);
+ baseStore = new AISearchStore(storeName, configuration, queryParams, dimension);
break;
case Constants.VECTOR_STORE_WEAVIATE:
- embeddingStore = new WeaviateStore(storeName, configuration, queryParams, embeddingModel, dimension);
+ baseStore = new WeaviateStore(storeName, configuration, queryParams, dimension);
break;
case Constants.VECTOR_STORE_CHROMA:
- embeddingStore = new ChromaStore(storeName, configuration, queryParams, embeddingModel, dimension);
+ baseStore = new ChromaStore(storeName, configuration, queryParams, dimension);
break;
case Constants.VECTOR_STORE_PINECONE:
- embeddingStore = new PineconeStore(storeName, configuration, queryParams, embeddingModel, dimension);
+ baseStore = new PineconeStore(storeName, configuration, queryParams, dimension);
break;
case Constants.VECTOR_STORE_ELASTICSEARCH:
- embeddingStore = new ElasticsearchStore(storeName, configuration, queryParams, embeddingModel, dimension);
+ baseStore = new ElasticsearchStore(storeName, configuration, queryParams, dimension);
break;
case Constants.VECTOR_STORE_OPENSEARCH:
- embeddingStore = new OpenSearchStore(storeName, configuration, queryParams, embeddingModel, dimension);
+ baseStore = new OpenSearchStore(storeName, configuration, queryParams, dimension);
break;
default:
//throw new IllegalOperationException("Unsupported Vector Store: " + configuration.getVectorStore());
- embeddingStore = null;
+ baseStore = null;
}
- return embeddingStore;
+ return baseStore;
}
}
}
diff --git a/src/main/java/org/mule/extension/mulechain/vectors/internal/store/aisearch/AISearchStore.java b/src/main/java/org/mule/extension/mulechain/vectors/internal/store/aisearch/AISearchStore.java
index 0f62233..f172617 100644
--- a/src/main/java/org/mule/extension/mulechain/vectors/internal/store/aisearch/AISearchStore.java
+++ b/src/main/java/org/mule/extension/mulechain/vectors/internal/store/aisearch/AISearchStore.java
@@ -1,14 +1,12 @@
package org.mule.extension.mulechain.vectors.internal.store.aisearch;
import dev.langchain4j.data.segment.TextSegment;
-import dev.langchain4j.model.embedding.EmbeddingModel;
import dev.langchain4j.store.embedding.EmbeddingStore;
import dev.langchain4j.store.embedding.azure.search.AzureAiSearchEmbeddingStore;
import org.json.JSONArray;
import org.json.JSONObject;
import org.mule.extension.mulechain.vectors.internal.config.Configuration;
import org.mule.extension.mulechain.vectors.internal.constant.Constants;
-import org.mule.extension.mulechain.vectors.internal.helper.parameter.EmbeddingModelNameParameters;
import org.mule.extension.mulechain.vectors.internal.helper.parameter.QueryParameters;
import org.mule.extension.mulechain.vectors.internal.store.BaseStore;
import org.mule.extension.mulechain.vectors.internal.util.JsonUtils;
@@ -28,9 +26,9 @@ public class AISearchStore extends BaseStore {
private String apiKey;
private String url;
- public AISearchStore(String storeName, Configuration configuration, QueryParameters queryParams, EmbeddingModel embeddingModel, int dimension) {
+ public AISearchStore(String storeName, Configuration configuration, QueryParameters queryParams, int dimension) {
- super(storeName, configuration, queryParams, embeddingModel, dimension);
+ super(storeName, configuration, queryParams, dimension);
JSONObject config = readConfigFile(configuration.getConfigFilePath());
JSONObject vectorStoreConfig = config.getJSONObject(Constants.VECTOR_STORE_AI_SEARCH);
diff --git a/src/main/java/org/mule/extension/mulechain/vectors/internal/store/chroma/ChromaStore.java b/src/main/java/org/mule/extension/mulechain/vectors/internal/store/chroma/ChromaStore.java
index b1d45e0..d074bcd 100644
--- a/src/main/java/org/mule/extension/mulechain/vectors/internal/store/chroma/ChromaStore.java
+++ b/src/main/java/org/mule/extension/mulechain/vectors/internal/store/chroma/ChromaStore.java
@@ -2,13 +2,11 @@
import dev.langchain4j.data.segment.TextSegment;
import dev.langchain4j.store.embedding.EmbeddingStore;
-import dev.langchain4j.model.embedding.EmbeddingModel;
import dev.langchain4j.store.embedding.chroma.ChromaEmbeddingStore;
import org.json.JSONArray;
import org.json.JSONObject;
import org.mule.extension.mulechain.vectors.internal.config.Configuration;
import org.mule.extension.mulechain.vectors.internal.constant.Constants;
-import org.mule.extension.mulechain.vectors.internal.helper.parameter.EmbeddingModelNameParameters;
import org.mule.extension.mulechain.vectors.internal.helper.parameter.QueryParameters;
import org.mule.extension.mulechain.vectors.internal.store.BaseStore;
import org.mule.extension.mulechain.vectors.internal.util.JsonUtils;
@@ -35,11 +33,10 @@ public class ChromaStore extends BaseStore {
* @param storeName the name of the vector store.
* @param configuration the configuration object containing necessary settings.
* @param queryParams parameters related to query configurations.
- * @param embeddingModel embedding model.
*/
- public ChromaStore(String storeName, Configuration configuration, QueryParameters queryParams, EmbeddingModel embeddingModel, int dimension) {
+ public ChromaStore(String storeName, Configuration configuration, QueryParameters queryParams, int dimension) {
- super(storeName, configuration, queryParams, embeddingModel, dimension);
+ super(storeName, configuration, queryParams, dimension);
JSONObject config = JsonUtils.readConfigFile(configuration.getConfigFilePath());
JSONObject vectorStoreConfig = config.getJSONObject(Constants.VECTOR_STORE_CHROMA);
diff --git a/src/main/java/org/mule/extension/mulechain/vectors/internal/store/elasticsearch/ElasticsearchStore.java b/src/main/java/org/mule/extension/mulechain/vectors/internal/store/elasticsearch/ElasticsearchStore.java
index 03382ad..f021777 100644
--- a/src/main/java/org/mule/extension/mulechain/vectors/internal/store/elasticsearch/ElasticsearchStore.java
+++ b/src/main/java/org/mule/extension/mulechain/vectors/internal/store/elasticsearch/ElasticsearchStore.java
@@ -7,7 +7,6 @@
import org.json.JSONObject;
import org.mule.extension.mulechain.vectors.internal.config.Configuration;
import org.mule.extension.mulechain.vectors.internal.constant.Constants;
-import org.mule.extension.mulechain.vectors.internal.helper.parameter.EmbeddingModelNameParameters;
import org.mule.extension.mulechain.vectors.internal.helper.parameter.QueryParameters;
import org.mule.extension.mulechain.vectors.internal.store.BaseStore;
import org.mule.extension.mulechain.vectors.internal.util.JsonUtils;
@@ -18,9 +17,9 @@ public class ElasticsearchStore extends BaseStore {
private String userName;
private String password;
- public ElasticsearchStore(String storeName, Configuration configuration, QueryParameters queryParams, EmbeddingModel embeddingModel, int dimension) {
+ public ElasticsearchStore(String storeName, Configuration configuration, QueryParameters queryParams, int dimension) {
- super(storeName, configuration, queryParams, embeddingModel, dimension);
+ super(storeName, configuration, queryParams, dimension);
JSONObject config = JsonUtils.readConfigFile(configuration.getConfigFilePath());
JSONObject vectorStoreConfig = config.getJSONObject(Constants.VECTOR_STORE_ELASTICSEARCH);
diff --git a/src/main/java/org/mule/extension/mulechain/vectors/internal/store/milvus/MilvusStore.java b/src/main/java/org/mule/extension/mulechain/vectors/internal/store/milvus/MilvusStore.java
index c39a615..074efed 100644
--- a/src/main/java/org/mule/extension/mulechain/vectors/internal/store/milvus/MilvusStore.java
+++ b/src/main/java/org/mule/extension/mulechain/vectors/internal/store/milvus/MilvusStore.java
@@ -2,7 +2,6 @@
import com.google.gson.JsonObject;
import dev.langchain4j.data.segment.TextSegment;
-import dev.langchain4j.model.embedding.EmbeddingModel;
import dev.langchain4j.store.embedding.EmbeddingStore;
import dev.langchain4j.store.embedding.milvus.MilvusEmbeddingStore;
import io.milvus.client.MilvusServiceClient;
@@ -14,7 +13,6 @@
import org.json.JSONObject;
import org.mule.extension.mulechain.vectors.internal.config.Configuration;
import org.mule.extension.mulechain.vectors.internal.constant.Constants;
-import org.mule.extension.mulechain.vectors.internal.helper.parameter.EmbeddingModelNameParameters;
import org.mule.extension.mulechain.vectors.internal.helper.parameter.QueryParameters;
import org.mule.extension.mulechain.vectors.internal.store.BaseStore;
import org.mule.extension.mulechain.vectors.internal.util.JsonUtils;
@@ -30,9 +28,9 @@ public class MilvusStore extends BaseStore {
private String uri;
- public MilvusStore(String storeName, Configuration configuration, QueryParameters queryParams, EmbeddingModel embeddingModel, int dimension) {
+ public MilvusStore(String storeName, Configuration configuration, QueryParameters queryParams, int dimension) {
- super(storeName, configuration, queryParams, embeddingModel, dimension);
+ super(storeName, configuration, queryParams, dimension);
JSONObject config = readConfigFile(configuration.getConfigFilePath());
JSONObject vectorStoreConfig = config.getJSONObject(Constants.VECTOR_STORE_MILVUS);
diff --git a/src/main/java/org/mule/extension/mulechain/vectors/internal/store/opensearch/OpenSearchStore.java b/src/main/java/org/mule/extension/mulechain/vectors/internal/store/opensearch/OpenSearchStore.java
index 5adcf67..cc34640 100644
--- a/src/main/java/org/mule/extension/mulechain/vectors/internal/store/opensearch/OpenSearchStore.java
+++ b/src/main/java/org/mule/extension/mulechain/vectors/internal/store/opensearch/OpenSearchStore.java
@@ -7,7 +7,6 @@
import org.json.JSONObject;
import org.mule.extension.mulechain.vectors.internal.config.Configuration;
import org.mule.extension.mulechain.vectors.internal.constant.Constants;
-import org.mule.extension.mulechain.vectors.internal.helper.parameter.EmbeddingModelNameParameters;
import org.mule.extension.mulechain.vectors.internal.helper.parameter.QueryParameters;
import org.mule.extension.mulechain.vectors.internal.store.BaseStore;
import org.mule.extension.mulechain.vectors.internal.util.JsonUtils;
@@ -18,9 +17,9 @@ public class OpenSearchStore extends BaseStore {
private String userName;
private String password;
- public OpenSearchStore(String storeName, Configuration configuration, QueryParameters queryParams, EmbeddingModel embeddingModel, int dimension) {
+ public OpenSearchStore(String storeName, Configuration configuration, QueryParameters queryParams, int dimension) {
- super(storeName, configuration, queryParams, embeddingModel, dimension);
+ super(storeName, configuration, queryParams, dimension);
JSONObject config = JsonUtils.readConfigFile(configuration.getConfigFilePath());
JSONObject vectorStoreConfig = config.getJSONObject(Constants.VECTOR_STORE_OPENSEARCH);
diff --git a/src/main/java/org/mule/extension/mulechain/vectors/internal/store/pgvector/PGVectorStore.java b/src/main/java/org/mule/extension/mulechain/vectors/internal/store/pgvector/PGVectorStore.java
index 902f1ba..3f10247 100644
--- a/src/main/java/org/mule/extension/mulechain/vectors/internal/store/pgvector/PGVectorStore.java
+++ b/src/main/java/org/mule/extension/mulechain/vectors/internal/store/pgvector/PGVectorStore.java
@@ -2,12 +2,10 @@
import dev.langchain4j.data.segment.TextSegment;
import dev.langchain4j.store.embedding.EmbeddingStore;
-import dev.langchain4j.model.embedding.EmbeddingModel;
import dev.langchain4j.store.embedding.pgvector.PgVectorEmbeddingStore;
import org.json.JSONObject;
import org.mule.extension.mulechain.vectors.internal.config.Configuration;
import org.mule.extension.mulechain.vectors.internal.constant.Constants;
-import org.mule.extension.mulechain.vectors.internal.helper.parameter.EmbeddingModelNameParameters;
import org.mule.extension.mulechain.vectors.internal.helper.parameter.QueryParameters;
import org.mule.extension.mulechain.vectors.internal.store.BaseStore;
import org.mule.extension.mulechain.vectors.internal.util.JsonUtils;
@@ -45,11 +43,10 @@ public class PGVectorStore extends BaseStore {
* @param storeName The name of the store.
* @param configuration The configuration for connecting to the store.
* @param queryParams Parameters related to query configurations.
- * @param embeddingModel Embedding model.
*/
- public PGVectorStore(String storeName, Configuration configuration, QueryParameters queryParams, EmbeddingModel embeddingModel, int dimension) {
+ public PGVectorStore(String storeName, Configuration configuration, QueryParameters queryParams, int dimension) {
- super(storeName, configuration, queryParams, embeddingModel, dimension);
+ super(storeName, configuration, queryParams, dimension);
JSONObject config = readConfigFile(configuration.getConfigFilePath());
JSONObject vectorStoreConfig = config.getJSONObject(Constants.VECTOR_STORE_PGVECTOR);
diff --git a/src/main/java/org/mule/extension/mulechain/vectors/internal/store/pinecone/PineconeStore.java b/src/main/java/org/mule/extension/mulechain/vectors/internal/store/pinecone/PineconeStore.java
index fc8862f..29ca64a 100644
--- a/src/main/java/org/mule/extension/mulechain/vectors/internal/store/pinecone/PineconeStore.java
+++ b/src/main/java/org/mule/extension/mulechain/vectors/internal/store/pinecone/PineconeStore.java
@@ -1,14 +1,12 @@
package org.mule.extension.mulechain.vectors.internal.store.pinecone;
import dev.langchain4j.data.segment.TextSegment;
-import dev.langchain4j.model.embedding.EmbeddingModel;
import dev.langchain4j.store.embedding.EmbeddingStore;
import dev.langchain4j.store.embedding.pinecone.PineconeEmbeddingStore;
import dev.langchain4j.store.embedding.pinecone.PineconeServerlessIndexConfig;
import org.json.JSONObject;
import org.mule.extension.mulechain.vectors.internal.config.Configuration;
import org.mule.extension.mulechain.vectors.internal.constant.Constants;
-import org.mule.extension.mulechain.vectors.internal.helper.parameter.EmbeddingModelNameParameters;
import org.mule.extension.mulechain.vectors.internal.helper.parameter.QueryParameters;
import org.mule.extension.mulechain.vectors.internal.store.BaseStore;
import org.mule.extension.mulechain.vectors.internal.util.JsonUtils;
@@ -19,9 +17,9 @@ public class PineconeStore extends BaseStore {
private String cloud;
private String region;
- public PineconeStore(String storeName, Configuration configuration, QueryParameters queryParams, EmbeddingModel embeddingModel, int dimension) {
+ public PineconeStore(String storeName, Configuration configuration, QueryParameters queryParams, int dimension) {
- super(storeName, configuration, queryParams, embeddingModel, dimension);
+ super(storeName, configuration, queryParams, dimension);
JSONObject config = JsonUtils.readConfigFile(configuration.getConfigFilePath());
JSONObject vectorStoreConfig = config.getJSONObject(Constants.VECTOR_STORE_PINECONE);
diff --git a/src/main/java/org/mule/extension/mulechain/vectors/internal/store/weviate/WeaviateStore.java b/src/main/java/org/mule/extension/mulechain/vectors/internal/store/weviate/WeaviateStore.java
index 1c2b9ff..5738078 100644
--- a/src/main/java/org/mule/extension/mulechain/vectors/internal/store/weviate/WeaviateStore.java
+++ b/src/main/java/org/mule/extension/mulechain/vectors/internal/store/weviate/WeaviateStore.java
@@ -1,13 +1,11 @@
package org.mule.extension.mulechain.vectors.internal.store.weviate;
import dev.langchain4j.data.segment.TextSegment;
-import dev.langchain4j.model.embedding.EmbeddingModel;
import dev.langchain4j.store.embedding.EmbeddingStore;
import dev.langchain4j.store.embedding.weaviate.WeaviateEmbeddingStore;
import org.json.JSONObject;
import org.mule.extension.mulechain.vectors.internal.config.Configuration;
import org.mule.extension.mulechain.vectors.internal.constant.Constants;
-import org.mule.extension.mulechain.vectors.internal.helper.parameter.EmbeddingModelNameParameters;
import org.mule.extension.mulechain.vectors.internal.helper.parameter.QueryParameters;
import org.mule.extension.mulechain.vectors.internal.store.BaseStore;
@@ -19,9 +17,9 @@ public class WeaviateStore extends BaseStore {
private String protocol;
private String apiKey;
- public WeaviateStore(String storeName, Configuration configuration, QueryParameters queryParams, EmbeddingModel embeddingModel, int dimension) {
+ public WeaviateStore(String storeName, Configuration configuration, QueryParameters queryParams, int dimension) {
- super(storeName, configuration, queryParams, embeddingModel, dimension);
+ super(storeName, configuration, queryParams, dimension);
JSONObject config = readConfigFile(configuration.getConfigFilePath());
JSONObject vectorStoreConfig = config.getJSONObject(Constants.VECTOR_STORE_WEAVIATE);