diff --git a/pom.xml b/pom.xml index 9bd1c50..522c3b1 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ 4.0.0 com.mule.mulechain mulechain-vectors - 0.1.93-SNAPSHOT + 0.1.95-SNAPSHOT mule-extension MAC Vectors diff --git a/src/main/java/org/mule/extension/mulechain/vectors/internal/helper/EmbeddingStoreIngestorHelper.java b/src/main/java/org/mule/extension/mulechain/vectors/internal/helper/EmbeddingStoreIngestorHelper.java deleted file mode 100644 index a60bae5..0000000 --- a/src/main/java/org/mule/extension/mulechain/vectors/internal/helper/EmbeddingStoreIngestorHelper.java +++ /dev/null @@ -1,265 +0,0 @@ -package org.mule.extension.mulechain.vectors.internal.helper; - -import dev.langchain4j.data.document.Document; -import dev.langchain4j.data.document.loader.UrlDocumentLoader; -import dev.langchain4j.data.document.parser.TextDocumentParser; -import dev.langchain4j.data.document.parser.apache.tika.ApacheTikaDocumentParser; -import dev.langchain4j.data.document.transformer.jsoup.HtmlToTextDocumentTransformer; -import dev.langchain4j.store.embedding.EmbeddingStoreIngestor; -import org.json.JSONObject; -import org.mule.extension.mulechain.vectors.internal.constant.Constants; -import org.mule.extension.mulechain.vectors.internal.helper.parameter.FileTypeParameters; -import org.mule.extension.mulechain.vectors.internal.storage.azure.AzureBlobStorage; -import org.mule.extension.mulechain.vectors.internal.storage.s3.AWSS3Storage; -import org.mule.extension.mulechain.vectors.internal.util.DocumentUtils; - -import org.mule.extension.mulechain.vectors.internal.util.Utils; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.IOException; -import java.net.MalformedURLException; -import java.net.URL; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.Paths; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.stream.Stream; - -import static dev.langchain4j.data.document.loader.FileSystemDocumentLoader.loadDocument; - -/** - * Helper class for ingesting documents into an embedding store from various sources, including local files, - * S3, and Azure storage containers. - */ -public class EmbeddingStoreIngestorHelper { - - private static final Logger LOGGER = LoggerFactory.getLogger(EmbeddingStoreIngestorHelper.class); - - private EmbeddingStoreIngestor ingestor; - private String storeName; - - /** - * Constructs a new EmbeddingStoreIngestorHelper. - * - * @param ingestor the embedding store ingestor used to store documents. - * @param storeName the name of the store where documents are being ingested. - */ - public EmbeddingStoreIngestorHelper(EmbeddingStoreIngestor ingestor, String storeName) { - - this.ingestor = ingestor; - this.storeName = storeName; - } - - /** - * Ingests documents from a local folder into the embedding store. - * - * @param folderPath the path of the folder containing documents to ingest. - * @param fileTypeParameters the type of files to ingest, as defined by FileTypeParameters. - * @return a JSONObject with ingestion status and metadata. - */ - public JSONObject ingestFromLocalFolder(String folderPath, FileTypeParameters fileTypeParameters) { - - long totalFiles = 0; - try (Stream paths = Files.walk(Paths.get(folderPath))) { - totalFiles = paths.filter(Files::isRegularFile).count(); - } catch (IOException e) { - e.printStackTrace(); - } - - LOGGER.info("Total number of files to process: " + totalFiles); - AtomicInteger fileCounter = new AtomicInteger(0); - try (Stream paths = Files.walk(Paths.get(folderPath))) { - paths.filter(Files::isRegularFile).forEach(path -> { - int currentFileCounter = fileCounter.incrementAndGet(); - LOGGER.info("Processing file " + currentFileCounter + ": " + path.getFileName()); - - Document document; - switch (fileTypeParameters.getFileType()) { - case Constants.FILE_TYPE_CRAWL: - document = loadDocument(path.toString(), new TextDocumentParser()); - DocumentUtils.addMetadataToDocument(document, Constants.FILE_TYPE_CRAWL, path); - ingestor.ingest(document); - break; - case Constants.FILE_TYPE_TEXT: - document = loadDocument(path.toString(), new TextDocumentParser()); - DocumentUtils.addMetadataToDocument(document, Constants.FILE_TYPE_TEXT, path.getFileName().toString(), folderPath + path.getFileName()); - ingestor.ingest(document); - break; - case Constants.FILE_TYPE_ANY: - document = loadDocument(path.toString(), new ApacheTikaDocumentParser()); - DocumentUtils.addMetadataToDocument(document, Constants.FILE_TYPE_ANY, path.getFileName().toString(),folderPath + path.getFileName()); - ingestor.ingest(document); - break; - default: - throw new IllegalArgumentException("Unsupported File Type: " + fileTypeParameters.getFileType()); - } - }); - } catch (IOException e) { - e.printStackTrace(); - } - LOGGER.info("Processing complete"); - - return createFolderIngestionStatusObject(totalFiles, folderPath); - } - - /** - * Ingests a single local file into the embedding store. - * - * @param filePath the path of the file to ingest. - * @param fileTypeParameters the type of file, as defined by FileTypeParameters. - * @return a JSONObject with ingestion status and metadata. - */ - public JSONObject ingestFromLocalFile(String filePath, FileTypeParameters fileTypeParameters) { - - Path path = Paths.get(filePath); - - Document document; - switch (fileTypeParameters.getFileType()) { - case Constants.FILE_TYPE_CRAWL: - document = loadDocument(path.toString(), new TextDocumentParser()); - DocumentUtils.addMetadataToDocument(document, Constants.FILE_TYPE_CRAWL, path); - break; - case Constants.FILE_TYPE_TEXT: - document = loadDocument(path.toString(), new TextDocumentParser()); - DocumentUtils.addMetadataToDocument(document, Constants.FILE_TYPE_TEXT, Utils.getFileNameFromPath(filePath), filePath); - break; - case Constants.FILE_TYPE_ANY: - document = loadDocument(path.toString(), new ApacheTikaDocumentParser()); - DocumentUtils.addMetadataToDocument(document, Constants.FILE_TYPE_ANY, Utils.getFileNameFromPath(filePath), filePath); - break; - case Constants.FILE_TYPE_URL: - document = loadUrlDocument(filePath); - break; - default: - throw new IllegalArgumentException("Unsupported File Type: " + fileTypeParameters.getFileType()); - } - ingestor.ingest(document); - return createFileIngestionStatusObject(fileTypeParameters.getFileType(), filePath); - } - - /** - * Ingests documents from an S3 folder into the embedding store. - * - * @param folderPath the path of the folder in S3. - * @param fileTypeParameters the type of files to ingest, as defined by FileTypeParameters. - * @param awsKey AWS access key. - * @param awsSecret AWS secret key. - * @param awsRegion AWS region. - * @param s3Bucket the name of the S3 bucket. - * @return a JSONObject with ingestion status and metadata. - */ - public JSONObject ingestFromS3Folder(String folderPath, FileTypeParameters fileTypeParameters, String awsKey, String awsSecret, String awsRegion, String s3Bucket) { - - AWSS3Storage awsS3Storage = new AWSS3Storage(s3Bucket, awsKey, awsSecret, awsRegion); - long totalFiles = awsS3Storage.readAllFiles(folderPath, ingestor, fileTypeParameters); - return createFolderIngestionStatusObject(totalFiles, folderPath); - } - - /** - * Ingests a single file from an S3 bucket into the embedding store. - * - * @param filePath the path of the file in the S3 bucket. - * @param fileTypeParameters the type of file to ingest, as defined by FileTypeParameters. - * @param awsKey AWS access key. - * @param awsSecret AWS secret key. - * @param awsRegion AWS region. - * @param s3Bucket the name of the S3 bucket. - * @return a JSONObject with ingestion status and metadata. - */ - public JSONObject ingestFromS3File(String filePath, FileTypeParameters fileTypeParameters, String awsKey, String awsSecret, String awsRegion, String s3Bucket) { - - AWSS3Storage awsS3Storage = new AWSS3Storage(s3Bucket, awsKey, awsSecret, awsRegion); - awsS3Storage.readFile(filePath, fileTypeParameters, ingestor); - return createFileIngestionStatusObject(fileTypeParameters.getFileType(), filePath); - } - - /** - * Ingests documents from an Azure storage container into the embedding store. - * - * @param containerName the name of the Azure container. - * @param fileType the type of files to ingest, as defined by FileTypeParameters. - * @param azureName the Azure storage account name. - * @param azureKey the Azure storage account key. - * @return a JSONObject with ingestion status and metadata. - */ - public JSONObject ingestFromAZContainer(String containerName, FileTypeParameters fileType, String azureName, String azureKey) { - - AzureBlobStorage azureBlobStorage = new AzureBlobStorage(azureName, azureKey); - long totalFiles = azureBlobStorage.readAllFiles(containerName, ingestor, fileType); - return createFolderIngestionStatusObject(totalFiles, containerName); - } - - /** - * Ingests a single file from an Azure storage container into the embedding store. - * - * @param containerName the name of the Azure container. - * @param blobName the name of the file in the Azure container. - * @param fileType the type of file to ingest, as defined by FileTypeParameters. - * @param azureName the Azure storage account name. - * @param azureKey the Azure storage account key. - * @return a JSONObject with ingestion status and metadata. - */ - public JSONObject ingestFromAZFile(String containerName, String blobName, FileTypeParameters fileType, String azureName, String azureKey) { - - AzureBlobStorage azureBlobStorage = new AzureBlobStorage(azureName, azureKey); - azureBlobStorage.readFile(containerName, blobName, fileType, ingestor); - return createFileIngestionStatusObject(fileType.getFileType(), containerName); - } - - /** - * Loads and transforms a document from a URL into a text document. - * - * @param contextPath the URL to load. - * @return the transformed document. - */ - private Document loadUrlDocument(String contextPath) { - - Document document; - try { - URL url = new URL(contextPath); - Document htmlDocument = UrlDocumentLoader.load(url, new TextDocumentParser()); - HtmlToTextDocumentTransformer transformer = new HtmlToTextDocumentTransformer(null, null, true); - document = transformer.transform(htmlDocument); - document.metadata().put(Constants.METADATA_KEY_URL, contextPath); - } catch (MalformedURLException e) { - throw new RuntimeException("Invalid URL: " + contextPath, e); - } - return document; - } - - /** - * Creates a JSONObject representing the ingestion status. - * - * @param fileType the type of the ingested file. - * @param folderPath the path of the ingested file or folder. - * @return a JSONObject containing ingestion status metadata. - */ - private JSONObject createFileIngestionStatusObject(String fileType, String folderPath) { - - JSONObject jsonObject = new JSONObject(); - jsonObject.put("fileType", fileType); - jsonObject.put("filePath", folderPath); - jsonObject.put("storeName", storeName); - jsonObject.put("status", "updated"); - return jsonObject; - } - - /** - * Creates a JSONObject representing the ingestion status of a folder or set of files. - * - * @param totalFiles the total number of files processed. - * @param folderPath the path of the processed folder. - * @return a JSONObject containing the ingestion status with file count, folder path, store name, and status. - */ - private JSONObject createFolderIngestionStatusObject(Long totalFiles, String folderPath) { - - JSONObject jsonObject = new JSONObject(); - jsonObject.put("filesCount", totalFiles); - jsonObject.put("folderPath", folderPath); - jsonObject.put("storeName", storeName); - jsonObject.put("status", "updated"); - return jsonObject; - } - -} diff --git a/src/main/java/org/mule/extension/mulechain/vectors/internal/helper/factory/EmbeddingModelFactory.java b/src/main/java/org/mule/extension/mulechain/vectors/internal/helper/factory/EmbeddingModelFactory.java deleted file mode 100644 index 97ff985..0000000 --- a/src/main/java/org/mule/extension/mulechain/vectors/internal/helper/factory/EmbeddingModelFactory.java +++ /dev/null @@ -1,106 +0,0 @@ -package org.mule.extension.mulechain.vectors.internal.helper.factory; - -import dev.langchain4j.model.azure.AzureOpenAiEmbeddingModel; -import dev.langchain4j.model.embedding.EmbeddingModel; -import dev.langchain4j.model.huggingface.HuggingFaceEmbeddingModel; -import dev.langchain4j.model.mistralai.MistralAiEmbeddingModel; -import dev.langchain4j.model.nomic.NomicEmbeddingModel; -import dev.langchain4j.model.openai.OpenAiEmbeddingModel; -import org.json.JSONObject; -import org.mule.extension.mulechain.vectors.internal.config.Configuration; -import org.mule.extension.mulechain.vectors.internal.constant.Constants; -import org.mule.extension.mulechain.vectors.internal.helper.parameter.EmbeddingModelNameParameters; - -import static org.mule.extension.mulechain.vectors.internal.util.JsonUtils.readConfigFile; - -public class EmbeddingModelFactory { - - public static EmbeddingModel createModel(Configuration configuration, EmbeddingModelNameParameters modelParams) { - - EmbeddingModel model = null; - JSONObject config = readConfigFile(configuration.getConfigFilePath()); - JSONObject llmType; - String llmTypeKey; - String llmTypeEndpoint; - - switch (configuration.getEmbeddingModelService()) { - case Constants.EMBEDDING_MODEL_SERVICE_AZURE_OPENAI: - llmType = config.getJSONObject("AZURE_OPENAI"); - llmTypeKey = llmType.getString("AZURE_OPENAI_KEY"); - llmTypeEndpoint = llmType.getString("AZURE_OPENAI_ENDPOINT"); - model = createAzureOpenAiModel(llmTypeKey, llmTypeEndpoint, modelParams); - break; - - case Constants.EMBEDDING_MODEL_SERVICE_OPENAI: - llmType = config.getJSONObject("OPENAI"); - llmTypeKey = llmType.getString("OPENAI_API_KEY"); - model = createOpenAiModel(llmTypeKey, modelParams); - break; - - case Constants.EMBEDDING_MODEL_SERVICE_MISTRAL_AI: - llmType = config.getJSONObject("MISTRAL_AI"); - llmTypeKey = llmType.getString("MISTRAL_AI_API_KEY"); - model = createMistralAIModel(llmTypeKey, modelParams); - break; - - case Constants.EMBEDDING_MODEL_SERVICE_NOMIC: - llmType = config.getJSONObject("NOMIC"); - llmTypeKey = llmType.getString("NOMIC_API_KEY"); - model = createNomicModel(llmTypeKey, modelParams); - - break; - case Constants.EMBEDDING_MODEL_SERVICE_HUGGING_FACE: - llmType = config.getJSONObject("HUGGING_FACE"); - llmTypeKey = llmType.getString("HUGGING_FACE_API_KEY"); - model = createHuggingFaceModel(llmTypeKey, modelParams); - - break; - default: - throw new IllegalArgumentException("Unsupported Embedding Model: " + configuration.getEmbeddingModelService()); - } - return model; - } - - private static EmbeddingModel createAzureOpenAiModel(String llmTypeKey, String llmTypeEndpoint, EmbeddingModelNameParameters modelParams) { - System.out.println("Inside createAzureOpenAiModel"); - return AzureOpenAiEmbeddingModel.builder() - .apiKey(llmTypeKey) - .endpoint(llmTypeEndpoint) - .deploymentName(modelParams.getEmbeddingModelName()) - .build(); - } - - private static EmbeddingModel createOpenAiModel(String llmTypeKey, EmbeddingModelNameParameters modelParams) { - return OpenAiEmbeddingModel.builder() - .apiKey(llmTypeKey) - .modelName(modelParams.getEmbeddingModelName()) - .build(); - } - - private static EmbeddingModel createMistralAIModel(String llmTypeKey, EmbeddingModelNameParameters modelParams) { - return MistralAiEmbeddingModel.builder() - .apiKey(llmTypeKey) - .modelName(modelParams.getEmbeddingModelName()) - .build(); - } - - private static EmbeddingModel createNomicModel(String llmTypeKey, EmbeddingModelNameParameters modelParams) { - return NomicEmbeddingModel.builder() - //.baseUrl("https://api-atlas.nomic.ai/v1/") - .apiKey(llmTypeKey) - .modelName(modelParams.getEmbeddingModelName()) - //.taskType("clustering") - .maxRetries(2) - .logRequests(true) - .logResponses(true) - .build(); - } - - private static EmbeddingModel createHuggingFaceModel(String llmTypeKey, EmbeddingModelNameParameters modelParams) { - return HuggingFaceEmbeddingModel.builder() - .accessToken(llmTypeKey) - .modelId(modelParams.getEmbeddingModelName()) - .build(); - } - -} diff --git a/src/main/java/org/mule/extension/mulechain/vectors/internal/helper/parameter/EmbeddingModelNameParameters.java b/src/main/java/org/mule/extension/mulechain/vectors/internal/helper/parameter/EmbeddingModelParameters.java similarity index 93% rename from src/main/java/org/mule/extension/mulechain/vectors/internal/helper/parameter/EmbeddingModelNameParameters.java rename to src/main/java/org/mule/extension/mulechain/vectors/internal/helper/parameter/EmbeddingModelParameters.java index ce415ac..7a7d1c8 100644 --- a/src/main/java/org/mule/extension/mulechain/vectors/internal/helper/parameter/EmbeddingModelNameParameters.java +++ b/src/main/java/org/mule/extension/mulechain/vectors/internal/helper/parameter/EmbeddingModelParameters.java @@ -6,7 +6,7 @@ import org.mule.runtime.extension.api.annotation.param.Parameter; import org.mule.runtime.extension.api.annotation.values.OfValues; -public class EmbeddingModelNameParameters { +public class EmbeddingModelParameters { @Parameter @Expression(ExpressionSupport.SUPPORTED) diff --git a/src/main/java/org/mule/extension/mulechain/vectors/internal/model/BaseModel.java b/src/main/java/org/mule/extension/mulechain/vectors/internal/model/BaseModel.java new file mode 100644 index 0000000..ae7c7bf --- /dev/null +++ b/src/main/java/org/mule/extension/mulechain/vectors/internal/model/BaseModel.java @@ -0,0 +1,91 @@ +package org.mule.extension.mulechain.vectors.internal.model; + +import dev.langchain4j.model.embedding.EmbeddingModel; +import org.mule.extension.mulechain.vectors.internal.config.Configuration; +import org.mule.extension.mulechain.vectors.internal.constant.Constants; +import org.mule.extension.mulechain.vectors.internal.helper.parameter.EmbeddingModelParameters; +import org.mule.extension.mulechain.vectors.internal.model.azureopenai.AzureOpenAIModel; +import org.mule.extension.mulechain.vectors.internal.model.huggingface.HuggingFaceModel; +import org.mule.extension.mulechain.vectors.internal.model.mistralai.MistralAIModel; +import org.mule.extension.mulechain.vectors.internal.model.nomic.NomicModel; +import org.mule.extension.mulechain.vectors.internal.model.openai.OpenAIModel; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class BaseModel { + + private static final Logger LOGGER = LoggerFactory.getLogger(BaseModel.class); + + protected Configuration configuration; + protected EmbeddingModelParameters embeddingModelParameters; + + public BaseModel(Configuration configuration, EmbeddingModelParameters embeddingModelParameters) { + + this.configuration = configuration; + this.embeddingModelParameters = embeddingModelParameters; + } + + public EmbeddingModel buildEmbeddingModel() { + + throw new UnsupportedOperationException("This method should be overridden by subclasses"); + } + + public static BaseModel.Builder builder() { + + return new BaseModel.Builder(); + } + + public static class Builder { + + private Configuration configuration; + private EmbeddingModelParameters embeddingModelParameters; + + public Builder() { + + } + + public BaseModel.Builder configuration(Configuration configuration) { + this.configuration = configuration; + return this; + } + + public BaseModel.Builder embeddingModelParameters(EmbeddingModelParameters embeddingModelParameters) { + this.embeddingModelParameters = embeddingModelParameters; + return this; + } + + public BaseModel build() { + + BaseModel baseModel; + + LOGGER.debug("Embedding Model Service: " + configuration.getEmbeddingModelService()); + switch (configuration.getEmbeddingModelService()) { + + case Constants.EMBEDDING_MODEL_SERVICE_AZURE_OPENAI: + baseModel = new AzureOpenAIModel(configuration, embeddingModelParameters); + break; + + case Constants.EMBEDDING_MODEL_SERVICE_OPENAI: + baseModel = new OpenAIModel(configuration, embeddingModelParameters); + break; + + case Constants.EMBEDDING_MODEL_SERVICE_MISTRAL_AI: + baseModel = new MistralAIModel(configuration, embeddingModelParameters); + break; + + case Constants.EMBEDDING_MODEL_SERVICE_NOMIC: + baseModel = new NomicModel(configuration, embeddingModelParameters); + break; + + case Constants.EMBEDDING_MODEL_SERVICE_HUGGING_FACE: + baseModel = new HuggingFaceModel(configuration, embeddingModelParameters); + break; + + default: + //throw new IllegalOperationException("Unsupported Vector Store: " + configuration.getVectorStore()); + baseModel = null; + } + return baseModel; + } + } +} diff --git a/src/main/java/org/mule/extension/mulechain/vectors/internal/model/azureopenai/AzureOpenAIModel.java b/src/main/java/org/mule/extension/mulechain/vectors/internal/model/azureopenai/AzureOpenAIModel.java new file mode 100644 index 0000000..5750bbe --- /dev/null +++ b/src/main/java/org/mule/extension/mulechain/vectors/internal/model/azureopenai/AzureOpenAIModel.java @@ -0,0 +1,35 @@ +package org.mule.extension.mulechain.vectors.internal.model.azureopenai; + +import dev.langchain4j.model.azure.AzureOpenAiEmbeddingModel; +import dev.langchain4j.model.embedding.EmbeddingModel; +import org.json.JSONObject; +import org.mule.extension.mulechain.vectors.internal.config.Configuration; +import org.mule.extension.mulechain.vectors.internal.helper.parameter.EmbeddingModelParameters; +import org.mule.extension.mulechain.vectors.internal.model.BaseModel; + +import static org.mule.extension.mulechain.vectors.internal.util.JsonUtils.readConfigFile; + +public class AzureOpenAIModel extends BaseModel { + + private final String apiKey; + private final String endpoint; + + public AzureOpenAIModel(Configuration configuration, EmbeddingModelParameters embeddingModelParameters) { + + super(configuration,embeddingModelParameters); + JSONObject config = readConfigFile(configuration.getConfigFilePath()); + assert config != null; + JSONObject modelConfig = config.getJSONObject("AZURE_OPENAI"); + this.apiKey = modelConfig.getString("AZURE_OPENAI_KEY"); + this.endpoint = modelConfig.getString("AZURE_OPENAI_ENDPOINT"); + } + + public EmbeddingModel buildEmbeddingModel() { + + return AzureOpenAiEmbeddingModel.builder() + .apiKey(apiKey) + .endpoint(endpoint) + .deploymentName(embeddingModelParameters.getEmbeddingModelName()) + .build(); + } +} diff --git a/src/main/java/org/mule/extension/mulechain/vectors/internal/model/huggingface/HuggingFaceModel.java b/src/main/java/org/mule/extension/mulechain/vectors/internal/model/huggingface/HuggingFaceModel.java new file mode 100644 index 0000000..7223847 --- /dev/null +++ b/src/main/java/org/mule/extension/mulechain/vectors/internal/model/huggingface/HuggingFaceModel.java @@ -0,0 +1,32 @@ +package org.mule.extension.mulechain.vectors.internal.model.huggingface; + +import dev.langchain4j.model.embedding.EmbeddingModel; +import dev.langchain4j.model.huggingface.HuggingFaceEmbeddingModel; +import org.json.JSONObject; +import org.mule.extension.mulechain.vectors.internal.config.Configuration; +import org.mule.extension.mulechain.vectors.internal.helper.parameter.EmbeddingModelParameters; +import org.mule.extension.mulechain.vectors.internal.model.BaseModel; + +import static org.mule.extension.mulechain.vectors.internal.util.JsonUtils.readConfigFile; + +public class HuggingFaceModel extends BaseModel { + + private final String apiKey; + + public HuggingFaceModel(Configuration configuration, EmbeddingModelParameters embeddingModelParameters) { + + super(configuration,embeddingModelParameters); + JSONObject config = readConfigFile(configuration.getConfigFilePath()); + assert config != null; + JSONObject modelConfig = config.getJSONObject("HUGGING_FACE"); + this.apiKey = modelConfig.getString("HUGGING_FACE_API_KEY"); + } + + public EmbeddingModel buildEmbeddingModel() { + + return HuggingFaceEmbeddingModel.builder() + .accessToken(apiKey) + .modelId(embeddingModelParameters.getEmbeddingModelName()) + .build(); + } +} diff --git a/src/main/java/org/mule/extension/mulechain/vectors/internal/model/mistralai/MistralAIModel.java b/src/main/java/org/mule/extension/mulechain/vectors/internal/model/mistralai/MistralAIModel.java new file mode 100644 index 0000000..7089f2f --- /dev/null +++ b/src/main/java/org/mule/extension/mulechain/vectors/internal/model/mistralai/MistralAIModel.java @@ -0,0 +1,32 @@ +package org.mule.extension.mulechain.vectors.internal.model.mistralai; + +import dev.langchain4j.model.embedding.EmbeddingModel; +import dev.langchain4j.model.mistralai.MistralAiEmbeddingModel; +import org.json.JSONObject; +import org.mule.extension.mulechain.vectors.internal.config.Configuration; +import org.mule.extension.mulechain.vectors.internal.helper.parameter.EmbeddingModelParameters; +import org.mule.extension.mulechain.vectors.internal.model.BaseModel; + +import static org.mule.extension.mulechain.vectors.internal.util.JsonUtils.readConfigFile; + +public class MistralAIModel extends BaseModel { + + private final String apiKey; + + public MistralAIModel(Configuration configuration, EmbeddingModelParameters embeddingModelParameters) { + + super(configuration,embeddingModelParameters); + JSONObject config = readConfigFile(configuration.getConfigFilePath()); + assert config != null; + JSONObject modelConfig = config.getJSONObject("MISTRAL_AI"); + this.apiKey = modelConfig.getString("MISTRAL_AI_API_KEY"); + } + + public EmbeddingModel buildEmbeddingModel() { + + return MistralAiEmbeddingModel.builder() + .apiKey(apiKey) + .modelName(embeddingModelParameters.getEmbeddingModelName()) + .build(); + } +} diff --git a/src/main/java/org/mule/extension/mulechain/vectors/internal/model/nomic/NomicModel.java b/src/main/java/org/mule/extension/mulechain/vectors/internal/model/nomic/NomicModel.java new file mode 100644 index 0000000..b00d61b --- /dev/null +++ b/src/main/java/org/mule/extension/mulechain/vectors/internal/model/nomic/NomicModel.java @@ -0,0 +1,37 @@ +package org.mule.extension.mulechain.vectors.internal.model.nomic; + +import dev.langchain4j.model.embedding.EmbeddingModel; +import dev.langchain4j.model.nomic.NomicEmbeddingModel; +import org.json.JSONObject; +import org.mule.extension.mulechain.vectors.internal.config.Configuration; +import org.mule.extension.mulechain.vectors.internal.helper.parameter.EmbeddingModelParameters; +import org.mule.extension.mulechain.vectors.internal.model.BaseModel; + +import static org.mule.extension.mulechain.vectors.internal.util.JsonUtils.readConfigFile; + +public class NomicModel extends BaseModel { + + private final String apiKey; + + public NomicModel(Configuration configuration, EmbeddingModelParameters embeddingModelParameters) { + + super(configuration,embeddingModelParameters); + JSONObject config = readConfigFile(configuration.getConfigFilePath()); + assert config != null; + JSONObject modelConfig = config.getJSONObject("NOMIC"); + this.apiKey = modelConfig.getString("NOMIC_API_KEY"); + } + + public EmbeddingModel buildEmbeddingModel() { + + return NomicEmbeddingModel.builder() + //.baseUrl("https://api-atlas.nomic.ai/v1/") + .apiKey(apiKey) + .modelName(embeddingModelParameters.getEmbeddingModelName()) + //.taskType("clustering") + .maxRetries(2) + .logRequests(true) + .logResponses(true) + .build(); + } +} diff --git a/src/main/java/org/mule/extension/mulechain/vectors/internal/model/openai/OpenAIModel.java b/src/main/java/org/mule/extension/mulechain/vectors/internal/model/openai/OpenAIModel.java new file mode 100644 index 0000000..43d10e9 --- /dev/null +++ b/src/main/java/org/mule/extension/mulechain/vectors/internal/model/openai/OpenAIModel.java @@ -0,0 +1,32 @@ +package org.mule.extension.mulechain.vectors.internal.model.openai; + +import dev.langchain4j.model.embedding.EmbeddingModel; +import dev.langchain4j.model.openai.OpenAiEmbeddingModel; +import org.json.JSONObject; +import org.mule.extension.mulechain.vectors.internal.config.Configuration; +import org.mule.extension.mulechain.vectors.internal.helper.parameter.EmbeddingModelParameters; +import org.mule.extension.mulechain.vectors.internal.model.BaseModel; + +import static org.mule.extension.mulechain.vectors.internal.util.JsonUtils.readConfigFile; + +public class OpenAIModel extends BaseModel { + + private final String apiKey; + + public OpenAIModel(Configuration configuration, EmbeddingModelParameters embeddingModelParameters) { + + super(configuration,embeddingModelParameters); + JSONObject config = readConfigFile(configuration.getConfigFilePath()); + assert config != null; + JSONObject modelConfig = config.getJSONObject("OPENAI"); + this.apiKey = modelConfig.getString("OPENAI_API_KEY"); + } + + public EmbeddingModel buildEmbeddingModel() { + + return OpenAiEmbeddingModel.builder() + .apiKey(apiKey) + .modelName(embeddingModelParameters.getEmbeddingModelName()) + .build(); + } +} diff --git a/src/main/java/org/mule/extension/mulechain/vectors/internal/operation/EmbeddingOperations.java b/src/main/java/org/mule/extension/mulechain/vectors/internal/operation/EmbeddingOperations.java index a84bdff..664d9ea 100644 --- a/src/main/java/org/mule/extension/mulechain/vectors/internal/operation/EmbeddingOperations.java +++ b/src/main/java/org/mule/extension/mulechain/vectors/internal/operation/EmbeddingOperations.java @@ -1,7 +1,6 @@ package org.mule.extension.mulechain.vectors.internal.operation; import static org.apache.commons.io.IOUtils.toInputStream; -import static org.mule.extension.mulechain.vectors.internal.util.JsonUtils.readConfigFile; import static org.mule.runtime.extension.api.annotation.param.MediaType.APPLICATION_JSON; import java.io.InputStream; @@ -10,14 +9,14 @@ import org.mule.extension.mulechain.vectors.internal.constant.Constants; import org.mule.extension.mulechain.vectors.internal.helper.EmbeddingOperationValidator; -import org.mule.extension.mulechain.vectors.internal.helper.EmbeddingStoreIngestorHelper; -import org.mule.extension.mulechain.vectors.internal.helper.factory.EmbeddingModelFactory; import org.mule.extension.mulechain.vectors.internal.helper.parameter.*; import org.mule.extension.mulechain.vectors.internal.config.Configuration; import dev.langchain4j.store.embedding.*; import dev.langchain4j.store.embedding.filter.Filter; import org.json.JSONArray; import org.json.JSONObject; +import org.mule.extension.mulechain.vectors.internal.model.BaseModel; +import org.mule.extension.mulechain.vectors.internal.storage.BaseStorage; import org.mule.extension.mulechain.vectors.internal.store.BaseStore; import org.mule.runtime.extension.api.annotation.Alias; import org.mule.runtime.extension.api.annotation.param.*; @@ -44,14 +43,18 @@ public class EmbeddingOperations { */ @MediaType(value = APPLICATION_JSON, strict = false) @Alias("Embedding-add-text-to-store") - public InputStream addTextToStore(String storeName, String textToAdd, @Config Configuration configuration, @ParameterGroup(name = "Additional Properties") EmbeddingModelNameParameters modelParams){ + public InputStream addTextToStore(String storeName, String textToAdd, @Config Configuration configuration, @ParameterGroup(name = "Additional Properties") EmbeddingModelParameters modelParams){ - EmbeddingModel embeddingModel = EmbeddingModelFactory.createModel(configuration, modelParams); + BaseModel baseModel = BaseModel.builder() + .configuration(configuration) + .embeddingModelParameters(modelParams) + .build(); + + EmbeddingModel embeddingModel = baseModel.buildEmbeddingModel(); BaseStore baseStore = BaseStore.builder() .storeName(storeName) .configuration(configuration) - .embeddingModel(embeddingModel) .dimension(embeddingModel.dimension()) .build(); @@ -75,9 +78,14 @@ public InputStream addTextToStore(String storeName, String textToAdd, @Config Co */ @MediaType(value = APPLICATION_JSON, strict = false) @Alias("Embedding-generate-from-text") - public InputStream generateEmbedding(String textToAdd, @Config Configuration configuration, @ParameterGroup(name = "Additional Properties") EmbeddingModelNameParameters modelParams){ + public InputStream generateEmbedding(String textToAdd, @Config Configuration configuration, @ParameterGroup(name = "Additional Properties") EmbeddingModelParameters modelParams){ + + BaseModel baseModel = BaseModel.builder() + .configuration(configuration) + .embeddingModelParameters(modelParams) + .build(); - EmbeddingModel embeddingModel = EmbeddingModelFactory.createModel(configuration, modelParams); + EmbeddingModel embeddingModel = baseModel.buildEmbeddingModel(); TextSegment textSegment = TextSegment.from(textToAdd); Embedding textEmbedding = embeddingModel.embed(textSegment).content(); @@ -100,43 +108,40 @@ public InputStream addFolderToStore(String storeName, String folderPath, @Config @ParameterGroup(name = "Context") FileTypeParameters fileType, @ParameterGroup(name = "Storage") StorageTypeParameters storageType, int maxSegmentSizeInChars, int maxOverlapSizeInChars, - @ParameterGroup(name = "Additional Properties") EmbeddingModelNameParameters modelParams){ + @ParameterGroup(name = "Additional Properties") EmbeddingModelParameters modelParams){ EmbeddingOperationValidator.validateOperationType( Constants.EMBEDDING_OPERATION_TYPE_STORE_METADATA,configuration.getVectorStore()); - EmbeddingModel embeddingModel = EmbeddingModelFactory.createModel(configuration, modelParams); + BaseModel baseModel = BaseModel.builder() + .configuration(configuration) + .embeddingModelParameters(modelParams) + .build(); + + EmbeddingModel embeddingModel = baseModel.buildEmbeddingModel(); BaseStore baseStore = BaseStore.builder() .storeName(storeName) .configuration(configuration) - .embeddingModel(embeddingModel) .dimension(embeddingModel.dimension()) .build(); EmbeddingStore embeddingStore = baseStore.buildEmbeddingStore(); - EmbeddingStoreIngestor ingestor = EmbeddingStoreIngestor.builder() + EmbeddingStoreIngestor embeddingStoreIngestor = EmbeddingStoreIngestor.builder() .documentSplitter(DocumentSplitters.recursive(maxSegmentSizeInChars, maxOverlapSizeInChars)) .embeddingModel(embeddingModel) .embeddingStore(embeddingStore) .build(); - EmbeddingStoreIngestorHelper embeddingStoreIngestorHelper = new EmbeddingStoreIngestorHelper(ingestor, storeName); + BaseStorage baseStorage = BaseStorage.builder() + .storeName(storeName) + .configuration(configuration) + .storageType(storageType.getStorageType()) + .embeddingStoreIngestor(embeddingStoreIngestor) + .build(); - JSONObject config = readConfigFile(configuration.getConfigFilePath()); - JSONObject jsonObject = new JSONObject(); - System.out.println("Storage Type: " + storageType.getStorageType()); - if (storageType.getStorageType().equals("S3") && !fileType.getFileType().equals("url")) { - JSONObject s3Json = config.getJSONObject("S3"); - String awsKey = s3Json.getString("AWS_ACCESS_KEY_ID"); - String awsSecret = s3Json.getString("AWS_SECRET_ACCESS_KEY"); - String awsRegion = s3Json.getString("AWS_DEFAULT_REGION"); - String s3Bucket = s3Json.getString("AWS_S3_BUCKET"); - jsonObject = embeddingStoreIngestorHelper.ingestFromS3Folder(folderPath, fileType, awsKey, awsSecret, awsRegion, s3Bucket); - } else { - jsonObject = embeddingStoreIngestorHelper.ingestFromLocalFolder(folderPath, fileType); - } + JSONObject jsonObject = baseStorage.readAndIngestAllFiles(folderPath, fileType.getFileType()); return toInputStream(jsonObject.toString(), StandardCharsets.UTF_8); } @@ -151,49 +156,40 @@ public InputStream addFileEmbedding(String storeName, String contextPath, @Confi @ParameterGroup(name = "Context") FileTypeParameters fileType, @ParameterGroup(name = "Storage") StorageTypeParameters storageType, int maxSegmentSizeInChars, int maxOverlapSizeInChars, - @ParameterGroup(name = "Additional Properties") EmbeddingModelNameParameters modelParams) { + @ParameterGroup(name = "Additional Properties") EmbeddingModelParameters modelParams) { EmbeddingOperationValidator.validateOperationType( Constants.EMBEDDING_OPERATION_TYPE_STORE_METADATA,configuration.getVectorStore()); - - EmbeddingModel embeddingModel = EmbeddingModelFactory.createModel(configuration, modelParams); + + BaseModel baseModel = BaseModel.builder() + .configuration(configuration) + .embeddingModelParameters(modelParams) + .build(); + + EmbeddingModel embeddingModel = baseModel.buildEmbeddingModel(); BaseStore baseStore = BaseStore.builder() .storeName(storeName) .configuration(configuration) - .embeddingModel(embeddingModel) .dimension(embeddingModel.dimension()) .build(); EmbeddingStore embeddingStore = baseStore.buildEmbeddingStore(); - EmbeddingStoreIngestor ingestor = EmbeddingStoreIngestor.builder() + EmbeddingStoreIngestor embeddingStoreIngestor = EmbeddingStoreIngestor.builder() .documentSplitter(DocumentSplitters.recursive(maxSegmentSizeInChars, maxOverlapSizeInChars)) .embeddingModel(embeddingModel) .embeddingStore(embeddingStore) .build(); - EmbeddingStoreIngestorHelper embeddingStoreIngestorHelper = new EmbeddingStoreIngestorHelper(ingestor, storeName); + BaseStorage baseStorage = BaseStorage.builder() + .storeName(storeName) + .configuration(configuration) + .storageType(storageType.getStorageType()) + .embeddingStoreIngestor(embeddingStoreIngestor) + .build(); - JSONObject config = readConfigFile(configuration.getConfigFilePath()); - JSONObject jsonObject = new JSONObject(); - System.out.println("Storage Type: " + storageType.getStorageType()); - if (storageType.getStorageType().equals("S3") && !fileType.getFileType().equals("url")) { - JSONObject s3Json = config.getJSONObject("S3"); - String awsKey = s3Json.getString("AWS_ACCESS_KEY_ID"); - String awsSecret = s3Json.getString("AWS_SECRET_ACCESS_KEY"); - String awsRegion = s3Json.getString("AWS_DEFAULT_REGION"); - String s3Bucket = s3Json.getString("AWS_S3_BUCKET"); - jsonObject = embeddingStoreIngestorHelper.ingestFromS3File(contextPath, fileType, awsKey, awsSecret, awsRegion, s3Bucket); - } else if (storageType.getStorageType().equals("AZURE_BLOB") && !fileType.getFileType().equals("url")) { - JSONObject azJson = config.getJSONObject("AZURE_BLOB"); - String azureName = azJson.getString("AZURE_BLOB_ACCOUNT_NAME"); - String azureKey = azJson.getString("AZURE_BLOB_ACCOUNT_KEY"); - String[] parts = contextPath.split("/", 2); - jsonObject = embeddingStoreIngestorHelper.ingestFromAZFile(parts[0], parts[1], fileType, azureName, azureKey); - } else { - jsonObject = embeddingStoreIngestorHelper.ingestFromLocalFile(contextPath, fileType); - } + JSONObject jsonObject = baseStorage.readAndIngestFile(contextPath, fileType.getFileType()); return toInputStream(jsonObject.toString(), StandardCharsets.UTF_8); } @@ -205,18 +201,22 @@ public InputStream addFileEmbedding(String storeName, String contextPath, @Confi @Alias("EMBEDDING-query-from-store") public InputStream queryFromEmbedding(String storeName, String question, Number maxResults, Double minScore, @Config Configuration configuration, - @ParameterGroup(name = "Additional Properties") EmbeddingModelNameParameters modelParams) { + @ParameterGroup(name = "Additional Properties") EmbeddingModelParameters modelParams) { int maximumResults = (int) maxResults; if (minScore == null) { //|| minScore == 0) { minScore = Constants.EMBEDDING_SEARCH_REQUEST_DEFAULT_MIN_SCORE; } - EmbeddingModel embeddingModel = EmbeddingModelFactory.createModel(configuration, modelParams); + BaseModel baseModel = BaseModel.builder() + .configuration(configuration) + .embeddingModelParameters(modelParams) + .build(); + + EmbeddingModel embeddingModel = baseModel.buildEmbeddingModel(); BaseStore baseStore = BaseStore.builder() .storeName(storeName) .configuration(configuration) - .embeddingModel(embeddingModel) .dimension(embeddingModel.dimension()) .build(); @@ -244,7 +244,6 @@ public InputStream queryFromEmbedding(String storeName, String question, Number JSONArray sources = new JSONArray(); JSONObject contentObject; - String fullPath; for (EmbeddingMatch match : embeddingMatches) { Metadata matchMetadata = match.embedded().metadata(); @@ -280,7 +279,7 @@ public InputStream queryFromEmbedding(String storeName, String question, Number public InputStream queryByFilterFromEmbedding(String storeName, String question, Number maxResults, Double minScore, @Config Configuration configuration, @ParameterGroup(name = "Filter") MetadataFilterParameters.SearchFilterParameters searchFilterParams, - @ParameterGroup(name = "Additional Properties") EmbeddingModelNameParameters modelParams) { + @ParameterGroup(name = "Additional Properties") EmbeddingModelParameters modelParams) { EmbeddingOperationValidator.validateOperationType( Constants.EMBEDDING_OPERATION_TYPE_FILTER_BY_METADATA,configuration.getVectorStore()); @@ -290,12 +289,16 @@ public InputStream queryByFilterFromEmbedding(String storeName, String question, minScore = Constants.EMBEDDING_SEARCH_REQUEST_DEFAULT_MIN_SCORE; } - EmbeddingModel embeddingModel = EmbeddingModelFactory.createModel(configuration, modelParams); + BaseModel baseModel = BaseModel.builder() + .configuration(configuration) + .embeddingModelParameters(modelParams) + .build(); + + EmbeddingModel embeddingModel = baseModel.buildEmbeddingModel(); BaseStore baseStore = BaseStore.builder() .storeName(storeName) .configuration(configuration) - .embeddingModel(embeddingModel) .dimension(embeddingModel.dimension()) .build(); @@ -380,7 +383,7 @@ public InputStream queryByFilterFromEmbedding(String storeName, String question, public InputStream listSourcesFromStore(String storeName, @Config Configuration configuration, @ParameterGroup(name = "Querying Strategy") QueryParameters queryParams, - @ParameterGroup(name = "Additional Properties") EmbeddingModelNameParameters modelParams + @ParameterGroup(name = "Additional Properties") EmbeddingModelParameters modelParams ) { EmbeddingOperationValidator.validateOperationType( @@ -408,19 +411,23 @@ public InputStream listSourcesFromStore(String storeName, public InputStream removeEmbeddingsByFilter(String storeName, @Config Configuration configuration, @ParameterGroup(name = "Filter") MetadataFilterParameters.RemoveFilterParameters removeFilterParams, - @ParameterGroup(name = "Additional Properties") EmbeddingModelNameParameters modelParams) { + @ParameterGroup(name = "Additional Properties") EmbeddingModelParameters modelParams) { EmbeddingOperationValidator.validateOperationType( Constants.EMBEDDING_OPERATION_TYPE_REMOVE_EMBEDDINGS,configuration.getVectorStore()); EmbeddingOperationValidator.validateOperationType( Constants.EMBEDDING_OPERATION_TYPE_FILTER_BY_METADATA,configuration.getVectorStore()); - EmbeddingModel embeddingModel = EmbeddingModelFactory.createModel(configuration, modelParams); + BaseModel baseModel = BaseModel.builder() + .configuration(configuration) + .embeddingModelParameters(modelParams) + .build(); + + EmbeddingModel embeddingModel = baseModel.buildEmbeddingModel(); BaseStore baseStore = BaseStore.builder() .storeName(storeName) .configuration(configuration) - .embeddingModel(embeddingModel) .dimension(embeddingModel.dimension()) .build(); @@ -429,6 +436,7 @@ public InputStream removeEmbeddingsByFilter(String storeName, Filter filter = removeFilterParams.buildMetadataFilter(); embeddingStore.removeAll(filter); + JSONObject jsonObject = new JSONObject(); jsonObject.put("storeName", storeName); jsonObject.put("filter", removeFilterParams.getFilterJSONObject()); diff --git a/src/main/java/org/mule/extension/mulechain/vectors/internal/storage/BaseStorage.java b/src/main/java/org/mule/extension/mulechain/vectors/internal/storage/BaseStorage.java index 9a6ce5e..f58d71b 100644 --- a/src/main/java/org/mule/extension/mulechain/vectors/internal/storage/BaseStorage.java +++ b/src/main/java/org/mule/extension/mulechain/vectors/internal/storage/BaseStorage.java @@ -1,17 +1,136 @@ package org.mule.extension.mulechain.vectors.internal.storage; import dev.langchain4j.store.embedding.EmbeddingStoreIngestor; -import org.mule.extension.mulechain.vectors.internal.helper.parameter.FileTypeParameters; +import org.json.JSONObject; +import org.mule.extension.mulechain.vectors.internal.config.Configuration; +import org.mule.extension.mulechain.vectors.internal.constant.Constants; +import org.mule.extension.mulechain.vectors.internal.storage.azureblob.AzureBlobStorage; +import org.mule.extension.mulechain.vectors.internal.storage.local.LocalStorage; +import org.mule.extension.mulechain.vectors.internal.storage.s3.AWSS3Storage; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; public abstract class BaseStorage { - protected EmbeddingStoreIngestor ingestor; + private static final Logger LOGGER = LoggerFactory.getLogger(BaseStorage.class); - public BaseStorage(EmbeddingStoreIngestor ingestor) { - this.ingestor = ingestor; + protected String storeName; + protected Configuration configuration; + protected EmbeddingStoreIngestor embeddingStoreIngestor; + + public BaseStorage(Configuration configuration, String storeName, EmbeddingStoreIngestor embeddingStoreIngestor) { + + this.storeName = storeName; + this.configuration = configuration; + this.embeddingStoreIngestor = embeddingStoreIngestor; + } + + public JSONObject readAndIngestAllFiles(String contextPath, String fileType) { + + throw new UnsupportedOperationException("This method should be overridden by subclasses"); + } + + public JSONObject readAndIngestFile(String contextPath, String fileType) { + + throw new UnsupportedOperationException("This method should be overridden by subclasses"); + } + + /** + * Creates a JSONObject representing the ingestion status. + * + * @param fileType the type of the ingested file. + * @param contextPath the path of the ingested file or folder. + * @return a JSONObject containing ingestion status metadata. + */ + protected JSONObject createFileIngestionStatusObject(String fileType, String contextPath) { + + JSONObject jsonObject = new JSONObject(); + jsonObject.put("fileType", fileType); + jsonObject.put("filePath", contextPath); + jsonObject.put("storeName", storeName); + jsonObject.put("status", "updated"); + return jsonObject; + } + + /** + * Creates a JSONObject representing the ingestion status of a folder or set of files. + * + * @param totalFiles the total number of files processed. + * @param contextPath the path of the processed folder. + * @return a JSONObject containing the ingestion status with file count, folder path, store name, and status. + */ + protected JSONObject createFolderIngestionStatusObject(Long totalFiles, String contextPath) { + + JSONObject jsonObject = new JSONObject(); + jsonObject.put("filesCount", totalFiles); + jsonObject.put("folderPath", contextPath); + jsonObject.put("storeName", storeName); + jsonObject.put("status", "updated"); + return jsonObject; + } + + public static BaseStorage.Builder builder() { + + return new BaseStorage.Builder(); } - public abstract long readAllFiles(String contextPath, FileTypeParameters fileType); + public static class Builder { - public abstract long readFile(String contextPath, FileTypeParameters fileType); + private String storeName; + private Configuration configuration; + private String storageType; + private EmbeddingStoreIngestor embeddingStoreIngestor; + + public Builder() { + + } + + public BaseStorage.Builder storeName(String storeName) { + this.storeName = storeName; + return this; + } + + public BaseStorage.Builder configuration(Configuration configuration) { + this.configuration = configuration; + return this; + } + + public BaseStorage.Builder storageType(String storageType) { + this.storageType = storageType; + return this; + } + + public BaseStorage.Builder embeddingStoreIngestor(EmbeddingStoreIngestor embeddingStoreIngestor) { + this.embeddingStoreIngestor = embeddingStoreIngestor; + return this; + } + + public BaseStorage build() { + + BaseStorage baseStorage; + + LOGGER.debug("Storage Type: " + storageType); + switch (storageType) { + + case Constants.STORAGE_TYPE_LOCAL: + + baseStorage = new LocalStorage(configuration, storeName, embeddingStoreIngestor); + break; + case Constants.STORAGE_TYPE_S3: + + baseStorage = new AWSS3Storage(configuration, storeName, embeddingStoreIngestor); + break; + + case Constants.STORAGE_TYPE_AZURE_BLOB: + + baseStorage = new AzureBlobStorage(configuration, storeName, embeddingStoreIngestor); + break; + + default: + //throw new IllegalOperationException("Unsupported Vector Store: " + configuration.getVectorStore()); + baseStorage = null; + } + return baseStorage; + } + } } diff --git a/src/main/java/org/mule/extension/mulechain/vectors/internal/storage/azure/AzureBlobStorage.java b/src/main/java/org/mule/extension/mulechain/vectors/internal/storage/azureblob/AzureBlobStorage.java similarity index 50% rename from src/main/java/org/mule/extension/mulechain/vectors/internal/storage/azure/AzureBlobStorage.java rename to src/main/java/org/mule/extension/mulechain/vectors/internal/storage/azureblob/AzureBlobStorage.java index e0e86a8..aadc5e3 100644 --- a/src/main/java/org/mule/extension/mulechain/vectors/internal/storage/azure/AzureBlobStorage.java +++ b/src/main/java/org/mule/extension/mulechain/vectors/internal/storage/azureblob/AzureBlobStorage.java @@ -1,4 +1,4 @@ -package org.mule.extension.mulechain.vectors.internal.storage.azure; +package org.mule.extension.mulechain.vectors.internal.storage.azureblob; import com.azure.storage.blob.BlobServiceClient; import com.azure.storage.blob.BlobServiceClientBuilder; @@ -9,49 +9,60 @@ import dev.langchain4j.data.document.DocumentParser; import java.util.List; +import org.json.JSONObject; +import org.mule.extension.mulechain.vectors.internal.config.Configuration; import org.mule.extension.mulechain.vectors.internal.constant.Constants; -import org.mule.extension.mulechain.vectors.internal.helper.parameter.FileTypeParameters; import dev.langchain4j.data.document.parser.TextDocumentParser; import dev.langchain4j.data.document.parser.apache.tika.ApacheTikaDocumentParser; import dev.langchain4j.data.document.Document; +import org.mule.extension.mulechain.vectors.internal.storage.BaseStorage; import org.mule.extension.mulechain.vectors.internal.util.DocumentUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -public class AzureBlobStorage { +import static org.mule.extension.mulechain.vectors.internal.util.JsonUtils.readConfigFile; + +public class AzureBlobStorage extends BaseStorage { private static final Logger LOGGER = LoggerFactory.getLogger(AzureBlobStorage.class); - private final AzureBlobStorageDocumentLoader loader; + private final String azureName; + private final String azureKey; + + private StorageSharedKeyCredential getCredentials() { + return new StorageSharedKeyCredential(azureName, azureKey); + } + + private AzureBlobStorageDocumentLoader loader; + + private AzureBlobStorageDocumentLoader getLoader() { - public AzureBlobStorage(String azureName, String azureKey) { - StorageSharedKeyCredential credential = new StorageSharedKeyCredential(azureName, azureKey); + if(this.loader == null) { - // Azure SDK client builders accept the credential as a parameter - BlobServiceClient blobServiceClient = new BlobServiceClientBuilder() + // Azure SDK client builders accept the credential as a parameter + BlobServiceClient blobServiceClient = new BlobServiceClientBuilder() .endpoint(String.format("https://%s.blob.core.windows.net/", azureName)) - .credential(credential) + .credential(getCredentials()) .buildClient(); - this.loader = new AzureBlobStorageDocumentLoader(blobServiceClient); + this.loader = new AzureBlobStorageDocumentLoader(blobServiceClient); + } + return this.loader; } - public static BlobServiceClient GetBlobServiceClientAccountKey(String accountName, String accountKey) { - StorageSharedKeyCredential credential = new StorageSharedKeyCredential(accountName, accountKey); + public AzureBlobStorage(Configuration configuration, String storeName, EmbeddingStoreIngestor embeddingStoreIngestor) { - // Azure SDK client builders accept the credential as a parameter - BlobServiceClient blobServiceClient = new BlobServiceClientBuilder() - .endpoint(String.format("https://%s.blob.core.windows.net/", accountName)) - .credential(credential) - .buildClient(); - - return blobServiceClient; + super(configuration, storeName, embeddingStoreIngestor); + JSONObject config = readConfigFile(configuration.getConfigFilePath()); + assert config != null; + JSONObject storageConfig = config.getJSONObject("AZURE_BLOB"); + this.azureName = storageConfig.getString("AZURE_BLOB_ACCOUNT_NAME"); + this.azureKey = storageConfig.getString("AZURE_BLOB_ACCOUNT_KEY"); } - public long readAllFiles(String containerName, EmbeddingStoreIngestor ingestor, FileTypeParameters fileType) - { + public JSONObject readAndIngestAllFiles(String containerName, String fileType) { DocumentParser parser = null; - switch (fileType.getFileType()){ + switch (fileType){ case Constants.FILE_TYPE_TEXT: case Constants.FILE_TYPE_CRAWL: parser = new TextDocumentParser(); @@ -60,10 +71,10 @@ public long readAllFiles(String containerName, EmbeddingStoreIngestor ingestor, parser = new ApacheTikaDocumentParser(); break; default: - throw new IllegalArgumentException("Unsupported File Type: " + fileType.getFileType()); + throw new IllegalArgumentException("Unsupported File Type: " + fileType); } - List documents = loader.loadDocuments(containerName, parser); + List documents = getLoader().loadDocuments(containerName, parser); int fileCount = documents.size(); LOGGER.debug("Total number of files in '" + containerName + "': " + fileCount); @@ -71,20 +82,25 @@ public long readAllFiles(String containerName, EmbeddingStoreIngestor ingestor, for (Document document : documents) { totalFiles += 1; - if (fileType.getFileType().equals(Constants.FILE_TYPE_CRAWL)){ + if (fileType.equals(Constants.FILE_TYPE_CRAWL)){ DocumentUtils.addMetadataToDocument(document); } LOGGER.debug("Ingesting File " + totalFiles + ": " + document.metadata().toMap().get("source")); - ingestor.ingest(document); + embeddingStoreIngestor.ingest(document); } LOGGER.debug("Total number of files processed: " + totalFiles); - return totalFiles; + return createFolderIngestionStatusObject(totalFiles, fileType); } - public void readFile(String containerName, String blobName, FileTypeParameters fileType, EmbeddingStoreIngestor ingestor) { + public JSONObject readAndIngestFile(String contextPath, String fileType) { + + String[] parts = contextPath.split("/", 2); + String containerName = parts[0]; + String blobName = parts[1]; + DocumentParser parser = null; - switch (fileType.getFileType()){ + switch (fileType){ case Constants.FILE_TYPE_TEXT: case Constants.FILE_TYPE_CRAWL: parser = new TextDocumentParser(); @@ -93,16 +109,16 @@ public void readFile(String containerName, String blobName, FileTypeParameters f parser = new ApacheTikaDocumentParser(); break; default: - throw new IllegalArgumentException("Unsupported File Type: " + fileType.getFileType()); + throw new IllegalArgumentException("Unsupported File Type: " + fileType); } - Document document = loader.loadDocument(containerName, blobName, parser); - System.out.println("Ready to add metadata: " + fileType.getFileType()); + Document document = getLoader().loadDocument(containerName, blobName, parser); + System.out.println("Ready to add metadata: " + fileType); - if (fileType.getFileType().equals(Constants.FILE_TYPE_CRAWL)){ + if (fileType.equals(Constants.FILE_TYPE_CRAWL)){ DocumentUtils.addMetadataToDocument(document); } - ingestor.ingest(document); - + embeddingStoreIngestor.ingest(document); + return createFileIngestionStatusObject(fileType, containerName); } } diff --git a/src/main/java/org/mule/extension/mulechain/vectors/internal/storage/local/LocalStorage.java b/src/main/java/org/mule/extension/mulechain/vectors/internal/storage/local/LocalStorage.java new file mode 100644 index 0000000..dd43321 --- /dev/null +++ b/src/main/java/org/mule/extension/mulechain/vectors/internal/storage/local/LocalStorage.java @@ -0,0 +1,129 @@ +package org.mule.extension.mulechain.vectors.internal.storage.local; + +import dev.langchain4j.data.document.Document; +import dev.langchain4j.data.document.DocumentParser; +import dev.langchain4j.data.document.loader.UrlDocumentLoader; +import dev.langchain4j.data.document.parser.TextDocumentParser; +import dev.langchain4j.data.document.parser.apache.tika.ApacheTikaDocumentParser; +import dev.langchain4j.data.document.transformer.jsoup.HtmlToTextDocumentTransformer; +import dev.langchain4j.store.embedding.EmbeddingStoreIngestor; +import org.json.JSONObject; +import org.mule.extension.mulechain.vectors.internal.config.Configuration; +import org.mule.extension.mulechain.vectors.internal.constant.Constants; +import org.mule.extension.mulechain.vectors.internal.storage.BaseStorage; +import org.mule.extension.mulechain.vectors.internal.storage.s3.AWSS3Storage; +import org.mule.extension.mulechain.vectors.internal.util.DocumentUtils; +import org.mule.extension.mulechain.vectors.internal.util.Utils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URL; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.Arrays; +import java.util.List; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.stream.Stream; + +import static dev.langchain4j.data.document.loader.FileSystemDocumentLoader.loadDocument; +import static org.mule.extension.mulechain.vectors.internal.util.JsonUtils.readConfigFile; + +public class LocalStorage extends BaseStorage { + + private static final Logger LOGGER = LoggerFactory.getLogger(LocalStorage.class); + + public LocalStorage(Configuration configuration, String storeName, EmbeddingStoreIngestor embeddingStoreIngestor) { + + super(configuration, storeName, embeddingStoreIngestor); + } + + public JSONObject readAndIngestAllFiles(String folderPath, String fileType) { + + long totalFiles = 0; + try (Stream paths = Files.walk(Paths.get(folderPath))) { + totalFiles = paths.filter(Files::isRegularFile).count(); + } catch (IOException e) { + LOGGER.error(Arrays.toString(e.getStackTrace())); + } + + LOGGER.info("Total number of files to process: " + totalFiles); + AtomicInteger fileCounter = new AtomicInteger(0); + try (Stream paths = Files.walk(Paths.get(folderPath))) { + paths.filter(Files::isRegularFile).forEach(path -> { + int currentFileCounter = fileCounter.incrementAndGet(); + LOGGER.info("Processing file " + currentFileCounter + ": " + path.getFileName()); + + Document document; + switch (fileType) { + case Constants.FILE_TYPE_CRAWL: + document = loadDocument(path.toString(), new TextDocumentParser()); + DocumentUtils.addMetadataToDocument(document, Constants.FILE_TYPE_CRAWL, path); + embeddingStoreIngestor.ingest(document); + break; + case Constants.FILE_TYPE_TEXT: + document = loadDocument(path.toString(), new TextDocumentParser()); + DocumentUtils.addMetadataToDocument(document, Constants.FILE_TYPE_TEXT, path.getFileName().toString(), folderPath + path.getFileName()); + embeddingStoreIngestor.ingest(document); + break; + case Constants.FILE_TYPE_ANY: + document = loadDocument(path.toString(), new ApacheTikaDocumentParser()); + DocumentUtils.addMetadataToDocument(document, Constants.FILE_TYPE_ANY, path.getFileName().toString(),folderPath + path.getFileName()); + embeddingStoreIngestor.ingest(document); + break; + default: + throw new IllegalArgumentException("Unsupported File Type: " + fileType); + } + }); + } catch (IOException e) { + LOGGER.error(Arrays.toString(e.getStackTrace())); + } + LOGGER.info("Processing complete"); + return createFolderIngestionStatusObject(totalFiles, fileType); + } + + public JSONObject readAndIngestFile(String filePath, String fileType) { + + Path path = Paths.get(filePath); + + Document document; + switch (fileType) { + case Constants.FILE_TYPE_CRAWL: + document = loadDocument(path.toString(), new TextDocumentParser()); + DocumentUtils.addMetadataToDocument(document, Constants.FILE_TYPE_CRAWL, path); + break; + case Constants.FILE_TYPE_TEXT: + document = loadDocument(path.toString(), new TextDocumentParser()); + DocumentUtils.addMetadataToDocument(document, Constants.FILE_TYPE_TEXT, Utils.getFileNameFromPath(filePath), filePath); + break; + case Constants.FILE_TYPE_ANY: + document = loadDocument(path.toString(), new ApacheTikaDocumentParser()); + DocumentUtils.addMetadataToDocument(document, Constants.FILE_TYPE_ANY, Utils.getFileNameFromPath(filePath), filePath); + break; + case Constants.FILE_TYPE_URL: + document = loadUrlDocument(filePath); + break; + default: + throw new IllegalArgumentException("Unsupported File Type: " + fileType); + } + embeddingStoreIngestor.ingest(document); + return createFileIngestionStatusObject(fileType, filePath); + } + + private Document loadUrlDocument(String contextPath) { + + Document document; + try { + URL url = new URL(contextPath); + Document htmlDocument = UrlDocumentLoader.load(url, new TextDocumentParser()); + HtmlToTextDocumentTransformer transformer = new HtmlToTextDocumentTransformer(null, null, true); + document = transformer.transform(htmlDocument); + document.metadata().put(Constants.METADATA_KEY_URL, contextPath); + } catch (MalformedURLException e) { + throw new RuntimeException("Invalid URL: " + contextPath, e); + } + return document; + } +} diff --git a/src/main/java/org/mule/extension/mulechain/vectors/internal/storage/s3/AWSS3Storage.java b/src/main/java/org/mule/extension/mulechain/vectors/internal/storage/s3/AWSS3Storage.java index cef396b..85763cf 100644 --- a/src/main/java/org/mule/extension/mulechain/vectors/internal/storage/s3/AWSS3Storage.java +++ b/src/main/java/org/mule/extension/mulechain/vectors/internal/storage/s3/AWSS3Storage.java @@ -6,37 +6,62 @@ import dev.langchain4j.data.document.DocumentParser; import java.util.List; +import org.json.JSONObject; +import org.mule.extension.mulechain.vectors.internal.config.Configuration; import org.mule.extension.mulechain.vectors.internal.constant.Constants; -import org.mule.extension.mulechain.vectors.internal.helper.parameter.FileTypeParameters; import dev.langchain4j.data.document.parser.TextDocumentParser; import dev.langchain4j.data.document.parser.apache.tika.ApacheTikaDocumentParser; import dev.langchain4j.data.document.Document; +import org.mule.extension.mulechain.vectors.internal.storage.BaseStorage; import org.mule.extension.mulechain.vectors.internal.util.DocumentUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import static dev.langchain4j.data.document.loader.FileSystemDocumentLoader.loadDocument; +import static org.mule.extension.mulechain.vectors.internal.util.JsonUtils.readConfigFile; -public class AWSS3Storage { +public class AWSS3Storage extends BaseStorage { private static final Logger LOGGER = LoggerFactory.getLogger(AWSS3Storage.class); - private final String bucketName; - private final AmazonS3DocumentLoader loader; + private final String awsAccessKeyId; + private final String awsSecretAccessKey; + private final String awsRegion; + private final String awsS3Bucket; - public AWSS3Storage(String bucketName, String awsKey, String awsSecret, String awsRegion) { - this.bucketName = bucketName; - AwsCredentials creds = new AwsCredentials(awsKey, awsSecret); - this.loader = AmazonS3DocumentLoader.builder() - .region(awsRegion) - .awsCredentials(creds) - .build(); + private AwsCredentials getCredentials() { + return new AwsCredentials(awsAccessKeyId, awsSecretAccessKey); } - public long readAllFiles(String folderPath, EmbeddingStoreIngestor ingestor, FileTypeParameters fileType) - { + private AmazonS3DocumentLoader loader; + + private AmazonS3DocumentLoader getLoader() { + + if(loader == null) { + + loader = AmazonS3DocumentLoader.builder() + .region(awsRegion) + .awsCredentials(getCredentials()) + .build(); + } + return loader; + } + + public AWSS3Storage(Configuration configuration, String storeName, EmbeddingStoreIngestor embeddingStoreIngestor) { + + super(configuration, storeName, embeddingStoreIngestor); + JSONObject config = readConfigFile(configuration.getConfigFilePath()); + assert config != null; + JSONObject storageConfig = config.getJSONObject("S3"); + this.awsAccessKeyId = storageConfig.getString("AWS_ACCESS_KEY_ID"); + this.awsSecretAccessKey = storageConfig.getString("AWS_SECRET_ACCESS_KEY"); + this.awsRegion = storageConfig.getString("AWS_DEFAULT_REGION"); + this.awsS3Bucket = storageConfig.getString("AWS_S3_BUCKET"); + } + + public JSONObject readAndIngestAllFiles(String folderPath, String fileType) { + DocumentParser parser = null; - switch (fileType.getFileType()){ + switch (fileType){ case Constants.FILE_TYPE_TEXT: parser = new TextDocumentParser(); break; @@ -44,26 +69,26 @@ public long readAllFiles(String folderPath, EmbeddingStoreIngestor ingestor, Fil parser = new ApacheTikaDocumentParser(); break; default: - throw new IllegalArgumentException("Unsupported File Type: " + fileType.getFileType()); + throw new IllegalArgumentException("Unsupported File Type: " + fileType); } - List documents = loader.loadDocuments(bucketName, folderPath, parser); + List documents = getLoader().loadDocuments(awsS3Bucket, folderPath, parser); int fileCount = documents.size(); LOGGER.debug("Total number of files in '" + folderPath + "': " + fileCount); long totalFiles = 0; for (Document document : documents) { - ingestor.ingest(document); + embeddingStoreIngestor.ingest(document); totalFiles += 1; LOGGER.debug("Ingesting File " + totalFiles + ": " + document.metadata().toMap().get("source")); } LOGGER.debug("Total number of files processed: " + totalFiles); - return totalFiles; + return createFolderIngestionStatusObject(totalFiles, fileType); } - public void readFile(String key, FileTypeParameters fileType, EmbeddingStoreIngestor ingestor) { + public JSONObject readAndIngestFile(String key, String fileType) { DocumentParser parser = null; - switch (fileType.getFileType()){ + switch (fileType){ case Constants.FILE_TYPE_TEXT: case Constants.FILE_TYPE_CRAWL: parser = new TextDocumentParser(); @@ -72,12 +97,13 @@ public void readFile(String key, FileTypeParameters fileType, EmbeddingStoreInge parser = new ApacheTikaDocumentParser(); break; default: - throw new IllegalArgumentException("Unsupported File Type: " + fileType.getFileType()); + throw new IllegalArgumentException("Unsupported File Type: " + fileType); } - Document document = loader.loadDocument(bucketName, key, parser); - if (fileType.getFileType().equals(Constants.FILE_TYPE_CRAWL)){ + Document document = getLoader().loadDocument(awsS3Bucket, key, parser); + if (fileType.equals(Constants.FILE_TYPE_CRAWL)){ DocumentUtils.addMetadataToDocument(document); } - ingestor.ingest(document); + embeddingStoreIngestor.ingest(document); + return createFileIngestionStatusObject(fileType, key); } } diff --git a/src/main/java/org/mule/extension/mulechain/vectors/internal/store/BaseStore.java b/src/main/java/org/mule/extension/mulechain/vectors/internal/store/BaseStore.java index 93778f1..4959961 100644 --- a/src/main/java/org/mule/extension/mulechain/vectors/internal/store/BaseStore.java +++ b/src/main/java/org/mule/extension/mulechain/vectors/internal/store/BaseStore.java @@ -38,15 +38,13 @@ public class BaseStore { protected String storeName; protected Configuration configuration; protected QueryParameters queryParams; - protected EmbeddingModel embeddingModel; protected int dimension; - public BaseStore(String storeName, Configuration configuration, QueryParameters queryParams, EmbeddingModel embeddingModel, int dimension) { + public BaseStore(String storeName, Configuration configuration, QueryParameters queryParams, int dimension) { this.storeName = storeName; this.configuration = configuration; this.queryParams = queryParams; - this.embeddingModel = embeddingModel; this.dimension = dimension; } @@ -205,7 +203,6 @@ public static class Builder { private String storeName; private Configuration configuration; private QueryParameters queryParams; - private EmbeddingModel embeddingModel; private int dimension; public Builder() { @@ -245,17 +242,6 @@ public Builder queryParams(QueryParameters queryParams) { return this; } - /** - * Sets a pre-configured embedding model for the {@code BaseStore}. - * - * @param embeddingModel the embedding model to use. - * @return the {@code Builder} instance, for method chaining. - */ - public Builder embeddingModel(EmbeddingModel embeddingModel) { - this.embeddingModel = embeddingModel; - return this; - } - public Builder dimension(int dimension) { this.dimension = dimension; return this; @@ -274,55 +260,55 @@ public Builder dimension(int dimension) { */ public BaseStore build() { - BaseStore embeddingStore; + BaseStore baseStore; switch (configuration.getVectorStore()) { case Constants.VECTOR_STORE_MILVUS: - embeddingStore = new MilvusStore(storeName, configuration, queryParams, embeddingModel, dimension); + baseStore = new MilvusStore(storeName, configuration, queryParams, dimension); break; case Constants.VECTOR_STORE_PGVECTOR: - embeddingStore = new PGVectorStore(storeName, configuration, queryParams, embeddingModel, dimension); + baseStore = new PGVectorStore(storeName, configuration, queryParams, dimension); break; case Constants.VECTOR_STORE_AI_SEARCH: - embeddingStore = new AISearchStore(storeName, configuration, queryParams, embeddingModel, dimension); + baseStore = new AISearchStore(storeName, configuration, queryParams, dimension); break; case Constants.VECTOR_STORE_WEAVIATE: - embeddingStore = new WeaviateStore(storeName, configuration, queryParams, embeddingModel, dimension); + baseStore = new WeaviateStore(storeName, configuration, queryParams, dimension); break; case Constants.VECTOR_STORE_CHROMA: - embeddingStore = new ChromaStore(storeName, configuration, queryParams, embeddingModel, dimension); + baseStore = new ChromaStore(storeName, configuration, queryParams, dimension); break; case Constants.VECTOR_STORE_PINECONE: - embeddingStore = new PineconeStore(storeName, configuration, queryParams, embeddingModel, dimension); + baseStore = new PineconeStore(storeName, configuration, queryParams, dimension); break; case Constants.VECTOR_STORE_ELASTICSEARCH: - embeddingStore = new ElasticsearchStore(storeName, configuration, queryParams, embeddingModel, dimension); + baseStore = new ElasticsearchStore(storeName, configuration, queryParams, dimension); break; case Constants.VECTOR_STORE_OPENSEARCH: - embeddingStore = new OpenSearchStore(storeName, configuration, queryParams, embeddingModel, dimension); + baseStore = new OpenSearchStore(storeName, configuration, queryParams, dimension); break; default: //throw new IllegalOperationException("Unsupported Vector Store: " + configuration.getVectorStore()); - embeddingStore = null; + baseStore = null; } - return embeddingStore; + return baseStore; } } } diff --git a/src/main/java/org/mule/extension/mulechain/vectors/internal/store/aisearch/AISearchStore.java b/src/main/java/org/mule/extension/mulechain/vectors/internal/store/aisearch/AISearchStore.java index 0f62233..f172617 100644 --- a/src/main/java/org/mule/extension/mulechain/vectors/internal/store/aisearch/AISearchStore.java +++ b/src/main/java/org/mule/extension/mulechain/vectors/internal/store/aisearch/AISearchStore.java @@ -1,14 +1,12 @@ package org.mule.extension.mulechain.vectors.internal.store.aisearch; import dev.langchain4j.data.segment.TextSegment; -import dev.langchain4j.model.embedding.EmbeddingModel; import dev.langchain4j.store.embedding.EmbeddingStore; import dev.langchain4j.store.embedding.azure.search.AzureAiSearchEmbeddingStore; import org.json.JSONArray; import org.json.JSONObject; import org.mule.extension.mulechain.vectors.internal.config.Configuration; import org.mule.extension.mulechain.vectors.internal.constant.Constants; -import org.mule.extension.mulechain.vectors.internal.helper.parameter.EmbeddingModelNameParameters; import org.mule.extension.mulechain.vectors.internal.helper.parameter.QueryParameters; import org.mule.extension.mulechain.vectors.internal.store.BaseStore; import org.mule.extension.mulechain.vectors.internal.util.JsonUtils; @@ -28,9 +26,9 @@ public class AISearchStore extends BaseStore { private String apiKey; private String url; - public AISearchStore(String storeName, Configuration configuration, QueryParameters queryParams, EmbeddingModel embeddingModel, int dimension) { + public AISearchStore(String storeName, Configuration configuration, QueryParameters queryParams, int dimension) { - super(storeName, configuration, queryParams, embeddingModel, dimension); + super(storeName, configuration, queryParams, dimension); JSONObject config = readConfigFile(configuration.getConfigFilePath()); JSONObject vectorStoreConfig = config.getJSONObject(Constants.VECTOR_STORE_AI_SEARCH); diff --git a/src/main/java/org/mule/extension/mulechain/vectors/internal/store/chroma/ChromaStore.java b/src/main/java/org/mule/extension/mulechain/vectors/internal/store/chroma/ChromaStore.java index b1d45e0..d074bcd 100644 --- a/src/main/java/org/mule/extension/mulechain/vectors/internal/store/chroma/ChromaStore.java +++ b/src/main/java/org/mule/extension/mulechain/vectors/internal/store/chroma/ChromaStore.java @@ -2,13 +2,11 @@ import dev.langchain4j.data.segment.TextSegment; import dev.langchain4j.store.embedding.EmbeddingStore; -import dev.langchain4j.model.embedding.EmbeddingModel; import dev.langchain4j.store.embedding.chroma.ChromaEmbeddingStore; import org.json.JSONArray; import org.json.JSONObject; import org.mule.extension.mulechain.vectors.internal.config.Configuration; import org.mule.extension.mulechain.vectors.internal.constant.Constants; -import org.mule.extension.mulechain.vectors.internal.helper.parameter.EmbeddingModelNameParameters; import org.mule.extension.mulechain.vectors.internal.helper.parameter.QueryParameters; import org.mule.extension.mulechain.vectors.internal.store.BaseStore; import org.mule.extension.mulechain.vectors.internal.util.JsonUtils; @@ -35,11 +33,10 @@ public class ChromaStore extends BaseStore { * @param storeName the name of the vector store. * @param configuration the configuration object containing necessary settings. * @param queryParams parameters related to query configurations. - * @param embeddingModel embedding model. */ - public ChromaStore(String storeName, Configuration configuration, QueryParameters queryParams, EmbeddingModel embeddingModel, int dimension) { + public ChromaStore(String storeName, Configuration configuration, QueryParameters queryParams, int dimension) { - super(storeName, configuration, queryParams, embeddingModel, dimension); + super(storeName, configuration, queryParams, dimension); JSONObject config = JsonUtils.readConfigFile(configuration.getConfigFilePath()); JSONObject vectorStoreConfig = config.getJSONObject(Constants.VECTOR_STORE_CHROMA); diff --git a/src/main/java/org/mule/extension/mulechain/vectors/internal/store/elasticsearch/ElasticsearchStore.java b/src/main/java/org/mule/extension/mulechain/vectors/internal/store/elasticsearch/ElasticsearchStore.java index 03382ad..f021777 100644 --- a/src/main/java/org/mule/extension/mulechain/vectors/internal/store/elasticsearch/ElasticsearchStore.java +++ b/src/main/java/org/mule/extension/mulechain/vectors/internal/store/elasticsearch/ElasticsearchStore.java @@ -7,7 +7,6 @@ import org.json.JSONObject; import org.mule.extension.mulechain.vectors.internal.config.Configuration; import org.mule.extension.mulechain.vectors.internal.constant.Constants; -import org.mule.extension.mulechain.vectors.internal.helper.parameter.EmbeddingModelNameParameters; import org.mule.extension.mulechain.vectors.internal.helper.parameter.QueryParameters; import org.mule.extension.mulechain.vectors.internal.store.BaseStore; import org.mule.extension.mulechain.vectors.internal.util.JsonUtils; @@ -18,9 +17,9 @@ public class ElasticsearchStore extends BaseStore { private String userName; private String password; - public ElasticsearchStore(String storeName, Configuration configuration, QueryParameters queryParams, EmbeddingModel embeddingModel, int dimension) { + public ElasticsearchStore(String storeName, Configuration configuration, QueryParameters queryParams, int dimension) { - super(storeName, configuration, queryParams, embeddingModel, dimension); + super(storeName, configuration, queryParams, dimension); JSONObject config = JsonUtils.readConfigFile(configuration.getConfigFilePath()); JSONObject vectorStoreConfig = config.getJSONObject(Constants.VECTOR_STORE_ELASTICSEARCH); diff --git a/src/main/java/org/mule/extension/mulechain/vectors/internal/store/milvus/MilvusStore.java b/src/main/java/org/mule/extension/mulechain/vectors/internal/store/milvus/MilvusStore.java index c39a615..074efed 100644 --- a/src/main/java/org/mule/extension/mulechain/vectors/internal/store/milvus/MilvusStore.java +++ b/src/main/java/org/mule/extension/mulechain/vectors/internal/store/milvus/MilvusStore.java @@ -2,7 +2,6 @@ import com.google.gson.JsonObject; import dev.langchain4j.data.segment.TextSegment; -import dev.langchain4j.model.embedding.EmbeddingModel; import dev.langchain4j.store.embedding.EmbeddingStore; import dev.langchain4j.store.embedding.milvus.MilvusEmbeddingStore; import io.milvus.client.MilvusServiceClient; @@ -14,7 +13,6 @@ import org.json.JSONObject; import org.mule.extension.mulechain.vectors.internal.config.Configuration; import org.mule.extension.mulechain.vectors.internal.constant.Constants; -import org.mule.extension.mulechain.vectors.internal.helper.parameter.EmbeddingModelNameParameters; import org.mule.extension.mulechain.vectors.internal.helper.parameter.QueryParameters; import org.mule.extension.mulechain.vectors.internal.store.BaseStore; import org.mule.extension.mulechain.vectors.internal.util.JsonUtils; @@ -30,9 +28,9 @@ public class MilvusStore extends BaseStore { private String uri; - public MilvusStore(String storeName, Configuration configuration, QueryParameters queryParams, EmbeddingModel embeddingModel, int dimension) { + public MilvusStore(String storeName, Configuration configuration, QueryParameters queryParams, int dimension) { - super(storeName, configuration, queryParams, embeddingModel, dimension); + super(storeName, configuration, queryParams, dimension); JSONObject config = readConfigFile(configuration.getConfigFilePath()); JSONObject vectorStoreConfig = config.getJSONObject(Constants.VECTOR_STORE_MILVUS); diff --git a/src/main/java/org/mule/extension/mulechain/vectors/internal/store/opensearch/OpenSearchStore.java b/src/main/java/org/mule/extension/mulechain/vectors/internal/store/opensearch/OpenSearchStore.java index 5adcf67..cc34640 100644 --- a/src/main/java/org/mule/extension/mulechain/vectors/internal/store/opensearch/OpenSearchStore.java +++ b/src/main/java/org/mule/extension/mulechain/vectors/internal/store/opensearch/OpenSearchStore.java @@ -7,7 +7,6 @@ import org.json.JSONObject; import org.mule.extension.mulechain.vectors.internal.config.Configuration; import org.mule.extension.mulechain.vectors.internal.constant.Constants; -import org.mule.extension.mulechain.vectors.internal.helper.parameter.EmbeddingModelNameParameters; import org.mule.extension.mulechain.vectors.internal.helper.parameter.QueryParameters; import org.mule.extension.mulechain.vectors.internal.store.BaseStore; import org.mule.extension.mulechain.vectors.internal.util.JsonUtils; @@ -18,9 +17,9 @@ public class OpenSearchStore extends BaseStore { private String userName; private String password; - public OpenSearchStore(String storeName, Configuration configuration, QueryParameters queryParams, EmbeddingModel embeddingModel, int dimension) { + public OpenSearchStore(String storeName, Configuration configuration, QueryParameters queryParams, int dimension) { - super(storeName, configuration, queryParams, embeddingModel, dimension); + super(storeName, configuration, queryParams, dimension); JSONObject config = JsonUtils.readConfigFile(configuration.getConfigFilePath()); JSONObject vectorStoreConfig = config.getJSONObject(Constants.VECTOR_STORE_OPENSEARCH); diff --git a/src/main/java/org/mule/extension/mulechain/vectors/internal/store/pgvector/PGVectorStore.java b/src/main/java/org/mule/extension/mulechain/vectors/internal/store/pgvector/PGVectorStore.java index 902f1ba..3f10247 100644 --- a/src/main/java/org/mule/extension/mulechain/vectors/internal/store/pgvector/PGVectorStore.java +++ b/src/main/java/org/mule/extension/mulechain/vectors/internal/store/pgvector/PGVectorStore.java @@ -2,12 +2,10 @@ import dev.langchain4j.data.segment.TextSegment; import dev.langchain4j.store.embedding.EmbeddingStore; -import dev.langchain4j.model.embedding.EmbeddingModel; import dev.langchain4j.store.embedding.pgvector.PgVectorEmbeddingStore; import org.json.JSONObject; import org.mule.extension.mulechain.vectors.internal.config.Configuration; import org.mule.extension.mulechain.vectors.internal.constant.Constants; -import org.mule.extension.mulechain.vectors.internal.helper.parameter.EmbeddingModelNameParameters; import org.mule.extension.mulechain.vectors.internal.helper.parameter.QueryParameters; import org.mule.extension.mulechain.vectors.internal.store.BaseStore; import org.mule.extension.mulechain.vectors.internal.util.JsonUtils; @@ -45,11 +43,10 @@ public class PGVectorStore extends BaseStore { * @param storeName The name of the store. * @param configuration The configuration for connecting to the store. * @param queryParams Parameters related to query configurations. - * @param embeddingModel Embedding model. */ - public PGVectorStore(String storeName, Configuration configuration, QueryParameters queryParams, EmbeddingModel embeddingModel, int dimension) { + public PGVectorStore(String storeName, Configuration configuration, QueryParameters queryParams, int dimension) { - super(storeName, configuration, queryParams, embeddingModel, dimension); + super(storeName, configuration, queryParams, dimension); JSONObject config = readConfigFile(configuration.getConfigFilePath()); JSONObject vectorStoreConfig = config.getJSONObject(Constants.VECTOR_STORE_PGVECTOR); diff --git a/src/main/java/org/mule/extension/mulechain/vectors/internal/store/pinecone/PineconeStore.java b/src/main/java/org/mule/extension/mulechain/vectors/internal/store/pinecone/PineconeStore.java index fc8862f..29ca64a 100644 --- a/src/main/java/org/mule/extension/mulechain/vectors/internal/store/pinecone/PineconeStore.java +++ b/src/main/java/org/mule/extension/mulechain/vectors/internal/store/pinecone/PineconeStore.java @@ -1,14 +1,12 @@ package org.mule.extension.mulechain.vectors.internal.store.pinecone; import dev.langchain4j.data.segment.TextSegment; -import dev.langchain4j.model.embedding.EmbeddingModel; import dev.langchain4j.store.embedding.EmbeddingStore; import dev.langchain4j.store.embedding.pinecone.PineconeEmbeddingStore; import dev.langchain4j.store.embedding.pinecone.PineconeServerlessIndexConfig; import org.json.JSONObject; import org.mule.extension.mulechain.vectors.internal.config.Configuration; import org.mule.extension.mulechain.vectors.internal.constant.Constants; -import org.mule.extension.mulechain.vectors.internal.helper.parameter.EmbeddingModelNameParameters; import org.mule.extension.mulechain.vectors.internal.helper.parameter.QueryParameters; import org.mule.extension.mulechain.vectors.internal.store.BaseStore; import org.mule.extension.mulechain.vectors.internal.util.JsonUtils; @@ -19,9 +17,9 @@ public class PineconeStore extends BaseStore { private String cloud; private String region; - public PineconeStore(String storeName, Configuration configuration, QueryParameters queryParams, EmbeddingModel embeddingModel, int dimension) { + public PineconeStore(String storeName, Configuration configuration, QueryParameters queryParams, int dimension) { - super(storeName, configuration, queryParams, embeddingModel, dimension); + super(storeName, configuration, queryParams, dimension); JSONObject config = JsonUtils.readConfigFile(configuration.getConfigFilePath()); JSONObject vectorStoreConfig = config.getJSONObject(Constants.VECTOR_STORE_PINECONE); diff --git a/src/main/java/org/mule/extension/mulechain/vectors/internal/store/weviate/WeaviateStore.java b/src/main/java/org/mule/extension/mulechain/vectors/internal/store/weviate/WeaviateStore.java index 1c2b9ff..5738078 100644 --- a/src/main/java/org/mule/extension/mulechain/vectors/internal/store/weviate/WeaviateStore.java +++ b/src/main/java/org/mule/extension/mulechain/vectors/internal/store/weviate/WeaviateStore.java @@ -1,13 +1,11 @@ package org.mule.extension.mulechain.vectors.internal.store.weviate; import dev.langchain4j.data.segment.TextSegment; -import dev.langchain4j.model.embedding.EmbeddingModel; import dev.langchain4j.store.embedding.EmbeddingStore; import dev.langchain4j.store.embedding.weaviate.WeaviateEmbeddingStore; import org.json.JSONObject; import org.mule.extension.mulechain.vectors.internal.config.Configuration; import org.mule.extension.mulechain.vectors.internal.constant.Constants; -import org.mule.extension.mulechain.vectors.internal.helper.parameter.EmbeddingModelNameParameters; import org.mule.extension.mulechain.vectors.internal.helper.parameter.QueryParameters; import org.mule.extension.mulechain.vectors.internal.store.BaseStore; @@ -19,9 +17,9 @@ public class WeaviateStore extends BaseStore { private String protocol; private String apiKey; - public WeaviateStore(String storeName, Configuration configuration, QueryParameters queryParams, EmbeddingModel embeddingModel, int dimension) { + public WeaviateStore(String storeName, Configuration configuration, QueryParameters queryParams, int dimension) { - super(storeName, configuration, queryParams, embeddingModel, dimension); + super(storeName, configuration, queryParams, dimension); JSONObject config = readConfigFile(configuration.getConfigFilePath()); JSONObject vectorStoreConfig = config.getJSONObject(Constants.VECTOR_STORE_WEAVIATE);