diff --git a/common/src/main/java/org/opensearch/ml/common/connector/functions/preprocess/CohereMultiModalEmbeddingPreProcessFunction.java b/common/src/main/java/org/opensearch/ml/common/connector/functions/preprocess/CohereMultiModalEmbeddingPreProcessFunction.java index fcd4d95523..80c615cb10 100644 --- a/common/src/main/java/org/opensearch/ml/common/connector/functions/preprocess/CohereMultiModalEmbeddingPreProcessFunction.java +++ b/common/src/main/java/org/opensearch/ml/common/connector/functions/preprocess/CohereMultiModalEmbeddingPreProcessFunction.java @@ -34,6 +34,13 @@ public void validate(MLInput mlInput) { public RemoteInferenceInputDataSet process(MLInput mlInput) { TextDocsInputDataSet inputData = (TextDocsInputDataSet) mlInput.getInputDataset(); Map parametersMap = new HashMap<>(); + + /** + * Cohere multi-modal model expects either image or texts, not both. + * For image, customer can use this pre-process function. For texts, customer can use + * connector.pre_process.cohere.embedding + * Cohere expects An array of image data URIs for the model to embed. Maximum number of images per call is 1. + */ parametersMap.put("images", inputData.getDocs().getFirst()); return RemoteInferenceInputDataSet .builder()