diff --git a/build.gradle b/build.gradle index 45837ca39..e81d2b5e7 100644 --- a/build.gradle +++ b/build.gradle @@ -180,7 +180,6 @@ dependencies { // ZipArchive dependencies used for integration tests zipArchive group: 'org.opensearch.plugin', name:'opensearch-ml-plugin', version: "${opensearch_build}" - secureIntegTestPluginArchive group: 'org.opensearch.plugin', name:'opensearch-security', version: "${opensearch_build}" configurations.all { diff --git a/src/main/java/org/opensearch/flowframework/common/DefaultUseCases.java b/src/main/java/org/opensearch/flowframework/common/DefaultUseCases.java index 8c12f7c43..71be18a65 100644 --- a/src/main/java/org/opensearch/flowframework/common/DefaultUseCases.java +++ b/src/main/java/org/opensearch/flowframework/common/DefaultUseCases.java @@ -21,21 +21,79 @@ public enum DefaultUseCases { /** defaults file and substitution ready template for OpenAI embedding model */ OPEN_AI_EMBEDDING_MODEL_DEPLOY( "open_ai_embedding_model_deploy", - "defaults/open-ai-embedding-defaults.json", + "defaults/openai-embedding-defaults.json", "substitutionTemplates/deploy-remote-model-template.json" ), - /** defaults file and substitution ready template for cohere embedding model */ + /** defaults file and substitution ready template for Cohere embedding model */ COHERE_EMBEDDING_MODEL_DEPLOY( "cohere-embedding_model_deploy", "defaults/cohere-embedding-defaults.json", "substitutionTemplates/deploy-remote-model-template-extra-params.json" ), + /** defaults file and substitution ready template for Bedrock Titan embedding model */ + BEDROCK_TITAN_EMBEDDING_MODEL_DEPLOY( + "bedrock-titan-embedding_model_deploy", + "defaults/bedrock-titan-embedding-defaults.json", + "substitutionTemplates/deploy-remote-bedrock-model-template.json" + ), + /** defaults file and substitution ready template for Bedrock Titan multimodal embedding model */ + BEDROCK_TITAN_MULTIMODAL_MODEL_DEPLOY( + "bedrock-titan-multimodal_model_deploy", + "defaults/bedrock-titan-multimodal-defaults.json", + "substitutionTemplates/deploy-remote-bedrock-model-template.json" + ), + /** defaults file and substitution ready template for Cohere chat model */ + COHERE_CHAT_MODEL_DEPLOY( + "cohere-chat_model_deploy", + "defaults/cohere-chat-defaults.json", + "substitutionTemplates/deploy-remote-model-chat-template.json" + ), + /** defaults file and substitution ready template for OpenAI chat model */ + OPENAI_CHAT_MODEL_DEPLOY( + "openai-chat_model_deploy", + "defaults/openai-chat-defaults.json", + "substitutionTemplates/deploy-remote-model-chat-template.json" + ), /** defaults file and substitution ready template for local neural sparse model and ingest pipeline*/ - LOCAL_NEURAL_SPARSE_SEARCH( - "local_neural_sparse_search", - "defaults/local-sparse-search-defaults.json", - "substitutionTemplates/neural-sparse-local-template.json" - ); + LOCAL_NEURAL_SPARSE_SEARCH_BI_ENCODER( + "local_neural_sparse_search_bi_encoder", + "defaults/local-sparse-search-biencoder-defaults.json", + "substitutionTemplates/neural-sparse-local-biencoder-template.json" + ), + /** defaults file and substitution ready template for semantic search, no model creation*/ + SEMANTIC_SEARCH("semantic_search", "defaults/semantic-search-defaults.json", "substitutionTemplates/semantic-search-template.json"), + /** defaults file and substitution ready template for multimodal search, no model creation*/ + MULTI_MODAL_SEARCH( + "multi_modal_search", + "defaults/multi-modal-search-defaults.json", + "substitutionTemplates/multi-modal-search-template.json" + ), + /** defaults file and substitution ready template for multimodal search, no model creation*/ + MULTI_MODAL_SEARCH_WITH_BEDROCK_TITAN( + "multi_modal_search_with_bedrock_titan_multi_modal", + "defaults/multimodal-search-bedrock-titan-defaults.json", + "substitutionTemplates/multi-modal-search-with-bedrock-titan-template.json" + ), + /** defaults file and substitution ready template for semantic search with query enricher processor attached, no model creation*/ + SEMANTIC_SEARCH_WITH_QUERY_ENRICHER( + "semantic_search_with_query_enricher", + "defaults/semantic-search-defaults.json", + "substitutionTemplates/semantic-search-with-query-enricher-template.json" + ), + /** defaults file and substitution ready template for semantic search with cohere embedding model*/ + SEMANTIC_SEARCH_WITH_COHERE_EMBEDDING( + "semantic_search_with_cohere_embedding", + "defaults/cohere-embedding-semantic-search-defaults.json", + "substitutionTemplates/semantic-search-with-model-template.json" + ), + /** defaults file and substitution ready template for semantic search with query enricher processor attached and cohere embedding model*/ + SEMANTIC_SEARCH_WITH_COHERE_EMBEDDING_AND_QUERY_ENRICHER( + "semantic_search_with_cohere_embedding_query_enricher", + "defaults/cohere-embedding-semantic-search-defaults.json", + "substitutionTemplates/semantic-search-with-model-and-query-enricher-template.json" + ), + /** defaults file and substitution ready template for hybrid search, no model creation*/ + HYBRID_SEARCH("hybrid_search", "defaults/hybrid-search-defaults.json", "substitutionTemplates/hybrid-search-template.json"); private final String useCaseName; private final String defaultsFile; diff --git a/src/main/java/org/opensearch/flowframework/util/ParseUtils.java b/src/main/java/org/opensearch/flowframework/util/ParseUtils.java index 224304016..cc2c9390a 100644 --- a/src/main/java/org/opensearch/flowframework/util/ParseUtils.java +++ b/src/main/java/org/opensearch/flowframework/util/ParseUtils.java @@ -378,8 +378,11 @@ public static Object conditionallySubstitute(Object value, Map */ public static Map parseJsonFileToStringToStringMap(String path) throws IOException { String jsonContent = resourceToString(path); + logger.info("jsonContent: " + jsonContent); Map mappedJsonFile = mapper.readValue(jsonContent, Map.class); return mappedJsonFile; } diff --git a/src/main/java/org/opensearch/flowframework/workflow/AbstractCreatePipelineStep.java b/src/main/java/org/opensearch/flowframework/workflow/AbstractCreatePipelineStep.java index 14f51afa8..b00931b51 100644 --- a/src/main/java/org/opensearch/flowframework/workflow/AbstractCreatePipelineStep.java +++ b/src/main/java/org/opensearch/flowframework/workflow/AbstractCreatePipelineStep.java @@ -84,7 +84,12 @@ public PlainActionFuture execute( String pipelineId = (String) inputs.get(PIPELINE_ID); String configurations = (String) inputs.get(CONFIGURATIONS); - byte[] byteArr = configurations.getBytes(StandardCharsets.UTF_8); + // Special case for processors that have arrays that need to have the quotes removed + // (e.g. "weights": "[0.7, 0.3]" -> "weights": [0.7, 0.3] + // Define a regular expression pattern to match stringified arrays + String transformedJsonString = configurations.replaceAll("\"\\[(.*?)]\"", "[$1]"); + + byte[] byteArr = transformedJsonString.getBytes(StandardCharsets.UTF_8); BytesReference configurationsBytes = new BytesArray(byteArr); String pipelineToBeCreated = this.getName(); diff --git a/src/main/java/org/opensearch/flowframework/workflow/CreateConnectorStep.java b/src/main/java/org/opensearch/flowframework/workflow/CreateConnectorStep.java index 403e26063..b26b28a6b 100644 --- a/src/main/java/org/opensearch/flowframework/workflow/CreateConnectorStep.java +++ b/src/main/java/org/opensearch/flowframework/workflow/CreateConnectorStep.java @@ -160,6 +160,7 @@ public void onFailure(Exception e) { parameters = getParameterMap(inputs.get(PARAMETERS_FIELD)); credentials = getStringToStringMap(inputs.get(CREDENTIAL_FIELD), CREDENTIAL_FIELD); actions = getConnectorActionList(inputs.get(ACTIONS_FIELD)); + // TODO: check for un-needed substitution? ${{create_connector. and remove field so we don't need almost duplicate templates } catch (IllegalArgumentException iae) { logger.error("IllegalArgumentException in connector configuration", iae); throw new FlowFrameworkException("IllegalArgumentException in connector configuration", RestStatus.BAD_REQUEST); diff --git a/src/main/java/org/opensearch/flowframework/workflow/RegisterLocalSparseEncodingModelStep.java b/src/main/java/org/opensearch/flowframework/workflow/RegisterLocalSparseEncodingModelStep.java index 5adcc4376..1f7e7d4b5 100644 --- a/src/main/java/org/opensearch/flowframework/workflow/RegisterLocalSparseEncodingModelStep.java +++ b/src/main/java/org/opensearch/flowframework/workflow/RegisterLocalSparseEncodingModelStep.java @@ -52,12 +52,12 @@ public RegisterLocalSparseEncodingModelStep( @Override protected Set getRequiredKeys() { - return Set.of(NAME_FIELD, VERSION_FIELD, MODEL_FORMAT, FUNCTION_NAME, MODEL_CONTENT_HASH_VALUE, URL); + return Set.of(NAME_FIELD, VERSION_FIELD, MODEL_FORMAT); } @Override protected Set getOptionalKeys() { - return Set.of(DESCRIPTION_FIELD, MODEL_GROUP_ID, DEPLOY_FIELD); + return Set.of(DESCRIPTION_FIELD, MODEL_GROUP_ID, DEPLOY_FIELD, MODEL_CONTENT_HASH_VALUE, URL, FUNCTION_NAME); } @Override diff --git a/src/main/java/org/opensearch/flowframework/workflow/WorkflowStepFactory.java b/src/main/java/org/opensearch/flowframework/workflow/WorkflowStepFactory.java index a47c7c5d8..224cbf1eb 100644 --- a/src/main/java/org/opensearch/flowframework/workflow/WorkflowStepFactory.java +++ b/src/main/java/org/opensearch/flowframework/workflow/WorkflowStepFactory.java @@ -156,8 +156,8 @@ public enum WorkflowSteps { /** Register Local Sparse Encoding Model Step */ REGISTER_LOCAL_SPARSE_ENCODING_MODEL( RegisterLocalSparseEncodingModelStep.NAME, - List.of(NAME_FIELD, VERSION_FIELD, MODEL_FORMAT, FUNCTION_NAME, MODEL_CONTENT_HASH_VALUE, URL), - List.of(MODEL_ID, REGISTER_MODEL_STATUS), + List.of(NAME_FIELD, VERSION_FIELD, MODEL_FORMAT), + List.of(MODEL_ID, REGISTER_MODEL_STATUS, FUNCTION_NAME, MODEL_CONTENT_HASH_VALUE, URL), List.of(OPENSEARCH_ML), TimeValue.timeValueSeconds(60) ), diff --git a/src/main/resources/defaults/bedrock-titan-embedding-defaults.json b/src/main/resources/defaults/bedrock-titan-embedding-defaults.json new file mode 100644 index 000000000..20baf867b --- /dev/null +++ b/src/main/resources/defaults/bedrock-titan-embedding-defaults.json @@ -0,0 +1,17 @@ +{ + "template.name": "deploy-bedrock-titan-embedding-model", + "template.description": "Deploying Amazon Bedrock Titan embedding model ", + "create_connector.name": "Amazon Bedrock Connector: embedding", + "create_connector.description": "The connector to bedrock Titan embedding model", + "create_connector.region": "us-east-1", + "create_connector.endpoint": "api.openai.com", + "create_connector.credential.access_key": "123", + "create_connector.credential.secret_key": "123", + "create_connector.credential.session_token": "123", + "create_connector.actions.url": "https://bedrock-runtime.us-east-1.amazonaws.com/model/amazon.titan-embed-text-v1/invoke", + "create_connector.actions.request_body": "{ \"inputText\": \"${parameters.inputText}\" }", + "create_connector.actions.pre_process_function": "\n StringBuilder builder = new StringBuilder();\n builder.append(\"\\\"\");\n String first = params.text_docs[0];\n builder.append(first);\n builder.append(\"\\\"\");\n def parameters = \"{\" +\"\\\"inputText\\\":\" + builder + \"}\";\n return \"{\" +\"\\\"parameters\\\":\" + parameters + \"}\";", + "create_connector.actions.post_process_function": "\n def name = \"sentence_embedding\";\n def dataType = \"FLOAT32\";\n if (params.embedding == null || params.embedding.length == 0) {\n return params.message;\n }\n def shape = [params.embedding.length];\n def json = \"{\" +\n \"\\\"name\\\":\\\"\" + name + \"\\\",\" +\n \"\\\"data_type\\\":\\\"\" + dataType + \"\\\",\" +\n \"\\\"shape\\\":\" + shape + \",\" +\n \"\\\"data\\\":\" + params.embedding +\n \"}\";\n return json;\n ", + "register_remote_model.name": "Bedrock embedding model", + "register_remote_model.description": "bedrock-embedding-model" +} diff --git a/src/main/resources/defaults/bedrock-titan-mulitmodal-defaults.json b/src/main/resources/defaults/bedrock-titan-mulitmodal-defaults.json new file mode 100644 index 000000000..b1666bec5 --- /dev/null +++ b/src/main/resources/defaults/bedrock-titan-mulitmodal-defaults.json @@ -0,0 +1,18 @@ +{ + "template.name": "deploy-bedrock-titan-multimodal-embedding-model", + "template.description": "deploying Amazon Bedrock Titan multimodal embedding model ", + "create_connector.name": "Amazon Bedrock Connector: multi-modal embedding", + "create_connector.description": "The connector to bedrock Titan multi-modal embedding model", + "create_connector.region": "us-east-1", + "create_connector.input_docs_processed_step_size": 2, + "create_connector.endpoint": "api.openai.com", + "create_connector.credential.access_key": "123", + "create_connector.credential.secret_key": "123", + "create_connector.credential.session_token": "123", + "create_connector.actions.url": "https://bedrock-runtime.us-east-1.amazonaws.com/model/amazon.titan-embed-image-v1/invoke", + "create_connector.actions.request_body": "{ \"inputText\": \"${parameters.inputText:-null}\", \"inputImage\": \"${parameters.inputImage:-null}\" }", + "create_connector.actions.pre_process_function": "\n StringBuilder parametersBuilder = new StringBuilder(\"{\");\n if (params.text_docs.length > 0 && params.text_docs[0] != null) {\n parametersBuilder.append(\"\\\"inputText\\\":\");\n parametersBuilder.append(\"\\\"\");\n parametersBuilder.append(params.text_docs[0]);\n parametersBuilder.append(\"\\\"\");\n \n if (params.text_docs.length > 1 && params.text_docs[1] != null) {\n parametersBuilder.append(\",\");\n }\n }\n \n \n if (params.text_docs.length > 1 && params.text_docs[1] != null) {\n parametersBuilder.append(\"\\\"inputImage\\\":\");\n parametersBuilder.append(\"\\\"\");\n parametersBuilder.append(params.text_docs[1]);\n parametersBuilder.append(\"\\\"\");\n }\n parametersBuilder.append(\"}\");\n \n return \"{\" +\"\\\"parameters\\\":\" + parametersBuilder + \"}\";", + "create_connector.actions.post_process_function": "\n def name = \"sentence_embedding\";\n def dataType = \"FLOAT32\";\n if (params.embedding == null || params.embedding.length == 0) {\n return null;\n }\n def shape = [params.embedding.length];\n def json = \"{\" +\n \"\\\"name\\\":\\\"\" + name + \"\\\",\" +\n \"\\\"data_type\\\":\\\"\" + dataType + \"\\\",\" +\n \"\\\"shape\\\":\" + shape + \",\" +\n \"\\\"data\\\":\" + params.embedding +\n \"}\";\n return json;\n ", + "register_remote_model.name": "Bedrock multi-modal embedding model", + "register_remote_model.description": "bedrock-multi-modal-embedding-model" +} diff --git a/src/main/resources/defaults/cohere-chat-defaults.json b/src/main/resources/defaults/cohere-chat-defaults.json new file mode 100644 index 000000000..bc200d335 --- /dev/null +++ b/src/main/resources/defaults/cohere-chat-defaults.json @@ -0,0 +1,14 @@ +{ + "template.name": "deploy-cohere-chat-model", + "template.description": "deploying cohere chat model", + "create_connector.name": "Cohere Chat Model", + "create_connector.description": "The connector to Cohere's public chat API", + "create_connector.protocol": "http", + "create_connector.model": "command", + "create_connector.endpoint": "api.cohere.ai", + "create_connector.credential.key": "123", + "create_connector.actions.url": "https://api.cohere.ai/v1/chat", + "create_connector.actions.request_body": "{ \"message\": \"${parameters.message}\", \"model\": \"${parameters.model}\" }", + "register_remote_model.name": "Cohere chat model", + "register_remote_model.description": "cohere-chat-model" +} diff --git a/src/main/resources/defaults/cohere-embedding-defaults.json b/src/main/resources/defaults/cohere-embedding-defaults.json index e36578b1c..53a402f60 100644 --- a/src/main/resources/defaults/cohere-embedding-defaults.json +++ b/src/main/resources/defaults/cohere-embedding-defaults.json @@ -7,7 +7,6 @@ "create_connector.model": "embed-english-v3.0", "create_connector.input_type": "search_document", "create_connector.truncate": "end", - "create_connector.endpoint": "api.openai.com", "create_connector.credential.key": "123", "create_connector.actions.url": "https://api.cohere.ai/v1/embed", "create_connector.actions.request_body": "{ \"texts\": ${parameters.texts}, \"truncate\": \"${parameters.truncate}\", \"model\": \"${parameters.model}\", \"input_type\": \"${parameters.input_type}\" }", diff --git a/src/main/resources/defaults/cohere-embedding-semantic-search-defaults.json b/src/main/resources/defaults/cohere-embedding-semantic-search-defaults.json new file mode 100644 index 000000000..439f905f2 --- /dev/null +++ b/src/main/resources/defaults/cohere-embedding-semantic-search-defaults.json @@ -0,0 +1,28 @@ +{ + "template.name": "semantic search with cohere embedding", + "template.description": "Setting up semantic search, with cohere embedding model", + "create_connector.name": "cohere-embedding-connector", + "create_connector.description": "The connector to Cohere's public embed API", + "create_connector.protocol": "http", + "create_connector.model": "embed-english-v3.0", + "create_connector.input_type": "search_document", + "create_connector.truncate": "end", + "create_connector.credential.key": "123", + "create_connector.actions.url": "https://api.cohere.ai/v1/embed", + "create_connector.actions.request_body": "{ \"texts\": ${parameters.texts}, \"truncate\": \"${parameters.truncate}\", \"model\": \"${parameters.model}\", \"input_type\": \"${parameters.input_type}\" }", + "create_connector.actions.pre_process_function": "connector.pre_process.cohere.embedding", + "create_connector.actions.post_process_function": "connector.post_process.cohere.embedding", + "register_remote_model.name": "Cohere english embed model", + "register_remote_model.description": "cohere-embedding-model", + "create_ingest_pipeline.pipeline_id": "nlp-ingest-pipeline", + "create_ingest_pipeline.description": "A text embedding pipeline", + "text_embedding.field_map.input": "passage_text", + "text_embedding.field_map.output": "passage_embedding", + "create_index.name": "my-nlp-index", + "create_index.settings.number_of_shards": "2", + "create_index.mappings.method.engine": "lucene", + "create_index.mappings.method.space_type": "l2", + "create_index.mappings.method.name": "hnsw", + "text_embedding.field_map.output.dimension": "1024", + "create_search_pipeline.pipeline_id": "default_model_pipeline" +} diff --git a/src/main/resources/defaults/hybrid-search-defaults.json b/src/main/resources/defaults/hybrid-search-defaults.json new file mode 100644 index 000000000..cf9fb584b --- /dev/null +++ b/src/main/resources/defaults/hybrid-search-defaults.json @@ -0,0 +1,19 @@ +{ + "template.name": "hybrid-search", + "template.description": "Setting up hybrid search, ingest pipeline and index", + "create_ingest_pipeline.pipeline_id": "nlp-ingest-pipeline", + "create_ingest_pipeline.description": "A text embedding pipeline", + "create_ingest_pipeline.model_id": "123", + "text_embedding.field_map.input": "passage_text", + "text_embedding.field_map.output": "passage_embedding", + "create_index.name": "my-nlp-index", + "create_index.settings.number_of_shards": "2", + "create_index.mappings.method.engine": "lucene", + "create_index.mappings.method.space_type": "l2", + "create_index.mappings.method.name": "hnsw", + "text_embedding.field_map.output.dimension": "1024", + "create_search_pipeline.pipeline_id": "nlp-search-pipeline", + "normalization-processor.normalization.technique": "min_max", + "normalization-processor.combination.technique": "arithmetic_mean", + "normalization-processor.combination.parameters.weights": "[0.3, 0.7]" +} diff --git a/src/main/resources/defaults/local-sparse-search-biencoder-defaults.json b/src/main/resources/defaults/local-sparse-search-biencoder-defaults.json new file mode 100644 index 000000000..aa4ae0512 --- /dev/null +++ b/src/main/resources/defaults/local-sparse-search-biencoder-defaults.json @@ -0,0 +1,14 @@ +{ + "template.name": "local-model-neural-sparse-search", + "template.description": "setting up neural sparse search with local model", + "register_local_sparse_encoding_model.name": "amazon/neural-sparse/opensearch-neural-sparse-encoding-v1", + "register_local_sparse_encoding_model.description": "This is a neural sparse encoding model", + "register_local_sparse_encoding_model.model_format": "TORCH_SCRIPT", + "register_local_sparse_encoding_model.deploy": "true", + "register_local_sparse_encoding_model.version": "1.0.1", + "create_ingest_pipeline.pipeline_id": "nlp-ingest-pipeline-sparse", + "create_ingest_pipeline.description": "A sparse encoding ingest pipeline", + "create_ingest_pipeline.text_embedding.field_map.input": "passage_text", + "create_ingest_pipeline.text_embedding.field_map.output": "passage_embedding", + "create_index.name": "my-nlp-index" +} diff --git a/src/main/resources/defaults/local-sparse-search-defaults.json b/src/main/resources/defaults/local-sparse-search-defaults.json deleted file mode 100644 index cde9291f2..000000000 --- a/src/main/resources/defaults/local-sparse-search-defaults.json +++ /dev/null @@ -1,17 +0,0 @@ -{ - "template.name": "local-model-neural-sparse-search", - "template.description": "setting up neural sparse search with local model", - "register_local_sparse_encoding_model.name": "neural-sparse/opensearch-neural-sparse-tokenizer-v1-v2", - "register_local_sparse_encoding_model.description": "This is a neural sparse tokenizer model: It tokenize input sentence into tokens and assign pre-defined weight from IDF to each. It serves only in query.", - "register_local_sparse_encoding_model.node_timeout": "60s", - "register_local_sparse_encoding_model.model_format": "TORCH_SCRIPT", - "register_local_sparse_encoding_model.function_name": "SPARSE_TOKENIZE", - "register_local_sparse_encoding_model.model_content_hash_value": "b3487da9c58ac90541b720f3b367084f271d280c7f3bdc3e6d9c9a269fb31950", - "register_local_sparse_encoding_model.url": "https://artifacts.opensearch.org/models/ml-models/amazon/neural-sparse/opensearch-neural-sparse-tokenizer-v1/1.0.0/torch_script/opensearch-neural-sparse-tokenizer-v1-1.0.0.zip", - "register_local_sparse_encoding_model.deploy": "true", - "create_ingest_pipeline.pipeline_id": "nlp-ingest-pipeline-sparse", - "create_ingest_pipeline.description": "A sparse encoding ingest pipeline", - "create_ingest_pipeline.text_embedding.field_map.input": "passage_text", - "create_ingest_pipeline.text_embedding.field_map.output": "passage_embedding", - "create_index.name": "my-nlp-index" -} diff --git a/src/main/resources/defaults/multi-modal-search-defaults.json b/src/main/resources/defaults/multi-modal-search-defaults.json new file mode 100644 index 000000000..3bd47f625 --- /dev/null +++ b/src/main/resources/defaults/multi-modal-search-defaults.json @@ -0,0 +1,15 @@ +{ + "template.name": "multi-modal-search", + "template.description": "Setting up multimodal search, ingest pipeline and index", + "create_ingest_pipeline.pipeline_id": "nlp-multimodal-ingest-pipeline", + "create_ingest_pipeline.description": "A text/image embedding pipeline", + "create_ingest_pipeline.model_id": "123", + "create_ingest_pipeline.embedding": "vector_embedding", + "text_image_embedding.field_map.text": "image_description", + "text_image_embedding.field_map.image": "image_binary", + "create_index.name": "my-multimodal-nlp-index", + "create_index.settings.number_of_shards": 2, + "text_image_embedding.field_map.output.dimension": 1024, + "create_index.mappings.method.engine": "lucene", + "create_index.mappings.method.name": "hnsw" +} diff --git a/src/main/resources/defaults/multimodal-search-bedrock-titan-defaults.json b/src/main/resources/defaults/multimodal-search-bedrock-titan-defaults.json new file mode 100644 index 000000000..222053db1 --- /dev/null +++ b/src/main/resources/defaults/multimodal-search-bedrock-titan-defaults.json @@ -0,0 +1,28 @@ +{ + "template.name": "multimodal-search-with-bedrock-titan-deployment", + "template.description": "Setting up multimodal search with Amazon Bedrock Titan embedding model deployment", + "create_connector.name": "Amazon Bedrock Connector: multi-modal embedding", + "create_connector.description": "The connector to bedrock Titan multi-modal embedding model", + "create_connector.region": "us-east-1", + "create_connector.input_docs_processed_step_size": 2, + "create_connector.endpoint": "api.openai.com", + "create_connector.credential.access_key": "123", + "create_connector.credential.secret_key": "123", + "create_connector.credential.session_token": "123", + "create_connector.actions.url": "https://bedrock-runtime.us-east-1.amazonaws.com/model/amazon.titan-embed-image-v1/invoke", + "create_connector.actions.request_body": "{ \"inputText\": \"${parameters.inputText:-null}\", \"inputImage\": \"${parameters.inputImage:-null}\" }", + "create_connector.actions.pre_process_function": "\n StringBuilder parametersBuilder = new StringBuilder(\"{\");\n if (params.text_docs.length > 0 && params.text_docs[0] != null) {\n parametersBuilder.append(\"\\\"inputText\\\":\");\n parametersBuilder.append(\"\\\"\");\n parametersBuilder.append(params.text_docs[0]);\n parametersBuilder.append(\"\\\"\");\n \n if (params.text_docs.length > 1 && params.text_docs[1] != null) {\n parametersBuilder.append(\",\");\n }\n }\n \n \n if (params.text_docs.length > 1 && params.text_docs[1] != null) {\n parametersBuilder.append(\"\\\"inputImage\\\":\");\n parametersBuilder.append(\"\\\"\");\n parametersBuilder.append(params.text_docs[1]);\n parametersBuilder.append(\"\\\"\");\n }\n parametersBuilder.append(\"}\");\n \n return \"{\" +\"\\\"parameters\\\":\" + parametersBuilder + \"}\";", + "create_connector.actions.post_process_function": "\n def name = \"sentence_embedding\";\n def dataType = \"FLOAT32\";\n if (params.embedding == null || params.embedding.length == 0) {\n return null;\n }\n def shape = [params.embedding.length];\n def json = \"{\" +\n \"\\\"name\\\":\\\"\" + name + \"\\\",\" +\n \"\\\"data_type\\\":\\\"\" + dataType + \"\\\",\" +\n \"\\\"shape\\\":\" + shape + \",\" +\n \"\\\"data\\\":\" + params.embedding +\n \"}\";\n return json;\n ", + "register_remote_model.name": "Bedrock multi-modal embedding model", + "register_remote_model.description": "bedrock-multi-modal-embedding-model", + "create_ingest_pipeline.pipeline_id": "nlp-multimodal-ingest-pipeline", + "create_ingest_pipeline.description": "A text/image embedding pipeline", + "create_ingest_pipeline.embedding": "vector_embedding", + "text_image_embedding.field_map.text": "image_description", + "text_image_embedding.field_map.image": "image_binary", + "create_index.name": "my-multimodal-nlp-index", + "create_index.settings.number_of_shards": 2, + "text_image_embedding.field_map.output.dimension": 1024, + "create_index.mappings.method.engine": "lucene", + "create_index.mappings.method.name": "hnsw" +} diff --git a/src/main/resources/defaults/open-ai-embedding-defaults.json b/src/main/resources/defaults/open-ai-embedding-defaults.json deleted file mode 100644 index 59fed86de..000000000 --- a/src/main/resources/defaults/open-ai-embedding-defaults.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "open_ai_embedding_deploy": { - "template.name": "deploy-openai-model", - "template.description": "deploying openAI embedding model", - "create_connector.name": "OpenAI-embedding-connector", - "create_connector.description": "Connector to public OpenAI model", - "create_connector.protocol": "http", - "create_connector.model": "text-embedding-ada-002", - "create_connector.endpoint": "api.openai.com", - "create_connector.credential.key": "123", - "create_connector.actions.url": "https://api.openai.com/v1/embeddings", - "create_connector.actions.request_body": "{ \"input\": ${parameters.input}, \"model\": \"${parameters.model}\" }", - "create_connector.actions.pre_process_function": "connector.pre_process.openai.embedding", - "create_connector.actions.post_process_function": "connector.post_process.openai.embedding", - "register_remote_model_1.name": "OpenAI embedding model", - "register_remote_model_1.description": "openai-embedding-model" - } -} diff --git a/src/main/resources/defaults/openai-chat-defaults.json b/src/main/resources/defaults/openai-chat-defaults.json new file mode 100644 index 000000000..2b28088db --- /dev/null +++ b/src/main/resources/defaults/openai-chat-defaults.json @@ -0,0 +1,14 @@ +{ + "template.name": "deploy-openai-chat-model", + "template.description": "deploying openAI chat model", + "create_connector.name": "OpenAI Chat Connector", + "create_connector.description": "Connector to public OpenAI model", + "create_connector.protocol": "http", + "create_connector.model": "gpt-3.5-turbo", + "create_connector.endpoint": "api.openai.com", + "create_connector.credential.key": "123", + "create_connector.actions.url": "https://${parameters.endpoint}/v1/chat/completions", + "create_connector.actions.request_body": "{ \"model\": \"${parameters.model}\", \"messages\": ${parameters.messages} }", + "register_remote_model_1.name": "OpenAI chat model", + "register_remote_model_1.description": "openai-chat-model" +} diff --git a/src/main/resources/defaults/openai-embedding-defaults.json b/src/main/resources/defaults/openai-embedding-defaults.json new file mode 100644 index 000000000..4775e1c27 --- /dev/null +++ b/src/main/resources/defaults/openai-embedding-defaults.json @@ -0,0 +1,16 @@ +{ + "template.name": "deploy-openai-model", + "template.description": "deploying openAI embedding model", + "create_connector.name": "OpenAI-embedding-connector", + "create_connector.description": "Connector to public OpenAI model", + "create_connector.protocol": "http", + "create_connector.model": "text-embedding-ada-002", + "create_connector.endpoint": "api.openai.com", + "create_connector.credential.key": "123", + "create_connector.actions.url": "https://api.openai.com/v1/embeddings", + "create_connector.actions.request_body": "{ \"input\": ${parameters.input}, \"model\": \"${parameters.model}\" }", + "create_connector.actions.pre_process_function": "connector.pre_process.openai.embedding", + "create_connector.actions.post_process_function": "connector.post_process.openai.embedding", + "register_remote_model_1.name": "OpenAI embedding model", + "register_remote_model_1.description": "openai-embedding-model" +} diff --git a/src/main/resources/defaults/semantic-search-defaults.json b/src/main/resources/defaults/semantic-search-defaults.json new file mode 100644 index 000000000..18e800559 --- /dev/null +++ b/src/main/resources/defaults/semantic-search-defaults.json @@ -0,0 +1,15 @@ +{ + "template.name": "semantic-search", + "template.description": "Setting up semantic search, ingest pipeline and index", + "create_ingest_pipeline.pipeline_id": "nlp-ingest-pipeline", + "create_ingest_pipeline.description": "A text embedding pipeline", + "create_ingest_pipeline.model_id": "123", + "text_embedding.field_map.input": "passage_text", + "text_embedding.field_map.output": "passage_embedding", + "create_index.name": "my-nlp-index", + "create_index.settings.number_of_shards": "2", + "create_index.mappings.method.engine": "lucene", + "create_index.mappings.method.space_type": "l2", + "create_index.mappings.method.name": "hnsw", + "text_embedding.field_map.output.dimension": "1024" +} diff --git a/src/main/resources/defaults/semantic-search-query-enricher-defaults.json b/src/main/resources/defaults/semantic-search-query-enricher-defaults.json new file mode 100644 index 000000000..304ba1044 --- /dev/null +++ b/src/main/resources/defaults/semantic-search-query-enricher-defaults.json @@ -0,0 +1,15 @@ +{ + "template.name": "semantic-search", + "template.description": "Setting up semantic search, ingest pipeline and index", + "create_ingest_pipeline.pipeline_id": "nlp-ingest-pipeline", + "create_ingest_pipeline.description": "A text embedding pipeline", + "create_ingest_pipeline.model_id": "123", + "text_embedding.field_map.input": "passage_text", + "text_embedding.field_map.output": "passage_embedding", + "create_search_pipeline.pipeline_id": "default_model_pipeline", + "create_index.name": "my-nlp-index", + "create_index.settings.number_of_shards": "2", + "create_index.mappings.method.engine": "lucene", + "create_index.mappings.method.space_type": "l2", + "create_index.mappings.method.name": "hnsw" +} diff --git a/src/main/resources/mappings/knn-text-search-default.json b/src/main/resources/mappings/knn-text-search-default.json deleted file mode 100644 index 5d7e20baf..000000000 --- a/src/main/resources/mappings/knn-text-search-default.json +++ /dev/null @@ -1,20 +0,0 @@ -{ - "properties": { - "id": { - "type": "text" - }, - "passage_embedding": { - "type": "knn_vector", - "dimension": 768, - "method": { - "engine": "lucene", - "space_type": "l2", - "name": "hnsw", - "parameters": {} - } - }, - "passage_text": { - "type": "text" - } - } -} diff --git a/src/main/resources/mappings/knn.json b/src/main/resources/mappings/knn.json deleted file mode 100644 index c31946e62..000000000 --- a/src/main/resources/mappings/knn.json +++ /dev/null @@ -1,16 +0,0 @@ -{ - "properties": { - "desc_v": { - "type": "keyword" - }, - "name_v": { - "type": "keyword" - }, - "description": { - "type": "keyword" - }, - "name": { - "type": "keyword" - } - } -} diff --git a/src/main/resources/mappings/open-ai-defaults.json b/src/main/resources/mappings/open-ai-defaults.json deleted file mode 100644 index 88f200e32..000000000 --- a/src/main/resources/mappings/open-ai-defaults.json +++ /dev/null @@ -1,36 +0,0 @@ -{ - "deploy-remote-model-defaults": [ - { - "openai_embedding_deploy": { - "template.name": "deploy-openai-model", - "template.description": "deploying openAI embedding model", - "create_connector_1.name": "OpenAI-embedding-connector", - "create_connector_1.description": "Connector to public AI model service for GPT 3.5", - "create_connector_1.protocol": "http", - "create_connector_1.model": "gpt-3.5-turbo", - "create_connector_1.endpoint": "api.openai.com", - "create_connector_1.credential.key": "123", - "create_connector_1.request_body": "{ \"input\": ${parameters.input}, \"model\": \"${parameters.model}\" }", - "create_connector_1.pre_process_function": "connector.pre_process.openai.embedding", - "create_connector_1.post_process_function": "connector.post_process.openai.embedding", - "register_remote_model_1.name": "test-description" - } - }, - { - "cohere_embedding_deploy": { - "template.name": "deploy-cohere-embedding-model", - "template.description": "deploying cohere embedding model", - "create_connector_1.name": "cohere-embedding-connector", - "create_connector_1.description": "Connector to public AI model service for GPT 3.5", - "create_connector_1.protocol": "http", - "create_connector_1.model": "gpt-3.5-turbo", - "create_connector_1.endpoint": "api.openai.com", - "create_connector_1.credential.key": "123", - "create_connector_1.request_body": "{ \"input\": ${parameters.input}, \"model\": \"${parameters.model}\" }", - "create_connector_1.pre_process_function": "connector.pre_process.openai.embedding", - "create_connector_1.post_process_function": "connector.post_process.openai.embedding", - "register_remote_model_1.name": "test-description" - } - } - ] -} diff --git a/src/main/resources/substitutionTemplates/deploy-remote-bedrock-model-template.json b/src/main/resources/substitutionTemplates/deploy-remote-bedrock-model-template.json new file mode 100644 index 000000000..4f9ea5911 --- /dev/null +++ b/src/main/resources/substitutionTemplates/deploy-remote-bedrock-model-template.json @@ -0,0 +1,81 @@ +{ + "name": "${{template.name}}", + "description": "${{template.description}}", + "use_case": "", + "version": { + "template": "1.0.0", + "compatibility": [ + "2.12.0", + "3.0.0" + ] + }, + "workflows": { + "provision": { + "nodes": [ + { + "id": "create_connector", + "type": "create_connector", + "user_inputs": { + "name": "${{create_connector.name}}", + "description": "${{create_connector.description}}", + "version": "1", + "protocol": "aws_sigv4", + "parameters": { + "region": "${{create_connector.region}}", + "service_name": "bedrock", + "input_docs_processed_step_size": "${{create_connector.input_docs_processed_step_size}}" + }, + "credential": { + "access_ key": "${{create_connector.credential.access_key}}", + "secret_key": "${{create_connector.credential.secret_key}}", + "session_token": "${{create_connector.credential.session_token}}" + }, + "actions": [ + { + "action_type": "predict", + "method": "POST", + "url": "${{create_connector.actions.url}}", + "headers": { + "content-type": "application/json", + "x-amz-content-sha256": "required" + }, + "request_body": "${{create_connector.actions.request_body}}", + "pre_process_function": "${{create_connector.actions.pre_process_function}}", + "post_process_function": "${{create_connector.actions.post_process_function}}" + } + ] + } + }, + { + "id": "register_model", + "type": "register_remote_model", + "previous_node_inputs": { + "create_connector": "parameters" + }, + "user_inputs": { + "name": "${{register_remote_model.name}}", + "function_name": "remote", + "description": "${{register_remote_model.description}}" + } + }, + { + "id": "deploy_model", + "type": "deploy_model", + "previous_node_inputs": { + "register_model": "model_id" + } + } + ], + "edges": [ + { + "source": "create_connector", + "dest": "register_model" + }, + { + "source": "register_model", + "dest": "deploy_model" + } + ] + } + } +} diff --git a/src/main/resources/mappings/deploy-remote-model-template-draft.json b/src/main/resources/substitutionTemplates/deploy-remote-model-chat-template.json similarity index 56% rename from src/main/resources/mappings/deploy-remote-model-template-draft.json rename to src/main/resources/substitutionTemplates/deploy-remote-model-chat-template.json index a2f80a8c4..6c2f7cc05 100644 --- a/src/main/resources/mappings/deploy-remote-model-template-draft.json +++ b/src/main/resources/substitutionTemplates/deploy-remote-model-chat-template.json @@ -1,7 +1,7 @@ { - "name": "{template.name}", - "description": "{template.description}", - "use_case": "DEPLOY_MODEL", + "name": "${{template.name}}", + "description": "${{template.description}}", + "use_case": "", "version": { "template": "1.0.0", "compatibility": [ @@ -16,28 +16,26 @@ "id": "create_connector", "type": "create_connector", "user_inputs": { - "name": "${{create_connector_1}}", - "description": "${{create_connector_1.description}}", + "name": "${{create_connector}}", + "description": "${{create_connector.description}}", "version": "1", - "protocol": "${{create_connector_1.protocol}}", + "protocol": "${{create_connector.protocol}}", "parameters": { - "endpoint": "${{create_connector_1.endpoint}}", - "model": "${{create_connector_1.model}}" + "endpoint": "${{create_connector.endpoint}}", + "model": "${{create_connector.model}}" }, "credential": { - "key": "${{create_connector_1.credential.key}}", + "key": "${{create_connector.credential.key}}" }, "actions": [ { "action_type": "predict", "method": "POST", - "url": "https://api.openai.com/v1/embeddings", + "url": "${{create_connector.actions.url}}", "headers": { - "Authorization": "Bearer ${credential.openAI_key}" + "Authorization": "Bearer ${credential.key}" }, - "request_body": "{ \"input\": ${parameters.input}, \"model\": \"${parameters.model}\" }", - "pre_process_function": "connector.pre_process.openai.embedding", - "post_process_function": "connector.post_process.openai.embedding" + "request_body": "${{create_connector.actions.request_body}}" } ] } @@ -49,9 +47,9 @@ "create_connector_step_1": "parameters" }, "user_inputs": { - "name": "${register_remote_model.name}", + "name": "${{register_remote_model.name}}", "function_name": "remote", - "description": "${register_remote_model.description}" + "description": "${{register_remote_model.description}}" } }, { diff --git a/src/main/resources/substitutionTemplates/hybrid-search-template.json b/src/main/resources/substitutionTemplates/hybrid-search-template.json new file mode 100644 index 000000000..9e16f1d09 --- /dev/null +++ b/src/main/resources/substitutionTemplates/hybrid-search-template.json @@ -0,0 +1,103 @@ +{ + "name": "${{template.name}}", + "description": "${{template.description}}", + "use_case": "HYBRID_SEARCH", + "version": { + "template": "1.0.0", + "compatibility": [ + "2.12.0", + "3.0.0" + ] + }, + "workflows": { + "provision": { + "nodes": [ + { + "id": "create_ingest_pipeline", + "type": "create_ingest_pipeline", + "user_inputs": { + "pipeline_id": "${{create_ingest_pipeline.pipeline_id}}", + "configurations": { + "description": "${{create_ingest_pipeline.description}}", + "processors": [ + { + "text_embedding": { + "model_id": "${{create_ingest_pipeline.model_id}}", + "field_map": { + "${{text_embedding.field_map.input}}": "${{text_embedding.field_map.output}}" + } + } + } + ] + } + } + }, + { + "id": "create_index", + "type": "create_index", + "previous_node_inputs": { + "create_ingest_pipeline": "pipeline_id" + }, + "user_inputs": { + "index_name": "${{create_index.name}}", + "configurations": { + "settings": { + "index.knn": true, + "default_pipeline": "${{create_ingest_pipeline.pipeline_id}}", + "number_of_shards": "${{create_index.settings.number_of_shards}}", + "index.search.default_pipeline": "${{create_search_pipeline.pipeline_id}}" + }, + "mappings": { + "_doc": { + "properties": { + "id": { + "type": "text" + }, + "${{text_embedding.field_map.output}}": { + "type": "knn_vector", + "dimension": "${{text_embedding.field_map.output.dimension}}", + "method": { + "engine": "${{create_index.mappings.method.engine}}", + "space_type": "${{create_index.mappings.method.space_type}}", + "name": "${{create_index.mappings.method.name}}", + "parameters": {} + } + }, + "${{text_embedding.field_map.input}}": { + "type": "text" + } + } + } + } + } + } + }, + { + "id": "create_search_pipeline", + "type": "create_search_pipeline", + "user_inputs": { + "pipeline_id": "${{create_search_pipeline.pipeline_id}}", + "configurations": { + "description": "Post processor for hybrid search", + "phase_results_processors": [ + { + "normalization-processor": { + "normalization": { + "technique": "${{normalization-processor.normalization.technique}}" + }, + "combination": { + "technique": "${{normalization-processor.combination.technique}}", + "parameters": { + "weights": "${{normalization-processor.combination.parameters.weights}}" + } + } + } + } + ] + } + } + } + ] + } + } +} diff --git a/src/main/resources/substitutionTemplates/multi-modal-search-template.json b/src/main/resources/substitutionTemplates/multi-modal-search-template.json new file mode 100644 index 000000000..ff8623a9e --- /dev/null +++ b/src/main/resources/substitutionTemplates/multi-modal-search-template.json @@ -0,0 +1,81 @@ +{ + "name": "${{template.name}}", + "description": "${{template.description}}", + "use_case": "MULTIMODAL_SEARCH", + "version": { + "template": "1.0.0", + "compatibility": [ + "2.12.0", + "3.0.0" + ] + }, + "workflows": { + "provision": { + "nodes": [ + { + "id": "create_ingest_pipeline", + "type": "create_ingest_pipeline", + "user_inputs": { + "pipeline_id": "${{create_ingest_pipeline.pipeline_id}}", + "configurations": { + "description": "${{create_ingest_pipeline.description}}", + "processors": [ + { + "text_image_embedding": { + "model_id": "${{create_ingest_pipeline.model_id}}", + "embedding": "${{create_ingest_pipeline.embedding}}", + "field_map": { + "text": "${{text_image_embedding.field_map.text}}", + "image": "${{text_image_embedding.field_map.image}}" + } + } + } + ] + } + } + }, + { + "id": "create_index", + "type": "create_index", + "previous_node_inputs": { + "create_ingest_pipeline": "pipeline_id" + }, + "user_inputs": { + "index_name": "${{create_index.name}}", + "configurations": { + "settings": { + "index.knn": true, + "default_pipeline": "${{create_ingest_pipeline.pipeline_id}}", + "number_of_shards": "${{create_index.settings.number_of_shards}}" + }, + "mappings": { + "_doc": { + "properties": { + "id": { + "type": "text" + }, + "${{text_embedding.field_map.output}}": { + "type": "knn_vector", + "dimension": "${{text_image_embedding.field_map.output.dimension}}", + "method": { + "engine": "${{create_index.mappings.method.engine}}", + "name": "${{create_index.mappings.method.name}}", + "parameters": {} + } + }, + "${{text_image_embedding.field_map.text}}": { + "type": "text" + }, + "${{text_image_embedding.field_map.image}}": { + "type": "binary" + } + } + } + } + } + } + } + ] + } + } +} diff --git a/src/main/resources/substitutionTemplates/multi-modal-search-with-bedrock-titan-template.json b/src/main/resources/substitutionTemplates/multi-modal-search-with-bedrock-titan-template.json new file mode 100644 index 000000000..87adf082e --- /dev/null +++ b/src/main/resources/substitutionTemplates/multi-modal-search-with-bedrock-titan-template.json @@ -0,0 +1,131 @@ +{ + "name": "${{template.name}}", + "description": "${{template.description}}", + "use_case": "MULTIMODAL_SEARCH", + "version": { + "template": "1.0.0", + "compatibility": [ + "2.12.0", + "3.0.0" + ] + }, + "workflows": { + "provision": { + "nodes": [ + { + "id": "create_connector", + "type": "create_connector", + "user_inputs": { + "name": "${{create_connector.name}}", + "description": "${{create_connector.description}}", + "version": "1", + "protocol": "aws_sigv4", + "parameters": { + "region": "${{create_connector.region}}", + "service_name": "bedrock", + "input_docs_processed_step_size": "${{create_connector.input_docs_processed_step_size}}" + }, + "credential": { + "access_ key": "${{create_connector.credential.access_key}}", + "secret_key": "${{create_connector.credential.secret_key}}", + "session_token": "${{create_connector.credential.session_token}}" + }, + "actions": [ + { + "action_type": "predict", + "method": "POST", + "url": "${{create_connector.actions.url}}", + "headers": { + "content-type": "application/json", + "x-amz-content-sha256": "required" + }, + "request_body": "${{create_connector.actions.request_body}}", + "pre_process_function": "${{create_connector.actions.pre_process_function}}", + "post_process_function": "${{create_connector.actions.post_process_function}}" + } + ] + } + }, + { + "id": "register_model", + "type": "register_remote_model", + "previous_node_inputs": { + "create_connector": "parameters" + }, + "user_inputs": { + "name": "${{register_remote_model.name}}", + "function_name": "remote", + "description": "${{register_remote_model.description}}", + "deploy" : true + } + }, + { + "id": "create_ingest_pipeline", + "type": "create_ingest_pipeline", + "previous_node_inputs": { + "register_model": "model_id" + }, + "user_inputs": { + "pipeline_id": "${{create_ingest_pipeline.pipeline_id}}", + "configurations": { + "description": "${{create_ingest_pipeline.description}}", + "processors": [ + { + "text_image_embedding": { + "model_id": "${{register_model.model_id}}", + "embedding": "${{create_ingest_pipeline.embedding}}", + "field_map": { + "text": "${{text_image_embedding.field_map.text}}", + "image": "${{text_image_embedding.field_map.image}}" + } + } + } + ] + } + } + }, + { + "id": "create_index", + "type": "create_index", + "previous_node_inputs": { + "create_ingest_pipeline": "pipeline_id" + }, + "user_inputs": { + "index_name": "${{create_index.name}}", + "configurations": { + "settings": { + "index.knn": true, + "default_pipeline": "${{create_ingest_pipeline.pipeline_id}}", + "number_of_shards": "${{create_index.settings.number_of_shards}}" + }, + "mappings": { + "_doc": { + "properties": { + "id": { + "type": "text" + }, + "${{text_embedding.field_map.output}}": { + "type": "knn_vector", + "dimension": "${{text_image_embedding.field_map.output.dimension}}", + "method": { + "engine": "${{create_index.mappings.method.engine}}", + "name": "${{create_index.mappings.method.name}}", + "parameters": {} + } + }, + "${{text_image_embedding.field_map.text}}": { + "type": "text" + }, + "${{text_image_embedding.field_map.image}}": { + "type": "binary" + } + } + } + } + } + } + } + ] + } + } +} diff --git a/src/main/resources/substitutionTemplates/neural-sparse-local-template.json b/src/main/resources/substitutionTemplates/neural-sparse-local-biencoder-template.json similarity index 76% rename from src/main/resources/substitutionTemplates/neural-sparse-local-template.json rename to src/main/resources/substitutionTemplates/neural-sparse-local-biencoder-template.json index 372336bb8..603e462ee 100644 --- a/src/main/resources/substitutionTemplates/neural-sparse-local-template.json +++ b/src/main/resources/substitutionTemplates/neural-sparse-local-biencoder-template.json @@ -16,14 +16,10 @@ "id": "register_local_sparse_encoding_model", "type": "register_local_sparse_encoding_model", "user_inputs": { - "node_timeout": "60s", - "name": "neural-sparse/opensearch-neural-sparse-tokenizer-v1-v2", - "version": "1.0.0", - "description": "This is a neural sparse tokenizer model: It tokenize input sentence into tokens and assign pre-defined weight from IDF to each. It serves only in query.", - "model_format": "TORCH_SCRIPT", - "function_name": "SPARSE_TOKENIZE", - "model_content_hash_value": "b3487da9c58ac90541b720f3b367084f271d280c7f3bdc3e6d9c9a269fb31950", - "url": "https://artifacts.opensearch.org/models/ml-models/amazon/neural-sparse/opensearch-neural-sparse-tokenizer-v1/1.0.0/torch_script/opensearch-neural-sparse-tokenizer-v1-1.0.0.zip", + "name": "${{register_local_sparse_encoding_model.name}}", + "version": "${{register_local_sparse_encoding_model.version}}", + "description": "${{register_local_sparse_encoding_model.description}}", + "model_format": "${{register_local_sparse_encoding_model.model_format}}", "deploy": true } }, diff --git a/src/main/resources/substitutionTemplates/semantic-search-template.json b/src/main/resources/substitutionTemplates/semantic-search-template.json new file mode 100644 index 000000000..3aa7095e1 --- /dev/null +++ b/src/main/resources/substitutionTemplates/semantic-search-template.json @@ -0,0 +1,77 @@ +{ + "name": "${{template.name}}", + "description": "${{template.description}}", + "use_case": "SEMANTIC_SEARCH", + "version": { + "template": "1.0.0", + "compatibility": [ + "2.12.0", + "3.0.0" + ] + }, + "workflows": { + "provision": { + "nodes": [ + { + "id": "create_ingest_pipeline", + "type": "create_ingest_pipeline", + "user_inputs": { + "pipeline_id": "${{create_ingest_pipeline.pipeline_id}}", + "configurations": { + "description": "${{create_ingest_pipeline.description}}", + "processors": [ + { + "text_embedding": { + "model_id": "${{create_ingest_pipeline.model_id}}", + "field_map": { + "${{text_embedding.field_map.input}}": "${{text_embedding.field_map.output}}" + } + } + } + ] + } + } + }, + { + "id": "create_index", + "type": "create_index", + "previous_node_inputs": { + "create_ingest_pipeline": "pipeline_id" + }, + "user_inputs": { + "index_name": "${{create_index.name}}", + "configurations": { + "settings": { + "index.knn": true, + "default_pipeline": "${{create_ingest_pipeline.pipeline_id}}", + "number_of_shards": "${{create_index.settings.number_of_shards}}" + }, + "mappings": { + "_doc": { + "properties": { + "id": { + "type": "text" + }, + "${{text_embedding.field_map.output}}": { + "type": "knn_vector", + "dimension": "${{text_embedding.field_map.output.dimension}}", + "method": { + "engine": "${{create_index.mappings.method.engine}}", + "space_type": "${{create_index.mappings.method.space_type}}", + "name": "${{create_index.mappings.method.name}}", + "parameters": {} + } + }, + "${{text_embedding.field_map.input}}": { + "type": "text" + } + } + } + } + } + } + } + ] + } + } +} diff --git a/src/main/resources/substitutionTemplates/semantic-search-with-model-and-query-enricher-template.json b/src/main/resources/substitutionTemplates/semantic-search-with-model-and-query-enricher-template.json new file mode 100644 index 000000000..f75b58e06 --- /dev/null +++ b/src/main/resources/substitutionTemplates/semantic-search-with-model-and-query-enricher-template.json @@ -0,0 +1,147 @@ +{ + "name": "${{template.name}}", + "description": "${{template.description}}", + "use_case": "SEMANTIC_SEARCH", + "version": { + "template": "1.0.0", + "compatibility": [ + "2.12.0", + "3.0.0" + ] + }, + "workflows": { + "provision": { + "nodes": [ + { + "id": "create_connector", + "type": "create_connector", + "user_inputs": { + "name": "${{create_connector.name}}", + "description": "${{create_connector.description}}", + "version": "1", + "protocol": "${{create_connector.protocol}}", + "parameters": { + "endpoint": "${{create_connector.endpoint}}", + "model": "${{create_connector.model}}", + "input_type": "search_document", + "truncate": "END" + }, + "credential": { + "key": "${{create_connector.credential.key}}" + }, + "actions": [ + { + "action_type": "predict", + "method": "POST", + "url": "${{create_connector.actions.url}}", + "headers": { + "Authorization": "Bearer ${credential.key}", + "Request-Source": "unspecified:opensearch" + }, + "request_body": "${{create_connector.actions.request_body}}", + "pre_process_function": "${{create_connector.actions.pre_process_function}}", + "post_process_function": "${{create_connector.actions.post_process_function}}" + } + ] + } + }, + { + "id": "register_model", + "type": "register_remote_model", + "previous_node_inputs": { + "create_connector": "parameters" + }, + "user_inputs": { + "name": "${{register_remote_model.name}}", + "function_name": "remote", + "description": "${{register_remote_model.description}}", + "deploy": true + } + }, + { + "id": "create_ingest_pipeline", + "type": "create_ingest_pipeline", + "previous_node_inputs": { + "register_model": "model_id" + }, + "user_inputs": { + "pipeline_id": "${{create_ingest_pipeline.pipeline_id}}", + "configurations": { + "description": "${{create_ingest_pipeline.description}}", + "processors": [ + { + "text_embedding": { + "model_id": "${{register_model.model_id}}", + "field_map": { + "${{text_embedding.field_map.input}}": "${{text_embedding.field_map.output}}" + } + } + } + ] + } + } + }, + { + "id": "create_index", + "type": "create_index", + "previous_node_inputs": { + "create_ingest_pipeline": "pipeline_id" + }, + "user_inputs": { + "index_name": "${{create_index.name}}", + "configurations": { + "settings": { + "index.knn": true, + "default_pipeline": "${{create_ingest_pipeline.pipeline_id}}", + "number_of_shards": "${{create_index.settings.number_of_shards}}", + "index.search.default_pipeline": "${{create_search_pipeline.pipeline_id}}" + }, + "mappings": { + "_doc": { + "properties": { + "id": { + "type": "text" + }, + "${{text_embedding.field_map.output}}": { + "type": "knn_vector", + "dimension": "${{text_embedding.field_map.output.dimension}}", + "method": { + "engine": "${{create_index.mappings.method.engine}}", + "space_type": "${{create_index.mappings.method.space_type}}", + "name": "${{create_index.mappings.method.name}}", + "parameters": {} + } + }, + "${{text_embedding.field_map.input}}": { + "type": "text" + } + } + } + } + } + } + }, + { + "id": "create_search_pipeline", + "type": "create_search_pipeline", + "previous_node_inputs": { + "register_model": "model_id" + }, + "user_inputs": { + "pipeline_id": "${{create_search_pipeline.pipeline_id}}", + "configurations": { + "request_processors": [ + { + "neural_query_enricher" : { + "description": "Sets the default model ID at index and field levels", + "default_model_id": "${{register_model.model_id}}" + } + } + ] + } + } + } + ] + } + } +} diff --git a/src/main/resources/substitutionTemplates/deploy-model-semantic-search-template-v1.json b/src/main/resources/substitutionTemplates/semantic-search-with-model-template.json similarity index 61% rename from src/main/resources/substitutionTemplates/deploy-model-semantic-search-template-v1.json rename to src/main/resources/substitutionTemplates/semantic-search-with-model-template.json index ae90693d3..f98c68659 100644 --- a/src/main/resources/substitutionTemplates/deploy-model-semantic-search-template-v1.json +++ b/src/main/resources/substitutionTemplates/semantic-search-with-model-template.json @@ -1,7 +1,7 @@ { "name": "${{template.name}}", "description": "${{template.description}}", - "use_case": "DEPLOY_MODEL", + "use_case": "SEMANTIC_SEARCH", "version": { "template": "1.0.0", "compatibility": [ @@ -16,18 +16,18 @@ "id": "create_connector", "type": "create_connector", "user_inputs": { - "name": "${{create_connector_1}}", - "description": "${{create_connector_1.description}}", + "name": "${{create_connector.name}}", + "description": "${{create_connector.description}}", "version": "1", - "protocol": "${{create_connector_1.protocol}}", + "protocol": "${{create_connector.protocol}}", "parameters": { - "endpoint": "${{create_connector_1.endpoint}}", - "model": "${{create_connector_1.model}}", + "endpoint": "${{create_connector.endpoint}}", + "model": "${{create_connector.model}}", "input_type": "search_document", "truncate": "END" }, "credential": { - "key": "${{create_connector_1.credential.key}}" + "key": "${{create_connector.credential.key}}" }, "actions": [ { @@ -49,26 +49,20 @@ "id": "register_model", "type": "register_remote_model", "previous_node_inputs": { - "create_connector_step_1": "parameters" + "create_connector": "parameters" }, "user_inputs": { - "name": "${register_remote_model.name}", + "name": "${{register_remote_model.name}}", "function_name": "remote", - "description": "${register_remote_model.description}" - } - }, - { - "id": "deploy_model", - "type": "deploy_model", - "previous_node_inputs": { - "register_model_1": "model_id" + "description": "${{register_remote_model.description}}", + "deploy": true } }, { "id": "create_ingest_pipeline", "type": "create_ingest_pipeline", "previous_node_inputs": { - "deploy_openai_model": "model_id" + "register_model": "model_id" }, "user_inputs": { "pipeline_id": "${{create_ingest_pipeline.pipeline_id}}", @@ -77,9 +71,9 @@ "processors": [ { "text_embedding": { - "model_id": "${{deploy_openai_model.model_id}}", + "model_id": "${{register_model.model_id}}", "field_map": { - "${{text_embedding.field_map.input}}": "${{text_embedding.field_map.input}}" + "${{text_embedding.field_map.input}}": "${{text_embedding.field_map.output}}" } } } @@ -97,23 +91,31 @@ "index_name": "${{create_index.name}}", "configurations": { "settings": { - "index": { - "number_of_shards": 2, - "number_of_replicas": 1, - "search.default_pipeline" : "${{create_ingest_pipeline.pipeline_id}}" - } + "index.knn": true, + "default_pipeline": "${{create_ingest_pipeline.pipeline_id}}", + "number_of_shards": "${{create_index.settings.number_of_shards}}" }, "mappings": { "_doc": { "properties": { - "age": { - "type": "integer" + "id": { + "type": "text" + }, + "${{text_embedding.field_map.output}}": { + "type": "knn_vector", + "dimension": "${{text_embedding.field_map.output.dimension}}", + "method": { + "engine": "${{create_index.mappings.method.engine}}", + "space_type": "${{create_index.mappings.method.space_type}}", + "name": "${{create_index.mappings.method.name}}", + "parameters": {} + } + }, + "${{text_embedding.field_map.input}}": { + "type": "text" } } } - }, - "aliases": { - "sample-alias1": {} } } } diff --git a/src/main/resources/substitutionTemplates/semantic-search-with-query-enricher-template.json b/src/main/resources/substitutionTemplates/semantic-search-with-query-enricher-template.json new file mode 100644 index 000000000..4244cd791 --- /dev/null +++ b/src/main/resources/substitutionTemplates/semantic-search-with-query-enricher-template.json @@ -0,0 +1,95 @@ +{ + "name": "${{template.name}}", + "description": "${{template.description}}", + "use_case": "SEMANTIC_SEARCH", + "version": { + "template": "1.0.0", + "compatibility": [ + "2.12.0", + "3.0.0" + ] + }, + "workflows": { + "provision": { + "nodes": [ + { + "id": "create_ingest_pipeline", + "type": "create_ingest_pipeline", + "user_inputs": { + "pipeline_id": "${{create_ingest_pipeline.pipeline_id}}", + "configurations": { + "description": "${{create_ingest_pipeline.description}}", + "processors": [ + { + "text_embedding": { + "model_id": "${{create_ingest_pipeline.model_id}}", + "field_map": { + "${{text_embedding.field_map.input}}": "${{text_embedding.field_map.output}}" + } + } + } + ] + } + } + }, + { + "id": "create_search_pipeline", + "type": "create_search_pipeline", + "user_inputs": { + "pipeline_id": "${{create_search_pipeline.pipeline_id}}", + "configurations": { + "request_processors": [ + { + "neural_query_enricher" : { + "description": "Sets the default model ID at index and field levels", + "default_model_id": "${{create_ingest_pipeline.model_id}}" + } + } + ] + } + } + }, + { + "id": "create_index", + "type": "create_index", + "previous_node_inputs": { + "create_ingest_pipeline": "pipeline_id" + }, + "user_inputs": { + "index_name": "${{create_index.name}}", + "configurations": { + "settings": { + "index.knn": true, + "default_pipeline": "${{create_ingest_pipeline.pipeline_id}}", + "number_of_shards": "${{create_index.settings.number_of_shards}}", + "index.search.default_pipeline": "${{create_search_pipeline.pipeline_id}}" + }, + "mappings": { + "_doc": { + "properties": { + "id": { + "type": "text" + }, + "${{text_embedding.field_map.output}}": { + "type": "knn_vector", + "dimension": "${{text_embedding.field_map.output.dimension}}", + "method": { + "engine": "${{create_index.mappings.method.engine}}", + "space_type": "${{create_index.mappings.method.space_type}}", + "name": "${{create_index.mappings.method.name}}", + "parameters": {} + } + }, + "${{text_embedding.field_map.input}}": { + "type": "text" + } + } + } + } + } + } + } + ] + } + } +} diff --git a/src/test/java/org/opensearch/flowframework/common/DefaultUseCasesTests.java b/src/test/java/org/opensearch/flowframework/common/DefaultUseCasesTests.java index b6dc72ebb..cda1f1e29 100644 --- a/src/test/java/org/opensearch/flowframework/common/DefaultUseCasesTests.java +++ b/src/test/java/org/opensearch/flowframework/common/DefaultUseCasesTests.java @@ -20,7 +20,7 @@ public void setUp() throws Exception { public void testGetDefaultsFileByValidUseCaseName() throws FlowFrameworkException { String defaultsFile = DefaultUseCases.getDefaultsFileByUseCaseName("open_ai_embedding_model_deploy"); - assertEquals("defaults/open-ai-embedding-defaults.json", defaultsFile); + assertEquals("defaults/openai-embedding-defaults.json", defaultsFile); } public void testGetDefaultsFileByInvalidUseCaseName() throws FlowFrameworkException { diff --git a/src/test/java/org/opensearch/flowframework/model/WorkflowValidatorTests.java b/src/test/java/org/opensearch/flowframework/model/WorkflowValidatorTests.java index b5a87f5c2..80a9788c2 100644 --- a/src/test/java/org/opensearch/flowframework/model/WorkflowValidatorTests.java +++ b/src/test/java/org/opensearch/flowframework/model/WorkflowValidatorTests.java @@ -73,8 +73,8 @@ public void testParseWorkflowValidator() throws IOException { assertEquals(2, validator.getWorkflowStepValidators().get("register_local_custom_model").getOutputs().size()); assertTrue(validator.getWorkflowStepValidators().keySet().contains("register_local_sparse_encoding_model")); - assertEquals(6, validator.getWorkflowStepValidators().get("register_local_sparse_encoding_model").getInputs().size()); - assertEquals(2, validator.getWorkflowStepValidators().get("register_local_sparse_encoding_model").getOutputs().size()); + assertEquals(3, validator.getWorkflowStepValidators().get("register_local_sparse_encoding_model").getInputs().size()); + assertEquals(5, validator.getWorkflowStepValidators().get("register_local_sparse_encoding_model").getOutputs().size()); assertTrue(validator.getWorkflowStepValidators().keySet().contains("register_local_pretrained_model")); assertEquals(3, validator.getWorkflowStepValidators().get("register_local_pretrained_model").getInputs().size()); diff --git a/src/test/java/org/opensearch/flowframework/workflow/RegisterLocalSparseEncodingModelStepTests.java b/src/test/java/org/opensearch/flowframework/workflow/RegisterLocalSparseEncodingModelStepTests.java index d86f97023..607b0ebed 100644 --- a/src/test/java/org/opensearch/flowframework/workflow/RegisterLocalSparseEncodingModelStepTests.java +++ b/src/test/java/org/opensearch/flowframework/workflow/RegisterLocalSparseEncodingModelStepTests.java @@ -266,7 +266,7 @@ public void testMissingInputs() { ExecutionException ex = assertThrows(ExecutionException.class, () -> future.get().getContent()); assertTrue(ex.getCause() instanceof FlowFrameworkException); assertTrue(ex.getCause().getMessage().startsWith("Missing required inputs [")); - for (String s : new String[] { "model_format", "name", "function_name", "version", "url", "model_content_hash_value" }) { + for (String s : new String[] { "model_format", "name", "version", }) { assertTrue(ex.getCause().getMessage().contains(s)); } assertTrue(ex.getCause().getMessage().endsWith("] in workflow [test-id] node [test-node-id]"));