Skip to content

Commit

Permalink
addressed comments and cleaned up defaults/templates
Browse files Browse the repository at this point in the history
Signed-off-by: Amit Galitzky <[email protected]>
  • Loading branch information
amitgalitz committed Mar 20, 2024
1 parent a9913fc commit 6f8122f
Show file tree
Hide file tree
Showing 21 changed files with 81 additions and 49 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ public enum DefaultUseCases {
COHERE_EMBEDDING_MODEL_DEPLOY(
"cohere-embedding_model_deploy",
"defaults/cohere-embedding-defaults.json",
"substitutionTemplates/deploy-remote-model-template-extra-params.json"
"substitutionTemplates/deploy-remote-model-extra-params-template.json"
),
/** defaults file and substitution ready template for Bedrock Titan embedding model */
BEDROCK_TITAN_EMBEDDING_MODEL_DEPLOY(
Expand Down
60 changes: 38 additions & 22 deletions src/main/java/org/opensearch/flowframework/util/ParseUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ public class ParseUtils {
// Matches ${{ foo.bar }} (whitespace optional) with capturing groups 1=foo, 2=bar
// private static final Pattern SUBSTITUTION_PATTERN = Pattern.compile("\\$\\{\\{\\s*(.+)\\.(.+?)\\s*\\}\\}");
private static final Pattern SUBSTITUTION_PATTERN = Pattern.compile("\\$\\{\\{\\s*([\\w_]+)\\.([\\w_]+)\\s*\\}\\}");
private static final Pattern JSON_ARRAY_DOUBLE_QUOTES_PATTERN = Pattern.compile("\"\\[(.*?)]\"");

private ParseUtils() {}

Expand All @@ -70,7 +71,7 @@ private ParseUtils() {}
* @param json the json string
* @return The XContent parser for the json string
* @throws IOException on failure to create the parser
*/
*/
public static XContentParser jsonToParser(String json) throws IOException {
XContentParser parser = JsonXContent.jsonXContent.createParser(
NamedXContentRegistry.EMPTY,
Expand Down Expand Up @@ -104,7 +105,7 @@ public static String resourceToString(String path) throws IOException {
* Builds an XContent object representing a map of String keys to String values.
*
* @param xContentBuilder An XContent builder whose position is at the start of the map object to build
* @param map A map as key-value String pairs.
* @param map A map as key-value String pairs.
* @throws IOException on a build failure
*/
public static void buildStringToStringMap(XContentBuilder xContentBuilder, Map<?, ?> map) throws IOException {
Expand All @@ -119,7 +120,7 @@ public static void buildStringToStringMap(XContentBuilder xContentBuilder, Map<?
* Builds an XContent object representing a map of String keys to Object values.
*
* @param xContentBuilder An XContent builder whose position is at the start of the map object to build
* @param map A map as key-value String to Object.
* @param map A map as key-value String to Object.
* @throws IOException on a build failure
*/
public static void buildStringToObjectMap(XContentBuilder xContentBuilder, Map<?, ?> map) throws IOException {
Expand All @@ -138,7 +139,7 @@ public static void buildStringToObjectMap(XContentBuilder xContentBuilder, Map<?
* Builds an XContent object representing a LLMSpec.
*
* @param xContentBuilder An XContent builder whose position is at the start of the map object to build
* @param llm LLMSpec
* @param llm LLMSpec
* @throws IOException on a build failure
*/
public static void buildLLMMap(XContentBuilder xContentBuilder, LLMSpec llm) throws IOException {
Expand Down Expand Up @@ -171,6 +172,7 @@ public static Map<String, String> parseStringToStringMap(XContentParser parser)
* Parses an XContent object representing a map of String keys to Object values.
* The Object value here can either be a string or a map
* If an array is found in the given parser we conver the array to a string representation of the array
*
* @param parser An XContent parser whose position is at the start of the map object to parse
* @return A map as identified by the key-value pairs in the XContent
* @throws IOException on a parse failure
Expand All @@ -189,10 +191,13 @@ public static Map<String, Object> parseStringToObjectMap(XContentParser parser)
// Handle array: convert it to a string representation
List<String> elements = new ArrayList<>();
while (parser.nextToken() != XContentParser.Token.END_ARRAY) {
elements.add("\"" + parser.text() + "\""); // Adding escaped quotes around each element
if (parser.currentToken().equals(XContentParser.Token.VALUE_NUMBER)) {
elements.add(String.valueOf(parser.numberValue())); // If number value don't add escaping quotes
} else {
elements.add("\"" + parser.text() + "\""); // Adding escaped quotes around each element
}
}
String arrayString = "[" + String.join(", ", elements) + "]";
map.put(fieldName, arrayString);
map.put(fieldName, elements.toString());
} else {
// Otherwise, parse it as a string
map.put(fieldName, parser.text());
Expand Down Expand Up @@ -220,6 +225,7 @@ public static Instant parseInstant(XContentParser parser) throws IOException {
* (e.g., john||own_index,testrole|__user__, no backend role so you see two verticle line after john.).
* This is the user string format used internally in the OPENSEARCH_SECURITY_USER_INFO_THREAD_CONTEXT and may be
* parsed using User.parse(string).
*
* @param client Client containing user info. A public API request will fill in the user info in the thread context.
* @return parsed user object
*/
Expand All @@ -233,7 +239,7 @@ public static User getUserContext(Client client) {
* Creates a XContentParser from a given Registry
*
* @param xContentRegistry main registry for serializable content
* @param bytesReference given bytes to be parsed
* @param bytesReference given bytes to be parsed
* @return bytesReference of {@link java.time.Instant}
* @throws IOException IOException if content can't be parsed correctly
*/
Expand All @@ -244,7 +250,8 @@ public static XContentParser createXContentParserFromRegistry(NamedXContentRegis

/**
* Generates a string to string Map
* @param map content map
*
* @param map content map
* @param fieldName fieldName
* @return instance of the map
*/
Expand All @@ -260,15 +267,15 @@ public static Map<String, String> getStringToStringMap(Object map, String fieldN
* Creates a map containing the specified input keys, with values derived from template data or previous node
* output.
*
* @param requiredInputKeys A set of keys that must be present, or will cause an exception to be thrown
* @param optionalInputKeys A set of keys that may be present, or will be absent in the returned map
* @param currentNodeInputs Input params and content for this node, from workflow parsing
* @param outputs WorkflowData content of previous steps
* @param requiredInputKeys A set of keys that must be present, or will cause an exception to be thrown
* @param optionalInputKeys A set of keys that may be present, or will be absent in the returned map
* @param currentNodeInputs Input params and content for this node, from workflow parsing
* @param outputs WorkflowData content of previous steps
* @param previousNodeInputs Input params for this node that come from previous steps
* @param params Params that came from REST path
* @param params Params that came from REST path
* @return A map containing the requiredInputKeys with their corresponding values,
* and optionalInputKeys with their corresponding values if present.
* Throws a {@link FlowFrameworkException} if a required key is not present.
* and optionalInputKeys with their corresponding values if present.
* Throws a {@link FlowFrameworkException} if a required key is not present.
*/
public static Map<String, Object> getInputsFromPreviousSteps(
Set<String> requiredInputKeys,
Expand Down Expand Up @@ -357,9 +364,10 @@ public static Map<String, Object> getInputsFromPreviousSteps(

/**
* Executes substitution on the given value by looking at any matching values in either the ouputs or params map
* @param value the Object that will have the substitution done on
*
* @param value the Object that will have the substitution done on
* @param outputs potential location of values to be substituted in
* @param params potential location of values to be subsituted in
* @param params potential location of values to be subsituted in
* @return the substituted object back
*/
public static Object conditionallySubstitute(Object value, Map<String, WorkflowData> outputs, Map<String, String> params) {
Expand Down Expand Up @@ -403,6 +411,7 @@ public static Object conditionallySubstitute(Object value, Map<String, WorkflowD

/**
* Generates a string based on an arbitrary String to object map using Jackson
*
* @param map content map
* @return instance of the string
* @throws JsonProcessingException JsonProcessingException from Jackson for issues processing map
Expand All @@ -415,6 +424,7 @@ public static String parseArbitraryStringToObjectMapToString(Map<String, Object>

/**
* Generates a String to String map based on a Json File
*
* @param path file path
* @return instance of the string
* @throws JsonProcessingException JsonProcessingException from Jackson for issues processing map
Expand All @@ -430,15 +440,21 @@ public static Map<String, String> parseJsonFileToStringToStringMap(String path)
* (e.g. "[\"text\", \"hello\"]" to "["text", "hello"]"), this is needed for processors that take in string arrays,
* This also removes the quotations around the array making the array valid to consume
* (e.g. "weights": "[0.7, 0.3]" to "weights": [0.7, 0.3])
*
* @param input The inputString given to be transformed
* @return the transformed string
*/
public static String removingBackslashesAndQuotesInArrayInJsonString(String input) {
return Pattern.compile("\"\\[(.*?)]\"").matcher(input).replaceAll(matchResult -> {
Matcher matcher = JSON_ARRAY_DOUBLE_QUOTES_PATTERN.matcher(input);
StringBuffer result = new StringBuffer();
while (matcher.find()) {
// Extract matched content and remove backslashes before quotes
String withoutEscapes = matchResult.group(1).replaceAll("\\\\\"", "\"");
String withoutEscapes = matcher.group(1).replaceAll("\\\\\"", "\"");
// Return the transformed string with the brackets but without the outer quotes
return "[" + withoutEscapes + "]";
});
matcher.appendReplacement(result, "[" + withoutEscapes + "]");
}
// Append remaining input after the last match
matcher.appendTail(result);
return result.toString();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,8 @@ public PlainActionFuture<WorkflowData> execute(

String configurations = (String) inputs.get(CONFIGURATIONS);

logger.info("configurations: " + configurations);

byte[] byteArr = configurations.getBytes(StandardCharsets.UTF_8);
BytesReference configurationsBytes = new BytesArray(byteArr);
CreateIndexRequest createIndexRequest = new CreateIndexRequest(indexName);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
"create_connector.name": "Amazon Bedrock Connector: embedding",
"create_connector.description": "The connector to bedrock Titan embedding model",
"create_connector.region": "us-east-1",
"create_connector.endpoint": "api.openai.com",
"create_connector.credential.access_key": "123",
"create_connector.credential.secret_key": "123",
"create_connector.credential.session_token": "123",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
{
"template.name": "deploy-bedrock-titan-multimodal-embedding-model",
"template.description": "deploying Amazon Bedrock Titan multimodal embedding model ",
"template.description": "Deploying Amazon Bedrock Titan multimodal embedding model ",
"create_connector.name": "Amazon Bedrock Connector: multi-modal embedding",
"create_connector.description": "The connector to bedrock Titan multi-modal embedding model",
"create_connector.region": "us-east-1",
"create_connector.input_docs_processed_step_size": 2,
"create_connector.endpoint": "api.openai.com",
"create_connector.input_docs_processed_step_size": "2",
"create_connector.credential.access_key": "123",
"create_connector.credential.secret_key": "123",
"create_connector.credential.session_token": "123",
Expand Down
2 changes: 1 addition & 1 deletion src/main/resources/defaults/cohere-chat-defaults.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"template.name": "deploy-cohere-chat-model",
"template.description": "deploying cohere chat model",
"template.description": "Deploying a Cohere chat model",
"create_connector.name": "Cohere Chat Model",
"create_connector.description": "The connector to Cohere's public chat API",
"create_connector.protocol": "http",
Expand Down
2 changes: 1 addition & 1 deletion src/main/resources/defaults/cohere-embedding-defaults.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"template.name": "deploy-cohere-model",
"template.description": "deploying cohere embedding model",
"template.description": "Deploying a Cohere embedding model",
"create_connector.name": "cohere-embedding-connector",
"create_connector.description": "The connector to Cohere's public embed API",
"create_connector.protocol": "http",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"template.name": "semantic search with cohere embedding",
"template.description": "Setting up semantic search, with cohere embedding model",
"template.description": "Setting up semantic search, with a Cohere embedding model",
"create_connector.name": "cohere-embedding-connector",
"create_connector.description": "The connector to Cohere's public embed API",
"create_connector.protocol": "http",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"template.name": "deploy-cohere-chat-model",
"template.description": "deploying cohere chat model",
"template.description": "A template to deploy a Cohere chat model",
"create_connector.name": "Cohere Chat Model",
"create_connector.description": "The connector to Cohere's public chat API",
"create_connector.protocol": "http",
Expand All @@ -13,7 +13,7 @@
"register_remote_model.description": "cohere-chat-model",
"create_search_pipeline.pipeline_id": "rag-pipeline",
"create_search_pipeline.retrieval_augmented_generation.tag": "openai_pipeline_demo",
"create_search_pipeline.retrieval_augmented_generation.description": "Demo pipeline Using cohere Connector",
"create_search_pipeline.retrieval_augmented_generation.description": "Demo pipeline using a Cohere chat model",
"create_search_pipeline.retrieval_augmented_generation.context_field_list": "[\"text\"]",
"create_search_pipeline.retrieval_augmented_generation.system_prompt": "You are a helpful assistant",
"create_search_pipeline.retrieval_augmented_generation.user_instructions": "Generate a concise and informative answer in less than 100 words for the given question"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"template.name": "local-model-neural-sparse-search",
"template.description": "setting up neural sparse search with local model",
"template.description": "Setting up neural sparse search with pretrained local model",
"register_local_sparse_encoding_model.name": "amazon/neural-sparse/opensearch-neural-sparse-encoding-v1",
"register_local_sparse_encoding_model.description": "This is a neural sparse encoding model",
"register_local_sparse_encoding_model.model_format": "TORCH_SCRIPT",
Expand Down
6 changes: 3 additions & 3 deletions src/main/resources/defaults/multi-modal-search-defaults.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,12 @@
"create_ingest_pipeline.pipeline_id": "nlp-multimodal-ingest-pipeline",
"create_ingest_pipeline.description": "A text/image embedding pipeline",
"create_ingest_pipeline.model_id": "123",
"create_ingest_pipeline.embedding": "vector_embedding",
"text_image_embedding.embedding": "vector_embedding",
"text_image_embedding.field_map.text": "image_description",
"text_image_embedding.field_map.image": "image_binary",
"create_index.name": "my-multimodal-nlp-index",
"create_index.settings.number_of_shards": 2,
"text_image_embedding.field_map.output.dimension": 1024,
"create_index.settings.number_of_shards": "2",
"text_image_embedding.field_map.output.dimension": "1024",
"create_index.mappings.method.engine": "lucene",
"create_index.mappings.method.name": "hnsw"
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
"register_remote_model.description": "bedrock-multi-modal-embedding-model",
"create_ingest_pipeline.pipeline_id": "nlp-multimodal-ingest-pipeline",
"create_ingest_pipeline.description": "A text/image embedding pipeline",
"text_image_embedding.create_ingest_pipeline.embedding": "vector_embedding",
"text_image_embedding.embedding": "vector_embedding",
"text_image_embedding.field_map.text": "image_description",
"text_image_embedding.field_map.image": "image_binary",
"create_index.name": "my-multimodal-nlp-index",
Expand Down
2 changes: 1 addition & 1 deletion src/main/resources/defaults/openai-chat-defaults.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"template.name": "deploy-openai-chat-model",
"template.description": "deploying openAI chat model",
"template.description": "Deploying an OpenAI chat model",
"create_connector.name": "OpenAI Chat Connector",
"create_connector.description": "Connector to public OpenAI model",
"create_connector.protocol": "http",
Expand Down
2 changes: 1 addition & 1 deletion src/main/resources/defaults/openai-embedding-defaults.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"template.name": "deploy-openai-model",
"template.description": "deploying openAI embedding model",
"template.description": "Deploying an OpenAI embedding model",
"create_connector.name": "OpenAI-embedding-connector",
"create_connector.description": "Connector to public OpenAI model",
"create_connector.protocol": "http",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
"input_docs_processed_step_size": "${{create_connector.input_docs_processed_step_size}}"
},
"credential": {
"access_ key": "${{create_connector.credential.access_key}}",
"access_key": "${{create_connector.credential.access_key}}",
"secret_key": "${{create_connector.credential.secret_key}}",
"session_token": "${{create_connector.credential.session_token}}"
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@
"id": "register_model",
"type": "register_remote_model",
"previous_node_inputs": {
"create_connector_step_1": "parameters"
"create_connector": "parameters"
},
"user_inputs": {
"name": "${{register_remote_model.name}}",
Expand All @@ -56,7 +56,7 @@
"id": "deploy_model",
"type": "deploy_model",
"previous_node_inputs": {
"register_model_1": "model_id"
"register_model": "model_id"
}
}
],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@
"id": "register_model",
"type": "register_remote_model",
"previous_node_inputs": {
"create_connector_step_1": "parameters"
"create_connector": "parameters"
},
"user_inputs": {
"name": "${{register_remote_model.name}}",
Expand All @@ -58,7 +58,7 @@
"id": "deploy_model",
"type": "deploy_model",
"previous_node_inputs": {
"register_model_1": "model_id"
"register_model": "model_id"
}
}
],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
{
"text_image_embedding": {
"model_id": "${{create_ingest_pipeline.model_id}}",
"embedding": "${{create_ingest_pipeline.embedding}}",
"embedding": "${{text_image_embedding.embedding}}",
"field_map": {
"text": "${{text_image_embedding.field_map.text}}",
"image": "${{text_image_embedding.field_map.image}}"
Expand Down Expand Up @@ -53,7 +53,7 @@
"id": {
"type": "text"
},
"${{text_embedding.field_map.output}}": {
"${{text_image_embedding.embedding}}": {
"type": "knn_vector",
"dimension": "${{text_image_embedding.field_map.output.dimension}}",
"method": {
Expand Down
Loading

0 comments on commit 6f8122f

Please sign in to comment.