Skip to content

Commit

Permalink
Adding additional default use cases (opensearch-project#731)
Browse files Browse the repository at this point in the history
* adding pretrained model templates

Signed-off-by: Amit Galitzky <[email protected]>

* adding reindex

Signed-off-by: Amit Galitzky <[email protected]>

* changing file structure for bwc

Signed-off-by: Amit Galitzky <[email protected]>

---------

Signed-off-by: Amit Galitzky <[email protected]>
  • Loading branch information
amitgalitz authored Jun 8, 2024
1 parent a941b68 commit 53126cb
Show file tree
Hide file tree
Showing 24 changed files with 630 additions and 46 deletions.
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.1.0/)
### Enhancements
- Add Workflow Step for Reindex from source index to destination ([#718](https://github.com/opensearch-project/flow-framework/pull/718))
- Add param to delete workflow API to clear status even if resources exist ([#719](https://github.com/opensearch-project/flow-framework/pull/719))

- Add additional default use cases ([#731](https://github.com/opensearch-project/flow-framework/pull/731))
### Bug Fixes
- Add user mapping to Workflow State index ([#705](https://github.com/opensearch-project/flow-framework/pull/705))

Expand Down
26 changes: 25 additions & 1 deletion build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,8 @@ dependencies {

// ZipArchive dependencies used for integration tests
zipArchive group: 'org.opensearch.plugin', name:'opensearch-ml-plugin', version: "${opensearch_build}"
zipArchive group: 'org.opensearch.plugin', name:'opensearch-knn', version: "${opensearch_build}"
zipArchive group: 'org.opensearch.plugin', name:'neural-search', version: "${opensearch_build}"
secureIntegTestPluginArchive group: 'org.opensearch.plugin', name:'opensearch-security', version: "${opensearch_build}"

configurations.all {
Expand Down Expand Up @@ -492,7 +494,29 @@ List<Provider<RegularFile>> plugins = [
return new RegularFile() {
@Override
File getAsFile() {
return configurations.zipArchive.asFileTree.getSingleFile()
return configurations.zipArchive.asFileTree.matching{include "**/opensearch-ml-plugin-${opensearch_build}.zip"}.getSingleFile()
}
}
}
}),
provider(new Callable<RegularFile>(){
@Override
RegularFile call() throws Exception {
return new RegularFile() {
@Override
File getAsFile() {
return configurations.zipArchive.asFileTree.matching{include "**/opensearch-knn-${opensearch_build}.zip"}.getSingleFile()
}
}
}
}),
provider(new Callable<RegularFile>(){
@Override
RegularFile call() throws Exception {
return new RegularFile() {
@Override
File getAsFile() {
return configurations.zipArchive.asFileTree.matching{include "**/neural-search-${opensearch_build}.zip"}.getSingleFile()
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -225,4 +225,6 @@ private CommonValue() {}
public static final String CREATE_CONNECTOR_CREDENTIAL_SESSION_TOKEN = "create_connector.credential.session_token";
/** The field name for ingest pipeline model ID substitution */
public static final String CREATE_INGEST_PIPELINE_MODEL_ID = "create_ingest_pipeline.model_id";
/** The field name for reindex source index substitution */
public static final String REINDEX_SOURCE_INDEX = "reindex.source_index";
}
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import static org.opensearch.flowframework.common.CommonValue.CREATE_CONNECTOR_CREDENTIAL_SECRET_KEY;
import static org.opensearch.flowframework.common.CommonValue.CREATE_CONNECTOR_CREDENTIAL_SESSION_TOKEN;
import static org.opensearch.flowframework.common.CommonValue.CREATE_INGEST_PIPELINE_MODEL_ID;
import static org.opensearch.flowframework.common.CommonValue.REINDEX_SOURCE_INDEX;

/**
* Enum encapsulating the different default use cases and templates we have stored
Expand Down Expand Up @@ -132,6 +133,28 @@ public enum DefaultUseCases {
"defaults/conversational-search-defaults.json",
"substitutionTemplates/conversational-search-with-cohere-model-template.json",
List.of(CREATE_CONNECTOR_CREDENTIAL_KEY)
),
/** defaults file and substitution ready template for semantic search with a local pretrained model*/
SEMANTIC_SEARCH_WITH_LOCAL_MODEL(
"semantic_search_with_local_model",
"defaults/semantic-search-with-local-model-defaults.json",
"substitutionTemplates/semantic-search-with-local-model-template.json",
Collections.emptyList()

),
/** defaults file and substitution ready template for hybrid search with a local pretrained model*/
HYBRID_SEARCH_WITH_LOCAL_MODEL(
"hybrid_search_with_local_model",
"defaults/hybrid-search-with-local-model-defaults.json",
"substitutionTemplates/hybrid-search-with-local-model-template.json",
Collections.emptyList()
),
/** defaults file and substitution ready template for semantic search with reindex command*/
SEMANTIC_SEARCH_WITH_REINDEX(
"semantic_search_with_reindex",
"defaults/semantic-search-with-reindex-defaults.json",
"substitutionTemplates/semantic-search-with-reindex-template.json",
List.of(CREATE_CONNECTOR_CREDENTIAL_KEY, REINDEX_SOURCE_INDEX)
);

private final String useCaseName;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -95,10 +95,20 @@ public PlainActionFuture<WorkflowData> execute(
Float requestsPerSecond = inputs.containsKey(REQUESTS_PER_SECOND)
? Float.parseFloat(inputs.get(REQUESTS_PER_SECOND).toString())
: null;
requestsPerSecond = requestsPerSecond < 0 ? Float.POSITIVE_INFINITY : requestsPerSecond;
Boolean requireAlias = inputs.containsKey(REQUIRE_ALIAS) ? Booleans.parseBoolean(inputs.get(REQUIRE_ALIAS).toString()) : null;
Integer slices = (Integer) inputs.get(SLICES);
Integer maxDocs = (Integer) inputs.get(MAX_DOCS);

Integer slices;
Integer maxDocs;
if (inputs.get(SLICES) != null) {
slices = Integer.parseInt(String.valueOf(inputs.get(SLICES)));
} else {
slices = (Integer) inputs.get(SLICES);
}
if (inputs.get(MAX_DOCS) != null) {
maxDocs = Integer.parseInt(String.valueOf(inputs.get(MAX_DOCS)));
} else {
maxDocs = (Integer) inputs.get(MAX_DOCS);
}
ReindexRequest reindexRequest = new ReindexRequest().setSourceIndices(Strings.splitStringByCommaToArray(sourceIndices))
.setDestIndex(destinationIndex);

Expand Down
3 changes: 1 addition & 2 deletions src/main/resources/defaults/hybrid-search-defaults.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,5 @@
"text_embedding.field_map.output.dimension": "1024",
"create_search_pipeline.pipeline_id": "nlp-search-pipeline",
"normalization-processor.normalization.technique": "min_max",
"normalization-processor.combination.technique": "arithmetic_mean",
"normalization-processor.combination.parameters.weights": "[0.3, 0.7]"
"normalization-processor.combination.technique": "arithmetic_mean"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
{
"template.name": "hybrid-search",
"template.description": "Setting up hybrid search, ingest pipeline and index",
"register_local_pretrained_model.name": "huggingface/sentence-transformers/paraphrase-MiniLM-L3-v2",
"register_local_pretrained_model.description": "This is a sentence transformer model",
"register_local_pretrained_model.model_format": "TORCH_SCRIPT",
"register_local_pretrained_model.deploy": "true",
"register_local_pretrained_model.version": "1.0.1",
"create_ingest_pipeline.pipeline_id": "nlp-ingest-pipeline",
"create_ingest_pipeline.description": "A text embedding pipeline",
"create_ingest_pipeline.model_id": "123",
"text_embedding.field_map.input": "passage_text",
"text_embedding.field_map.output": "passage_embedding",
"create_index.name": "my-nlp-index",
"create_index.settings.number_of_shards": "2",
"create_index.mappings.method.engine": "lucene",
"create_index.mappings.method.space_type": "l2",
"create_index.mappings.method.name": "hnsw",
"text_embedding.field_map.output.dimension": "768",
"create_search_pipeline.pipeline_id": "nlp-search-pipeline",
"normalization-processor.normalization.technique": "min_max",
"normalization-processor.combination.technique": "arithmetic_mean"
}
4 changes: 3 additions & 1 deletion src/main/resources/defaults/multi-modal-search-defaults.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,7 @@
"create_index.settings.number_of_shards": "2",
"text_image_embedding.field_map.output.dimension": "1024",
"create_index.mappings.method.engine": "lucene",
"create_index.mappings.method.name": "hnsw"
"create_index.mappings.method.name": "hnsw",
"text_image_embedding.field_map.image.type": "text",
"text_image_embedding.field_map.text.type": "text"
}
Original file line number Diff line number Diff line change
Expand Up @@ -24,5 +24,7 @@
"create_index.settings.number_of_shards": "2",
"text_image_embedding.field_map.output.dimension": "1024",
"create_index.mappings.method.engine": "lucene",
"create_index.mappings.method.name": "hnsw"
"create_index.mappings.method.name": "hnsw",
"text_image_embedding.field_map.image.type": "text",
"text_image_embedding.field_map.text.type": "text"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
{
"template.name": "semantic search with local pretrained model",
"template.description": "Setting up semantic search, with a local pretrained embedding model",
"register_local_pretrained_model.name": "huggingface/sentence-transformers/paraphrase-MiniLM-L3-v2",
"register_local_pretrained_model.description": "This is a sentence transformer model",
"register_local_pretrained_model.model_format": "TORCH_SCRIPT",
"register_local_pretrained_model.deploy": "true",
"register_local_pretrained_model.version": "1.0.1",
"create_ingest_pipeline.pipeline_id": "nlp-ingest-pipeline",
"create_ingest_pipeline.description": "A text embedding pipeline",
"text_embedding.field_map.input": "passage_text",
"text_embedding.field_map.output": "passage_embedding",
"create_index.name": "my-nlp-index",
"create_index.settings.number_of_shards": "2",
"create_index.mappings.method.engine": "lucene",
"create_index.mappings.method.space_type": "l2",
"create_index.mappings.method.name": "hnsw",
"text_embedding.field_map.output.dimension": "768",
"create_search_pipeline.pipeline_id": "default_model_pipeline"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
{
"template.name": "semantic search with cohere embedding",
"template.description": "Setting up semantic search, with a Cohere embedding model",
"create_connector.name": "cohere-embedding-connector",
"create_connector.description": "The connector to Cohere's public embed API",
"create_connector.protocol": "http",
"create_connector.model": "embed-english-v3.0",
"create_connector.input_type": "search_document",
"create_connector.truncate": "end",
"create_connector.credential.key": "123",
"create_connector.actions.url": "https://api.cohere.ai/v1/embed",
"create_connector.actions.request_body": "{ \"texts\": ${parameters.texts}, \"truncate\": \"${parameters.truncate}\", \"model\": \"${parameters.model}\", \"input_type\": \"${parameters.input_type}\" }",
"create_connector.actions.pre_process_function": "connector.pre_process.cohere.embedding",
"create_connector.actions.post_process_function": "connector.post_process.cohere.embedding",
"register_remote_model.name": "Cohere english embed model",
"register_remote_model.description": "cohere-embedding-model",
"create_ingest_pipeline.pipeline_id": "nlp-ingest-pipeline",
"create_ingest_pipeline.description": "A text embedding pipeline",
"text_embedding.field_map.input": "passage_text",
"text_embedding.field_map.output": "passage_embedding",
"create_index.name": "my-nlp-index",
"create_index.settings.number_of_shards": "2",
"create_index.mappings.method.engine": "lucene",
"create_index.mappings.method.space_type": "l2",
"create_index.mappings.method.name": "hnsw",
"text_embedding.field_map.output.dimension": "1024",
"create_search_pipeline.pipeline_id": "default_model_pipeline",
"reindex.source_index": "",
"reindex.requests_per_second": "-1",
"reindex.slices": "1"
}
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,6 @@
"mappings": {
"_doc": {
"properties": {
"id": {
"type": "text"
},
"${{text_embedding.field_map.output}}": {
"type": "knn_vector",
"dimension": "${{text_embedding.field_map.output.dimension}}",
Expand Down Expand Up @@ -86,10 +83,7 @@
"technique": "${{normalization-processor.normalization.technique}}"
},
"combination": {
"technique": "${{normalization-processor.combination.technique}}",
"parameters": {
"weights": "${{normalization-processor.combination.parameters.weights}}"
}
"technique": "${{normalization-processor.combination.technique}}"
}
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
{
"name": "${{template.name}}",
"description": "${{template.description}}",
"use_case": "HYBRID_SEARCH",
"version": {
"template": "1.0.0",
"compatibility": [
"2.12.0",
"3.0.0"
]
},
"workflows": {
"provision": {
"nodes": [
{
"id": "register_local_pretrained_model",
"type": "register_local_pretrained_model",
"user_inputs": {
"name": "${{register_local_pretrained_model.name}}",
"version": "${{register_local_pretrained_model.version}}",
"description": "${{register_local_pretrained_model.description}}",
"model_format": "${{register_local_pretrained_model.model_format}}",
"deploy": true
}
},
{
"id": "create_ingest_pipeline",
"type": "create_ingest_pipeline",
"previous_node_inputs": {
"register_local_pretrained_model": "model_id"
},
"user_inputs": {
"pipeline_id": "${{create_ingest_pipeline.pipeline_id}}",
"configurations": {
"description": "${{create_ingest_pipeline.description}}",
"processors": [
{
"text_embedding": {
"model_id": "${{register_local_pretrained_model.model_id}}",
"field_map": {
"${{text_embedding.field_map.input}}": "${{text_embedding.field_map.output}}"
}
}
}
]
}
}
},
{
"id": "create_index",
"type": "create_index",
"previous_node_inputs": {
"create_ingest_pipeline": "pipeline_id"
},
"user_inputs": {
"index_name": "${{create_index.name}}",
"configurations": {
"settings": {
"index.knn": true,
"default_pipeline": "${{create_ingest_pipeline.pipeline_id}}",
"number_of_shards": "${{create_index.settings.number_of_shards}}",
"index.search.default_pipeline": "${{create_search_pipeline.pipeline_id}}"
},
"mappings": {
"properties": {
"${{text_embedding.field_map.output}}": {
"type": "knn_vector",
"dimension": "${{text_embedding.field_map.output.dimension}}",
"method": {
"engine": "${{create_index.mappings.method.engine}}",
"space_type": "${{create_index.mappings.method.space_type}}",
"name": "${{create_index.mappings.method.name}}",
"parameters": {}
}
},
"${{text_embedding.field_map.input}}": {
"type": "text"
}
}
}
}
}
},
{
"id": "create_search_pipeline",
"type": "create_search_pipeline",
"user_inputs": {
"pipeline_id": "${{create_search_pipeline.pipeline_id}}",
"configurations": {
"description": "Post processor for hybrid search",
"phase_results_processors": [
{
"normalization-processor": {
"normalization": {
"technique": "${{normalization-processor.normalization.technique}}"
},
"combination": {
"technique": "${{normalization-processor.combination.technique}}"
}
}
}
]
}
}
}
]
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,6 @@
"mappings": {
"_doc": {
"properties": {
"id": {
"type": "text"
},
"${{text_image_embedding.embedding}}": {
"type": "knn_vector",
"dimension": "${{text_image_embedding.field_map.output.dimension}}",
Expand All @@ -64,10 +61,10 @@
}
},
"${{text_image_embedding.field_map.text}}": {
"type": "text"
"type": "${{text_image_embedding.field_map.text.type}}"
},
"${{text_image_embedding.field_map.image}}": {
"type": "binary"
"type": "${{text_image_embedding.field_map.image.type}}"
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,9 +101,6 @@
"mappings": {
"_doc": {
"properties": {
"id": {
"type": "text"
},
"${{text_image_embedding.embedding}}": {
"type": "knn_vector",
"dimension": "${{text_image_embedding.field_map.output.dimension}}",
Expand All @@ -114,10 +111,10 @@
}
},
"${{text_image_embedding.field_map.text}}": {
"type": "text"
"type": "${{text_image_embedding.field_map.text.type}}"
},
"${{text_image_embedding.field_map.image}}": {
"type": "binary"
"type": "${{text_image_embedding.field_map.image.type}}"
}
}
}
Expand Down
Loading

0 comments on commit 53126cb

Please sign in to comment.