From 8004a2de44c9376940cd11c90910bf78e65484c2 Mon Sep 17 00:00:00 2001 From: Tyler Ohlsen Date: Tue, 11 Jun 2024 15:34:35 -0700 Subject: [PATCH] Onboard ML inference ingest processor (#172) Signed-off-by: Tyler Ohlsen (cherry picked from commit f077c820b166c81388539f82661bd703f93bbcbb) --- common/constants.ts | 2 +- common/interfaces.ts | 23 ++- .../transformer/ml_transformer.ts | 8 +- public/configs/base_config.ts | 2 + .../base_ingest_processor.ts | 17 -- public/configs/ingest_processors/index.ts | 1 + .../ingest_processors/ml_ingest_processor.ts | 18 ++ .../ingest_processors/model_processor.ts | 17 -- .../text_embedding_processor.ts | 6 +- public/configs/ml_processor.ts | 42 ++++ .../utils/workflow_to_template_utils.ts | 193 +++++------------- .../workflow_inputs/config_field_list.tsx | 15 +- .../ingest_inputs/processors_list.tsx | 14 +- .../workflow_inputs/input_fields/index.ts | 1 + .../input_fields/map_field.tsx | 164 +++++++++++++++ .../input_fields/model_field.tsx | 9 +- public/pages/workflows/new_workflow/utils.ts | 11 +- public/utils/utils.ts | 48 +++-- server/routes/helpers.ts | 4 +- 19 files changed, 361 insertions(+), 234 deletions(-) delete mode 100644 public/configs/ingest_processors/base_ingest_processor.ts create mode 100644 public/configs/ingest_processors/ml_ingest_processor.ts delete mode 100644 public/configs/ingest_processors/model_processor.ts create mode 100644 public/configs/ml_processor.ts create mode 100644 public/pages/workflow_detail/workflow_inputs/input_fields/map_field.tsx diff --git a/common/constants.ts b/common/constants.ts index 13b4abb9..093d411b 100644 --- a/common/constants.ts +++ b/common/constants.ts @@ -124,7 +124,7 @@ export const NEURAL_SPARSE_TOKENIZER_TRANSFORMER = { */ export enum PROCESSOR_TYPE { - MODEL = 'model_processor', + ML = 'ml_processor', } export enum MODEL_TYPE { diff --git a/common/interfaces.ts b/common/interfaces.ts index 494639be..b4f35f2b 100644 --- a/common/interfaces.ts +++ b/common/interfaces.ts @@ -6,7 +6,7 @@ import { Node, Edge } from 'reactflow'; import { FormikValues } from 'formik'; import { ObjectSchema } from 'yup'; -import { COMPONENT_CLASS, PROCESSOR_TYPE, MODEL_TYPE } from './constants'; +import { COMPONENT_CLASS, PROCESSOR_TYPE } from './constants'; export type Index = { name: string; @@ -18,7 +18,7 @@ export type Index = { TODO: over time these can become less generic as the form inputs & UX becomes finalized */ -export type ConfigFieldType = 'string' | 'json' | 'select' | 'model'; +export type ConfigFieldType = 'string' | 'json' | 'select' | 'model' | 'map'; export type ConfigSelectType = 'model'; export type ConfigFieldValue = string | {}; export interface IConfigField { @@ -46,10 +46,6 @@ export interface IProcessorConfig extends IConfig { type: PROCESSOR_TYPE; } -export interface IModelProcessorConfig extends IProcessorConfig { - modelType: MODEL_TYPE; -} - export type EnrichConfig = { processors: IProcessorConfig[]; }; @@ -75,6 +71,13 @@ export type WorkflowConfig = { search: SearchConfig; }; +export type MapEntry = { + key: string; + value: string; +}; + +export type MapFormValue = MapEntry[]; + export type WorkflowFormValues = { ingest: FormikValues; search: FormikValues; @@ -181,6 +184,14 @@ export type SearchRequestProcessor = SearchProcessor & {}; export type SearchResponseProcessor = SearchProcessor & {}; export type SearchPhaseResultsProcessor = SearchProcessor & {}; +export type MLInferenceProcessor = IngestProcessor & { + ml_inference: { + model_id: string; + input_map?: {}; + output_map?: {}; + }; +}; + export type TextEmbeddingProcessor = IngestProcessor & { text_embedding: { model_id: string; diff --git a/public/component_types/transformer/ml_transformer.ts b/public/component_types/transformer/ml_transformer.ts index 68803425..d3ff638f 100644 --- a/public/component_types/transformer/ml_transformer.ts +++ b/public/component_types/transformer/ml_transformer.ts @@ -7,13 +7,15 @@ import { COMPONENT_CLASS } from '../../../common'; import { BaseTransformer } from './base_transformer'; /** - * A generic ML transformer UI component + * A generic ML inference transformer. Can be used across ingest, search request, and search response. + * Under the hood, using the implemented ML inference processors. + * Ref (ingest): https://opensearch.org/docs/latest/ingest-pipelines/processors/ml-inference/ */ export class MLTransformer extends BaseTransformer { constructor() { super(); this.type = COMPONENT_CLASS.ML_TRANSFORMER; - this.label = 'ML Transformer'; - this.description = 'A general ML transformer'; + this.label = 'ML Processor'; + this.description = 'A general ML processor'; } } diff --git a/public/configs/base_config.ts b/public/configs/base_config.ts index 86bf2301..ba887789 100644 --- a/public/configs/base_config.ts +++ b/public/configs/base_config.ts @@ -10,11 +10,13 @@ import { IConfig, IConfigField } from '../../common'; */ export abstract class BaseConfig implements IConfig { id: string; + name: string; fields: IConfigField[]; // No-op constructor. If there are general / defaults for field values, add in here. constructor() { this.id = ''; + this.name = ''; this.fields = []; } diff --git a/public/configs/ingest_processors/base_ingest_processor.ts b/public/configs/ingest_processors/base_ingest_processor.ts deleted file mode 100644 index 6120ab9c..00000000 --- a/public/configs/ingest_processors/base_ingest_processor.ts +++ /dev/null @@ -1,17 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - */ - -import { BaseConfig } from '../base_config'; - -/** - * A base ingest processor config - */ -export abstract class BaseIngestProcessor extends BaseConfig { - name: string; - constructor() { - super(); - this.name = ''; - } -} diff --git a/public/configs/ingest_processors/index.ts b/public/configs/ingest_processors/index.ts index 364e51bc..39bcd3d2 100644 --- a/public/configs/ingest_processors/index.ts +++ b/public/configs/ingest_processors/index.ts @@ -4,3 +4,4 @@ */ export * from './text_embedding_processor'; +export * from './ml_ingest_processor'; diff --git a/public/configs/ingest_processors/ml_ingest_processor.ts b/public/configs/ingest_processors/ml_ingest_processor.ts new file mode 100644 index 00000000..63158f63 --- /dev/null +++ b/public/configs/ingest_processors/ml_ingest_processor.ts @@ -0,0 +1,18 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +import { generateId } from '../../utils'; +import { MLProcessor } from '../ml_processor'; + +/** + * The ML processor in the context of ingest + */ +export class MLIngestProcessor extends MLProcessor { + constructor() { + super(); + this.id = generateId('ml_ingest_processor'); + this.name = 'ML ingest processor'; + } +} diff --git a/public/configs/ingest_processors/model_processor.ts b/public/configs/ingest_processors/model_processor.ts deleted file mode 100644 index b550f41e..00000000 --- a/public/configs/ingest_processors/model_processor.ts +++ /dev/null @@ -1,17 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - */ - -import { MODEL_TYPE } from '../../../common'; -import { BaseIngestProcessor } from './base_ingest_processor'; - -/** - * A base model processor config - */ -export abstract class ModelProcessor extends BaseIngestProcessor { - type: MODEL_TYPE; - constructor() { - super(); - } -} diff --git a/public/configs/ingest_processors/text_embedding_processor.ts b/public/configs/ingest_processors/text_embedding_processor.ts index 3da301f6..05a76d53 100644 --- a/public/configs/ingest_processors/text_embedding_processor.ts +++ b/public/configs/ingest_processors/text_embedding_processor.ts @@ -3,19 +3,17 @@ * SPDX-License-Identifier: Apache-2.0 */ -import { MODEL_TYPE } from '../../../common'; import { generateId } from '../../utils'; -import { ModelProcessor } from './model_processor'; +import { BaseConfig } from '../base_config'; /** * A specialized text embedding processor config */ -export class TextEmbeddingProcessor extends ModelProcessor { +export class TextEmbeddingProcessor extends BaseConfig { constructor() { super(); this.id = generateId('text_embedding_processor'); this.name = 'Text embedding processor'; - this.type = MODEL_TYPE.TEXT_EMBEDDING; this.fields = [ { label: 'Text Embedding Model', diff --git a/public/configs/ml_processor.ts b/public/configs/ml_processor.ts new file mode 100644 index 00000000..75d222aa --- /dev/null +++ b/public/configs/ml_processor.ts @@ -0,0 +1,42 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +import { BaseConfig } from './base_config'; + +/** + * A generic ML processor config. Used in ingest and search flows. + * The interfaces are identical across ingest / search request / search response processors. + */ +export abstract class MLProcessor extends BaseConfig { + constructor() { + super(); + this.fields = [ + { + label: 'Model', + id: 'model', + type: 'model', + helpText: 'The model ID.', + helpLink: + 'https://opensearch.org/docs/latest/ml-commons-plugin/integrating-ml-models/#choosing-a-model', + }, + { + label: 'Input Map', + id: 'inputMap', + type: 'map', + helpText: `An array specifying how to map fields from the ingested document to the model’s input.`, + helpLink: + 'https://opensearch.org/docs/latest/ingest-pipelines/processors/ml-inference/#configuration-parameters', + }, + { + label: 'Output Map', + id: 'outputMap', + type: 'map', + helpText: `An array specifying how to map the model’s output to new fields.`, + helpLink: + 'https://opensearch.org/docs/latest/ingest-pipelines/processors/ml-inference/#configuration-parameters', + }, + ]; + } +} diff --git a/public/pages/workflow_detail/utils/workflow_to_template_utils.ts b/public/pages/workflow_detail/utils/workflow_to_template_utils.ts index 96548545..24c0b25b 100644 --- a/public/pages/workflow_detail/utils/workflow_to_template_utils.ts +++ b/public/pages/workflow_detail/utils/workflow_to_template_utils.ts @@ -7,28 +7,18 @@ import { TemplateFlows, TemplateNode, CreateIngestPipelineNode, - TextEmbeddingProcessor, CreateIndexNode, TemplateFlow, TemplateEdge, ModelFormValue, - MODEL_CATEGORY, - RegisterPretrainedModelNode, - PretrainedSentenceTransformer, - ROBERTA_SENTENCE_TRANSFORMER, - MPNET_SENTENCE_TRANSFORMER, - BERT_SENTENCE_TRANSFORMER, - NEURAL_SPARSE_TRANSFORMER, - NEURAL_SPARSE_DOC_TRANSFORMER, - NEURAL_SPARSE_TOKENIZER_TRANSFORMER, - SparseEncodingProcessor, IndexMappings, WORKFLOW_STEP_TYPE, WorkflowConfig, PROCESSOR_TYPE, - IModelProcessorConfig, - MODEL_TYPE, IndexConfig, + IProcessorConfig, + MLInferenceProcessor, + MapFormValue, } from '../../../../common'; import { generateId, processorConfigToFormik } from '../../../utils'; @@ -50,14 +40,20 @@ function configToProvisionTemplateFlow(config: WorkflowConfig): TemplateFlow { const edges = [] as TemplateEdge[]; // TODO: few assumptions are made here, such as there will always be - // a single model-related processor. In the future make this more flexible and generic. - const modelProcessorConfig = config.ingest.enrich.processors.find( - (processorConfig) => processorConfig.type === PROCESSOR_TYPE.MODEL - ) as IModelProcessorConfig; + // a single ml-related processor. In the future make this more flexible and generic. + const mlProcessorConfig = config.ingest.enrich.processors.find( + (processorConfig) => processorConfig.type === PROCESSOR_TYPE.ML + ) as IProcessorConfig; - nodes.push(...modelProcessorConfigToTemplateNodes(modelProcessorConfig)); + nodes.push(...mlProcessorConfigToTemplateNodes(mlProcessorConfig)); nodes.push( - indexConfigToTemplateNode(modelProcessorConfig, config.ingest.index) + indexConfigToTemplateNode( + config.ingest.index, + nodes.find( + (node) => + node.type === WORKFLOW_STEP_TYPE.CREATE_INGEST_PIPELINE_STEP_TYPE + ) as CreateIngestPipelineNode + ) ); return { @@ -69,178 +65,87 @@ function configToProvisionTemplateFlow(config: WorkflowConfig): TemplateFlow { // General fn to process all ML processor configs. Convert into a final ingest pipeline. // Optionally prepend a register pretrained model step if the selected model // is a pretrained and currently undeployed one. -function modelProcessorConfigToTemplateNodes( - modelProcessorConfig: IModelProcessorConfig +function mlProcessorConfigToTemplateNodes( + mlProcessorConfig: IProcessorConfig ): TemplateNode[] { // TODO improvements to make here: // 1. Consideration of multiple ingest processors and how to collect them all, and finally create // a single ingest pipeline with all of them, in the same order as done on the UI - switch (modelProcessorConfig.modelType) { - case MODEL_TYPE.TEXT_EMBEDDING: - case MODEL_TYPE.SPARSE_ENCODER: + switch (mlProcessorConfig.type) { + case PROCESSOR_TYPE.ML: default: { - const { model, inputField, vectorField } = processorConfigToFormik( - modelProcessorConfig + const { model, inputMap, outputMap } = processorConfigToFormik( + mlProcessorConfig ) as { model: ModelFormValue; - inputField: string; - vectorField: string; + inputMap: MapFormValue; + outputMap: MapFormValue; }; - const modelId = model.id; const ingestPipelineName = generateId('ingest_pipeline'); - // register model workflow step type is different per use case - const registerModelStepType = - modelProcessorConfig.modelType === MODEL_TYPE.TEXT_EMBEDDING - ? WORKFLOW_STEP_TYPE.REGISTER_LOCAL_PRETRAINED_MODEL_STEP_TYPE - : WORKFLOW_STEP_TYPE.REGISTER_LOCAL_SPARSE_ENCODING_MODEL_STEP_TYPE; - - let registerModelStep = undefined as - | RegisterPretrainedModelNode - | undefined; - if (model.category === MODEL_CATEGORY.PRETRAINED) { - const pretrainedModel = [ - ROBERTA_SENTENCE_TRANSFORMER, - MPNET_SENTENCE_TRANSFORMER, - BERT_SENTENCE_TRANSFORMER, - NEURAL_SPARSE_TRANSFORMER, - NEURAL_SPARSE_DOC_TRANSFORMER, - NEURAL_SPARSE_TOKENIZER_TRANSFORMER, - ].find( - // the model ID in the form will be the unique name of the pretrained model - (model) => model.name === modelId - ) as PretrainedSentenceTransformer; - - registerModelStep = { - id: registerModelStepType, - type: registerModelStepType, - user_inputs: { - name: pretrainedModel.name, - description: pretrainedModel.description, - model_format: pretrainedModel.format, - version: pretrainedModel.version, - deploy: true, - }, - } as RegisterPretrainedModelNode; + let finalProcessor = { + ml_inference: { + model_id: model.id, + }, + } as MLInferenceProcessor; + if (inputMap?.length > 0) { + finalProcessor.ml_inference.input_map = inputMap.map((mapEntry) => ({ + [mapEntry.key]: mapEntry.value, + })); + } + if (outputMap?.length > 0) { + finalProcessor.ml_inference.output_map = outputMap.map((mapEntry) => ({ + [mapEntry.key]: mapEntry.value, + })); } - // The model ID depends on if we are consuming it from a previous pretrained model step, - // or directly from the user - const finalModelId = - registerModelStep !== undefined - ? `\${{${registerModelStepType}.model_id}}` - : modelId; - - // processor is different per use case - const finalProcessor = - modelProcessorConfig.modelType === MODEL_TYPE.TEXT_EMBEDDING - ? ({ - text_embedding: { - model_id: finalModelId, - field_map: { - [inputField]: vectorField, - }, - }, - } as TextEmbeddingProcessor) - : ({ - sparse_encoding: { - model_id: finalModelId, - field_map: { - [inputField]: vectorField, - }, - }, - } as SparseEncodingProcessor); - - // ingest pipeline is different per use case const finalIngestPipelineDescription = - modelProcessorConfig.modelType === MODEL_TYPE.TEXT_EMBEDDING - ? 'An ingest pipeline with a text embedding processor' - : 'An ingest pieline with a neural sparse encoding processor'; + 'An ingest pipeline with an ML inference processor.'; const createIngestPipelineStep = { - id: modelProcessorConfig.id, + id: ingestPipelineName, type: WORKFLOW_STEP_TYPE.CREATE_INGEST_PIPELINE_STEP_TYPE, user_inputs: { pipeline_id: ingestPipelineName, - model_id: finalModelId, - input_field: inputField, - output_field: vectorField, + model_id: model.id, configurations: { description: finalIngestPipelineDescription, processors: [finalProcessor], }, }, } as CreateIngestPipelineNode; - if (registerModelStep !== undefined) { - createIngestPipelineStep.previous_node_inputs = { - ...createIngestPipelineStep.previous_node_inputs, - [registerModelStepType]: 'model_id', - }; - } - return registerModelStep !== undefined - ? [registerModelStep, createIngestPipelineStep] - : [createIngestPipelineStep]; + return [createIngestPipelineStep]; } } } // General fn to convert an index config to a final CreateIndexNode template node. -// Requires the processor configs +// Requires any ingest/pipeline node details to set any defaults function indexConfigToTemplateNode( - modelProcessorConfig: IModelProcessorConfig, - indexConfig: IndexConfig + indexConfig: IndexConfig, + ingestPipelineNode: CreateIngestPipelineNode ): CreateIndexNode { const indexName = indexConfig.name.value as string; - const { inputField, vectorField } = processorConfigToFormik( - modelProcessorConfig - ) as { - inputField: string; - vectorField: string; - }; + + // TODO: extract model details to determine the mappings // index mappings are different per use case const finalIndexMappings = { - properties: - modelProcessorConfig.modelType === MODEL_TYPE.TEXT_EMBEDDING - ? { - [vectorField]: { - type: 'knn_vector', - // TODO: remove hardcoding, fetch from the selected model - // (existing or from pretrained configuration) - dimension: 768, - method: { - engine: 'lucene', - space_type: 'l2', - name: 'hnsw', - parameters: {}, - }, - }, - [inputField]: { - type: 'text', - }, - } - : { - [vectorField]: { - type: 'rank_features', - }, - [inputField]: { - type: 'text', - }, - }, + properties: {}, } as IndexMappings; return { id: 'create_index', type: WORKFLOW_STEP_TYPE.CREATE_INDEX_STEP_TYPE, previous_node_inputs: { - [modelProcessorConfig.id]: 'pipeline_id', + [ingestPipelineNode.id]: 'pipeline_id', }, user_inputs: { index_name: indexName, configurations: { settings: { - default_pipeline: `\${{${modelProcessorConfig.id}.pipeline_id}}`, + default_pipeline: `\${{${ingestPipelineNode.id}.pipeline_id}}`, }, mappings: finalIndexMappings, }, diff --git a/public/pages/workflow_detail/workflow_inputs/config_field_list.tsx b/public/pages/workflow_detail/workflow_inputs/config_field_list.tsx index 632f4bd6..d7d80811 100644 --- a/public/pages/workflow_detail/workflow_inputs/config_field_list.tsx +++ b/public/pages/workflow_detail/workflow_inputs/config_field_list.tsx @@ -5,7 +5,7 @@ import React from 'react'; import { EuiFlexItem, EuiSpacer } from '@elastic/eui'; -import { TextField, JsonField, SelectField, ModelField } from './input_fields'; +import { TextField, ModelField, MapField } from './input_fields'; import { IConfig } from '../../../../common'; /** @@ -68,6 +68,19 @@ export function ConfigFieldList(props: ConfigFieldListProps) { ); break; } + case 'map': { + el = ( + + + + + ); + break; + } // case 'json': { // el = ( // diff --git a/public/pages/workflow_detail/workflow_inputs/ingest_inputs/processors_list.tsx b/public/pages/workflow_detail/workflow_inputs/ingest_inputs/processors_list.tsx index 9f70c111..0c89d45b 100644 --- a/public/pages/workflow_detail/workflow_inputs/ingest_inputs/processors_list.tsx +++ b/public/pages/workflow_detail/workflow_inputs/ingest_inputs/processors_list.tsx @@ -17,8 +17,7 @@ import { cloneDeep } from 'lodash'; import { useFormikContext } from 'formik'; import { IConfig, - IModelProcessorConfig, - MODEL_TYPE, + IProcessorConfig, PROCESSOR_TYPE, WorkflowConfig, WorkflowFormValues, @@ -49,13 +48,13 @@ export function ProcessorsList(props: ProcessorsListProps) { newConfig.ingest.enrich.processors = [ ...newConfig.ingest.enrich.processors, { - type: PROCESSOR_TYPE.MODEL, - modelType: MODEL_TYPE.TEXT_EMBEDDING, + type: PROCESSOR_TYPE.ML, id: processorIdToAdd, fields: [], - } as IModelProcessorConfig, + } as IProcessorConfig, ]; props.setUiConfig(newConfig); + props.onFormChange(); } // Deleting a processor from the config. Fetch the existing one @@ -68,6 +67,7 @@ export function ProcessorsList(props: ProcessorsListProps) { (processorConfig) => processorConfig.id !== processorIdToDelete ); props.setUiConfig(newConfig); + props.onFormChange(); } return ( @@ -112,7 +112,9 @@ export function ProcessorsList(props: ProcessorsListProps) { addProcessor(generateId('test-processor')); }} > - Add another processor + {props.uiConfig?.ingest.enrich.processors.length > 0 + ? 'Add another processor' + : 'Add processor'} diff --git a/public/pages/workflow_detail/workflow_inputs/input_fields/index.ts b/public/pages/workflow_detail/workflow_inputs/input_fields/index.ts index 7d0561f5..b211fd3d 100644 --- a/public/pages/workflow_detail/workflow_inputs/input_fields/index.ts +++ b/public/pages/workflow_detail/workflow_inputs/input_fields/index.ts @@ -7,3 +7,4 @@ export { TextField } from './text_field'; export { JsonField } from './json_field'; export { SelectField } from './select_field'; export { ModelField } from './model_field'; +export { MapField } from './map_field'; diff --git a/public/pages/workflow_detail/workflow_inputs/input_fields/map_field.tsx b/public/pages/workflow_detail/workflow_inputs/input_fields/map_field.tsx new file mode 100644 index 00000000..23714314 --- /dev/null +++ b/public/pages/workflow_detail/workflow_inputs/input_fields/map_field.tsx @@ -0,0 +1,164 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +import React from 'react'; +import { + EuiButton, + EuiButtonIcon, + EuiFlexGroup, + EuiFlexItem, + EuiFormControlLayoutDelimited, + EuiFormRow, + EuiLink, + EuiText, +} from '@elastic/eui'; +import { Field, FieldProps, getIn, useFormikContext } from 'formik'; +import { + IConfigField, + MapEntry, + MapFormValue, + WorkflowFormValues, +} from '../../../../../common'; + +interface MapFieldProps { + field: IConfigField; + fieldPath: string; // the full path in string-form to the field (e.g., 'ingest.enrich.processors.text_embedding_processor.inputField') + onFormChange: () => void; +} + +/** + * Input component for configuring field mappings + */ +export function MapField(props: MapFieldProps) { + const { setFieldValue, errors, touched } = useFormikContext< + WorkflowFormValues + >(); + + // Adding a map entry to the end of the existing arr + function addMapEntry(curEntries: MapFormValue): void { + const updatedEntries = [...curEntries, { key: '', value: '' } as MapEntry]; + setFieldValue(props.fieldPath, updatedEntries); + props.onFormChange(); + } + + // Deleting a map entry + function deleteMapEntry( + curEntries: MapFormValue, + entryIndexToDelete: number + ): void { + const updatedEntries = [...curEntries]; + updatedEntries.splice(entryIndexToDelete, 1); + setFieldValue(props.fieldPath, updatedEntries); + props.onFormChange(); + } + + return ( + + {({ field, form }: FieldProps) => { + return ( + + + Learn more + + + ) : undefined + } + helpText={props.field.helpText || undefined} + error={ + getIn(errors, field.name) !== undefined && + getIn(errors, field.name).length > 0 + ? 'Invalid or missing mapping values' + : false + } + isInvalid={ + getIn(errors, field.name) !== undefined && + getIn(errors, field.name).length > 0 && + getIn(touched, field.name) !== undefined && + getIn(touched, field.name).length > 0 + } + > + + {field.value?.map((mapping: MapEntry, idx: number) => { + return ( + + + + { + form.setFieldValue( + `${props.fieldPath}.${idx}.key`, + e.target.value + ); + props.onFormChange(); + }} + /> + } + endControl={ + { + form.setFieldValue( + `${props.fieldPath}.${idx}.value`, + e.target.value + ); + props.onFormChange(); + }} + /> + } + /> + + + { + deleteMapEntry(field.value, idx); + }} + /> + + + + ); + })} + +
+ { + addMapEntry(field.value); + }} + > + {field.value?.length > 0 + ? 'Add another field mapping' + : 'Add field mapping'} + +
+
+
+
+ ); + }} +
+ ); +} diff --git a/public/pages/workflow_detail/workflow_inputs/input_fields/model_field.tsx b/public/pages/workflow_detail/workflow_inputs/input_fields/model_field.tsx index 37ccefe2..26d50698 100644 --- a/public/pages/workflow_detail/workflow_inputs/input_fields/model_field.tsx +++ b/public/pages/workflow_detail/workflow_inputs/input_fields/model_field.tsx @@ -66,10 +66,11 @@ export function ModelField(props: ModelFieldProps) { id: MODEL_CATEGORY.DEPLOYED, label: 'Existing deployed models', }, - { - id: MODEL_CATEGORY.PRETRAINED, - label: 'Pretrained models', - }, + // TODO: finalize if pretrained models will be supported or not + // { + // id: MODEL_CATEGORY.PRETRAINED, + // label: 'Pretrained models', + // }, ] as EuiRadioGroupOption[]; const [selectedRadioId, setSelectedRadioId] = useState< MODEL_CATEGORY | undefined diff --git a/public/pages/workflows/new_workflow/utils.ts b/public/pages/workflows/new_workflow/utils.ts index 9c540589..91b57d66 100644 --- a/public/pages/workflows/new_workflow/utils.ts +++ b/public/pages/workflows/new_workflow/utils.ts @@ -3,7 +3,7 @@ * SPDX-License-Identifier: Apache-2.0 */ -import { TextEmbeddingProcessor } from '../../../configs'; +import { MLIngestProcessor } from '../../../configs'; import { USE_CASE, WorkflowTemplate, @@ -11,13 +11,13 @@ import { DEFAULT_NEW_WORKFLOW_NAME, UIState, PROCESSOR_TYPE, - IModelProcessorConfig, COMPONENT_CLASS, COMPONENT_CATEGORY, NODE_CATEGORY, ReactFlowComponent, ReactFlowEdge, WorkspaceFlowState, + IProcessorConfig, } from '../../../../common'; import { generateId, initComponentData } from '../../../utils'; import { MarkerType } from 'reactflow'; @@ -108,19 +108,18 @@ function fetchSemanticSearchMetadata(): UIState { // We can reuse the base state. Only need to override a few things, // such as preset ingest processors. let baseState = fetchEmptyMetadata(); - const processor = new TextEmbeddingProcessor(); + const processor = new MLIngestProcessor(); // @ts-ignore baseState.config.ingest.enrich.processors = [ { - type: PROCESSOR_TYPE.MODEL, - modelType: processor.type, + type: PROCESSOR_TYPE.ML, id: processor.id, fields: processor.fields, metadata: { label: processor.name, }, }, - ] as IModelProcessorConfig; + ] as IProcessorConfig; return baseState; } diff --git a/public/utils/utils.ts b/public/utils/utils.ts index 31f75371..345e9faf 100644 --- a/public/utils/utils.ts +++ b/public/utils/utils.ts @@ -8,6 +8,7 @@ import { EuiFilterSelectItem } from '@elastic/eui'; import { Schema, ObjectSchema } from 'yup'; import * as yup from 'yup'; import { cloneDeep } from 'lodash'; +import { MarkerType } from 'reactflow'; import { IComponent, IComponentData, @@ -34,9 +35,7 @@ import { COMPONENT_CATEGORY, NODE_CATEGORY, IConfig, - IModelProcessorConfig, PROCESSOR_TYPE, - MODEL_TYPE, } from '../../common'; import { Document, @@ -44,10 +43,7 @@ import { MLTransformer, NeuralQuery, Results, - SparseEncoderTransformer, - TextEmbeddingTransformer, } from '../component_types'; -import { MarkerType } from 'reactflow'; // Append 16 random characters export function generateId(prefix: string): string { @@ -263,6 +259,9 @@ export function getInitialValue(fieldType: ConfigFieldType): ConfigFieldValue { algorithm: undefined, } as ModelFormValue; } + case 'map': { + return []; + } case 'json': { return {}; } @@ -288,6 +287,19 @@ function getFieldSchema(field: IConfigField): Schema { }); break; } + case 'map': { + baseSchema = yup.array().of( + yup.object().shape({ + key: yup.string().min(1, 'Too short').max(70, 'Too long').required(), + value: yup + .string() + .min(1, 'Too short') + .max(70, 'Too long') + .required(), + }) + ); + break; + } case 'json': { baseSchema = yup.object().json(); break; @@ -439,26 +451,18 @@ function enrichConfigToWorkspaceFlow( let xPosition = NODE_WIDTH + NODE_SPACING * 2; // node padding + (width of doc node) + node padding let prevNodeId = undefined as string | undefined; - const modelProcessorConfigs = enrichConfig.processors.filter( - (processorConfig) => processorConfig.type === PROCESSOR_TYPE.MODEL - ) as IModelProcessorConfig[]; + const mlProcessorConfigs = enrichConfig.processors.filter( + (processorConfig) => processorConfig.type === PROCESSOR_TYPE.ML + ) as IProcessorConfig[]; - modelProcessorConfigs.forEach((modelProcessorConfig) => { + mlProcessorConfigs.forEach((mlProcessorConfig) => { let transformer = {} as MLTransformer; let transformerNodeId = ''; - switch (modelProcessorConfig.modelType) { - case MODEL_TYPE.TEXT_EMBEDDING: { - transformer = new TextEmbeddingTransformer(); - transformerNodeId = generateId( - COMPONENT_CLASS.TEXT_EMBEDDING_TRANSFORMER - ); - break; - } - case MODEL_TYPE.SPARSE_ENCODER: { - transformer = new SparseEncoderTransformer(); - transformerNodeId = generateId( - COMPONENT_CLASS.SPARSE_ENCODER_TRANSFORMER - ); + switch (mlProcessorConfig.type) { + case PROCESSOR_TYPE.ML: + default: { + transformer = new MLTransformer(); + transformerNodeId = generateId(COMPONENT_CLASS.ML_TRANSFORMER); break; } } diff --git a/server/routes/helpers.ts b/server/routes/helpers.ts index 6498b568..8833869d 100644 --- a/server/routes/helpers.ts +++ b/server/routes/helpers.ts @@ -61,9 +61,7 @@ export function getWorkflowsFromResponses( ): WorkflowDict { const workflowDict = {} as WorkflowDict; workflowHits.forEach((workflowHit: any) => { - // TODO: update schema parsing after hit schema has been updated. - // https://github.com/opensearch-project/flow-framework/issues/546 - const hitSource = workflowHit.fields.filter[0]; + const hitSource = workflowHit._source; workflowDict[workflowHit._id] = toWorkflowObj(hitSource, workflowHit._id); const workflowStateHit = workflowStateHits.find( (workflowStateHit) => workflowStateHit._id === workflowHit._id