From e801d4c9b5b9365b471e932a74e4f10ebf0f97af Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Wed, 13 Nov 2024 21:32:55 +0000 Subject: [PATCH] Onboard rerank by field processor (#476) Signed-off-by: Tyler Ohlsen (cherry picked from commit 40250028719fb61772c29cfbb09c0230b92e4476) Signed-off-by: github-actions[bot] --- common/constants.ts | 1 + .../search_response_processors/index.ts | 1 + .../rerank_processor.ts | 53 +++++++++++++++++++ .../workflow_inputs/processors_list.tsx | 8 +++ public/utils/config_to_template_utils.ts | 21 ++++++++ public/utils/config_to_workspace_utils.ts | 8 +++ 6 files changed, 92 insertions(+) create mode 100644 public/configs/search_response_processors/rerank_processor.ts diff --git a/common/constants.ts b/common/constants.ts index 4d50974c..74ec6cdf 100644 --- a/common/constants.ts +++ b/common/constants.ts @@ -127,6 +127,7 @@ export enum PROCESSOR_TYPE { TEXT_CHUNKING = 'text_chunking', NORMALIZATION = 'normalization-processor', COLLAPSE = 'collapse', + RERANK = 'rerank', } export enum MODEL_TYPE { diff --git a/public/configs/search_response_processors/index.ts b/public/configs/search_response_processors/index.ts index 2e5c5e40..1848d4b6 100644 --- a/public/configs/search_response_processors/index.ts +++ b/public/configs/search_response_processors/index.ts @@ -8,3 +8,4 @@ export * from './split_search_response_processor'; export * from './sort_search_response_processor'; export * from './normalization_processor'; export * from './collapse_processor'; +export * from './rerank_processor'; diff --git a/public/configs/search_response_processors/rerank_processor.ts b/public/configs/search_response_processors/rerank_processor.ts new file mode 100644 index 00000000..7a8bf7aa --- /dev/null +++ b/public/configs/search_response_processors/rerank_processor.ts @@ -0,0 +1,53 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +import { PROCESSOR_TYPE } from '../../../common'; +import { Processor } from '../processor'; +import { generateId } from '../../utils'; + +/** + * The rerank processor config. Used in search flows. + * For now, only supports the by_field type. For details, see + * https://opensearch.org/docs/latest/search-plugins/search-pipelines/rerank-processor/#the-by_field-rerank-type + */ +export class RerankProcessor extends Processor { + constructor() { + super(); + this.id = generateId('rerank_processor'); + this.type = PROCESSOR_TYPE.RERANK; + this.name = 'Rerank Processor'; + this.fields = [ + { + id: 'target_field', + type: 'string', + }, + ]; + this.optionalFields = [ + { + id: 'remove_target_field', + type: 'boolean', + value: false, + }, + { + id: 'keep_previous_score', + type: 'boolean', + value: false, + }, + { + id: 'tag', + type: 'string', + }, + { + id: 'description', + type: 'string', + }, + { + id: 'ignore_failure', + type: 'boolean', + value: false, + }, + ]; + } +} diff --git a/public/pages/workflow_detail/workflow_inputs/processors_list.tsx b/public/pages/workflow_detail/workflow_inputs/processors_list.tsx index ec774211..b85909a4 100644 --- a/public/pages/workflow_detail/workflow_inputs/processors_list.tsx +++ b/public/pages/workflow_detail/workflow_inputs/processors_list.tsx @@ -30,6 +30,7 @@ import { MLSearchRequestProcessor, MLSearchResponseProcessor, NormalizationProcessor, + RerankProcessor, SortIngestProcessor, SortSearchResponseProcessor, SplitIngestProcessor, @@ -276,6 +277,13 @@ export function ProcessorsList(props: ProcessorsListProps) { ); }, }, + { + name: 'Rerank Processor', + onClick: () => { + closePopover(); + addProcessor(new RerankProcessor().toObj()); + }, + }, { name: 'Split Processor', onClick: () => { diff --git a/public/utils/config_to_template_utils.ts b/public/utils/config_to_template_utils.ts index 09873a26..3d073cb2 100644 --- a/public/utils/config_to_template_utils.ts +++ b/public/utils/config_to_template_utils.ts @@ -314,6 +314,27 @@ export function processorConfigsToTemplateProcessors( }); break; } + // Since we only support the by_field type of the rerank processor, + // we need to nest the form values within the parent "by_field" field. + case PROCESSOR_TYPE.RERANK: { + const formValues = processorConfigToFormik(processorConfig); + let finalFormValues = {} as FormikValues; + Object.keys(formValues).forEach((formKey: string) => { + const formValue = formValues[formKey]; + finalFormValues = optionallyAddToFinalForm( + finalFormValues, + formKey, + formValue + ); + }); + finalFormValues = { + by_field: finalFormValues, + }; + processorsList.push({ + [processorConfig.type]: finalFormValues, + }); + break; + } case PROCESSOR_TYPE.SPLIT: case PROCESSOR_TYPE.SORT: case PROCESSOR_TYPE.COLLAPSE: diff --git a/public/utils/config_to_workspace_utils.ts b/public/utils/config_to_workspace_utils.ts index a24a51e0..fe0c485c 100644 --- a/public/utils/config_to_workspace_utils.ts +++ b/public/utils/config_to_workspace_utils.ts @@ -348,6 +348,14 @@ function processorsConfigToWorkspaceFlow( ); break; } + case PROCESSOR_TYPE.RERANK: { + transformer = new BaseTransformer( + processorConfig.name, + 'Rerank results by a document field', + context + ); + break; + } default: { transformer = new BaseTransformer(processorConfig.name, '', context); break;