Skip to content

Commit

Permalink
add field map and algorithm map for text chunking processor
Browse files Browse the repository at this point in the history
Signed-off-by: yuye-aws <[email protected]>
  • Loading branch information
yuye-aws committed Apr 29, 2024
1 parent 5df923c commit 7769fac
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
import org.opensearch.ingest.AbstractProcessor;
import org.opensearch.ingest.IngestDocument;

import java.util.Map;

/**
* This processor is used for text chunking.
* The text chunking results could be fed to downstream embedding processor.
Expand All @@ -17,9 +19,23 @@
public final class TextChunkingProcessor extends AbstractProcessor {

public static final String TYPE = "text_chunking";
public static final String FIELD_MAP_FIELD = "field_map";
public static final String ALGORITHM_FIELD = "algorithm";

private final Map<String, Object> fieldMap;
private final Map<String, Object> algorithmMap;



public TextChunkingProcessor(final String tag, final String description) {
public TextChunkingProcessor(
final String tag,
final String description,
Map<String, Object> fieldMap,
Map<String, Object> algorithmMap
) {
super(tag, description);
this.fieldMap = fieldMap;
this.algorithmMap = algorithmMap;
}

public String getType() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@

import org.opensearch.ingest.Processor;
import org.opensearch.neuralsearch.processor.TextChunkingProcessor;
import static org.opensearch.neuralsearch.processor.TextChunkingProcessor.TYPE;
import static org.opensearch.neuralsearch.processor.TextChunkingProcessor.FIELD_MAP_FIELD;
import static org.opensearch.neuralsearch.processor.TextChunkingProcessor.ALGORITHM_FIELD;
import static org.opensearch.ingest.ConfigurationUtils.readMap;

/**
* Factory for chunking ingest processor for ingestion pipeline.
Expand All @@ -26,6 +30,8 @@ public TextChunkingProcessor create(
String description,
Map<String, Object> config
) throws Exception {
return new TextChunkingProcessor(processorTag, description);
Map<String, Object> fieldMap = readMap(TYPE, processorTag, config, FIELD_MAP_FIELD);
Map<String, Object> algorithmMap = readMap(TYPE, processorTag, config, ALGORITHM_FIELD);
return new TextChunkingProcessor(processorTag, description, fieldMap, algorithmMap);
}
}

0 comments on commit 7769fac

Please sign in to comment.