diff --git a/src/main/java/org/opensearch/neuralsearch/processor/TextChunkingProcessor.java b/src/main/java/org/opensearch/neuralsearch/processor/TextChunkingProcessor.java index 963135e68..c9bd5e46e 100644 --- a/src/main/java/org/opensearch/neuralsearch/processor/TextChunkingProcessor.java +++ b/src/main/java/org/opensearch/neuralsearch/processor/TextChunkingProcessor.java @@ -11,6 +11,7 @@ import java.util.ArrayList; import java.util.List; import java.util.Objects; +import java.util.Set; import com.google.common.annotations.VisibleForTesting; @@ -113,6 +114,17 @@ private void parseAlgorithmMap(final Map algorithmMap) { } } + Set allChunkerAlgorithms = ChunkerFactory.allChunkerAlgorithms; + if (!allChunkerAlgorithms.contains(algorithmKey)) { + throw new IllegalArgumentException( + String.format( + Locale.ROOT, + "Chunking algorithm [%s] is not supported. Supported chunking algorithms are %s", + algorithmKey, + allChunkerAlgorithms + ) + ); + } Map chunkerParameters = (Map) algorithmValue; if (algorithmKey.equals(FixedTokenLengthChunker.ALGORITHM_NAME)) { // fixed token length algorithm needs analysis registry for tokenization diff --git a/src/main/java/org/opensearch/neuralsearch/processor/chunker/ChunkerFactory.java b/src/main/java/org/opensearch/neuralsearch/processor/chunker/ChunkerFactory.java index d66bc423e..a3f7346d5 100644 --- a/src/main/java/org/opensearch/neuralsearch/processor/chunker/ChunkerFactory.java +++ b/src/main/java/org/opensearch/neuralsearch/processor/chunker/ChunkerFactory.java @@ -5,9 +5,11 @@ package org.opensearch.neuralsearch.processor.chunker; import com.google.common.collect.ImmutableMap; +import lombok.Getter; import java.util.Map; -import java.util.Locale; +import java.util.Objects; +import java.util.Set; import java.util.function.Function; /** @@ -22,18 +24,13 @@ public class ChunkerFactory { DelimiterChunker::new ); + @Getter + public static Set allChunkerAlgorithms = chunkers.keySet(); + public static Chunker create(final String type, final Map parameters) { Function, Chunker> chunkerConstructionFunction = chunkers.get(type); - if (chunkerConstructionFunction == null) { - throw new IllegalArgumentException( - String.format( - Locale.ROOT, - "Chunking algorithm [%s] is not supported. Supported chunking algorithms are %s", - type, - chunkers.keySet() - ) - ); - } + // chunkerConstructionFunction is not null because we have validated the type in text chunking processor + Objects.requireNonNull(chunkerConstructionFunction); return chunkerConstructionFunction.apply(parameters); } } diff --git a/src/test/java/org/opensearch/neuralsearch/processor/chunker/ChunkerFactoryTests.java b/src/test/java/org/opensearch/neuralsearch/processor/chunker/ChunkerFactoryTests.java index 2b06ca10a..21859c24e 100644 --- a/src/test/java/org/opensearch/neuralsearch/processor/chunker/ChunkerFactoryTests.java +++ b/src/test/java/org/opensearch/neuralsearch/processor/chunker/ChunkerFactoryTests.java @@ -9,7 +9,6 @@ import org.opensearch.test.OpenSearchTestCase; import java.util.HashMap; -import java.util.Locale; import java.util.Map; import static org.opensearch.neuralsearch.processor.chunker.FixedTokenLengthChunker.ANALYSIS_REGISTRY_FIELD; @@ -33,12 +32,7 @@ public void testCreate_Delimiter() { public void testCreate_Invalid() { String invalidChunkerName = "Invalid Chunker Algorithm"; - IllegalArgumentException illegalArgumentException = assertThrows( - IllegalArgumentException.class, - () -> ChunkerFactory.create(invalidChunkerName, createChunkParameters()) - ); - assert (illegalArgumentException.getMessage() - .contains(String.format(Locale.ROOT, "Chunking algorithm [%s] is not supported.", invalidChunkerName))); + assertThrows(NullPointerException.class, () -> ChunkerFactory.create(invalidChunkerName, createChunkParameters())); } private Map createChunkParameters() {