Skip to content

Commit

Permalink
validate supported chunker algorithm in text chunking processor
Browse files Browse the repository at this point in the history
Signed-off-by: yuye-aws <[email protected]>
  • Loading branch information
yuye-aws committed Mar 15, 2024
1 parent 89c465c commit e7dffe0
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 18 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import java.util.Set;

import com.google.common.annotations.VisibleForTesting;

Expand Down Expand Up @@ -113,6 +114,17 @@ private void parseAlgorithmMap(final Map<String, Object> algorithmMap) {
}
}

Set<String> allChunkerAlgorithms = ChunkerFactory.allChunkerAlgorithms;
if (!allChunkerAlgorithms.contains(algorithmKey)) {
throw new IllegalArgumentException(
String.format(
Locale.ROOT,
"Chunking algorithm [%s] is not supported. Supported chunking algorithms are %s",
algorithmKey,
allChunkerAlgorithms
)
);
}
Map<String, Object> chunkerParameters = (Map<String, Object>) algorithmValue;
if (algorithmKey.equals(FixedTokenLengthChunker.ALGORITHM_NAME)) {
// fixed token length algorithm needs analysis registry for tokenization
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,11 @@
package org.opensearch.neuralsearch.processor.chunker;

import com.google.common.collect.ImmutableMap;
import lombok.Getter;

import java.util.Map;
import java.util.Locale;
import java.util.Objects;
import java.util.Set;
import java.util.function.Function;

/**
Expand All @@ -22,18 +24,13 @@ public class ChunkerFactory {
DelimiterChunker::new
);

@Getter
public static Set<String> allChunkerAlgorithms = chunkers.keySet();

public static Chunker create(final String type, final Map<String, Object> parameters) {
Function<Map<String, Object>, Chunker> chunkerConstructionFunction = chunkers.get(type);
if (chunkerConstructionFunction == null) {
throw new IllegalArgumentException(
String.format(
Locale.ROOT,
"Chunking algorithm [%s] is not supported. Supported chunking algorithms are %s",
type,
chunkers.keySet()
)
);
}
// chunkerConstructionFunction is not null because we have validated the type in text chunking processor
Objects.requireNonNull(chunkerConstructionFunction);
return chunkerConstructionFunction.apply(parameters);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
import org.opensearch.test.OpenSearchTestCase;

import java.util.HashMap;
import java.util.Locale;
import java.util.Map;

import static org.opensearch.neuralsearch.processor.chunker.FixedTokenLengthChunker.ANALYSIS_REGISTRY_FIELD;
Expand All @@ -33,12 +32,7 @@ public void testCreate_Delimiter() {

public void testCreate_Invalid() {
String invalidChunkerName = "Invalid Chunker Algorithm";
IllegalArgumentException illegalArgumentException = assertThrows(
IllegalArgumentException.class,
() -> ChunkerFactory.create(invalidChunkerName, createChunkParameters())
);
assert (illegalArgumentException.getMessage()
.contains(String.format(Locale.ROOT, "Chunking algorithm [%s] is not supported.", invalidChunkerName)));
assertThrows(NullPointerException.class, () -> ChunkerFactory.create(invalidChunkerName, createChunkParameters()));
}

private Map<String, Object> createChunkParameters() {
Expand Down

0 comments on commit e7dffe0

Please sign in to comment.