Skip to content

Commit

Permalink
bug fix in chunker factory
Browse files Browse the repository at this point in the history
Signed-off-by: yuye-aws <[email protected]>
  • Loading branch information
yuye-aws committed Mar 14, 2024
1 parent 0db6313 commit 59b6e31
Show file tree
Hide file tree
Showing 4 changed files with 25 additions and 11 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -301,7 +301,12 @@ private int chunkString(final String content, List<String> result, final Map<Str
return chunkCount;
}

private int chunkList(final List<String> contentList, List<String> result, final Map<String, Object> runTimeParameters, int chunkCount) {
private int chunkList(
final List<String> contentList,
List<String> result,
final Map<String, Object> runTimeParameters,
int chunkCount
) {
// flatten original output format from List<List<String>> to List<String>
for (String content : contentList) {
chunkCount = chunkString(content, result, runTimeParameters, chunkCount);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
package org.opensearch.neuralsearch.processor.chunker;

import com.google.common.collect.ImmutableMap;
import lombok.Getter;

import java.util.Map;
import java.util.Set;
Expand All @@ -20,20 +21,24 @@ public class ChunkerFactory {
FixedTokenLengthChunker.ALGORITHM_NAME,
FixedTokenLengthChunker::new,
DelimiterChunker.ALGORITHM_NAME,
FixedTokenLengthChunker::new
DelimiterChunker::new
);

@Getter
private static final Set<String> allChunkers = chunkers.keySet();

public static Chunker create(final String type, final Map<String, Object> parameters) {
Function<Map<String, Object>, Chunker> chunkerConstructionFunction= chunkers.get(type);
Function<Map<String, Object>, Chunker> chunkerConstructionFunction = chunkers.get(type);
if (chunkerConstructionFunction == null) {
throw new IllegalArgumentException(
String.format(Locale.ROOT, "chunking algorithm [%s] is not supported. Supported chunking algorithms are %s", type, chunkers.keySet())
String.format(
Locale.ROOT,
"chunking algorithm [%s] is not supported. Supported chunking algorithms are %s",
type,
chunkers.keySet()
)
);
}
return chunkerConstructionFunction.apply(parameters);
}

public static Set<String> getAllChunkers() {
return chunkers.keySet();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,11 @@ public static Number validateNumberParameter(final Map<String, Object> parameter
/**
* Validate and parse the parameter for positive integer parameters
*/
public static int validatePositiveIntegerParameter(final Map<String, Object> parameters, final String fieldName, final int defaultValue) {
public static int validatePositiveIntegerParameter(
final Map<String, Object> parameters,
final String fieldName,
final int defaultValue
) {
Number fieldValueNumber = validateNumberParameter(parameters, fieldName, defaultValue);
int fieldValueInt = fieldValueNumber.intValue();
// sometimes parameter has negative default value, indicating that this parameter is not effective
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,13 @@ public void testGetAllChunkers() {
public void testCreate_FixedTokenLength() {
Chunker chunker = ChunkerFactory.create(FixedTokenLengthChunker.ALGORITHM_NAME, createChunkParameters());
assertNotNull(chunker);
assertTrue(chunker instanceof FixedTokenLengthChunker);
assert (chunker instanceof FixedTokenLengthChunker);
}

public void testCreate_Delimiter() {
Chunker chunker = ChunkerFactory.create(DelimiterChunker.ALGORITHM_NAME, createChunkParameters());
assertNotNull(chunker);
assertTrue(chunker instanceof DelimiterChunker);
assert (chunker instanceof DelimiterChunker);
}

public void testCreate_Invalid() {
Expand Down

0 comments on commit 59b6e31

Please sign in to comment.