Skip to content

Commit

Permalink
Fix potential NPE issue in chunking processor; add changee log
Browse files Browse the repository at this point in the history
Signed-off-by: zane-neo <[email protected]>
  • Loading branch information
zane-neo committed Apr 12, 2024
1 parent 5e68be4 commit 2e81222
Show file tree
Hide file tree
Showing 5 changed files with 40 additions and 14 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Allowing execution of hybrid query on index alias with filters ([#670](https://github.com/opensearch-project/neural-search/pull/670))
### Bug Fixes
- Add support for request_cache flag in hybrid query ([#663](https://github.com/opensearch-project/neural-search/pull/663))
- Fix may type validation issue in multiple pipeline processors ([#661](https://github.com/opensearch-project/neural-search/pull/661))
### Infrastructure
### Documentation
### Maintenance
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -109,12 +109,12 @@ public IngestDocument execute(IngestDocument ingestDocument) throws Exception {
public void execute(IngestDocument ingestDocument, BiConsumer<IngestDocument, Exception> handler) {
try {
validateEmbeddingFieldsValue(ingestDocument);
Map<String, Object> ProcessMap = buildMapWithTargetKeyAndOriginalValue(ingestDocument);
List<String> inferenceList = createInferenceList(ProcessMap);
Map<String, Object> processMap = buildMapWithTargetKeyAndOriginalValue(ingestDocument);
List<String> inferenceList = createInferenceList(processMap);
if (inferenceList.size() == 0) {
handler.accept(ingestDocument, null);
} else {
doExecute(ingestDocument, ProcessMap, inferenceList, handler);
doExecute(ingestDocument, processMap, inferenceList, handler);
}
} catch (Exception e) {
handler.accept(null, e);
Expand Down Expand Up @@ -213,7 +213,8 @@ private void validateEmbeddingFieldsValue(IngestDocument ingestDocument) {
sourceAndMetadataMap,
fieldMap,
1,
ProcessorDocumentUtils.getMaxDepth(sourceAndMetadataMap, clusterService, environment)
ProcessorDocumentUtils.getMaxDepth(sourceAndMetadataMap, clusterService, environment),
true
);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import java.util.List;
import java.util.Objects;

import org.apache.commons.lang.StringUtils;
import org.opensearch.cluster.metadata.IndexMetadata;
import org.opensearch.env.Environment;
import org.opensearch.index.IndexService;
Expand Down Expand Up @@ -171,7 +172,8 @@ public IngestDocument execute(final IngestDocument ingestDocument) {
sourceAndMetadataMap,
fieldMap,
1,
ProcessorDocumentUtils.getMaxDepth(sourceAndMetadataMap, clusterService, environment)
ProcessorDocumentUtils.getMaxDepth(sourceAndMetadataMap, clusterService, environment),
false
);
// fixed token length algorithm needs runtime parameter max_token_count for tokenization
Map<String, Object> runtimeParameters = new HashMap<>();
Expand Down Expand Up @@ -253,7 +255,13 @@ private List<String> chunkLeafType(final Object value, final Map<String, Object>
// leaf type means null, String or List<String>
// the result should be an empty list when the input is null
List<String> result = new ArrayList<>();
if (value == null) {
return result;
}
if (value instanceof String) {
if (StringUtils.isBlank(String.valueOf(value))) {
return result;
}
result = chunkString(value.toString(), runTimeParameters);
} else if (isListOfString(value)) {
result = chunkList((List<String>) value, runTimeParameters);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,8 @@ private void validateEmbeddingFieldsValue(final IngestDocument ingestDocument) {
sourceAndMetadataMap,
fieldMap,
1,
ProcessorDocumentUtils.getMaxDepth(sourceAndMetadataMap, clusterService, environment)
ProcessorDocumentUtils.getMaxDepth(sourceAndMetadataMap, clusterService, environment),
true
);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@ public static void validateMapTypeValue(
final Map<String, Object> sourceValue,
final Object fieldMap,
final int depth,
final long maxDepth
final long maxDepth,
final boolean allowEmpty
) {
if (sourceValue == null) return; // allow map type value to be null.
validateDepth(sourceKey, depth, maxDepth);
Expand All @@ -55,37 +56,51 @@ public static void validateMapTypeValue(
Object nextSourceValue = sourceValue.get(key);
if (nextSourceValue != null) {
if (nextSourceValue instanceof List) {
validateListTypeValue(key, (List) nextSourceValue, fieldMap, depth + 1, maxDepth);
validateListTypeValue(key, (List) nextSourceValue, fieldMap, depth + 1, maxDepth, allowEmpty);
} else if (nextSourceValue instanceof Map) {
validateMapTypeValue(key, (Map<String, Object>) nextSourceValue, nextFieldMap, depth + 1, maxDepth);
validateMapTypeValue(key, (Map<String, Object>) nextSourceValue, nextFieldMap, depth + 1, maxDepth, allowEmpty);
} else if (!(nextSourceValue instanceof String)) {
throw new IllegalArgumentException("map type field [" + key + "] is neither string nor nested type, cannot process it");
} else if (StringUtils.isBlank((String) nextSourceValue)) {
} else if (!allowEmpty && StringUtils.isBlank((String) nextSourceValue)) {
throw new IllegalArgumentException("map type field [" + key + "] has empty string value, cannot process it");
}
}
});
}

@SuppressWarnings({ "rawtypes", "unchecked" })
private static void validateListTypeValue(String sourceKey, List sourceValue, Object fieldMap, int depth, long maxDepth) {
private static void validateListTypeValue(
String sourceKey,
List sourceValue,
Object fieldMap,
int depth,
long maxDepth,
boolean allowEmpty
) {
validateDepth(sourceKey, depth, maxDepth);
if (sourceValue == null || sourceValue.isEmpty()) return;
Object firstNonNullElement = sourceValue.stream().filter(Objects::nonNull).findFirst().orElse(null);
if (firstNonNullElement == null) return;
for (Object element : sourceValue) {
if (firstNonNullElement instanceof List) { // nested list case.
validateListTypeValue(sourceKey, (List) element, fieldMap, depth + 1, maxDepth);
validateListTypeValue(sourceKey, (List) element, fieldMap, depth + 1, maxDepth, allowEmpty);
} else if (firstNonNullElement instanceof Map) {
validateMapTypeValue(sourceKey, (Map<String, Object>) element, ((Map) fieldMap).get(sourceKey), depth + 1, maxDepth);
validateMapTypeValue(
sourceKey,
(Map<String, Object>) element,
((Map) fieldMap).get(sourceKey),
depth + 1,
maxDepth,
allowEmpty
);
} else if (!(firstNonNullElement instanceof String)) {
throw new IllegalArgumentException("list type field [" + sourceKey + "] has non string value, cannot process it");
} else {
if (element == null) {
throw new IllegalArgumentException("list type field [" + sourceKey + "] has null, cannot process it");
} else if (!(element instanceof String)) {
throw new IllegalArgumentException("list type field [" + sourceKey + "] has non string value, cannot process it");
} else if (StringUtils.isBlank(element.toString())) {
} else if (!allowEmpty && StringUtils.isBlank(element.toString())) {
throw new IllegalArgumentException("list type field [" + sourceKey + "] has empty string, cannot process it");
}
}
Expand Down

0 comments on commit 2e81222

Please sign in to comment.