Skip to content

Commit

Permalink
update integration test for cascade processor
Browse files Browse the repository at this point in the history
Signed-off-by: yuye-aws <[email protected]>
  • Loading branch information
yuye-aws committed Feb 29, 2024
1 parent 628c5ce commit a909ada
Showing 1 changed file with 16 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ public class DocumentChunkingProcessorIT extends BaseNeuralSearchIT {

private static final String OUTPUT_FIELD = "body_chunk";

private static final String INTERMEDIATE_FIELD = "body_chunk_intermediate";

private static final String FIXED_TOKEN_LENGTH_PIPELINE_NAME = "pipeline-document-chunking-fixed-token-length";

private static final String DELIMITER_PIPELINE_NAME = "pipeline-document-chunking-delimiter";
Expand Down Expand Up @@ -63,7 +65,7 @@ public void testDocumentChunkingProcessor_withFixedTokenLength_successful() thro
expectedPassages.add("This is an example document to be chunked The document");
expectedPassages.add("The document contains a single paragraph two sentences and 24");
expectedPassages.add("and 24 tokens by standard tokenizer in OpenSearch");
validateIndexIngestResults(INDEX_NAME, expectedPassages);
validateIndexIngestResults(INDEX_NAME, OUTPUT_FIELD, expectedPassages);
} finally {
wipeOfTestResources(INDEX_NAME, FIXED_TOKEN_LENGTH_PIPELINE_NAME, null, null);
}
Expand Down Expand Up @@ -94,7 +96,7 @@ public void testDocumentChunkingProcessor_withDelimiter_successful() throws Exce
expectedPassages.add(
" The document contains a single paragraph, two sentences and 24 tokens by standard tokenizer in OpenSearch."
);
validateIndexIngestResults(INDEX_NAME, expectedPassages);
validateIndexIngestResults(INDEX_NAME, OUTPUT_FIELD, expectedPassages);
} finally {
wipeOfTestResources(INDEX_NAME, DELIMITER_PIPELINE_NAME, null, null);
}
Expand All @@ -111,13 +113,21 @@ public void testDocumentChunkingProcessor_withCascade_successful() throws Except
expectedPassages.add("This is an example document to be chunked");
expectedPassages.add("The document contains a single paragraph two sentences and 24");
expectedPassages.add("and 24 tokens by standard tokenizer in OpenSearch");
validateIndexIngestResults(INDEX_NAME, expectedPassages);
validateIndexIngestResults(INDEX_NAME, OUTPUT_FIELD, expectedPassages);

expectedPassages.clear();
expectedPassages.add("This is an example document to be chunked.");
expectedPassages.add(
" The document contains a single paragraph, two sentences and 24 tokens by standard tokenizer in OpenSearch."
);
validateIndexIngestResults(INDEX_NAME, INTERMEDIATE_FIELD, expectedPassages);

} finally {
wipeOfTestResources(INDEX_NAME, CASCADE_PIPELINE_NAME, null, null);
}
}

private void validateIndexIngestResults(String indexName, Object expected) {
private void validateIndexIngestResults(String indexName, String fieldName, Object expected) {
assertEquals(1, getDocCount(indexName));
MatchAllQueryBuilder query = new MatchAllQueryBuilder();
Map<String, Object> searchResults = search(indexName, query, 10);
Expand All @@ -128,8 +138,8 @@ private void validateIndexIngestResults(String indexName, Object expected) {
assert (documentSource instanceof Map);
@SuppressWarnings("unchecked")
Map<String, Object> documentSourceMap = (Map<String, Object>) documentSource;
assert (documentSourceMap).containsKey(OUTPUT_FIELD);
Object ingestOutputs = documentSourceMap.get(OUTPUT_FIELD);
assert (documentSourceMap).containsKey(fieldName);
Object ingestOutputs = documentSourceMap.get(fieldName);
assertEquals(expected, ingestOutputs);
}

Expand Down

0 comments on commit a909ada

Please sign in to comment.