From a909adade2b2c9bb29808dcba4b845c78e93a316 Mon Sep 17 00:00:00 2001 From: yuye-aws Date: Thu, 29 Feb 2024 12:15:00 +0800 Subject: [PATCH] update integration test for cascade processor Signed-off-by: yuye-aws --- .../DocumentChunkingProcessorIT.java | 22 ++++++++++++++----- 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/src/test/java/org/opensearch/neuralsearch/processor/DocumentChunkingProcessorIT.java b/src/test/java/org/opensearch/neuralsearch/processor/DocumentChunkingProcessorIT.java index 320d7ac6f..d8caa64da 100644 --- a/src/test/java/org/opensearch/neuralsearch/processor/DocumentChunkingProcessorIT.java +++ b/src/test/java/org/opensearch/neuralsearch/processor/DocumentChunkingProcessorIT.java @@ -28,6 +28,8 @@ public class DocumentChunkingProcessorIT extends BaseNeuralSearchIT { private static final String OUTPUT_FIELD = "body_chunk"; + private static final String INTERMEDIATE_FIELD = "body_chunk_intermediate"; + private static final String FIXED_TOKEN_LENGTH_PIPELINE_NAME = "pipeline-document-chunking-fixed-token-length"; private static final String DELIMITER_PIPELINE_NAME = "pipeline-document-chunking-delimiter"; @@ -63,7 +65,7 @@ public void testDocumentChunkingProcessor_withFixedTokenLength_successful() thro expectedPassages.add("This is an example document to be chunked The document"); expectedPassages.add("The document contains a single paragraph two sentences and 24"); expectedPassages.add("and 24 tokens by standard tokenizer in OpenSearch"); - validateIndexIngestResults(INDEX_NAME, expectedPassages); + validateIndexIngestResults(INDEX_NAME, OUTPUT_FIELD, expectedPassages); } finally { wipeOfTestResources(INDEX_NAME, FIXED_TOKEN_LENGTH_PIPELINE_NAME, null, null); } @@ -94,7 +96,7 @@ public void testDocumentChunkingProcessor_withDelimiter_successful() throws Exce expectedPassages.add( " The document contains a single paragraph, two sentences and 24 tokens by standard tokenizer in OpenSearch." ); - validateIndexIngestResults(INDEX_NAME, expectedPassages); + validateIndexIngestResults(INDEX_NAME, OUTPUT_FIELD, expectedPassages); } finally { wipeOfTestResources(INDEX_NAME, DELIMITER_PIPELINE_NAME, null, null); } @@ -111,13 +113,21 @@ public void testDocumentChunkingProcessor_withCascade_successful() throws Except expectedPassages.add("This is an example document to be chunked"); expectedPassages.add("The document contains a single paragraph two sentences and 24"); expectedPassages.add("and 24 tokens by standard tokenizer in OpenSearch"); - validateIndexIngestResults(INDEX_NAME, expectedPassages); + validateIndexIngestResults(INDEX_NAME, OUTPUT_FIELD, expectedPassages); + + expectedPassages.clear(); + expectedPassages.add("This is an example document to be chunked."); + expectedPassages.add( + " The document contains a single paragraph, two sentences and 24 tokens by standard tokenizer in OpenSearch." + ); + validateIndexIngestResults(INDEX_NAME, INTERMEDIATE_FIELD, expectedPassages); + } finally { wipeOfTestResources(INDEX_NAME, CASCADE_PIPELINE_NAME, null, null); } } - private void validateIndexIngestResults(String indexName, Object expected) { + private void validateIndexIngestResults(String indexName, String fieldName, Object expected) { assertEquals(1, getDocCount(indexName)); MatchAllQueryBuilder query = new MatchAllQueryBuilder(); Map searchResults = search(indexName, query, 10); @@ -128,8 +138,8 @@ private void validateIndexIngestResults(String indexName, Object expected) { assert (documentSource instanceof Map); @SuppressWarnings("unchecked") Map documentSourceMap = (Map) documentSource; - assert (documentSourceMap).containsKey(OUTPUT_FIELD); - Object ingestOutputs = documentSourceMap.get(OUTPUT_FIELD); + assert (documentSourceMap).containsKey(fieldName); + Object ingestOutputs = documentSourceMap.get(fieldName); assertEquals(expected, ingestOutputs); }