Skip to content

Commit

Permalink
Ignore metadata fields when removing fields by remove ingest processor
Browse files Browse the repository at this point in the history
Signed-off-by: Gao Binlong <[email protected]>
  • Loading branch information
gaobinlong committed Oct 24, 2023
1 parent 5bd413c commit 283fe28
Show file tree
Hide file tree
Showing 4 changed files with 131 additions and 2 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- [BUG] Fix java.lang.SecurityException in repository-gcs plugin ([#10642](https://github.com/opensearch-project/OpenSearch/pull/10642))
- Add telemetry tracer/metric enable flag and integ test. ([#10395](https://github.com/opensearch-project/OpenSearch/pull/10395))
- Add instrumentation for indexing in transport bulk action and transport shard bulk action. ([#10273](https://github.com/opensearch-project/OpenSearch/pull/10273))
- Ignore metadata fields when removing fields by remove ingest processor

### Deprecated

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;

/**
Expand All @@ -67,6 +68,11 @@ public List<TemplateScript.Factory> getFields() {

@Override
public IngestDocument execute(IngestDocument document) {
final Set<String> metadataFields = document.getMetadata()
.keySet()
.stream()
.map(IngestDocument.Metadata::getFieldName)
.collect(Collectors.toSet());
fields.forEach(field -> {
String path = document.renderTemplate(field);
final boolean fieldPathIsNullOrEmpty = Strings.isNullOrEmpty(path);
Expand All @@ -79,7 +85,10 @@ public IngestDocument execute(IngestDocument document) {
throw new IllegalArgumentException("field [" + path + "] doesn't exist");
}
}
document.removeField(path);
// ignore metadata fields such as _index, _id, etc.
if (!metadataFields.contains(path)) {
document.removeField(path);
}
});
return document;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,15 +40,17 @@

import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;

import static org.hamcrest.Matchers.equalTo;

public class RemoveProcessorTests extends OpenSearchTestCase {

public void testRemoveFields() throws Exception {
IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random());
String field = RandomDocumentPicks.randomExistingFieldName(random(), ingestDocument);
String field = RandomDocumentPicks.addRandomField(random(), ingestDocument, randomAlphaOfLength(10));
Processor processor = new RemoveProcessor(
randomAlphaOfLength(10),
null,
Expand Down Expand Up @@ -124,4 +126,31 @@ public void testIgnoreMissing() throws Exception {
processor = new RemoveProcessor.Factory(TestTemplateService.instance()).create(null, processorTag, null, configWithEmptyField);
processor.execute(ingestDocument);
}

public void testRemoveMetadataField() throws Exception {
IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random(), new HashMap<>());
// ignore _if_seq_no
List<String> metadataFields = ingestDocument.getMetadata()
.keySet()
.stream()
.map(IngestDocument.Metadata::getFieldName)
.collect(Collectors.toList());
String metadataFieldName = metadataFields.get(randomIntBetween(0, metadataFields.size() - 1));
Map<String, Object> config = new HashMap<>();
config.put("field", metadataFieldName);
String processorTag = randomAlphaOfLength(10);
Processor processor = new RemoveProcessor.Factory(TestTemplateService.instance()).create(null, processorTag, null, config);
// _if_seq_no and _if_primary_term do not exist in the enriched document, removing them will throw IllegalArgumentException
if (metadataFieldName.equals(IngestDocument.Metadata.IF_SEQ_NO.getFieldName())
|| metadataFieldName.equals(IngestDocument.Metadata.IF_PRIMARY_TERM.getFieldName())) {
assertThrows(
"field: [" + metadataFieldName + "] doesn't exist",
IllegalArgumentException.class,
() -> processor.execute(ingestDocument)
);
} else {
// for other metadata fields such as _index, id, ignore the removing operation
assertThat(ingestDocument.hasField(metadataFieldName), equalTo(true));
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -91,3 +91,93 @@ teardown:
index: test
id: 1
- match: { _source.message: "foo bar baz" }

---
"Test remove metadata field":
- do:
ingest.put_pipeline:
id: "my_pipeline"
body: >
{
"description": "_description",
"processors": [
{
"remove" : {
"field" : "{{foo}}"
}
}
]
}
- match: { acknowledged: true }

- do:
index:
index: test
id: 1
pipeline: "my_pipeline"
body: {
foo: "_index"
}
- do:
get:
index: test
id: 1
- match: { _source: { foo: "_index" } }

- do:
ingest.put_pipeline:
id: "my_pipeline"
body: >
{
"description": "_description",
"processors": [
{
"remove" : {
"field" : "_id"
}
}
]
}
- match: { acknowledged: true }

- do:
index:
index: test
id: 1
pipeline: "my_pipeline"
body: { message: "foo bar baz" }
- do:
get:
index: test
id: 1
- match: { _source: { message: "foo bar baz" } }

- do:
ingest.put_pipeline:
id: "my_pipeline"
body: >
{
"description": "_description",
"processors": [
{
"remove" : {
"field" : "_version"
}
}
]
}
- match: { acknowledged: true }

- do:
index:
index: test
id: "test_id_10000"
pipeline: "my_pipeline"
version: 1
version_type: "external"
body: { message: "foo bar baz" }
- do:
get:
index: test
id: 1
- match: { _source: { message: "foo bar baz" } }

0 comments on commit 283fe28

Please sign in to comment.