Skip to content

Commit

Permalink
Add index mapping parameter for counted_keyword
Browse files Browse the repository at this point in the history
With this commit we add a new mapping parameter `index` to the
`counted_keyword` mapping type. This allows to reduce disk usage for use
cases where indexed fields are not required.

Relates elastic#101826
  • Loading branch information
danielmitterdorfer committed Dec 21, 2023
1 parent 48dcb85 commit 7dc70f2
Show file tree
Hide file tree
Showing 2 changed files with 64 additions and 13 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -78,16 +78,24 @@ public class CountedKeywordFieldMapper extends FieldMapper {
public static final String CONTENT_TYPE = "counted_keyword";
public static final String COUNT_FIELD_NAME_SUFFIX = "_count";

public static final FieldType FIELD_TYPE;
private static final FieldType FIELD_TYPE_INDEXED;
private static final FieldType FIELD_TYPE_NOT_INDEXED;

static {
FieldType ft = new FieldType();
ft.setDocValuesType(DocValuesType.SORTED_SET);
ft.setTokenized(false);
ft.setOmitNorms(true);
ft.setIndexOptions(IndexOptions.DOCS);
ft.freeze();
FIELD_TYPE = freezeAndDeduplicateFieldType(ft);
FieldType indexed = new FieldType();
indexed.setDocValuesType(DocValuesType.SORTED_SET);
indexed.setTokenized(false);
indexed.setOmitNorms(true);
indexed.setIndexOptions(IndexOptions.DOCS);
FIELD_TYPE_INDEXED = freezeAndDeduplicateFieldType(indexed);

FieldType notIndexed = new FieldType();
notIndexed.setDocValuesType(DocValuesType.SORTED_SET);
notIndexed.setTokenized(false);
notIndexed.setOmitNorms(true);
notIndexed.setIndexOptions(IndexOptions.NONE);
FIELD_TYPE_NOT_INDEXED = freezeAndDeduplicateFieldType(notIndexed);

}

private static class CountedKeywordFieldType extends StringFieldType {
Expand Down Expand Up @@ -261,7 +269,12 @@ public TermsEnum termsEnum() throws IOException {
}
}

private static CountedKeywordFieldMapper toType(FieldMapper in) {
return (CountedKeywordFieldMapper) in;
}

public static class Builder extends FieldMapper.Builder {
private final Parameter<Boolean> indexed = Parameter.indexParam(m -> toType(m).mappedFieldType.isIndexed(), true);
private final Parameter<Map<String, String>> meta = Parameter.metaParam();

protected Builder(String name) {
Expand All @@ -270,22 +283,24 @@ protected Builder(String name) {

@Override
protected Parameter<?>[] getParameters() {
return new Parameter<?>[] { meta };
return new Parameter<?>[] { meta, indexed };
}

@Override
public FieldMapper build(MapperBuilderContext context) {

BinaryFieldMapper countFieldMapper = new BinaryFieldMapper.Builder(name + COUNT_FIELD_NAME_SUFFIX, true).build(context);
boolean isIndexed = indexed.getValue();
FieldType ft = isIndexed ? FIELD_TYPE_INDEXED : FIELD_TYPE_NOT_INDEXED;
return new CountedKeywordFieldMapper(
name,
FIELD_TYPE,
ft,
new CountedKeywordFieldType(
context.buildFullName(name),
true,
isIndexed,
false,
true,
new TextSearchInfo(FIELD_TYPE, null, KEYWORD_ANALYZER, KEYWORD_ANALYZER),
new TextSearchInfo(ft, null, KEYWORD_ANALYZER, KEYWORD_ANALYZER),
meta.getValue(),
countFieldMapper.fieldType()
),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,17 @@ setup:
type: counted_keyword


- do:
indices.create:
index: test-events-no-index
body:
mappings:
properties:
events:
type: counted_keyword
index: false


- do:
index:
index: test-events
Expand All @@ -26,12 +37,18 @@ setup:
id: "2"
body: { "events": [ "a", "b", "b", "b", "c" ] }

- do:
index:
index: test-events-no-index
id: "1"
body: { "events": [ "a", "a", "b", "c" ] }


- do:
indices.refresh: { }

---
"Counted Terms agg":

- do:
search:
index: test-events
Expand All @@ -49,3 +66,22 @@ setup:
- match: { aggregations.event_terms.buckets.2.key: "c" }
- match: { aggregations.event_terms.buckets.2.doc_count: 2 }
- length: { aggregations.event_terms.buckets: 3 }

# although the field is not indexed, the counted_terms agg should still work
- do:
search:
index: test-events-no-index
body:
size: 0
aggs:
event_terms:
counted_terms:
field: events

- match: { aggregations.event_terms.buckets.0.key: "b" }
- match: { aggregations.event_terms.buckets.0.doc_count: 4 }
- match: { aggregations.event_terms.buckets.1.key: "a" }
- match: { aggregations.event_terms.buckets.1.doc_count: 3 }
- match: { aggregations.event_terms.buckets.2.key: "c" }
- match: { aggregations.event_terms.buckets.2.doc_count: 2 }
- length: { aggregations.event_terms.buckets: 3 }

0 comments on commit 7dc70f2

Please sign in to comment.