diff --git a/server/src/main/java/org/elasticsearch/index/codec/DeduplicatingFieldInfosFormat.java b/server/src/main/java/org/elasticsearch/index/codec/DeduplicatingFieldInfosFormat.java index 75ec265a68391..67d98bf30d6ec 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/DeduplicatingFieldInfosFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/DeduplicatingFieldInfosFormat.java @@ -65,7 +65,7 @@ public FieldInfos read(Directory directory, SegmentInfo segmentInfo, String segm fi.isParentField() ); } - return new FieldInfos(deduplicated); + return new FieldInfosWithUsages(deduplicated); } private static Map internStringStringMap(Map m) { diff --git a/server/src/main/java/org/elasticsearch/index/codec/FieldInfosWithUsages.java b/server/src/main/java/org/elasticsearch/index/codec/FieldInfosWithUsages.java new file mode 100644 index 0000000000000..e0d4a94ee28a1 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/codec/FieldInfosWithUsages.java @@ -0,0 +1,49 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.index.codec; + +import org.apache.lucene.index.DocValuesType; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.FieldInfos; +import org.apache.lucene.index.IndexOptions; + +public class FieldInfosWithUsages extends FieldInfos { + private final int totalUsages; + + public FieldInfosWithUsages(FieldInfo[] infos) { + super(infos); + this.totalUsages = computeUsages(infos); + } + + public static int computeUsages(FieldInfo[] infos) { + int usages = 0; + for (FieldInfo fi : infos) { + if (fi.getIndexOptions() != IndexOptions.NONE) { + usages++; + } + if (fi.hasNorms()) { + usages++; + } + if (fi.getDocValuesType() != DocValuesType.NONE) { + usages++; + } + if (fi.getPointDimensionCount() > 0) { + usages++; + } + if (fi.getVectorDimension() > 0) { + usages++; + } + } + return usages; + } + + public int getTotalUsages() { + return totalUsages; + } +} diff --git a/server/src/main/java/org/elasticsearch/index/shard/IndexShard.java b/server/src/main/java/org/elasticsearch/index/shard/IndexShard.java index 8f1ae42a7475c..b618ca49a3a0b 100644 --- a/server/src/main/java/org/elasticsearch/index/shard/IndexShard.java +++ b/server/src/main/java/org/elasticsearch/index/shard/IndexShard.java @@ -83,6 +83,7 @@ import org.elasticsearch.index.cache.query.TrivialQueryCachingPolicy; import org.elasticsearch.index.cache.request.ShardRequestCache; import org.elasticsearch.index.codec.CodecService; +import org.elasticsearch.index.codec.FieldInfosWithUsages; import org.elasticsearch.index.engine.CommitStats; import org.elasticsearch.index.engine.Engine; import org.elasticsearch.index.engine.Engine.GetResult; @@ -4093,11 +4094,20 @@ public void afterRefresh(boolean didRefresh) { try (var searcher = getEngine().acquireSearcher("shard_field_stats", Engine.SearcherScope.INTERNAL)) { int numSegments = 0; int totalFields = 0; + long usages = 0; for (LeafReaderContext leaf : searcher.getLeafContexts()) { numSegments++; - totalFields += leaf.reader().getFieldInfos().size(); + var fieldInfos = leaf.reader().getFieldInfos(); + totalFields += fieldInfos.size(); + if (fieldInfos instanceof FieldInfosWithUsages ft) { + if (usages != -1) { + usages += ft.getTotalUsages(); + } + } else { + usages = -1; + } } - shardFieldStats = new ShardFieldStats(numSegments, totalFields); + shardFieldStats = new ShardFieldStats(numSegments, totalFields, usages); } catch (AlreadyClosedException ignored) { } diff --git a/server/src/main/java/org/elasticsearch/index/shard/ShardFieldStats.java b/server/src/main/java/org/elasticsearch/index/shard/ShardFieldStats.java index 9c53abb1e95e5..c2122b9f4b0aa 100644 --- a/server/src/main/java/org/elasticsearch/index/shard/ShardFieldStats.java +++ b/server/src/main/java/org/elasticsearch/index/shard/ShardFieldStats.java @@ -14,7 +14,9 @@ * * @param numSegments the number of segments * @param totalFields the total number of fields across the segments + * @param fieldUsages the number of usages for segment-level fields (e.g., doc_values, postings, norms, points) + * -1 if unavailable */ -public record ShardFieldStats(int numSegments, int totalFields) { +public record ShardFieldStats(int numSegments, int totalFields, long fieldUsages) { } diff --git a/server/src/test/java/org/elasticsearch/index/shard/IndexShardTests.java b/server/src/test/java/org/elasticsearch/index/shard/IndexShardTests.java index 28497dc4a8a6b..63bd1fd6cdeff 100644 --- a/server/src/test/java/org/elasticsearch/index/shard/IndexShardTests.java +++ b/server/src/test/java/org/elasticsearch/index/shard/IndexShardTests.java @@ -1793,6 +1793,7 @@ public void testShardFieldStats() throws IOException { assertNotNull(stats); assertThat(stats.numSegments(), equalTo(0)); assertThat(stats.totalFields(), equalTo(0)); + assertThat(stats.fieldUsages(), equalTo(0L)); // index some documents int numDocs = between(1, 10); for (int i = 0; i < numDocs; i++) { @@ -1809,6 +1810,9 @@ public void testShardFieldStats() throws IOException { assertThat(stats.numSegments(), equalTo(1)); // _id, _source, _version, _primary_term, _seq_no, f1, f1.keyword, f2, f2.keyword, assertThat(stats.totalFields(), equalTo(9)); + // _id(term), _source(0), _version(dv), _primary_term(dv), _seq_no(point,dv), f1(postings,norms), + // f1.keyword(term,dv), f2(postings,norms), f2.keyword(term,dv), + assertThat(stats.fieldUsages(), equalTo(13L)); // don't re-compute on refresh without change if (randomBoolean()) { shard.refresh("test"); @@ -1838,10 +1842,15 @@ public void testShardFieldStats() throws IOException { assertThat(stats.numSegments(), equalTo(2)); // 9 + _id, _source, _version, _primary_term, _seq_no, f1, f1.keyword, f2, f2.keyword, f3, f3.keyword assertThat(stats.totalFields(), equalTo(21)); + // first segment: 13, second segment: 13 + f3(postings,norms) + f3.keyword(term,dv), and __soft_deletes to previous segment + assertThat(stats.fieldUsages(), equalTo(31L)); shard.forceMerge(new ForceMergeRequest().maxNumSegments(1).flush(true)); stats = shard.getShardFieldStats(); assertThat(stats.numSegments(), equalTo(1)); assertThat(stats.totalFields(), equalTo(12)); + // _id(term), _source(0), _version(dv), _primary_term(dv), _seq_no(point,dv), f1(postings,norms), + // f1.keyword(term,dv), f2(postings,norms), f2.keyword(term,dv), f3(postings,norms), f3.keyword(term,dv), __soft_deletes + assertThat(stats.fieldUsages(), equalTo(18L)); closeShards(shard); }