Skip to content

Commit

Permalink
Estimate segment field usages
Browse files Browse the repository at this point in the history
  • Loading branch information
dnhatn committed Sep 11, 2024
1 parent 4ce661c commit e3e5128
Show file tree
Hide file tree
Showing 5 changed files with 78 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ public FieldInfos read(Directory directory, SegmentInfo segmentInfo, String segm
fi.isParentField()
);
}
return new FieldInfos(deduplicated);
return new FieldInfosWithUsages(deduplicated);
}

private static Map<String, String> internStringStringMap(Map<String, String> m) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/

package org.elasticsearch.index.codec;

import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexOptions;

public class FieldInfosWithUsages extends FieldInfos {
private final int totalUsages;

public FieldInfosWithUsages(FieldInfo[] infos) {
super(infos);
this.totalUsages = computeUsages(infos);
}

public static int computeUsages(FieldInfo[] infos) {
int usages = 0;
for (FieldInfo fi : infos) {
if (fi.getIndexOptions() != IndexOptions.NONE) {
usages++;
}
if (fi.hasNorms()) {
usages++;
}
if (fi.hasVectors()) {
usages++;
}
if (fi.getDocValuesType() != DocValuesType.NONE) {
usages++;
}
if (fi.getPointDimensionCount() > 0) {
usages++;
}
if (fi.getVectorDimension() > 0) {
usages++;
}
}
return usages;
}

public int getTotalUsages() {
return totalUsages;
}
}
15 changes: 13 additions & 2 deletions server/src/main/java/org/elasticsearch/index/shard/IndexShard.java
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@
import org.elasticsearch.index.cache.query.TrivialQueryCachingPolicy;
import org.elasticsearch.index.cache.request.ShardRequestCache;
import org.elasticsearch.index.codec.CodecService;
import org.elasticsearch.index.codec.FieldInfosWithUsages;
import org.elasticsearch.index.engine.CommitStats;
import org.elasticsearch.index.engine.Engine;
import org.elasticsearch.index.engine.Engine.GetResult;
Expand Down Expand Up @@ -4093,11 +4094,21 @@ public void afterRefresh(boolean didRefresh) {
try (var searcher = getEngine().acquireSearcher("shard_field_stats", Engine.SearcherScope.INTERNAL)) {
int numSegments = 0;
int totalFields = 0;
long usages = 0;
for (LeafReaderContext leaf : searcher.getLeafContexts()) {
numSegments++;
totalFields += leaf.reader().getFieldInfos().size();
var fieldInfos = leaf.reader().getFieldInfos();
totalFields += fieldInfos.size();
if (fieldInfos instanceof FieldInfosWithUsages ft) {
if (usages != -1) {
usages += ft.getTotalUsages();
}
} else {
assert false : "Elasticsearch Codec should have wrapped FieldInfos with field usages: " + leaf;
usages = -1;
}
}
shardFieldStats = new ShardFieldStats(numSegments, totalFields);
shardFieldStats = new ShardFieldStats(numSegments, totalFields, usages);
} catch (AlreadyClosedException ignored) {

}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@
*
* @param numSegments the number of segments
* @param totalFields the total number of fields across the segments
* @param fieldUsages the number of usages for segment-level fields (e.g., doc_values, postings, norms, points)
* -1 if unavailable
*/
public record ShardFieldStats(int numSegments, int totalFields) {
public record ShardFieldStats(int numSegments, int totalFields, long fieldUsages) {

}
Original file line number Diff line number Diff line change
Expand Up @@ -1793,6 +1793,7 @@ public void testShardFieldStats() throws IOException {
assertNotNull(stats);
assertThat(stats.numSegments(), equalTo(0));
assertThat(stats.totalFields(), equalTo(0));
assertThat(stats.fieldUsages(), equalTo(0L));
// index some documents
int numDocs = between(1, 10);
for (int i = 0; i < numDocs; i++) {
Expand All @@ -1809,6 +1810,9 @@ public void testShardFieldStats() throws IOException {
assertThat(stats.numSegments(), equalTo(1));
// _id, _source, _version, _primary_term, _seq_no, f1, f1.keyword, f2, f2.keyword,
assertThat(stats.totalFields(), equalTo(9));
// _id(term), _source(0), _version(dv), _primary_term(dv), _seq_no(point,dv), f1(postings,norms),
// f1.keyword(term,dv), f2(postings,norms), f2.keyword(term,dv),
assertThat(stats.fieldUsages(), equalTo(13L));
// don't re-compute on refresh without change
if (randomBoolean()) {
shard.refresh("test");
Expand Down Expand Up @@ -1838,10 +1842,15 @@ public void testShardFieldStats() throws IOException {
assertThat(stats.numSegments(), equalTo(2));
// 9 + _id, _source, _version, _primary_term, _seq_no, f1, f1.keyword, f2, f2.keyword, f3, f3.keyword
assertThat(stats.totalFields(), equalTo(21));
// first segment: 13, second segment: 13 + f3(postings,norms) + f3.keyword(term,dv), and __soft_deletes to previous segment
assertThat(stats.fieldUsages(), equalTo(31L));
shard.forceMerge(new ForceMergeRequest().maxNumSegments(1).flush(true));
stats = shard.getShardFieldStats();
assertThat(stats.numSegments(), equalTo(1));
assertThat(stats.totalFields(), equalTo(12));
// _id(term), _source(0), _version(dv), _primary_term(dv), _seq_no(point,dv), f1(postings,norms),
// f1.keyword(term,dv), f2(postings,norms), f2.keyword(term,dv), f3(postings,norms), f3.keyword(term,dv), __soft_deletes
assertThat(stats.fieldUsages(), equalTo(18L));
closeShards(shard);
}

Expand Down

0 comments on commit e3e5128

Please sign in to comment.