Skip to content

Commit

Permalink
Estimate segment field usages (#112760)
Browse files Browse the repository at this point in the history
We have introduced a new memory estimation method in serverless, based 
on the number of segments and the fields within them. This new approach
works well overall, but it still falls short in cases where most fields
are used more than once - for example, in both doc_values and postings,
or doc_values and points. This change exposes the total usage of fields
in segments, allowing us to adjust the memory estimate for these cases.
  • Loading branch information
dnhatn authored and davidkyle committed Sep 12, 2024
1 parent 7b17077 commit 18a48c7
Show file tree
Hide file tree
Showing 5 changed files with 74 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ public FieldInfos read(Directory directory, SegmentInfo segmentInfo, String segm
fi.isParentField()
);
}
return new FieldInfos(deduplicated);
return new FieldInfosWithUsages(deduplicated);
}

private static Map<String, String> internStringStringMap(Map<String, String> m) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/

package org.elasticsearch.index.codec;

import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexOptions;

public class FieldInfosWithUsages extends FieldInfos {
private final int totalUsages;

public FieldInfosWithUsages(FieldInfo[] infos) {
super(infos);
this.totalUsages = computeUsages(infos);
}

public static int computeUsages(FieldInfo[] infos) {
int usages = 0;
for (FieldInfo fi : infos) {
if (fi.getIndexOptions() != IndexOptions.NONE) {
usages++;
}
if (fi.hasNorms()) {
usages++;
}
if (fi.getDocValuesType() != DocValuesType.NONE) {
usages++;
}
if (fi.getPointDimensionCount() > 0) {
usages++;
}
if (fi.getVectorDimension() > 0) {
usages++;
}
}
return usages;
}

public int getTotalUsages() {
return totalUsages;
}
}
14 changes: 12 additions & 2 deletions server/src/main/java/org/elasticsearch/index/shard/IndexShard.java
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@
import org.elasticsearch.index.cache.query.TrivialQueryCachingPolicy;
import org.elasticsearch.index.cache.request.ShardRequestCache;
import org.elasticsearch.index.codec.CodecService;
import org.elasticsearch.index.codec.FieldInfosWithUsages;
import org.elasticsearch.index.engine.CommitStats;
import org.elasticsearch.index.engine.Engine;
import org.elasticsearch.index.engine.Engine.GetResult;
Expand Down Expand Up @@ -4093,11 +4094,20 @@ public void afterRefresh(boolean didRefresh) {
try (var searcher = getEngine().acquireSearcher("shard_field_stats", Engine.SearcherScope.INTERNAL)) {
int numSegments = 0;
int totalFields = 0;
long usages = 0;
for (LeafReaderContext leaf : searcher.getLeafContexts()) {
numSegments++;
totalFields += leaf.reader().getFieldInfos().size();
var fieldInfos = leaf.reader().getFieldInfos();
totalFields += fieldInfos.size();
if (fieldInfos instanceof FieldInfosWithUsages ft) {
if (usages != -1) {
usages += ft.getTotalUsages();
}
} else {
usages = -1;
}
}
shardFieldStats = new ShardFieldStats(numSegments, totalFields);
shardFieldStats = new ShardFieldStats(numSegments, totalFields, usages);
} catch (AlreadyClosedException ignored) {

}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@
*
* @param numSegments the number of segments
* @param totalFields the total number of fields across the segments
* @param fieldUsages the number of usages for segment-level fields (e.g., doc_values, postings, norms, points)
* -1 if unavailable
*/
public record ShardFieldStats(int numSegments, int totalFields) {
public record ShardFieldStats(int numSegments, int totalFields, long fieldUsages) {

}
Original file line number Diff line number Diff line change
Expand Up @@ -1793,6 +1793,7 @@ public void testShardFieldStats() throws IOException {
assertNotNull(stats);
assertThat(stats.numSegments(), equalTo(0));
assertThat(stats.totalFields(), equalTo(0));
assertThat(stats.fieldUsages(), equalTo(0L));
// index some documents
int numDocs = between(1, 10);
for (int i = 0; i < numDocs; i++) {
Expand All @@ -1809,6 +1810,9 @@ public void testShardFieldStats() throws IOException {
assertThat(stats.numSegments(), equalTo(1));
// _id, _source, _version, _primary_term, _seq_no, f1, f1.keyword, f2, f2.keyword,
assertThat(stats.totalFields(), equalTo(9));
// _id(term), _source(0), _version(dv), _primary_term(dv), _seq_no(point,dv), f1(postings,norms),
// f1.keyword(term,dv), f2(postings,norms), f2.keyword(term,dv),
assertThat(stats.fieldUsages(), equalTo(13L));
// don't re-compute on refresh without change
if (randomBoolean()) {
shard.refresh("test");
Expand Down Expand Up @@ -1838,10 +1842,15 @@ public void testShardFieldStats() throws IOException {
assertThat(stats.numSegments(), equalTo(2));
// 9 + _id, _source, _version, _primary_term, _seq_no, f1, f1.keyword, f2, f2.keyword, f3, f3.keyword
assertThat(stats.totalFields(), equalTo(21));
// first segment: 13, second segment: 13 + f3(postings,norms) + f3.keyword(term,dv), and __soft_deletes to previous segment
assertThat(stats.fieldUsages(), equalTo(31L));
shard.forceMerge(new ForceMergeRequest().maxNumSegments(1).flush(true));
stats = shard.getShardFieldStats();
assertThat(stats.numSegments(), equalTo(1));
assertThat(stats.totalFields(), equalTo(12));
// _id(term), _source(0), _version(dv), _primary_term(dv), _seq_no(point,dv), f1(postings,norms),
// f1.keyword(term,dv), f2(postings,norms), f2.keyword(term,dv), f3(postings,norms), f3.keyword(term,dv), __soft_deletes
assertThat(stats.fieldUsages(), equalTo(18L));
closeShards(shard);
}

Expand Down

0 comments on commit 18a48c7

Please sign in to comment.