Skip to content

Commit

Permalink
Avoid needless index metadata builders during reroute (#88506)
Browse files Browse the repository at this point in the history
This set of changes makes `org.elasticsearch.cluster.routing.allocation.IndexMetadataUpdater#applyChanges` essentially free even in clusters of O(100k) indices compared to using a disproportionately increasing amount of CPU as the cluster grows (about 1% of CPU time while bootstrapping many shards at 25k indices benchmarks and increasing from there).
It also appears to have additional benefits end-to-end in those benchmarks, likely as a result of making diffing metadata cheaper by retaining more instance equality across the board.

relates #77466
  • Loading branch information
original-brownbear authored Jul 14, 2022
1 parent 4efc09c commit 7dc02c5
Show file tree
Hide file tree
Showing 4 changed files with 303 additions and 91 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -608,7 +608,8 @@ private IndexMetadata(
final boolean isPartialSearchableSnapshot,
@Nullable final IndexMode indexMode,
@Nullable final Instant timeSeriesStart,
@Nullable final Instant timeSeriesEnd
@Nullable final Instant timeSeriesEnd,
final Version indexCompatibilityVersion
) {
this.index = index;
this.version = version;
Expand Down Expand Up @@ -654,7 +655,8 @@ private IndexMetadata(
this.autoExpandReplicas = autoExpandReplicas;
this.isSearchableSnapshot = isSearchableSnapshot;
this.isPartialSearchableSnapshot = isPartialSearchableSnapshot;
this.indexCompatibilityVersion = SETTING_INDEX_VERSION_COMPATIBILITY.get(settings);
this.indexCompatibilityVersion = indexCompatibilityVersion;
assert indexCompatibilityVersion.equals(SETTING_INDEX_VERSION_COMPATIBILITY.get(settings));
this.indexMode = indexMode;
this.timeSeriesStart = timeSeriesStart;
this.timeSeriesEnd = timeSeriesEnd;
Expand Down Expand Up @@ -705,28 +707,227 @@ IndexMetadata withMappingMetadata(MappingMetadata mapping) {
this.isPartialSearchableSnapshot,
this.indexMode,
this.timeSeriesStart,
this.timeSeriesEnd
this.timeSeriesEnd,
this.indexCompatibilityVersion
);
}

public Index getIndex() {
return index;
/**
* Copy constructor that sets the in-sync allocation ids for the specified shard.
* @param shardId shard id to set in-sync allocation ids for
* @param inSyncSet new in-sync allocation ids
* @return updated instance
*/
public IndexMetadata withInSyncAllocationIds(int shardId, Set<String> inSyncSet) {
if (inSyncSet.equals(inSyncAllocationIds.get(shardId))) {
return this;
}
return new IndexMetadata(
this.index,
this.version,
this.mappingVersion,
this.settingsVersion,
this.aliasesVersion,
this.primaryTerms,
this.state,
this.numberOfShards,
this.numberOfReplicas,
this.settings,
this.mapping,
this.aliases,
this.customData,
Maps.copyMapWithAddedOrReplacedEntry(this.inSyncAllocationIds, shardId, Set.copyOf(inSyncSet)),
this.requireFilters,
this.initialRecoveryFilters,
this.includeFilters,
this.excludeFilters,
this.indexCreatedVersion,
this.routingNumShards,
this.routingPartitionSize,
this.routingPaths,
this.waitForActiveShards,
this.rolloverInfos,
this.isSystem,
this.isHidden,
this.timestampRange,
this.priority,
this.creationDate,
this.ignoreDiskWatermarks,
this.tierPreference,
this.shardsPerNodeLimit,
this.lifecyclePolicyName,
this.lifecycleExecutionState,
this.autoExpandReplicas,
this.isSearchableSnapshot,
this.isPartialSearchableSnapshot,
this.indexMode,
this.timeSeriesStart,
this.timeSeriesEnd,
this.indexCompatibilityVersion
);
}

public String getIndexUUID() {
return index.getUUID();
/**
* Creates a copy of this instance that has the primary term for the given shard id incremented.
* @param shardId shard id to increment primary term for
* @return updated instance with incremented primary term
*/
public IndexMetadata withIncrementedPrimaryTerm(int shardId) {
final long[] incremented = this.primaryTerms.clone();
incremented[shardId]++;
return new IndexMetadata(
this.index,
this.version,
this.mappingVersion,
this.settingsVersion,
this.aliasesVersion,
incremented,
this.state,
this.numberOfShards,
this.numberOfReplicas,
this.settings,
this.mapping,
this.aliases,
this.customData,
this.inSyncAllocationIds,
this.requireFilters,
this.initialRecoveryFilters,
this.includeFilters,
this.excludeFilters,
this.indexCreatedVersion,
this.routingNumShards,
this.routingPartitionSize,
this.routingPaths,
this.waitForActiveShards,
this.rolloverInfos,
this.isSystem,
this.isHidden,
this.timestampRange,
this.priority,
this.creationDate,
this.ignoreDiskWatermarks,
this.tierPreference,
this.shardsPerNodeLimit,
this.lifecyclePolicyName,
this.lifecycleExecutionState,
this.autoExpandReplicas,
this.isSearchableSnapshot,
this.isPartialSearchableSnapshot,
this.indexMode,
this.timeSeriesStart,
this.timeSeriesEnd,
this.indexCompatibilityVersion
);
}

/**
* Test whether the current index UUID is the same as the given one. Returns true if either are _na_
* @param timestampRange new timestamp range
* @return copy of this instance with updated timestamp range
*/
public boolean isSameUUID(String otherUUID) {
assert otherUUID != null;
assert getIndexUUID() != null;
if (INDEX_UUID_NA_VALUE.equals(otherUUID) || INDEX_UUID_NA_VALUE.equals(getIndexUUID())) {
return true;
public IndexMetadata withTimestampRange(IndexLongFieldRange timestampRange) {
if (timestampRange.equals(this.timestampRange)) {
return this;
}
return otherUUID.equals(getIndexUUID());
return new IndexMetadata(
this.index,
this.version,
this.mappingVersion,
this.settingsVersion,
this.aliasesVersion,
this.primaryTerms,
this.state,
this.numberOfShards,
this.numberOfReplicas,
this.settings,
this.mapping,
this.aliases,
this.customData,
this.inSyncAllocationIds,
this.requireFilters,
this.initialRecoveryFilters,
this.includeFilters,
this.excludeFilters,
this.indexCreatedVersion,
this.routingNumShards,
this.routingPartitionSize,
this.routingPaths,
this.waitForActiveShards,
this.rolloverInfos,
this.isSystem,
this.isHidden,
timestampRange,
this.priority,
this.creationDate,
this.ignoreDiskWatermarks,
this.tierPreference,
this.shardsPerNodeLimit,
this.lifecyclePolicyName,
this.lifecycleExecutionState,
this.autoExpandReplicas,
this.isSearchableSnapshot,
this.isPartialSearchableSnapshot,
this.indexMode,
this.timeSeriesStart,
this.timeSeriesEnd,
this.indexCompatibilityVersion
);
}

/**
* @return a copy of this instance that has its version incremented by one
*/
public IndexMetadata withIncrementedVersion() {
return new IndexMetadata(
this.index,
this.version + 1,
this.mappingVersion,
this.settingsVersion,
this.aliasesVersion,
this.primaryTerms,
this.state,
this.numberOfShards,
this.numberOfReplicas,
this.settings,
this.mapping,
this.aliases,
this.customData,
this.inSyncAllocationIds,
this.requireFilters,
this.initialRecoveryFilters,
this.includeFilters,
this.excludeFilters,
this.indexCreatedVersion,
this.routingNumShards,
this.routingPartitionSize,
this.routingPaths,
this.waitForActiveShards,
this.rolloverInfos,
this.isSystem,
this.isHidden,
this.timestampRange,
this.priority,
this.creationDate,
this.ignoreDiskWatermarks,
this.tierPreference,
this.shardsPerNodeLimit,
this.lifecyclePolicyName,
this.lifecycleExecutionState,
this.autoExpandReplicas,
this.isSearchableSnapshot,
this.isPartialSearchableSnapshot,
this.indexMode,
this.timeSeriesStart,
this.timeSeriesEnd,
this.indexCompatibilityVersion
);
}

public Index getIndex() {
return index;
}

public String getIndexUUID() {
return index.getUUID();
}

public long getVersion() {
Expand Down Expand Up @@ -1581,10 +1782,6 @@ public Builder settingsVersion(final long settingsVersion) {
return this;
}

public long aliasesVersion() {
return aliasesVersion;
}

public Builder aliasesVersion(final long aliasesVersion) {
this.aliasesVersion = aliasesVersion;
return this;
Expand Down Expand Up @@ -1640,10 +1837,6 @@ public Builder timestampRange(IndexLongFieldRange timestampRange) {
return this;
}

public IndexLongFieldRange getTimestampRange() {
return timestampRange;
}

public IndexMetadata build() {
/*
* We expect that the metadata has been properly built to set the number of shards and the number of replicas, and do not rely
Expand Down Expand Up @@ -1804,7 +1997,8 @@ public IndexMetadata build() {
isSearchableSnapshot && settings.getAsBoolean(SEARCHABLE_SNAPSHOT_PARTIAL_SETTING_KEY, false),
isTsdb ? IndexMode.TIME_SERIES : null,
isTsdb ? IndexSettings.TIME_SERIES_START_TIME.get(settings) : null,
isTsdb ? IndexSettings.TIME_SERIES_END_TIME.get(settings) : null
isTsdb ? IndexSettings.TIME_SERIES_END_TIME.get(settings) : null,
SETTING_INDEX_VERSION_COMPATIBILITY.get(settings)
);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import org.elasticsearch.cluster.block.ClusterBlockLevel;
import org.elasticsearch.cluster.coordination.CoordinationMetadata;
import org.elasticsearch.cluster.metadata.IndexAbstraction.ConcreteIndex;
import org.elasticsearch.cluster.routing.RoutingTable;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.UUIDs;
import org.elasticsearch.common.collect.ImmutableOpenMap;
Expand Down Expand Up @@ -397,6 +398,47 @@ public Metadata withCoordinationMetadata(CoordinationMetadata coordinationMetada
);
}

/**
* Creates a copy of this instance updated with the given {@link IndexMetadata} that must only contain changes to primary terms
* and in-sync allocation ids relative to the existing entries. This method is only used by
* {@link org.elasticsearch.cluster.routing.allocation.IndexMetadataUpdater#applyChanges(Metadata, RoutingTable)}.
* @param updates map of index name to {@link IndexMetadata}.
* @return updated metadata instance
*/
public Metadata withAllocationAndTermUpdatesOnly(Map<String, IndexMetadata> updates) {
if (updates.isEmpty()) {
return this;
}
final var updatedIndicesBuilder = ImmutableOpenMap.builder(indices);
updatedIndicesBuilder.putAllFromMap(updates);
return new Metadata(
clusterUUID,
clusterUUIDCommitted,
version,
coordinationMetadata,
transientSettings,
persistentSettings,
settings,
hashesOfConsistentSettings,
totalNumberOfShards,
totalOpenIndexShards,
updatedIndicesBuilder.build(),
aliasedIndices,
templates,
customs,
allIndices,
visibleIndices,
allOpenIndices,
visibleOpenIndices,
allClosedIndices,
visibleClosedIndices,
indicesLookup,
mappingsByHash,
oldestIndexVersion,
reservedStateMetadata
);
}

public long version() {
return this.version;
}
Expand Down Expand Up @@ -1392,7 +1434,7 @@ public Builder put(IndexMetadata indexMetadata, boolean incrementVersion) {
return this;
}
// if we put a new index metadata, increment its version
indexMetadata = IndexMetadata.builder(indexMetadata).version(indexMetadata.getVersion() + 1).build();
indexMetadata = indexMetadata.withIncrementedVersion();
previous = indices.put(name, indexMetadata);
} else {
previous = indices.put(name, indexMetadata);
Expand Down Expand Up @@ -1853,7 +1895,7 @@ public Metadata build() {
return build(false);
}

private Metadata build(boolean skipNameCollisionChecks) {
public Metadata build(boolean skipNameCollisionChecks) {
// TODO: We should move these datastructures to IndexNameExpressionResolver, this will give the following benefits:
// 1) The datastructures will be rebuilt only when needed. Now during serializing we rebuild these datastructures
// while these datastructures aren't even used.
Expand Down
Loading

0 comments on commit 7dc02c5

Please sign in to comment.