Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize log cluster health performance. #87723

Merged
merged 12 commits into from
Jul 1, 2022
5 changes: 5 additions & 0 deletions docs/changelog/87723.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 87723
summary: Optimize log cluster health performance.
area: Cluster Allocation
type: enhancement
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,12 @@
*/
package org.elasticsearch.cluster.health;

import org.apache.logging.log4j.Logger;
import org.elasticsearch.cluster.ClusterState;
import org.elasticsearch.cluster.metadata.IndexMetadata;
import org.elasticsearch.cluster.routing.IndexRoutingTable;
import org.elasticsearch.cluster.routing.IndexShardRoutingTable;
import org.elasticsearch.cluster.routing.ShardRouting;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.io.stream.Writeable;
Expand All @@ -22,6 +25,8 @@
import java.util.Map;
import java.util.Objects;

import static org.elasticsearch.cluster.health.ClusterShardHealth.getInactivePrimaryHealth;

public final class ClusterStateHealth implements Iterable<ClusterIndexHealth>, Writeable {

private final int numberOfNodes;
Expand Down Expand Up @@ -144,6 +149,40 @@ public ClusterStateHealth(
this.indices = indices;
}

public static ClusterHealthStatus getHealthStatus(
original-brownbear marked this conversation as resolved.
Show resolved Hide resolved
final ClusterState clusterState,
final Logger logger
) {
if (clusterState.blocks().hasGlobalBlockWithStatus(RestStatus.SERVICE_UNAVAILABLE)) {
return ClusterHealthStatus.RED;
}

ClusterHealthStatus computeStatus = ClusterHealthStatus.GREEN;
for (String index : clusterState.metadata().getConcreteAllIndices()) {
IndexRoutingTable indexRoutingTable = clusterState.routingTable().index(index);
if (indexRoutingTable.allShardsActive()) {
// GREEN index
continue;
}

for (int i = 0; i < indexRoutingTable.size(); i++) {
IndexShardRoutingTable indexShardRoutingTable = indexRoutingTable.shard(i);
ShardRouting primary = indexShardRoutingTable.primaryShard();
if (primary.active()) {
// index has inactive replicas
computeStatus = ClusterHealthStatus.YELLOW;
continue;
}
computeStatus = getInactivePrimaryHealth(primary);
if (computeStatus == ClusterHealthStatus.RED) {
logger.debug("One of inactive primary shard {} causes cluster state RED.", primary.shardId());
return ClusterHealthStatus.RED;
}
}
}
return computeStatus;
}

public int getActiveShards() {
return activeShards;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,18 +61,21 @@ public class IndexRoutingTable implements SimpleDiffable<IndexRoutingTable> {
// note, we assume that when the index routing is created, ShardRoutings are created for all possible number of
// shards with state set to UNASSIGNED
private final IndexShardRoutingTable[] shards;

private final boolean allShardsActive;
private final List<ShardRouting> allActiveShards;

IndexRoutingTable(Index index, IndexShardRoutingTable[] shards) {
this.index = index;
this.shuffler = new RotationShardShuffler(Randomness.get().nextInt());
this.shards = shards;
int totalShardCount = 0;
List<ShardRouting> allActiveShards = new ArrayList<>();
for (IndexShardRoutingTable shard : shards) {
allActiveShards.addAll(shard.activeShards());
totalShardCount += shard.size();
}
this.allActiveShards = CollectionUtils.wrapUnmodifiableOrEmptySingleton(allActiveShards);
this.allShardsActive = totalShardCount == allActiveShards.size();
}

/**
Expand Down Expand Up @@ -217,6 +220,10 @@ public boolean allPrimaryShardsActive() {
return primaryShardsActive() == shards.length;
}

public boolean allShardsActive() {
return this.allShardsActive;
}

/**
* Calculates the number of primary shards in active state in routing table
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
import org.elasticsearch.cluster.ClusterState;
import org.elasticsearch.cluster.RestoreInProgress;
import org.elasticsearch.cluster.health.ClusterHealthStatus;
import org.elasticsearch.cluster.health.ClusterStateHealth;
import org.elasticsearch.cluster.metadata.AutoExpandReplicas;
import org.elasticsearch.cluster.metadata.IndexMetadata;
import org.elasticsearch.cluster.metadata.Metadata;
Expand Down Expand Up @@ -55,6 +54,7 @@

import static java.util.Collections.emptyList;
import static java.util.Collections.singletonList;
import static org.elasticsearch.cluster.health.ClusterStateHealth.getHealthStatus;
import static org.elasticsearch.cluster.routing.UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING;

/**
Expand Down Expand Up @@ -168,7 +168,7 @@ private static ClusterState buildResultAndLogHealthChange(ClusterState oldState,
}
final ClusterState newState = newStateBuilder.build();

logClusterHealthStateChange(new ClusterStateHealth(oldState), new ClusterStateHealth(newState), reason);
logClusterHealthStateChange(oldState, newState, reason);

return newState;
}
Expand Down Expand Up @@ -495,13 +495,10 @@ public ClusterState reroute(ClusterState clusterState, String reason) {
return buildResultAndLogHealthChange(clusterState, allocation, reason);
}

private static void logClusterHealthStateChange(
ClusterStateHealth previousStateHealth,
ClusterStateHealth newStateHealth,
String reason
) {
ClusterHealthStatus previousHealth = previousStateHealth.getStatus();
ClusterHealthStatus currentHealth = newStateHealth.getStatus();
private static void logClusterHealthStateChange(final ClusterState previousState, final ClusterState newState, String reason) {
ClusterHealthStatus previousHealth = getHealthStatus(previousState, logger);
ClusterHealthStatus currentHealth = getHealthStatus(newState, logger);

if (previousHealth.equals(currentHealth) == false) {
logger.info(
new ESLogMessage("Cluster health status changed from [{}] to [{}] (reason: [{}]).").argAndField(
Expand Down