Skip to content

Commit

Permalink
Clarify allocation explain if random shard chosen (#75670)
Browse files Browse the repository at this point in the history
Today we often encounter users that are confused by the behaviour of
calling `GET _cluster/allocation/explain` without a body: it _seems_ to
work, but it explains a random shard, and if this isn't the shard
they're thinking of then it's unclear how to proceed.

With this commit we add a note to the response when a shard was randomly
chosen indicating that it is possible, and possibly useful, to explain a
different shard. We also adjust the exception message in the case when
all shards are assigned to indicate why it's an invalid request and what
to do to make it valid.
DaveCTurner authored Aug 2, 2021
1 parent 2131c44 commit 95edc6d
Showing 6 changed files with 99 additions and 19 deletions.
2 changes: 2 additions & 0 deletions docs/reference/cluster/allocation-explain.asciidoc
Original file line number Diff line number Diff line change
@@ -25,6 +25,8 @@ GET _cluster/allocation/explain

`GET _cluster/allocation/explain`

`POST _cluster/allocation/explain`

[[cluster-allocation-explain-api-prereqs]]
==== {api-prereq-title}

Original file line number Diff line number Diff line change
@@ -32,7 +32,7 @@
}
},
"body":{
"description":"The index, shard, and primary flag to explain. Empty means 'explain the first unassigned shard'"
"description":"The index, shard, and primary flag to explain. Empty means 'explain a randomly-chosen unassigned shard'"
}
}
}
Original file line number Diff line number Diff line change
@@ -8,6 +8,7 @@

package org.elasticsearch.action.admin.cluster.allocation;

import org.elasticsearch.Version;
import org.elasticsearch.cluster.ClusterInfo;
import org.elasticsearch.cluster.node.DiscoveryNode;
import org.elasticsearch.cluster.routing.ShardRouting;
@@ -36,15 +37,27 @@
*/
public final class ClusterAllocationExplanation implements ToXContentObject, Writeable {

static final String NO_SHARD_SPECIFIED_MESSAGE = "No shard was specified in the explain API request, so this response " +
"explains a randomly chosen unassigned shard. There may be other unassigned shards in this cluster which cannot be assigned for " +
"different reasons. It may not be possible to assign this shard until one of the other shards is assigned correctly. To explain " +
"the allocation of other shards (whether assigned or unassigned) you must specify the target shard in the request to this API.";

private final boolean specificShard;
private final ShardRouting shardRouting;
private final DiscoveryNode currentNode;
private final DiscoveryNode relocationTargetNode;
private final ClusterInfo clusterInfo;
private final ShardAllocationDecision shardAllocationDecision;

public ClusterAllocationExplanation(ShardRouting shardRouting, @Nullable DiscoveryNode currentNode,
@Nullable DiscoveryNode relocationTargetNode, @Nullable ClusterInfo clusterInfo,
ShardAllocationDecision shardAllocationDecision) {
public ClusterAllocationExplanation(
boolean specificShard,
ShardRouting shardRouting,
@Nullable DiscoveryNode currentNode,
@Nullable DiscoveryNode relocationTargetNode,
@Nullable ClusterInfo clusterInfo,
ShardAllocationDecision shardAllocationDecision) {

this.specificShard = specificShard;
this.shardRouting = shardRouting;
this.currentNode = currentNode;
this.relocationTargetNode = relocationTargetNode;
@@ -53,6 +66,11 @@ public ClusterAllocationExplanation(ShardRouting shardRouting, @Nullable Discove
}

public ClusterAllocationExplanation(StreamInput in) throws IOException {
if (in.getVersion().onOrAfter(Version.V_8_0_0)) {
this.specificShard = in.readBoolean();
} else {
this.specificShard = true; // suppress "this is a random shard" warning in BwC situations
}
this.shardRouting = new ShardRouting(in);
this.currentNode = in.readOptionalWriteable(DiscoveryNode::new);
this.relocationTargetNode = in.readOptionalWriteable(DiscoveryNode::new);
@@ -62,13 +80,20 @@ public ClusterAllocationExplanation(StreamInput in) throws IOException {

@Override
public void writeTo(StreamOutput out) throws IOException {
if (out.getVersion().onOrAfter(Version.V_8_0_0)) {
out.writeBoolean(specificShard);
} // else suppress "this is a random shard" warning in BwC situations
shardRouting.writeTo(out);
out.writeOptionalWriteable(currentNode);
out.writeOptionalWriteable(relocationTargetNode);
out.writeOptionalWriteable(clusterInfo);
shardAllocationDecision.writeTo(out);
}

public boolean isSpecificShard() {
return specificShard;
}

/**
* Returns the shard that the explanation is about.
*/
@@ -131,6 +156,9 @@ public ShardAllocationDecision getShardAllocationDecision() {

public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject(); {
if (isSpecificShard() == false) {
builder.field("note", NO_SHARD_SPECIFIED_MESSAGE);
}
builder.field("index", shardRouting.getIndexName());
builder.field("shard", shardRouting.getId());
builder.field("primary", shardRouting.primary());
Original file line number Diff line number Diff line change
@@ -81,15 +81,25 @@ protected void masterOperation(Task task, final ClusterAllocationExplainRequest
ShardRouting shardRouting = findShardToExplain(request, allocation);
logger.debug("explaining the allocation for [{}], found shard [{}]", request, shardRouting);

ClusterAllocationExplanation cae = explainShard(shardRouting, allocation,
request.includeDiskInfo() ? clusterInfo : null, request.includeYesDecisions(), allocationService);
ClusterAllocationExplanation cae = explainShard(
shardRouting,
allocation,
request.includeDiskInfo() ? clusterInfo : null,
request.includeYesDecisions(),
request.useAnyUnassignedShard() == false,
allocationService);
listener.onResponse(new ClusterAllocationExplainResponse(cae));
}

// public for testing
public static ClusterAllocationExplanation explainShard(ShardRouting shardRouting, RoutingAllocation allocation,
ClusterInfo clusterInfo, boolean includeYesDecisions,
AllocationService allocationService) {
public static ClusterAllocationExplanation explainShard(
ShardRouting shardRouting,
RoutingAllocation allocation,
ClusterInfo clusterInfo,
boolean includeYesDecisions,
boolean isSpecificShard,
AllocationService allocationService) {

allocation.setDebugMode(includeYesDecisions ? DebugMode.ON : DebugMode.EXCLUDE_YES_DECISIONS);

ShardAllocationDecision shardDecision;
@@ -99,10 +109,13 @@ public static ClusterAllocationExplanation explainShard(ShardRouting shardRoutin
shardDecision = allocationService.explainShardAllocation(shardRouting, allocation);
}

return new ClusterAllocationExplanation(shardRouting,
return new ClusterAllocationExplanation(
isSpecificShard,
shardRouting,
shardRouting.currentNodeId() != null ? allocation.nodes().get(shardRouting.currentNodeId()) : null,
shardRouting.relocatingNodeId() != null ? allocation.nodes().get(shardRouting.relocatingNodeId()) : null,
clusterInfo, shardDecision);
clusterInfo,
shardDecision);
}

// public for testing
@@ -115,7 +128,9 @@ public static ShardRouting findShardToExplain(ClusterAllocationExplainRequest re
foundShard = ui.next();
}
if (foundShard == null) {
throw new IllegalArgumentException("unable to find any unassigned shards to explain [" + request + "]");
throw new IllegalArgumentException("No shard was specified in the request which means the response should explain a " +
"randomly-chosen unassigned shard, but there are no unassigned shards in this cluster. To explain the allocation of " +
"an assigned shard you must specify the target shard in the request.");
}
} else {
String index = request.getIndex();
Original file line number Diff line number Diff line change
@@ -32,6 +32,8 @@
import java.util.Locale;

import static org.elasticsearch.action.admin.cluster.allocation.TransportClusterAllocationExplainAction.findShardToExplain;
import static org.hamcrest.Matchers.allOf;
import static org.hamcrest.Matchers.containsString;

/**
* Tests for the {@link TransportClusterAllocationExplainAction} class.
@@ -46,7 +48,12 @@ public void testInitializingOrRelocatingShardExplanation() throws Exception {
ShardRouting shard = clusterState.getRoutingTable().index("idx").shard(0).primaryShard();
RoutingAllocation allocation = new RoutingAllocation(new AllocationDeciders(Collections.emptyList()),
clusterState.getRoutingNodes(), clusterState, null, null, System.nanoTime());
ClusterAllocationExplanation cae = TransportClusterAllocationExplainAction.explainShard(shard, allocation, null, randomBoolean(),
ClusterAllocationExplanation cae = TransportClusterAllocationExplainAction.explainShard(
shard,
allocation,
null,
randomBoolean(),
true,
new AllocationService(null, new TestGatewayAllocator(), new ShardsAllocator() {
@Override
public void allocate(RoutingAllocation allocation) {
@@ -64,6 +71,7 @@ public ShardAllocationDecision decideShardAllocation(ShardRouting shard, Routing
}, null, null));

assertEquals(shard.currentNodeId(), cae.getCurrentNode().getId());
assertTrue(cae.isSpecificShard());
assertFalse(cae.getShardAllocationDecision().isDecisionTaken());
assertFalse(cae.getShardAllocationDecision().getAllocateDecision().isDecisionTaken());
assertFalse(cae.getShardAllocationDecision().getMoveDecision().isDecisionTaken());
@@ -110,8 +118,13 @@ public void testFindAnyUnassignedShardToExplain() {
final ClusterState allStartedClusterState = ClusterStateCreationUtils.state("idx", randomBoolean(),
ShardRoutingState.STARTED, ShardRoutingState.STARTED);
final ClusterAllocationExplainRequest anyUnassignedShardsRequest = new ClusterAllocationExplainRequest();
expectThrows(IllegalArgumentException.class, () ->
findShardToExplain(anyUnassignedShardsRequest, routingAllocation(allStartedClusterState)));
assertThat(expectThrows(
IllegalArgumentException.class,
() -> findShardToExplain(anyUnassignedShardsRequest, routingAllocation(allStartedClusterState))).getMessage(),
allOf(
// no point in asserting the precise wording of the message into this test, but we care that it contains these bits:
containsString("No shard was specified in the request"),
containsString("specify the target shard in the request")));
}

public void testFindPrimaryShardToExplain() {
Original file line number Diff line number Diff line change
@@ -31,6 +31,9 @@

import static java.util.Collections.emptyMap;
import static java.util.Collections.emptySet;
import static org.hamcrest.Matchers.allOf;
import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.equalTo;

/**
* Tests for the cluster allocation explanation
@@ -50,11 +53,12 @@ public void testDecisionEquality() {
}

public void testExplanationSerialization() throws Exception {
ClusterAllocationExplanation cae = randomClusterAllocationExplanation(randomBoolean());
ClusterAllocationExplanation cae = randomClusterAllocationExplanation(randomBoolean(), randomBoolean());
BytesStreamOutput out = new BytesStreamOutput();
cae.writeTo(out);
StreamInput in = out.bytes().streamInput();
ClusterAllocationExplanation cae2 = new ClusterAllocationExplanation(in);
assertEquals(cae.isSpecificShard(), cae2.isSpecificShard());
assertEquals(cae.getShard(), cae2.getShard());
assertEquals(cae.isPrimary(), cae2.isPrimary());
assertTrue(cae2.isPrimary());
@@ -73,7 +77,7 @@ public void testExplanationSerialization() throws Exception {
}

public void testExplanationToXContent() throws Exception {
ClusterAllocationExplanation cae = randomClusterAllocationExplanation(true);
ClusterAllocationExplanation cae = randomClusterAllocationExplanation(true, true);
XContentBuilder builder = XContentFactory.jsonBuilder();
cae.toXContent(builder, ToXContent.EMPTY_PARAMS);
assertEquals("{\"index\":\"idx\",\"shard\":0,\"primary\":true,\"current_state\":\"started\",\"current_node\":" +
@@ -83,7 +87,25 @@ public void testExplanationToXContent() throws Exception {
"that can both allocate this shard and improve the cluster balance\"}", Strings.toString(builder));
}

private static ClusterAllocationExplanation randomClusterAllocationExplanation(boolean assignedShard) {
public void testRandomShardExplanationToXContent() throws Exception {
ClusterAllocationExplanation cae = randomClusterAllocationExplanation(true, false);
XContentBuilder builder = XContentFactory.jsonBuilder();
cae.toXContent(builder, ToXContent.EMPTY_PARAMS);
final String actual = Strings.toString(builder);
assertThat(actual, allOf(
equalTo("{\"note\":\"" + ClusterAllocationExplanation.NO_SHARD_SPECIFIED_MESSAGE +
"\",\"index\":\"idx\",\"shard\":0,\"primary\":true,\"current_state\":\"started\",\"current_node\":" +
"{\"id\":\"node-0\",\"name\":\"\",\"transport_address\":\"" + cae.getCurrentNode().getAddress() +
"\",\"weight_ranking\":3},\"can_remain_on_current_node\":\"yes\",\"can_rebalance_cluster\":\"yes\"," +
"\"can_rebalance_to_other_node\":\"no\",\"rebalance_explanation\":\"cannot rebalance as no target node exists " +
"that can both allocate this shard and improve the cluster balance\"}"),
// no point in asserting the precise wording of the message into this test, but we care that the note contains these bits:
containsString("No shard was specified in the explain API request"),
containsString("specify the target shard in the request")
));
}

private static ClusterAllocationExplanation randomClusterAllocationExplanation(boolean assignedShard, boolean specificShard) {
ShardRouting shardRouting = TestShardRouting.newShardRouting(new ShardId(new Index("idx", "123"), 0),
assignedShard ? "node-0" : null, true, assignedShard ? ShardRoutingState.STARTED : ShardRoutingState.UNASSIGNED);
DiscoveryNode node = assignedShard ? new DiscoveryNode("node-0", buildNewFakeTransportAddress(), emptyMap(), emptySet(),
@@ -97,6 +119,6 @@ private static ClusterAllocationExplanation randomClusterAllocationExplanation(b
AllocateUnassignedDecision allocateDecision = AllocateUnassignedDecision.no(UnassignedInfo.AllocationStatus.DECIDERS_NO, null);
shardAllocationDecision = new ShardAllocationDecision(allocateDecision, MoveDecision.NOT_TAKEN);
}
return new ClusterAllocationExplanation(shardRouting, node, null, null, shardAllocationDecision);
return new ClusterAllocationExplanation(specificShard, shardRouting, node, null, null, shardAllocationDecision);
}
}

0 comments on commit 95edc6d

Please sign in to comment.