Skip to content

Commit

Permalink
Clarify allocation explain if random shard chosen (#75670)
Browse files Browse the repository at this point in the history
Today we often encounter users that are confused by the behaviour of
calling `GET _cluster/allocation/explain` without a body: it _seems_ to
work, but it explains a random shard, and if this isn't the shard
they're thinking of then it's unclear how to proceed.

With this commit we add a note to the response when a shard was randomly
chosen indicating that it is possible, and possibly useful, to explain a
different shard. We also adjust the exception message in the case when
all shards are assigned to indicate why it's an invalid request and what
to do to make it valid.
  • Loading branch information
DaveCTurner committed Aug 2, 2021
1 parent 3f2c3e4 commit cee74ec
Show file tree
Hide file tree
Showing 6 changed files with 99 additions and 19 deletions.
2 changes: 2 additions & 0 deletions docs/reference/cluster/allocation-explain.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ GET _cluster/allocation/explain

`GET _cluster/allocation/explain`

`POST _cluster/allocation/explain`

[[cluster-allocation-explain-api-prereqs]]
==== {api-prereq-title}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
}
},
"body":{
"description":"The index, shard, and primary flag to explain. Empty means 'explain the first unassigned shard'"
"description":"The index, shard, and primary flag to explain. Empty means 'explain a randomly-chosen unassigned shard'"
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

package org.elasticsearch.action.admin.cluster.allocation;

import org.elasticsearch.Version;
import org.elasticsearch.cluster.ClusterInfo;
import org.elasticsearch.cluster.node.DiscoveryNode;
import org.elasticsearch.cluster.routing.ShardRouting;
Expand Down Expand Up @@ -36,15 +37,27 @@
*/
public final class ClusterAllocationExplanation implements ToXContentObject, Writeable {

static final String NO_SHARD_SPECIFIED_MESSAGE = "No shard was specified in the explain API request, so this response " +
"explains a randomly chosen unassigned shard. There may be other unassigned shards in this cluster which cannot be assigned for " +
"different reasons. It may not be possible to assign this shard until one of the other shards is assigned correctly. To explain " +
"the allocation of other shards (whether assigned or unassigned) you must specify the target shard in the request to this API.";

private final boolean specificShard;
private final ShardRouting shardRouting;
private final DiscoveryNode currentNode;
private final DiscoveryNode relocationTargetNode;
private final ClusterInfo clusterInfo;
private final ShardAllocationDecision shardAllocationDecision;

public ClusterAllocationExplanation(ShardRouting shardRouting, @Nullable DiscoveryNode currentNode,
@Nullable DiscoveryNode relocationTargetNode, @Nullable ClusterInfo clusterInfo,
ShardAllocationDecision shardAllocationDecision) {
public ClusterAllocationExplanation(
boolean specificShard,
ShardRouting shardRouting,
@Nullable DiscoveryNode currentNode,
@Nullable DiscoveryNode relocationTargetNode,
@Nullable ClusterInfo clusterInfo,
ShardAllocationDecision shardAllocationDecision) {

this.specificShard = specificShard;
this.shardRouting = shardRouting;
this.currentNode = currentNode;
this.relocationTargetNode = relocationTargetNode;
Expand All @@ -53,6 +66,11 @@ public ClusterAllocationExplanation(ShardRouting shardRouting, @Nullable Discove
}

public ClusterAllocationExplanation(StreamInput in) throws IOException {
if (in.getVersion().onOrAfter(Version.V_7_15_0)) {
this.specificShard = in.readBoolean();
} else {
this.specificShard = true; // suppress "this is a random shard" warning in BwC situations
}
this.shardRouting = new ShardRouting(in);
this.currentNode = in.readOptionalWriteable(DiscoveryNode::new);
this.relocationTargetNode = in.readOptionalWriteable(DiscoveryNode::new);
Expand All @@ -62,13 +80,20 @@ public ClusterAllocationExplanation(StreamInput in) throws IOException {

@Override
public void writeTo(StreamOutput out) throws IOException {
if (out.getVersion().onOrAfter(Version.V_7_15_0)) {
out.writeBoolean(specificShard);
} // else suppress "this is a random shard" warning in BwC situations
shardRouting.writeTo(out);
out.writeOptionalWriteable(currentNode);
out.writeOptionalWriteable(relocationTargetNode);
out.writeOptionalWriteable(clusterInfo);
shardAllocationDecision.writeTo(out);
}

public boolean isSpecificShard() {
return specificShard;
}

/**
* Returns the shard that the explanation is about.
*/
Expand Down Expand Up @@ -131,6 +156,9 @@ public ShardAllocationDecision getShardAllocationDecision() {

public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject(); {
if (isSpecificShard() == false) {
builder.field("note", NO_SHARD_SPECIFIED_MESSAGE);
}
builder.field("index", shardRouting.getIndexName());
builder.field("shard", shardRouting.getId());
builder.field("primary", shardRouting.primary());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,15 +80,25 @@ protected void masterOperation(final ClusterAllocationExplainRequest request, fi
ShardRouting shardRouting = findShardToExplain(request, allocation);
logger.debug("explaining the allocation for [{}], found shard [{}]", request, shardRouting);

ClusterAllocationExplanation cae = explainShard(shardRouting, allocation,
request.includeDiskInfo() ? clusterInfo : null, request.includeYesDecisions(), allocationService);
ClusterAllocationExplanation cae = explainShard(
shardRouting,
allocation,
request.includeDiskInfo() ? clusterInfo : null,
request.includeYesDecisions(),
request.useAnyUnassignedShard() == false,
allocationService);
listener.onResponse(new ClusterAllocationExplainResponse(cae));
}

// public for testing
public static ClusterAllocationExplanation explainShard(ShardRouting shardRouting, RoutingAllocation allocation,
ClusterInfo clusterInfo, boolean includeYesDecisions,
AllocationService allocationService) {
public static ClusterAllocationExplanation explainShard(
ShardRouting shardRouting,
RoutingAllocation allocation,
ClusterInfo clusterInfo,
boolean includeYesDecisions,
boolean isSpecificShard,
AllocationService allocationService) {

allocation.setDebugMode(includeYesDecisions ? DebugMode.ON : DebugMode.EXCLUDE_YES_DECISIONS);

ShardAllocationDecision shardDecision;
Expand All @@ -98,10 +108,13 @@ public static ClusterAllocationExplanation explainShard(ShardRouting shardRoutin
shardDecision = allocationService.explainShardAllocation(shardRouting, allocation);
}

return new ClusterAllocationExplanation(shardRouting,
return new ClusterAllocationExplanation(
isSpecificShard,
shardRouting,
shardRouting.currentNodeId() != null ? allocation.nodes().get(shardRouting.currentNodeId()) : null,
shardRouting.relocatingNodeId() != null ? allocation.nodes().get(shardRouting.relocatingNodeId()) : null,
clusterInfo, shardDecision);
clusterInfo,
shardDecision);
}

// public for testing
Expand All @@ -114,7 +127,9 @@ public static ShardRouting findShardToExplain(ClusterAllocationExplainRequest re
foundShard = ui.next();
}
if (foundShard == null) {
throw new IllegalArgumentException("unable to find any unassigned shards to explain [" + request + "]");
throw new IllegalArgumentException("No shard was specified in the request which means the response should explain a " +
"randomly-chosen unassigned shard, but there are no unassigned shards in this cluster. To explain the allocation of " +
"an assigned shard you must specify the target shard in the request.");
}
} else {
String index = request.getIndex();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@
import java.util.Locale;

import static org.elasticsearch.action.admin.cluster.allocation.TransportClusterAllocationExplainAction.findShardToExplain;
import static org.hamcrest.Matchers.allOf;
import static org.hamcrest.Matchers.containsString;

/**
* Tests for the {@link TransportClusterAllocationExplainAction} class.
Expand All @@ -46,7 +48,12 @@ public void testInitializingOrRelocatingShardExplanation() throws Exception {
ShardRouting shard = clusterState.getRoutingTable().index("idx").shard(0).primaryShard();
RoutingAllocation allocation = new RoutingAllocation(new AllocationDeciders(Collections.emptyList()),
clusterState.getRoutingNodes(), clusterState, null, null, System.nanoTime());
ClusterAllocationExplanation cae = TransportClusterAllocationExplainAction.explainShard(shard, allocation, null, randomBoolean(),
ClusterAllocationExplanation cae = TransportClusterAllocationExplainAction.explainShard(
shard,
allocation,
null,
randomBoolean(),
true,
new AllocationService(null, new TestGatewayAllocator(), new ShardsAllocator() {
@Override
public void allocate(RoutingAllocation allocation) {
Expand All @@ -64,6 +71,7 @@ public ShardAllocationDecision decideShardAllocation(ShardRouting shard, Routing
}, null, null));

assertEquals(shard.currentNodeId(), cae.getCurrentNode().getId());
assertTrue(cae.isSpecificShard());
assertFalse(cae.getShardAllocationDecision().isDecisionTaken());
assertFalse(cae.getShardAllocationDecision().getAllocateDecision().isDecisionTaken());
assertFalse(cae.getShardAllocationDecision().getMoveDecision().isDecisionTaken());
Expand Down Expand Up @@ -110,8 +118,13 @@ public void testFindAnyUnassignedShardToExplain() {
final ClusterState allStartedClusterState = ClusterStateCreationUtils.state("idx", randomBoolean(),
ShardRoutingState.STARTED, ShardRoutingState.STARTED);
final ClusterAllocationExplainRequest anyUnassignedShardsRequest = new ClusterAllocationExplainRequest();
expectThrows(IllegalArgumentException.class, () ->
findShardToExplain(anyUnassignedShardsRequest, routingAllocation(allStartedClusterState)));
assertThat(expectThrows(
IllegalArgumentException.class,
() -> findShardToExplain(anyUnassignedShardsRequest, routingAllocation(allStartedClusterState))).getMessage(),
allOf(
// no point in asserting the precise wording of the message into this test, but we care that it contains these bits:
containsString("No shard was specified in the request"),
containsString("specify the target shard in the request")));
}

public void testFindPrimaryShardToExplain() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@

import static java.util.Collections.emptyMap;
import static java.util.Collections.emptySet;
import static org.hamcrest.Matchers.allOf;
import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.equalTo;

/**
* Tests for the cluster allocation explanation
Expand All @@ -50,11 +53,12 @@ public void testDecisionEquality() {
}

public void testExplanationSerialization() throws Exception {
ClusterAllocationExplanation cae = randomClusterAllocationExplanation(randomBoolean());
ClusterAllocationExplanation cae = randomClusterAllocationExplanation(randomBoolean(), randomBoolean());
BytesStreamOutput out = new BytesStreamOutput();
cae.writeTo(out);
StreamInput in = out.bytes().streamInput();
ClusterAllocationExplanation cae2 = new ClusterAllocationExplanation(in);
assertEquals(cae.isSpecificShard(), cae2.isSpecificShard());
assertEquals(cae.getShard(), cae2.getShard());
assertEquals(cae.isPrimary(), cae2.isPrimary());
assertTrue(cae2.isPrimary());
Expand All @@ -73,7 +77,7 @@ public void testExplanationSerialization() throws Exception {
}

public void testExplanationToXContent() throws Exception {
ClusterAllocationExplanation cae = randomClusterAllocationExplanation(true);
ClusterAllocationExplanation cae = randomClusterAllocationExplanation(true, true);
XContentBuilder builder = XContentFactory.jsonBuilder();
cae.toXContent(builder, ToXContent.EMPTY_PARAMS);
assertEquals("{\"index\":\"idx\",\"shard\":0,\"primary\":true,\"current_state\":\"started\",\"current_node\":" +
Expand All @@ -83,7 +87,25 @@ public void testExplanationToXContent() throws Exception {
"that can both allocate this shard and improve the cluster balance\"}", Strings.toString(builder));
}

private static ClusterAllocationExplanation randomClusterAllocationExplanation(boolean assignedShard) {
public void testRandomShardExplanationToXContent() throws Exception {
ClusterAllocationExplanation cae = randomClusterAllocationExplanation(true, false);
XContentBuilder builder = XContentFactory.jsonBuilder();
cae.toXContent(builder, ToXContent.EMPTY_PARAMS);
final String actual = Strings.toString(builder);
assertThat(actual, allOf(
equalTo("{\"note\":\"" + ClusterAllocationExplanation.NO_SHARD_SPECIFIED_MESSAGE +
"\",\"index\":\"idx\",\"shard\":0,\"primary\":true,\"current_state\":\"started\",\"current_node\":" +
"{\"id\":\"node-0\",\"name\":\"\",\"transport_address\":\"" + cae.getCurrentNode().getAddress() +
"\",\"weight_ranking\":3},\"can_remain_on_current_node\":\"yes\",\"can_rebalance_cluster\":\"yes\"," +
"\"can_rebalance_to_other_node\":\"no\",\"rebalance_explanation\":\"cannot rebalance as no target node exists " +
"that can both allocate this shard and improve the cluster balance\"}"),
// no point in asserting the precise wording of the message into this test, but we care that the note contains these bits:
containsString("No shard was specified in the explain API request"),
containsString("specify the target shard in the request")
));
}

private static ClusterAllocationExplanation randomClusterAllocationExplanation(boolean assignedShard, boolean specificShard) {
ShardRouting shardRouting = TestShardRouting.newShardRouting(new ShardId(new Index("idx", "123"), 0),
assignedShard ? "node-0" : null, true, assignedShard ? ShardRoutingState.STARTED : ShardRoutingState.UNASSIGNED);
DiscoveryNode node = assignedShard ? new DiscoveryNode("node-0", buildNewFakeTransportAddress(), emptyMap(), emptySet(),
Expand All @@ -97,6 +119,6 @@ private static ClusterAllocationExplanation randomClusterAllocationExplanation(b
AllocateUnassignedDecision allocateDecision = AllocateUnassignedDecision.no(UnassignedInfo.AllocationStatus.DECIDERS_NO, null);
shardAllocationDecision = new ShardAllocationDecision(allocateDecision, MoveDecision.NOT_TAKEN);
}
return new ClusterAllocationExplanation(shardRouting, node, null, null, shardAllocationDecision);
return new ClusterAllocationExplanation(specificShard, shardRouting, node, null, null, shardAllocationDecision);
}
}

0 comments on commit cee74ec

Please sign in to comment.