From 95edc6deb28f3284df435cac1e38b017b5df6193 Mon Sep 17 00:00:00 2001 From: David Turner Date: Mon, 2 Aug 2021 15:14:09 +0100 Subject: [PATCH] Clarify allocation explain if random shard chosen (#75670) Today we often encounter users that are confused by the behaviour of calling `GET _cluster/allocation/explain` without a body: it _seems_ to work, but it explains a random shard, and if this isn't the shard they're thinking of then it's unclear how to proceed. With this commit we add a note to the response when a shard was randomly chosen indicating that it is possible, and possibly useful, to explain a different shard. We also adjust the exception message in the case when all shards are assigned to indicate why it's an invalid request and what to do to make it valid. --- .../cluster/allocation-explain.asciidoc | 2 ++ .../api/cluster.allocation_explain.json | 2 +- .../ClusterAllocationExplanation.java | 34 +++++++++++++++++-- ...ansportClusterAllocationExplainAction.java | 31 ++++++++++++----- .../ClusterAllocationExplainActionTests.java | 19 +++++++++-- .../ClusterAllocationExplanationTests.java | 30 +++++++++++++--- 6 files changed, 99 insertions(+), 19 deletions(-) diff --git a/docs/reference/cluster/allocation-explain.asciidoc b/docs/reference/cluster/allocation-explain.asciidoc index 0a633039c4185..36ef95a0d79da 100644 --- a/docs/reference/cluster/allocation-explain.asciidoc +++ b/docs/reference/cluster/allocation-explain.asciidoc @@ -25,6 +25,8 @@ GET _cluster/allocation/explain `GET _cluster/allocation/explain` +`POST _cluster/allocation/explain` + [[cluster-allocation-explain-api-prereqs]] ==== {api-prereq-title} diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/cluster.allocation_explain.json b/rest-api-spec/src/main/resources/rest-api-spec/api/cluster.allocation_explain.json index 36f341d761531..e828af8a569ed 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/api/cluster.allocation_explain.json +++ b/rest-api-spec/src/main/resources/rest-api-spec/api/cluster.allocation_explain.json @@ -32,7 +32,7 @@ } }, "body":{ - "description":"The index, shard, and primary flag to explain. Empty means 'explain the first unassigned shard'" + "description":"The index, shard, and primary flag to explain. Empty means 'explain a randomly-chosen unassigned shard'" } } } diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/allocation/ClusterAllocationExplanation.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/allocation/ClusterAllocationExplanation.java index 9ecc361cafef8..e59004aa8f5e6 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/allocation/ClusterAllocationExplanation.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/allocation/ClusterAllocationExplanation.java @@ -8,6 +8,7 @@ package org.elasticsearch.action.admin.cluster.allocation; +import org.elasticsearch.Version; import org.elasticsearch.cluster.ClusterInfo; import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.cluster.routing.ShardRouting; @@ -36,15 +37,27 @@ */ public final class ClusterAllocationExplanation implements ToXContentObject, Writeable { + static final String NO_SHARD_SPECIFIED_MESSAGE = "No shard was specified in the explain API request, so this response " + + "explains a randomly chosen unassigned shard. There may be other unassigned shards in this cluster which cannot be assigned for " + + "different reasons. It may not be possible to assign this shard until one of the other shards is assigned correctly. To explain " + + "the allocation of other shards (whether assigned or unassigned) you must specify the target shard in the request to this API."; + + private final boolean specificShard; private final ShardRouting shardRouting; private final DiscoveryNode currentNode; private final DiscoveryNode relocationTargetNode; private final ClusterInfo clusterInfo; private final ShardAllocationDecision shardAllocationDecision; - public ClusterAllocationExplanation(ShardRouting shardRouting, @Nullable DiscoveryNode currentNode, - @Nullable DiscoveryNode relocationTargetNode, @Nullable ClusterInfo clusterInfo, - ShardAllocationDecision shardAllocationDecision) { + public ClusterAllocationExplanation( + boolean specificShard, + ShardRouting shardRouting, + @Nullable DiscoveryNode currentNode, + @Nullable DiscoveryNode relocationTargetNode, + @Nullable ClusterInfo clusterInfo, + ShardAllocationDecision shardAllocationDecision) { + + this.specificShard = specificShard; this.shardRouting = shardRouting; this.currentNode = currentNode; this.relocationTargetNode = relocationTargetNode; @@ -53,6 +66,11 @@ public ClusterAllocationExplanation(ShardRouting shardRouting, @Nullable Discove } public ClusterAllocationExplanation(StreamInput in) throws IOException { + if (in.getVersion().onOrAfter(Version.V_8_0_0)) { + this.specificShard = in.readBoolean(); + } else { + this.specificShard = true; // suppress "this is a random shard" warning in BwC situations + } this.shardRouting = new ShardRouting(in); this.currentNode = in.readOptionalWriteable(DiscoveryNode::new); this.relocationTargetNode = in.readOptionalWriteable(DiscoveryNode::new); @@ -62,6 +80,9 @@ public ClusterAllocationExplanation(StreamInput in) throws IOException { @Override public void writeTo(StreamOutput out) throws IOException { + if (out.getVersion().onOrAfter(Version.V_8_0_0)) { + out.writeBoolean(specificShard); + } // else suppress "this is a random shard" warning in BwC situations shardRouting.writeTo(out); out.writeOptionalWriteable(currentNode); out.writeOptionalWriteable(relocationTargetNode); @@ -69,6 +90,10 @@ public void writeTo(StreamOutput out) throws IOException { shardAllocationDecision.writeTo(out); } + public boolean isSpecificShard() { + return specificShard; + } + /** * Returns the shard that the explanation is about. */ @@ -131,6 +156,9 @@ public ShardAllocationDecision getShardAllocationDecision() { public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { builder.startObject(); { + if (isSpecificShard() == false) { + builder.field("note", NO_SHARD_SPECIFIED_MESSAGE); + } builder.field("index", shardRouting.getIndexName()); builder.field("shard", shardRouting.getId()); builder.field("primary", shardRouting.primary()); diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/allocation/TransportClusterAllocationExplainAction.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/allocation/TransportClusterAllocationExplainAction.java index a795818ac2a63..852d55f866702 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/allocation/TransportClusterAllocationExplainAction.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/allocation/TransportClusterAllocationExplainAction.java @@ -81,15 +81,25 @@ protected void masterOperation(Task task, final ClusterAllocationExplainRequest ShardRouting shardRouting = findShardToExplain(request, allocation); logger.debug("explaining the allocation for [{}], found shard [{}]", request, shardRouting); - ClusterAllocationExplanation cae = explainShard(shardRouting, allocation, - request.includeDiskInfo() ? clusterInfo : null, request.includeYesDecisions(), allocationService); + ClusterAllocationExplanation cae = explainShard( + shardRouting, + allocation, + request.includeDiskInfo() ? clusterInfo : null, + request.includeYesDecisions(), + request.useAnyUnassignedShard() == false, + allocationService); listener.onResponse(new ClusterAllocationExplainResponse(cae)); } // public for testing - public static ClusterAllocationExplanation explainShard(ShardRouting shardRouting, RoutingAllocation allocation, - ClusterInfo clusterInfo, boolean includeYesDecisions, - AllocationService allocationService) { + public static ClusterAllocationExplanation explainShard( + ShardRouting shardRouting, + RoutingAllocation allocation, + ClusterInfo clusterInfo, + boolean includeYesDecisions, + boolean isSpecificShard, + AllocationService allocationService) { + allocation.setDebugMode(includeYesDecisions ? DebugMode.ON : DebugMode.EXCLUDE_YES_DECISIONS); ShardAllocationDecision shardDecision; @@ -99,10 +109,13 @@ public static ClusterAllocationExplanation explainShard(ShardRouting shardRoutin shardDecision = allocationService.explainShardAllocation(shardRouting, allocation); } - return new ClusterAllocationExplanation(shardRouting, + return new ClusterAllocationExplanation( + isSpecificShard, + shardRouting, shardRouting.currentNodeId() != null ? allocation.nodes().get(shardRouting.currentNodeId()) : null, shardRouting.relocatingNodeId() != null ? allocation.nodes().get(shardRouting.relocatingNodeId()) : null, - clusterInfo, shardDecision); + clusterInfo, + shardDecision); } // public for testing @@ -115,7 +128,9 @@ public static ShardRouting findShardToExplain(ClusterAllocationExplainRequest re foundShard = ui.next(); } if (foundShard == null) { - throw new IllegalArgumentException("unable to find any unassigned shards to explain [" + request + "]"); + throw new IllegalArgumentException("No shard was specified in the request which means the response should explain a " + + "randomly-chosen unassigned shard, but there are no unassigned shards in this cluster. To explain the allocation of " + + "an assigned shard you must specify the target shard in the request."); } } else { String index = request.getIndex(); diff --git a/server/src/test/java/org/elasticsearch/action/admin/cluster/allocation/ClusterAllocationExplainActionTests.java b/server/src/test/java/org/elasticsearch/action/admin/cluster/allocation/ClusterAllocationExplainActionTests.java index a49f0df8278a3..cc2386b50b1f6 100644 --- a/server/src/test/java/org/elasticsearch/action/admin/cluster/allocation/ClusterAllocationExplainActionTests.java +++ b/server/src/test/java/org/elasticsearch/action/admin/cluster/allocation/ClusterAllocationExplainActionTests.java @@ -32,6 +32,8 @@ import java.util.Locale; import static org.elasticsearch.action.admin.cluster.allocation.TransportClusterAllocationExplainAction.findShardToExplain; +import static org.hamcrest.Matchers.allOf; +import static org.hamcrest.Matchers.containsString; /** * Tests for the {@link TransportClusterAllocationExplainAction} class. @@ -46,7 +48,12 @@ public void testInitializingOrRelocatingShardExplanation() throws Exception { ShardRouting shard = clusterState.getRoutingTable().index("idx").shard(0).primaryShard(); RoutingAllocation allocation = new RoutingAllocation(new AllocationDeciders(Collections.emptyList()), clusterState.getRoutingNodes(), clusterState, null, null, System.nanoTime()); - ClusterAllocationExplanation cae = TransportClusterAllocationExplainAction.explainShard(shard, allocation, null, randomBoolean(), + ClusterAllocationExplanation cae = TransportClusterAllocationExplainAction.explainShard( + shard, + allocation, + null, + randomBoolean(), + true, new AllocationService(null, new TestGatewayAllocator(), new ShardsAllocator() { @Override public void allocate(RoutingAllocation allocation) { @@ -64,6 +71,7 @@ public ShardAllocationDecision decideShardAllocation(ShardRouting shard, Routing }, null, null)); assertEquals(shard.currentNodeId(), cae.getCurrentNode().getId()); + assertTrue(cae.isSpecificShard()); assertFalse(cae.getShardAllocationDecision().isDecisionTaken()); assertFalse(cae.getShardAllocationDecision().getAllocateDecision().isDecisionTaken()); assertFalse(cae.getShardAllocationDecision().getMoveDecision().isDecisionTaken()); @@ -110,8 +118,13 @@ public void testFindAnyUnassignedShardToExplain() { final ClusterState allStartedClusterState = ClusterStateCreationUtils.state("idx", randomBoolean(), ShardRoutingState.STARTED, ShardRoutingState.STARTED); final ClusterAllocationExplainRequest anyUnassignedShardsRequest = new ClusterAllocationExplainRequest(); - expectThrows(IllegalArgumentException.class, () -> - findShardToExplain(anyUnassignedShardsRequest, routingAllocation(allStartedClusterState))); + assertThat(expectThrows( + IllegalArgumentException.class, + () -> findShardToExplain(anyUnassignedShardsRequest, routingAllocation(allStartedClusterState))).getMessage(), + allOf( + // no point in asserting the precise wording of the message into this test, but we care that it contains these bits: + containsString("No shard was specified in the request"), + containsString("specify the target shard in the request"))); } public void testFindPrimaryShardToExplain() { diff --git a/server/src/test/java/org/elasticsearch/action/admin/cluster/allocation/ClusterAllocationExplanationTests.java b/server/src/test/java/org/elasticsearch/action/admin/cluster/allocation/ClusterAllocationExplanationTests.java index 73322267d69ba..2917adc4df256 100644 --- a/server/src/test/java/org/elasticsearch/action/admin/cluster/allocation/ClusterAllocationExplanationTests.java +++ b/server/src/test/java/org/elasticsearch/action/admin/cluster/allocation/ClusterAllocationExplanationTests.java @@ -31,6 +31,9 @@ import static java.util.Collections.emptyMap; import static java.util.Collections.emptySet; +import static org.hamcrest.Matchers.allOf; +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.equalTo; /** * Tests for the cluster allocation explanation @@ -50,11 +53,12 @@ public void testDecisionEquality() { } public void testExplanationSerialization() throws Exception { - ClusterAllocationExplanation cae = randomClusterAllocationExplanation(randomBoolean()); + ClusterAllocationExplanation cae = randomClusterAllocationExplanation(randomBoolean(), randomBoolean()); BytesStreamOutput out = new BytesStreamOutput(); cae.writeTo(out); StreamInput in = out.bytes().streamInput(); ClusterAllocationExplanation cae2 = new ClusterAllocationExplanation(in); + assertEquals(cae.isSpecificShard(), cae2.isSpecificShard()); assertEquals(cae.getShard(), cae2.getShard()); assertEquals(cae.isPrimary(), cae2.isPrimary()); assertTrue(cae2.isPrimary()); @@ -73,7 +77,7 @@ public void testExplanationSerialization() throws Exception { } public void testExplanationToXContent() throws Exception { - ClusterAllocationExplanation cae = randomClusterAllocationExplanation(true); + ClusterAllocationExplanation cae = randomClusterAllocationExplanation(true, true); XContentBuilder builder = XContentFactory.jsonBuilder(); cae.toXContent(builder, ToXContent.EMPTY_PARAMS); assertEquals("{\"index\":\"idx\",\"shard\":0,\"primary\":true,\"current_state\":\"started\",\"current_node\":" + @@ -83,7 +87,25 @@ public void testExplanationToXContent() throws Exception { "that can both allocate this shard and improve the cluster balance\"}", Strings.toString(builder)); } - private static ClusterAllocationExplanation randomClusterAllocationExplanation(boolean assignedShard) { + public void testRandomShardExplanationToXContent() throws Exception { + ClusterAllocationExplanation cae = randomClusterAllocationExplanation(true, false); + XContentBuilder builder = XContentFactory.jsonBuilder(); + cae.toXContent(builder, ToXContent.EMPTY_PARAMS); + final String actual = Strings.toString(builder); + assertThat(actual, allOf( + equalTo("{\"note\":\"" + ClusterAllocationExplanation.NO_SHARD_SPECIFIED_MESSAGE + + "\",\"index\":\"idx\",\"shard\":0,\"primary\":true,\"current_state\":\"started\",\"current_node\":" + + "{\"id\":\"node-0\",\"name\":\"\",\"transport_address\":\"" + cae.getCurrentNode().getAddress() + + "\",\"weight_ranking\":3},\"can_remain_on_current_node\":\"yes\",\"can_rebalance_cluster\":\"yes\"," + + "\"can_rebalance_to_other_node\":\"no\",\"rebalance_explanation\":\"cannot rebalance as no target node exists " + + "that can both allocate this shard and improve the cluster balance\"}"), + // no point in asserting the precise wording of the message into this test, but we care that the note contains these bits: + containsString("No shard was specified in the explain API request"), + containsString("specify the target shard in the request") + )); + } + + private static ClusterAllocationExplanation randomClusterAllocationExplanation(boolean assignedShard, boolean specificShard) { ShardRouting shardRouting = TestShardRouting.newShardRouting(new ShardId(new Index("idx", "123"), 0), assignedShard ? "node-0" : null, true, assignedShard ? ShardRoutingState.STARTED : ShardRoutingState.UNASSIGNED); DiscoveryNode node = assignedShard ? new DiscoveryNode("node-0", buildNewFakeTransportAddress(), emptyMap(), emptySet(), @@ -97,6 +119,6 @@ private static ClusterAllocationExplanation randomClusterAllocationExplanation(b AllocateUnassignedDecision allocateDecision = AllocateUnassignedDecision.no(UnassignedInfo.AllocationStatus.DECIDERS_NO, null); shardAllocationDecision = new ShardAllocationDecision(allocateDecision, MoveDecision.NOT_TAKEN); } - return new ClusterAllocationExplanation(shardRouting, node, null, null, shardAllocationDecision); + return new ClusterAllocationExplanation(specificShard, shardRouting, node, null, null, shardAllocationDecision); } }