From 33029bbd3a98eacab56054f8aeae6cc6b184bc2d Mon Sep 17 00:00:00 2001 From: Gaurav614 Date: Mon, 17 Jun 2019 20:11:19 +0530 Subject: [PATCH] Bugfix of issue #41073 Addition of test case that creates the scenario when there are no data nodes in Cluster and user tries for index Creation. Changing the status of primary shards that are unassigned to AllocationStatus.Deciders_NO when there are no data nodes helps in solving this issue --- .../allocator/BalancedShardsAllocator.java | 33 +++- .../health/NoDataNodesHealthTests.java | 153 ++++++++++++++++++ 2 files changed, 185 insertions(+), 1 deletion(-) create mode 100644 server/src/test/java/org/elasticsearch/cluster/health/NoDataNodesHealthTests.java diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancedShardsAllocator.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancedShardsAllocator.java index 6af6e6696e033..17d110a2f6478 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancedShardsAllocator.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancedShardsAllocator.java @@ -29,6 +29,7 @@ import org.elasticsearch.cluster.routing.RoutingNodes; import org.elasticsearch.cluster.routing.ShardRouting; import org.elasticsearch.cluster.routing.ShardRoutingState; +import org.elasticsearch.cluster.routing.UnassignedInfo; import org.elasticsearch.cluster.routing.UnassignedInfo.AllocationStatus; import org.elasticsearch.cluster.routing.allocation.AllocateUnassignedDecision; import org.elasticsearch.cluster.routing.allocation.AllocationDecision; @@ -115,7 +116,8 @@ private void setThreshold(float threshold) { @Override public void allocate(RoutingAllocation allocation) { if (allocation.routingNodes().size() == 0) { - /* with no nodes this is pointless */ + // If no data node then set AllocationStatus to DECIDERS_NO + setAllocationStatus(allocation); return; } final Balancer balancer = new Balancer(logger, allocation, weightFunction, threshold); @@ -141,6 +143,35 @@ public ShardAllocationDecision decideShardAllocation(final ShardRouting shard, f return new ShardAllocationDecision(allocateUnassignedDecision, moveDecision); } + /** + * This method is called when there are no data nodes in the cluster. + * + * Newly created unassigned primary shards, with no prior allocation attempts + * are classified as yellow instead of red by cluster health. + * This function explicitly sets their allocation status to DECIDERS_NO, to + * indicate red indices with unassigned shards. + */ + private void setAllocationStatus(RoutingAllocation allocation){ + RoutingNodes routingNodes = allocation.routingNodes(); + RoutingNodes.UnassignedShards unassignedShards = routingNodes.unassigned(); + if (unassignedShards.isEmpty()) { + return; + } + RoutingNodes.UnassignedShards.UnassignedIterator unassignedIterator = unassignedShards.iterator(); + while (unassignedIterator.hasNext()) { + ShardRouting shard = unassignedIterator.next(); + UnassignedInfo shardInfo = shard.unassignedInfo(); + if (shard.primary() && shardInfo.getLastAllocationStatus() == AllocationStatus.NO_ATTEMPT) { + UnassignedInfo newInfo = new UnassignedInfo(shardInfo.getReason(), shardInfo.getMessage(), shardInfo.getFailure(), + shardInfo.getNumFailedAllocations(), shardInfo.getUnassignedTimeInNanos(), + shardInfo.getUnassignedTimeInMillis(), shardInfo.isDelayed(), + AllocationStatus.DECIDERS_NO); + unassignedIterator.updateUnassigned(newInfo, shard.recoverySource(), allocation.changes()); + } + } + } + + /** * Returns the currently configured delta threshold */ diff --git a/server/src/test/java/org/elasticsearch/cluster/health/NoDataNodesHealthTests.java b/server/src/test/java/org/elasticsearch/cluster/health/NoDataNodesHealthTests.java new file mode 100644 index 0000000000000..cffaa21c4a5f3 --- /dev/null +++ b/server/src/test/java/org/elasticsearch/cluster/health/NoDataNodesHealthTests.java @@ -0,0 +1,153 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.cluster.health; + +import org.elasticsearch.Version; +import org.elasticsearch.cluster.ClusterName; +import org.elasticsearch.cluster.ClusterState; +import org.elasticsearch.cluster.ESAllocationTestCase; +import org.elasticsearch.cluster.metadata.IndexMetaData; +import org.elasticsearch.cluster.metadata.MetaData; +import org.elasticsearch.cluster.node.DiscoveryNode; +import org.elasticsearch.cluster.node.DiscoveryNodes; +import org.elasticsearch.cluster.routing.RoutingNodes; +import org.elasticsearch.cluster.routing.RoutingTable; +import org.elasticsearch.cluster.routing.ShardRouting; +import org.elasticsearch.cluster.routing.UnassignedInfo; +import org.elasticsearch.common.util.set.Sets; + +import java.util.Collections; + +import static org.elasticsearch.cluster.routing.ShardRoutingState.INITIALIZING; +import static org.elasticsearch.cluster.routing.ShardRoutingState.STARTED; + +/** + * The test case checks for the scenario when there is no data node in the cluster and only + * master is active. At this moment when index creation is tried, the cluster health status should + * change to RED + */ +public class NoDataNodesHealthTests extends ESAllocationTestCase { + /** + * This method specifically creates a cluster with no data nodes + * and a single master node + */ + private ClusterState setUpClusterWithNoDataNodes() { + + DiscoveryNodes node = DiscoveryNodes.builder().add(newNode("node_m", Collections.singleton(DiscoveryNode.Role.MASTER))).build(); + MetaData metaData = MetaData.builder() + .put(IndexMetaData.builder("TestIndex") + .settings(settings(Version.CURRENT)) + .numberOfShards(randomIntBetween(1, 3)) + .numberOfReplicas(randomIntBetween(0, 2))) + .build(); + RoutingTable routingTable = RoutingTable.builder().addAsNew(metaData.index("TestIndex")).build(); + ClusterState state = ClusterState.builder(new ClusterName("test_cluster")) + .nodes(node) + .metaData(metaData) + .routingTable(routingTable) + .build(); + MockAllocationService service = createAllocationService(); + state = service.reroute(state, "reroute"); + return state; + } + + public void testClusterHealthWithNoDataNodes() { + ClusterState state = setUpClusterWithNoDataNodes(); + int dataNodes = state.nodes().getDataNodes().size(); + int masterNodes = state.nodes().getMasterNodes().size(); + assertTrue(dataNodes == 0); + assertTrue(masterNodes > 0); + ClusterHealthStatus clusterHealthStatus = new ClusterStateHealth(state).getStatus(); + assertEquals(ClusterHealthStatus.RED, clusterHealthStatus); + } + + /** + * The method test for scenario where we have a cluster with indices and data nodes + * and then all data nodes gets terminated now new index is created then + * all indices and cluster health should be red, but last allocation attempts of new index shards v/s + * old index shards should be different. + */ + public void testAllocationStatusForTerminatedNodes() { + //creates one master and two data nodes + DiscoveryNodes.Builder nodeBuilder = DiscoveryNodes.builder().add(newNode("node_m", Collections.singleton(DiscoveryNode.Role.MASTER))) + .add(newNode("node_d1", Collections.singleton(DiscoveryNode.Role.DATA))) + .add(newNode("node_d2", Collections.singleton(DiscoveryNode.Role.DATA))); + MetaData metaData = MetaData.builder() + .put(IndexMetaData.builder("TestIndex") + .settings(settings(Version.CURRENT)) + .numberOfShards(2) + .numberOfReplicas(0)) + .build(); + RoutingTable routingTable = RoutingTable.builder().addAsNew(metaData.index("TestIndex")).build(); + ClusterState state = ClusterState.builder(new ClusterName("test_cluster")) + .nodes(nodeBuilder.build()) + .metaData(metaData) + .routingTable(routingTable) + .build(); + MockAllocationService allocationService = createAllocationService(); + //perform allocation of TestIndex + state = allocationService.reroute(state, "Test_allocation"); + state = allocationService.applyStartedShards(state, state.getRoutingNodes().shardsWithState(INITIALIZING)); + IndexMetaData.Builder idxMetaBuilder = IndexMetaData.builder(state.metaData().index("TestIndex")); + for (final ShardRouting shards : state.getRoutingTable().index("TestIndex").shardsWithState(STARTED)) { + idxMetaBuilder.putInSyncAllocationIds(shards.getId(), Sets.newHashSet(shards.allocationId().getId())); + } + state = ClusterState.builder(state).metaData(MetaData.builder(state.metaData()).put(idxMetaBuilder)).build(); + //asserting the cluster is in green after TestIndex Creation + assertEquals(ClusterHealthStatus.GREEN, new ClusterStateHealth(state).getStatus()); + //Terminating data nodes + state = ClusterState.builder(state) + .nodes(DiscoveryNodes.builder(state.getNodes()) + .remove("node_d1").remove("node_d2").build()) + .build(); + //Removing dead nodes from cluster with a cluster reroute + state = allocationService.deassociateDeadNodes(state, true, "Test_allocation"); + //asserting that a cluster state goes Red after data nodes goes terminated + assertEquals(ClusterHealthStatus.RED, new ClusterStateHealth(state).getStatus()); + //Creating NewTestIndex meta deta + metaData = MetaData.builder(state.metaData()) + .put(IndexMetaData.builder("NewTestIndex") + .settings(settings(Version.CURRENT)) + .numberOfShards(2) + .numberOfReplicas(0)) + .build(); + //changed cluster state + state = ClusterState.builder(state) + .metaData(metaData) + .routingTable(RoutingTable.builder(state.getRoutingTable()).addAsNew(metaData.index("NewTestIndex")).build()).build(); + //allocation after newly created index + state = allocationService.reroute(state, "no data nodes"); + assertEquals(ClusterHealthStatus.RED, new ClusterStateHealth(state).getStatus()); + RoutingNodes routingNodes = state.getRoutingNodes(); + RoutingNodes.UnassignedShards unassignedShards = routingNodes.unassigned(); + assertFalse(unassignedShards.isEmpty()); + RoutingNodes.UnassignedShards.UnassignedIterator unassignedIterator = unassignedShards.iterator(); + while (unassignedIterator.hasNext()) { + ShardRouting shard = unassignedIterator.next(); + UnassignedInfo shardInfo = shard.unassignedInfo(); + /* asserting that the TestIndex shards have different AllocationStatus than DECIDERS_NO + and NewTestIndex status is DECIDERS_NO */ + if (shard.getIndexName().equals("TestIndex")) { + assertNotEquals(shardInfo.getLastAllocationStatus(), UnassignedInfo.AllocationStatus.DECIDERS_NO); + } else if (shard.getIndexName().equals("NewTestIndex")) { + assertEquals(shardInfo.getLastAllocationStatus(), UnassignedInfo.AllocationStatus.DECIDERS_NO); + } + } + } +}