From d45e2535c40b96250ab51f5e24f08cb5b5609f30 Mon Sep 17 00:00:00 2001 From: "ievgen.degtiarenko" Date: Thu, 17 Nov 2022 16:43:18 +0100 Subject: [PATCH 1/8] Avoid NPE when disassociateDeadNodes --- .../allocator/DesiredBalanceReconciler.java | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceReconciler.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceReconciler.java index 4ad4a4a61e543..a4d2e6a3370e1 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceReconciler.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceReconciler.java @@ -415,13 +415,17 @@ private DiscoveryNode findRelocationTarget( ) { for (final var nodeId : desiredNodeIds) { // TODO consider ignored nodes here too? - if (nodeId.equals(shardRouting.currentNodeId()) == false) { - final var currentNode = routingNodes.node(nodeId); - final var decision = canAllocateDecider.apply(shardRouting, currentNode); - logger.trace("relocate {} to {}: {}", shardRouting, nodeId, decision); - if (decision.type() == Decision.Type.YES) { - return currentNode.node(); - } + if (nodeId.equals(shardRouting.currentNodeId())) { + continue; + } + final var node = routingNodes.node(nodeId); + if (node == null) { // node theft the cluster while reconciliation is still in progress + continue; + } + final var decision = canAllocateDecider.apply(shardRouting, node); + logger.trace("relocate {} to {}: {}", shardRouting, nodeId, decision); + if (decision.type() == Decision.Type.YES) { + return node.node(); } } From 21d3771c5232f5162a84a143db8f9c1d359c7a5b Mon Sep 17 00:00:00 2001 From: Ievgen Degtiarenko Date: Thu, 17 Nov 2022 16:49:05 +0100 Subject: [PATCH 2/8] Update docs/changelog/91659.yaml --- docs/changelog/91659.yaml | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 docs/changelog/91659.yaml diff --git a/docs/changelog/91659.yaml b/docs/changelog/91659.yaml new file mode 100644 index 0000000000000..54b7bcd90c8c8 --- /dev/null +++ b/docs/changelog/91659.yaml @@ -0,0 +1,5 @@ +pr: 91659 +summary: Avoid NPE when `disassociateDeadNodes` +area: Allocation +type: bug +issues: [] From c9e01fff8c1acf2ca02ce944287e0e0af424dcb5 Mon Sep 17 00:00:00 2001 From: "ievgen.degtiarenko" Date: Thu, 17 Nov 2022 16:51:04 +0100 Subject: [PATCH 3/8] update issue yaml --- docs/changelog/91659.yaml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/docs/changelog/91659.yaml b/docs/changelog/91659.yaml index 54b7bcd90c8c8..f79057891d2e8 100644 --- a/docs/changelog/91659.yaml +++ b/docs/changelog/91659.yaml @@ -1,5 +1,6 @@ pr: 91659 -summary: Avoid NPE when `disassociateDeadNodes` +summary: Avoid NPE when disassociateDeadNodes is executed for a node present in the desired balance area: Allocation type: bug -issues: [] +issues: + - 91517 From 3b22560ded0b127ef74222f89630215e243b4383 Mon Sep 17 00:00:00 2001 From: "ievgen.degtiarenko" Date: Fri, 18 Nov 2022 09:15:54 +0100 Subject: [PATCH 4/8] add unit test --- .../DesiredBalanceReconcilerTests.java | 68 ++++++++++++++++--- 1 file changed, 60 insertions(+), 8 deletions(-) diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceReconcilerTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceReconcilerTests.java index 72204fb823263..3d0c8cbac7c4f 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceReconcilerTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceReconcilerTests.java @@ -21,6 +21,7 @@ import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.cluster.node.DiscoveryNodeRole; import org.elasticsearch.cluster.node.DiscoveryNodes; +import org.elasticsearch.cluster.routing.IndexRoutingTable; import org.elasticsearch.cluster.routing.IndexShardRoutingTable; import org.elasticsearch.cluster.routing.RecoverySource; import org.elasticsearch.cluster.routing.RoutingChangesObserver; @@ -78,25 +79,22 @@ import static org.elasticsearch.cluster.metadata.IndexMetadata.SETTING_INDEX_VERSION_CREATED; import static org.elasticsearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_REPLICAS; import static org.elasticsearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_SHARDS; +import static org.elasticsearch.cluster.routing.ShardRoutingState.STARTED; +import static org.elasticsearch.cluster.routing.TestShardRouting.newShardRouting; import static org.elasticsearch.cluster.routing.allocation.decider.ThrottlingAllocationDecider.CLUSTER_ROUTING_ALLOCATION_NODE_CONCURRENT_INCOMING_RECOVERIES_SETTING; import static org.elasticsearch.cluster.routing.allocation.decider.ThrottlingAllocationDecider.CLUSTER_ROUTING_ALLOCATION_NODE_CONCURRENT_OUTGOING_RECOVERIES_SETTING; import static org.elasticsearch.cluster.routing.allocation.decider.ThrottlingAllocationDecider.CLUSTER_ROUTING_ALLOCATION_NODE_INITIAL_PRIMARIES_RECOVERIES_SETTING; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.hasSize; +import static org.hamcrest.Matchers.notNullValue; +import static org.hamcrest.Matchers.nullValue; import static org.hamcrest.Matchers.oneOf; public class DesiredBalanceReconcilerTests extends ESTestCase { public void testNoChangesOnEmptyDesiredBalance() { final var clusterState = DesiredBalanceComputerTests.createInitialClusterState(3); - final var routingAllocation = new RoutingAllocation( - new AllocationDeciders(List.of()), - clusterState.mutableRoutingNodes(), - clusterState, - ClusterInfo.EMPTY, - SnapshotShardSizeInfo.EMPTY, - 0L - ); + final var routingAllocation = createRoutingAllocationFrom(clusterState); reconcile(routingAllocation, new DesiredBalance(1, Map.of())); assertFalse(routingAllocation.routingNodesChanged()); @@ -1021,6 +1019,49 @@ public Decision canAllocate(ShardRouting shardRouting, RoutingAllocation allocat assertThat(reroutedState.getRoutingNodes().node("node-1").shardsWithState(ShardRoutingState.RELOCATING), hasSize(1)); } + public void testDoNotRebalanceToTheNodeThatNoLongerExists() { + + var indexMetadata = IndexMetadata.builder("index-1") + .settings( + Settings.builder() + .put(SETTING_NUMBER_OF_SHARDS, 1) + .put(SETTING_NUMBER_OF_REPLICAS, 0) + .put(SETTING_INDEX_VERSION_CREATED.getKey(), Version.CURRENT) + ) + .system(randomBoolean()) + .build(); + final var index = indexMetadata.getIndex(); + final var shardId = new ShardId(index, 0); + + final var clusterState = ClusterState.builder(ClusterName.DEFAULT) + .nodes( + DiscoveryNodes.builder() + // data-node-1 left the cluster + .localNodeId("data-node-2") + .masterNodeId("data-node-2") + .add(new DiscoveryNode("data-node-2", buildNewFakeTransportAddress(), Version.CURRENT)) + ) + .metadata(Metadata.builder().put(indexMetadata, true)) + .routingTable( + RoutingTable.builder() + .add(IndexRoutingTable.builder(index).addShard(newShardRouting(shardId, "data-node-2", true, STARTED))) + ) + .build(); + + final var allocation = createRoutingAllocationFrom(clusterState); + final var balance = new DesiredBalance( + 1, + Map.of(shardId, new ShardAssignment(Set.of("data-node-1"), 1, 0, 0)) // shard is assigned to the node that has left + ); + + reconcile(allocation, balance); + + assertThat(allocation.routingNodes().node("data-node-1"), nullValue()); + assertThat(allocation.routingNodes().node("data-node-2"), notNullValue()); + // shard is kept wherever until balance is recalculated + assertThat(allocation.routingNodes().node("data-node-2").getByShardId(shardId), notNullValue()); + } + private static void reconcile(RoutingAllocation routingAllocation, DesiredBalance desiredBalance) { new DesiredBalanceReconciler(desiredBalance, routingAllocation, new NodeAllocationOrdering()).run(); } @@ -1037,6 +1078,17 @@ private static AllocationService createTestAllocationService( ); } + private static RoutingAllocation createRoutingAllocationFrom(ClusterState clusterState) { + return new RoutingAllocation( + new AllocationDeciders(List.of()), + clusterState.mutableRoutingNodes(), + clusterState, + ClusterInfo.EMPTY, + SnapshotShardSizeInfo.EMPTY, + 0L + ); + } + private static AllocationService createTestAllocationService( Consumer allocationConsumer, ClusterInfoService clusterInfoService, From a8b335fcfba7eebc7c59c0fabb8b63b4e2090c4c Mon Sep 17 00:00:00 2001 From: Ievgen Degtiarenko Date: Fri, 18 Nov 2022 09:16:25 +0100 Subject: [PATCH 5/8] Update docs/changelog/91659.yaml --- docs/changelog/91659.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/changelog/91659.yaml b/docs/changelog/91659.yaml index f79057891d2e8..80cc3f5bb5d73 100644 --- a/docs/changelog/91659.yaml +++ b/docs/changelog/91659.yaml @@ -1,6 +1,6 @@ pr: 91659 -summary: Avoid NPE when disassociateDeadNodes is executed for a node present in the desired balance +summary: Avoid NPE when disassociateDeadNodes is executed for a node present in the + desired balance area: Allocation type: bug -issues: - - 91517 +issues: [] From ddd5f66c35e3c3cbf6c188889f441f5507f95985 Mon Sep 17 00:00:00 2001 From: "ievgen.degtiarenko" Date: Fri, 18 Nov 2022 09:27:11 +0100 Subject: [PATCH 6/8] cleanup --- .../allocator/DesiredBalanceReconcilerTests.java | 15 +++------------ .../DesiredBalanceShardsAllocatorTests.java | 7 +------ 2 files changed, 4 insertions(+), 18 deletions(-) diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceReconcilerTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceReconcilerTests.java index 3d0c8cbac7c4f..3b437926690dd 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceReconcilerTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceReconcilerTests.java @@ -317,10 +317,6 @@ public void testUnassignedShardsInterleaving() { final var stateWithInitializingPrimaries = startInitializingShardsAndReroute(allocationService, clusterState); for (final var indexRoutingTable : stateWithInitializingPrimaries.routingTable()) { - for (int i = 0; i < indexRoutingTable.size(); i++) { - final var indexShardRoutingTable = indexRoutingTable.shard(i); - } - for (int i = 0; i < indexRoutingTable.size(); i++) { final var indexShardRoutingTable = indexRoutingTable.shard(i); assertTrue(indexShardRoutingTable.primaryShard().initializing()); @@ -1168,19 +1164,14 @@ private static DesiredBalance desiredBalance(ClusterState clusterState, BiPredic private static DiscoveryNodes discoveryNodes(int nodeCount) { final var discoveryNodes = DiscoveryNodes.builder(); for (var i = 0; i < nodeCount; i++) { - final var transportAddress = buildNewFakeTransportAddress(); - final var discoveryNode = new DiscoveryNode( + discoveryNodes.add(new DiscoveryNode( "node-" + i, "node-" + i, - UUIDs.randomBase64UUID(random()), - transportAddress.address().getHostString(), - transportAddress.getAddress(), - transportAddress, + buildNewFakeTransportAddress(), Map.of(), Set.of(DiscoveryNodeRole.MASTER_ROLE, DiscoveryNodeRole.DATA_ROLE), Version.CURRENT - ); - discoveryNodes.add(discoveryNode); + )); } discoveryNodes.masterNodeId("node-0").localNodeId("node-0"); return discoveryNodes.build(); diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceShardsAllocatorTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceShardsAllocatorTests.java index a4ed26dc7b74b..e41878f7107a3 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceShardsAllocatorTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceShardsAllocatorTests.java @@ -33,7 +33,6 @@ import org.elasticsearch.cluster.service.ClusterApplierService; import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.cluster.service.FakeThreadPoolMasterService; -import org.elasticsearch.common.UUIDs; import org.elasticsearch.common.settings.ClusterSettings; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.util.concurrent.DeterministicTaskQueue; @@ -377,14 +376,10 @@ public void onFailure(Exception e) { } private static DiscoveryNode createDiscoveryNode(String nodeId) { - var transportAddress = buildNewFakeTransportAddress(); return new DiscoveryNode( nodeId, nodeId, - UUIDs.randomBase64UUID(random()), - transportAddress.address().getHostString(), - transportAddress.getAddress(), - transportAddress, + buildNewFakeTransportAddress(), Map.of(), Set.of(DiscoveryNodeRole.MASTER_ROLE, DiscoveryNodeRole.DATA_ROLE), Version.CURRENT From f37372dfa375de4649a5a93105df5b59d4f7e8ec Mon Sep 17 00:00:00 2001 From: "ievgen.degtiarenko" Date: Fri, 18 Nov 2022 09:40:15 +0100 Subject: [PATCH 7/8] fmt --- .../DesiredBalanceReconcilerTests.java | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceReconcilerTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceReconcilerTests.java index 3b437926690dd..13df8b5e6af6f 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceReconcilerTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceReconcilerTests.java @@ -1164,14 +1164,16 @@ private static DesiredBalance desiredBalance(ClusterState clusterState, BiPredic private static DiscoveryNodes discoveryNodes(int nodeCount) { final var discoveryNodes = DiscoveryNodes.builder(); for (var i = 0; i < nodeCount; i++) { - discoveryNodes.add(new DiscoveryNode( - "node-" + i, - "node-" + i, - buildNewFakeTransportAddress(), - Map.of(), - Set.of(DiscoveryNodeRole.MASTER_ROLE, DiscoveryNodeRole.DATA_ROLE), - Version.CURRENT - )); + discoveryNodes.add( + new DiscoveryNode( + "node-" + i, + "node-" + i, + buildNewFakeTransportAddress(), + Map.of(), + Set.of(DiscoveryNodeRole.MASTER_ROLE, DiscoveryNodeRole.DATA_ROLE), + Version.CURRENT + ) + ); } discoveryNodes.masterNodeId("node-0").localNodeId("node-0"); return discoveryNodes.build(); From 3917ec7e077a1e01f062c11127e2534912aea1fb Mon Sep 17 00:00:00 2001 From: "ievgen.degtiarenko" Date: Fri, 18 Nov 2022 10:20:55 +0100 Subject: [PATCH 8/8] fix comments --- .../allocation/allocator/DesiredBalanceReconciler.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceReconciler.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceReconciler.java index a4d2e6a3370e1..6bf0ed8eeba05 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceReconciler.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceReconciler.java @@ -419,7 +419,7 @@ private DiscoveryNode findRelocationTarget( continue; } final var node = routingNodes.node(nodeId); - if (node == null) { // node theft the cluster while reconciliation is still in progress + if (node == null) { // node left the cluster while reconciliation is still in progress continue; } final var decision = canAllocateDecider.apply(shardRouting, node); @@ -433,10 +433,12 @@ private DiscoveryNode findRelocationTarget( } private Decision decideCanAllocate(ShardRouting shardRouting, RoutingNode target) { + assert target != null : "Target node is not found"; return allocation.deciders().canAllocate(shardRouting, target, allocation); } private Decision decideCanForceAllocateForVacate(ShardRouting shardRouting, RoutingNode target) { + assert target != null : "Target node is not found"; return allocation.deciders().canForceAllocateDuringReplace(shardRouting, target, allocation); } }