From d1b8218d3cfe8f74a775e49c13b8d42f7c9372a3 Mon Sep 17 00:00:00 2001 From: Lee Hinman Date: Wed, 28 Jul 2021 16:02:53 -0600 Subject: [PATCH 01/33] WIP, basic implementation --- .../elasticsearch/cluster/ClusterModule.java | 4 +- .../metadata/SingleNodeShutdownMetadata.java | 3 +- .../allocator/BalancedShardsAllocator.java | 31 ++- .../allocation/decider/AllocationDecider.java | 18 ++ .../decider/AwarenessAllocationDecider.java | 5 + .../ClusterRebalanceAllocationDecider.java | 4 +- .../decider/DiskThresholdDecider.java | 6 + .../decider/EnableAllocationDecider.java | 4 +- .../decider/MaxRetryAllocationDecider.java | 5 + .../NodeReplacementAllocationDecider.java | 172 ++++++++++++ .../decider/NodeVersionAllocationDecider.java | 5 + ...caAfterPrimaryActiveAllocationDecider.java | 5 + .../decider/ResizeAllocationDecider.java | 5 + .../RestoreInProgressAllocationDecider.java | 5 + .../decider/SameShardAllocationDecider.java | 5 + .../SnapshotInProgressAllocationDecider.java | 5 + .../decider/ThrottlingAllocationDecider.java | 5 + .../cluster/ClusterModuleTests.java | 2 + ...NodeReplacementAllocationDeciderTests.java | 248 ++++++++++++++++++ .../cluster/ESAllocationTestCase.java | 4 + .../core/ilm/SetSingleNodeAllocateStep.java | 4 +- .../xpack/shutdown/NodeShutdownShardsIT.java | 110 ++++++++ ...TransportGetShutdownStatusActionTests.java | 61 +++-- 23 files changed, 676 insertions(+), 40 deletions(-) create mode 100644 server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/NodeReplacementAllocationDecider.java create mode 100644 server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/NodeReplacementAllocationDeciderTests.java diff --git a/server/src/main/java/org/elasticsearch/cluster/ClusterModule.java b/server/src/main/java/org/elasticsearch/cluster/ClusterModule.java index a9678d2657e1b..3e068709f17df 100644 --- a/server/src/main/java/org/elasticsearch/cluster/ClusterModule.java +++ b/server/src/main/java/org/elasticsearch/cluster/ClusterModule.java @@ -38,6 +38,7 @@ import org.elasticsearch.cluster.routing.allocation.decider.EnableAllocationDecider; import org.elasticsearch.cluster.routing.allocation.decider.FilterAllocationDecider; import org.elasticsearch.cluster.routing.allocation.decider.MaxRetryAllocationDecider; +import org.elasticsearch.cluster.routing.allocation.decider.NodeReplacementAllocationDecider; import org.elasticsearch.cluster.routing.allocation.decider.NodeShutdownAllocationDecider; import org.elasticsearch.cluster.routing.allocation.decider.NodeVersionAllocationDecider; import org.elasticsearch.cluster.routing.allocation.decider.RebalanceOnlyWhenActiveAllocationDecider; @@ -49,7 +50,6 @@ import org.elasticsearch.cluster.routing.allocation.decider.SnapshotInProgressAllocationDecider; import org.elasticsearch.cluster.routing.allocation.decider.ThrottlingAllocationDecider; import org.elasticsearch.cluster.service.ClusterService; -import org.elasticsearch.common.xcontent.ParseField; import org.elasticsearch.common.inject.AbstractModule; import org.elasticsearch.common.io.stream.NamedWriteable; import org.elasticsearch.common.io.stream.NamedWriteableRegistry.Entry; @@ -60,6 +60,7 @@ import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.util.concurrent.ThreadContext; import org.elasticsearch.common.xcontent.NamedXContentRegistry; +import org.elasticsearch.common.xcontent.ParseField; import org.elasticsearch.gateway.GatewayAllocator; import org.elasticsearch.indices.SystemIndices; import org.elasticsearch.ingest.IngestMetadata; @@ -202,6 +203,7 @@ public static Collection createAllocationDeciders(Settings se addAllocationDecider(deciders, new SnapshotInProgressAllocationDecider()); addAllocationDecider(deciders, new RestoreInProgressAllocationDecider()); addAllocationDecider(deciders, new NodeShutdownAllocationDecider()); + addAllocationDecider(deciders, new NodeReplacementAllocationDecider()); addAllocationDecider(deciders, new FilterAllocationDecider(settings, clusterSettings)); addAllocationDecider(deciders, new SameShardAllocationDecider(settings, clusterSettings)); addAllocationDecider(deciders, new DiskThresholdDecider(settings, clusterSettings)); diff --git a/server/src/main/java/org/elasticsearch/cluster/metadata/SingleNodeShutdownMetadata.java b/server/src/main/java/org/elasticsearch/cluster/metadata/SingleNodeShutdownMetadata.java index d2cf25f219303..ddd8e1bb2ad34 100644 --- a/server/src/main/java/org/elasticsearch/cluster/metadata/SingleNodeShutdownMetadata.java +++ b/server/src/main/java/org/elasticsearch/cluster/metadata/SingleNodeShutdownMetadata.java @@ -12,6 +12,7 @@ import org.elasticsearch.Version; import org.elasticsearch.cluster.AbstractDiffable; import org.elasticsearch.cluster.Diffable; +import org.elasticsearch.common.Strings; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.xcontent.ConstructingObjectParser; @@ -114,7 +115,7 @@ private SingleNodeShutdownMetadata( if (targetNodeName != null && type != Type.REPLACE) { throw new IllegalArgumentException(new ParameterizedMessage("target node name is only valid for REPLACE type shutdowns, " + "but was given type [{}] and target node name [{}]", type, targetNodeName).getFormattedMessage()); - } else if (targetNodeName == null && type == Type.REPLACE) { + } else if (Strings.hasText(targetNodeName) == false && type == Type.REPLACE) { throw new IllegalArgumentException("target node name is required for REPLACE type shutdowns"); } this.targetNodeName = targetNodeName; diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancedShardsAllocator.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancedShardsAllocator.java index 0b223d0e69c3d..650d7d101109e 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancedShardsAllocator.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancedShardsAllocator.java @@ -14,6 +14,7 @@ import org.apache.lucene.util.IntroSorter; import org.elasticsearch.cluster.metadata.IndexMetadata; import org.elasticsearch.cluster.metadata.Metadata; +import org.elasticsearch.cluster.metadata.SingleNodeShutdownMetadata; import org.elasticsearch.cluster.routing.RoutingNode; import org.elasticsearch.cluster.routing.RoutingNodes; import org.elasticsearch.cluster.routing.ShardRouting; @@ -47,6 +48,7 @@ import java.util.List; import java.util.Map; import java.util.Set; +import java.util.function.BiFunction; import java.util.stream.StreamSupport; import static org.elasticsearch.cluster.routing.ShardRoutingState.RELOCATING; @@ -671,7 +673,6 @@ public MoveDecision decideMove(final ShardRouting shardRouting) { return MoveDecision.NOT_TAKEN; } - final boolean explain = allocation.debugDecision(); final ModelNode sourceNode = nodes.get(shardRouting.currentNodeId()); assert sourceNode != null && sourceNode.containsShard(shardRouting); RoutingNode routingNode = sourceNode.getRoutingNode(); @@ -687,6 +688,21 @@ public MoveDecision decideMove(final ShardRouting shardRouting) { * This is not guaranteed to be balanced after this operation we still try best effort to * allocate on the minimal eligible node. */ + // don't use canRebalance as we want hard filtering rules to apply. See #17698 + MoveDecision moveDecision = decideMove(shardRouting, sourceNode, canRemain, this::decideCanAllocate); + if (moveDecision.forceMove() == false && + allocation.metadata().nodeShutdowns().values().stream() + .filter(s -> s.getType() == SingleNodeShutdownMetadata.Type.REPLACE) + .map(SingleNodeShutdownMetadata::getNodeId) + .anyMatch(shardRouting.currentNodeId()::equals)) { + return decideMove(shardRouting, sourceNode, canRemain, this::decideCanForceAllocateForVacate); + } + return moveDecision; + } + + private MoveDecision decideMove(ShardRouting shardRouting, ModelNode sourceNode, Decision remainDecision, + BiFunction decider) { + final boolean explain = allocation.debugDecision(); Type bestDecision = Type.NO; RoutingNode targetNode = null; final List nodeExplanationMap = explain ? new ArrayList<>() : null; @@ -694,8 +710,7 @@ public MoveDecision decideMove(final ShardRouting shardRouting) { for (ModelNode currentNode : sorter.modelNodes) { if (currentNode != sourceNode) { RoutingNode target = currentNode.getRoutingNode(); - // don't use canRebalance as we want hard filtering rules to apply. See #17698 - Decision allocationDecision = allocation.deciders().canAllocate(shardRouting, target, allocation); + Decision allocationDecision = decider.apply(shardRouting, target); if (explain) { nodeExplanationMap.add(new NodeAllocationResult( currentNode.getRoutingNode().node(), allocationDecision, ++weightRanking)); @@ -715,10 +730,18 @@ public MoveDecision decideMove(final ShardRouting shardRouting) { } } - return MoveDecision.cannotRemain(canRemain, AllocationDecision.fromDecisionType(bestDecision), + return MoveDecision.cannotRemain(remainDecision, AllocationDecision.fromDecisionType(bestDecision), targetNode != null ? targetNode.node() : null, nodeExplanationMap); } + private Decision decideCanAllocate(ShardRouting shardRouting, RoutingNode target) { + return allocation.deciders().canAllocate(shardRouting, target, allocation); + } + + private Decision decideCanForceAllocateForVacate(ShardRouting shardRouting, RoutingNode target) { + return allocation.deciders().canForceDuringVacate(shardRouting, target, allocation); + } + /** * Builds the internal model from all shards in the given * {@link Iterable}. All shards in the {@link Iterable} must be assigned diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/AllocationDecider.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/AllocationDecider.java index af2a67b7d9b8d..2c3daf0794156 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/AllocationDecider.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/AllocationDecider.java @@ -104,4 +104,22 @@ public Decision canForceAllocatePrimary(ShardRouting shardRouting, RoutingNode n return decision; } } + + /** + * Returns a {@link Decision} whether the given shard can be forced to the + * given node in the event that the shard's source node is being vacated. + * This allows nodes using a vacate-type node shutdown (replace/vacate) to + * override certain deciders in the interest of moving the shard away from + * a node that *must* be removed. + * + * It defaults to returning "YES" and must be overridden by deciders that + * opt-out to having their other NO decisions *not* overridden while vacating. + * + * The caller is responsible for first checking: + * - that a replacement/vacate is ongoing + * - the shard routing's current node is the source of the replacement + */ + public Decision canForceDuringVacate(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { + return Decision.YES; + } } diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/AwarenessAllocationDecider.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/AwarenessAllocationDecider.java index d59e215161bb7..6dc5a01c987b0 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/AwarenessAllocationDecider.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/AwarenessAllocationDecider.java @@ -120,6 +120,11 @@ public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, Routing return underCapacity(shardRouting, node, allocation, true); } + @Override + public Decision canForceDuringVacate(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { + return canAllocate(shardRouting, node, allocation); + } + @Override public Decision canRemain(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { return underCapacity(shardRouting, node, allocation, false); diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/ClusterRebalanceAllocationDecider.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/ClusterRebalanceAllocationDecider.java index 92d850109930c..fc11bd4a31506 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/ClusterRebalanceAllocationDecider.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/ClusterRebalanceAllocationDecider.java @@ -8,8 +8,6 @@ package org.elasticsearch.cluster.routing.allocation.decider; -import java.util.Locale; - import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.elasticsearch.cluster.routing.RoutingNodes; @@ -20,6 +18,8 @@ import org.elasticsearch.common.settings.Setting.Property; import org.elasticsearch.common.settings.Settings; +import java.util.Locale; + /** * This {@link AllocationDecider} controls re-balancing operations based on the * cluster wide active shard state. This decided can not be configured in diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/DiskThresholdDecider.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/DiskThresholdDecider.java index 468c3a670c6b7..d3a92224fb27e 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/DiskThresholdDecider.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/DiskThresholdDecider.java @@ -318,6 +318,12 @@ public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, Routing new ByteSizeValue(freeBytesAfterShard)); } + @Override + public Decision canForceDuringVacate(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { + // TODO: check whether we would reach 100% disk as a safety effort + return super.canForceDuringVacate(shardRouting, node, allocation); + } + private static final Decision YES_NOT_MOST_UTILIZED_DISK = Decision.single(Decision.Type.YES, NAME, "this shard is not allocated on the most utilized disk and can remain"); diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/EnableAllocationDecider.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/EnableAllocationDecider.java index 329775afaf0b7..455e26a1f6096 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/EnableAllocationDecider.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/EnableAllocationDecider.java @@ -8,8 +8,6 @@ package org.elasticsearch.cluster.routing.allocation.decider; -import java.util.Locale; - import org.elasticsearch.cluster.metadata.IndexMetadata; import org.elasticsearch.cluster.routing.RecoverySource; import org.elasticsearch.cluster.routing.RoutingNode; @@ -20,6 +18,8 @@ import org.elasticsearch.common.settings.Setting.Property; import org.elasticsearch.common.settings.Settings; +import java.util.Locale; + /** * This allocation decider allows shard allocations / rebalancing via the cluster wide settings * {@link #CLUSTER_ROUTING_ALLOCATION_ENABLE_SETTING} / {@link #CLUSTER_ROUTING_REBALANCE_ENABLE_SETTING} and the per index setting diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/MaxRetryAllocationDecider.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/MaxRetryAllocationDecider.java index ff76a1f3f1bdf..dbae5f5502f57 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/MaxRetryAllocationDecider.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/MaxRetryAllocationDecider.java @@ -73,4 +73,9 @@ public Decision canForceAllocatePrimary(ShardRouting shardRouting, RoutingNode n // if so, we don't want to force the primary allocation here return canAllocate(shardRouting, node, allocation); } + + @Override + public Decision canForceDuringVacate(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { + return canAllocate(shardRouting, node, allocation); + } } diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/NodeReplacementAllocationDecider.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/NodeReplacementAllocationDecider.java new file mode 100644 index 0000000000000..1cfe2851db1cb --- /dev/null +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/NodeReplacementAllocationDecider.java @@ -0,0 +1,172 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.cluster.routing.allocation.decider; + +import org.elasticsearch.cluster.metadata.IndexMetadata; +import org.elasticsearch.cluster.metadata.Metadata; +import org.elasticsearch.cluster.metadata.SingleNodeShutdownMetadata; +import org.elasticsearch.cluster.node.DiscoveryNode; +import org.elasticsearch.cluster.routing.RoutingNode; +import org.elasticsearch.cluster.routing.ShardRouting; +import org.elasticsearch.cluster.routing.allocation.RoutingAllocation; + +public class NodeReplacementAllocationDecider extends AllocationDecider { + + public static final String NAME = "node_replacement"; + + static final Decision NO_REPLACEMENTS = Decision.single(Decision.Type.YES, NAME, + "no node replacements are currently ongoing, allocation is allowed"); + + @Override + public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { + final Metadata metadata = allocation.metadata(); + if (replacementOngoing(metadata) == false) { + return NO_REPLACEMENTS; + } else if (replacementFromSourceToTarget(metadata, shardRouting.currentNodeId(), node.nodeId())) { + return Decision.single(Decision.Type.YES, NAME, + "node [%s] is replacing node [%s], and may receive shards from it", shardRouting.currentNodeId(), node.nodeId()); + } else if (isReplacementSource(metadata, shardRouting.currentNodeId())) { + return Decision.single(Decision.Type.NO, NAME, + "node [%s] is being replaced, and its shards may only be allocated to the replacement target", + shardRouting.currentNodeId()); + } else if (isReplacementSource(metadata, node.nodeId())) { + return Decision.single(Decision.Type.NO, NAME, + "node [%s] is being removed, so no data from other nodes may be allocated to it", node.nodeId()); + } else if (isReplacementTargetName(metadata, node.node().getName())) { + return Decision.single(Decision.Type.NO, NAME, + "node [%s] is replacing a vacating node, so no data from other nodes " + + "may be allocated to it until the replacement is complete", node.nodeId()); + } else { + return Decision.single(Decision.Type.YES, NAME, + "neither the source nor target node are part of an ongoing node replacement"); + } + } + + @Override + public Decision canRemain(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { + if (replacementOngoing(allocation.metadata()) == false) { + return NO_REPLACEMENTS; + } else if (isReplacementSource(allocation.metadata(), node.nodeId())) { + return Decision.single(Decision.Type.NO, NAME, + "node [%s] is being replaced by node [%s], so no data may remain on it", node.nodeId(), + getReplacementName(allocation.metadata(), node.nodeId())); + } else { + return Decision.single(Decision.Type.YES, NAME, "node [%s] is not being replaced", node.nodeId()); + } + } + + /** + * See the comment in the else branch of {@link #canAllocate(ShardRouting, RoutingNode, RoutingAllocation)} + * for a reason why we allow allocation that may potentially allocate to the source of a node + * replacement shutdown. + */ + @Override + public Decision canAllocate(IndexMetadata indexMetadata, RoutingNode node, RoutingAllocation allocation) { + if (replacementOngoing(allocation.metadata()) == false) { + return NO_REPLACEMENTS; + } else if (isReplacementTargetName(allocation.metadata(), node.node().getName())) { + return Decision.single(Decision.Type.NO, NAME, + "node [%s] is replacing a vacating node, so no data from other nodes " + + "may be allocated to it until the replacement is complete", node.nodeId()); + } else { + // The node in question is not a replacement target, so allow allocation. + return Decision.single(Decision.Type.YES, NAME, + "node is not a replacement target, so allocation is allowed"); + } + } + + @Override + public Decision shouldAutoExpandToNode(IndexMetadata indexMetadata, DiscoveryNode node, RoutingAllocation allocation) { + if (replacementOngoing(allocation.metadata()) == false) { + return NO_REPLACEMENTS; + } else if (isReplacementTargetName(allocation.metadata(), node.getName())) { + return Decision.single(Decision.Type.NO, NAME, + "node [%s] is a node replacement target, shards cannot auto expand to be on it until the replacement is complete", + node.getId(), "source"); + } else if (isReplacementSource(allocation.metadata(), node.getId())) { + return Decision.single(Decision.Type.NO, NAME, + "node [%s] is being replaced, shards cannot auto expand to be on it", node.getId()); + } else { + return Decision.single(Decision.Type.YES, NAME, + "node is not part of a node replacement, so shards may be auto expanded onto it"); + } + } + + @Override + public Decision canForceDuringVacate(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { + if (replacementFromSourceToTarget(allocation.metadata(), shardRouting.currentNodeId(), node.nodeId())) { + return Decision.single(Decision.Type.YES, NAME, + "node [%s] is being replaced by node [%s], and can be force vacated to the target", + shardRouting.currentNodeId(), node.nodeId()); + } else { + return Decision.single(Decision.Type.NO, NAME, + "shard is not on the source of a node replacement relocated to the replacement target"); + } + } + + /** + * Returns true if there are any node replacements ongoing in the cluster + */ + private static boolean replacementOngoing(Metadata metadata) { + return metadata.nodeShutdowns().values().stream() + .anyMatch(shutdown -> shutdown.getType().equals(SingleNodeShutdownMetadata.Type.REPLACE)); + } + + /** + * Returns true if there is a replacement currently ongoing from the source to the target node id + */ + private static boolean replacementFromSourceToTarget(Metadata metadata, String sourceNodeId, String targetNodeId) { + if (replacementOngoing(metadata) == false) { + return false; + } + if (sourceNodeId == null || targetNodeId == null) { + return false; + } + return metadata.nodeShutdowns().values().stream() + .filter(shutdown -> shutdown.getType().equals(SingleNodeShutdownMetadata.Type.REPLACE)) + .filter(shutdown -> shutdown.getNodeId().equals(sourceNodeId)) + .anyMatch(shutdown -> shutdown.getTargetNodeName().equals(targetNodeId)); + } + + /** + * Returns true if the given node id is the source (the replaced node) of an ongoing node replacement + */ + private static boolean isReplacementSource(Metadata metadata, String nodeId) { + if (nodeId == null || replacementOngoing(metadata) == false) { + return false; + } + return metadata.nodeShutdowns().values().stream() + .filter(shutdown -> shutdown.getType().equals(SingleNodeShutdownMetadata.Type.REPLACE)) + .anyMatch(shutdown -> shutdown.getNodeId().equals(nodeId)); + } + + /** + * Returns true if the given node name (not the id!) is the target (the replacing node) of an ongoing node replacement + */ + private static boolean isReplacementTargetName(Metadata metadata, String nodeName) { + if (nodeName == null || replacementOngoing(metadata) == false) { + return false; + } + return metadata.nodeShutdowns().values().stream() + .filter(shutdown -> shutdown.getType().equals(SingleNodeShutdownMetadata.Type.REPLACE)) + .anyMatch(shutdown -> shutdown.getTargetNodeName().equals(nodeName)); + } + + private static String getReplacementName(Metadata metadata, String nodeBeingReplaced) { + if (nodeBeingReplaced == null || replacementOngoing(metadata) == false) { + return null; + } + return metadata.nodeShutdowns().values().stream() + .filter(shutdown -> shutdown.getType().equals(SingleNodeShutdownMetadata.Type.REPLACE)) + .filter(shutdown -> shutdown.getNodeId().equals(nodeBeingReplaced)) + .findFirst() + .map(SingleNodeShutdownMetadata::getTargetNodeName) + .orElse(null); + } +} diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/NodeVersionAllocationDecider.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/NodeVersionAllocationDecider.java index 9259361ee3c3b..b50d5256bdcff 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/NodeVersionAllocationDecider.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/NodeVersionAllocationDecider.java @@ -53,6 +53,11 @@ public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, Routing } } + @Override + public Decision canForceDuringVacate(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { + return canAllocate(shardRouting, node, allocation); + } + private Decision isVersionCompatibleRelocatePrimary(final RoutingNodes routingNodes, final String sourceNodeId, final RoutingNode target, final RoutingAllocation allocation) { final RoutingNode source = routingNodes.node(sourceNodeId); diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/ReplicaAfterPrimaryActiveAllocationDecider.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/ReplicaAfterPrimaryActiveAllocationDecider.java index 4020f46ec0f8e..b385f2e34cca4 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/ReplicaAfterPrimaryActiveAllocationDecider.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/ReplicaAfterPrimaryActiveAllocationDecider.java @@ -35,4 +35,9 @@ public Decision canAllocate(ShardRouting shardRouting, RoutingAllocation allocat } return allocation.decision(Decision.YES, NAME, "primary shard for this replica is already active"); } + + @Override + public Decision canForceDuringVacate(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { + return canAllocate(shardRouting, node, allocation); + } } diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/ResizeAllocationDecider.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/ResizeAllocationDecider.java index 292ed5c118928..00ec82050733e 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/ResizeAllocationDecider.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/ResizeAllocationDecider.java @@ -71,4 +71,9 @@ public Decision canForceAllocatePrimary(ShardRouting shardRouting, RoutingNode n assert shardRouting.primary() : "must not call canForceAllocatePrimary on a non-primary shard " + shardRouting; return canAllocate(shardRouting, node, allocation); } + + @Override + public Decision canForceDuringVacate(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { + return canAllocate(shardRouting, node, allocation); + } } diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/RestoreInProgressAllocationDecider.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/RestoreInProgressAllocationDecider.java index 209e25fa20486..d5ab85cabe5ed 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/RestoreInProgressAllocationDecider.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/RestoreInProgressAllocationDecider.java @@ -68,4 +68,9 @@ public Decision canForceAllocatePrimary(ShardRouting shardRouting, RoutingNode n assert shardRouting.primary() : "must not call canForceAllocatePrimary on a non-primary shard " + shardRouting; return canAllocate(shardRouting, node, allocation); } + + @Override + public Decision canForceDuringVacate(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { + return canAllocate(shardRouting, node, allocation); + } } diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/SameShardAllocationDecider.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/SameShardAllocationDecider.java index 1f3f995deaf0d..9c8f2c274deeb 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/SameShardAllocationDecider.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/SameShardAllocationDecider.java @@ -105,6 +105,11 @@ public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, Routing return YES_NONE_HOLD_COPY; } + @Override + public Decision canForceDuringVacate(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { + return canAllocate(shardRouting, node, allocation); + } + private static Decision debugNoAlreadyAllocatedToHost(RoutingNode node, RoutingAllocation allocation, boolean checkNodeOnSameHostAddress) { String hostType = checkNodeOnSameHostAddress ? "address" : "name"; diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/SnapshotInProgressAllocationDecider.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/SnapshotInProgressAllocationDecider.java index 9561f7cb8ed19..48ccbb4de7e41 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/SnapshotInProgressAllocationDecider.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/SnapshotInProgressAllocationDecider.java @@ -46,6 +46,11 @@ public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, Routing return canMove(shardRouting, allocation); } + @Override + public Decision canForceDuringVacate(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { + return canAllocate(shardRouting, node, allocation); + } + private Decision canMove(ShardRouting shardRouting, RoutingAllocation allocation) { if (shardRouting.primary()) { // Only primary shards are snapshotted diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/ThrottlingAllocationDecider.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/ThrottlingAllocationDecider.java index 8c7bc6de7d6a1..0c4b115d4a351 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/ThrottlingAllocationDecider.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/ThrottlingAllocationDecider.java @@ -166,6 +166,11 @@ public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, Routing } } + @Override + public Decision canForceDuringVacate(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { + return canAllocate(shardRouting, node, allocation); + } + /** * The shard routing passed to {@link #canAllocate(ShardRouting, RoutingNode, RoutingAllocation)} is not the initializing shard to this * node but: diff --git a/server/src/test/java/org/elasticsearch/cluster/ClusterModuleTests.java b/server/src/test/java/org/elasticsearch/cluster/ClusterModuleTests.java index b966d428ce0c6..5eb4479767f6d 100644 --- a/server/src/test/java/org/elasticsearch/cluster/ClusterModuleTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/ClusterModuleTests.java @@ -21,6 +21,7 @@ import org.elasticsearch.cluster.routing.allocation.decider.EnableAllocationDecider; import org.elasticsearch.cluster.routing.allocation.decider.FilterAllocationDecider; import org.elasticsearch.cluster.routing.allocation.decider.MaxRetryAllocationDecider; +import org.elasticsearch.cluster.routing.allocation.decider.NodeReplacementAllocationDecider; import org.elasticsearch.cluster.routing.allocation.decider.NodeShutdownAllocationDecider; import org.elasticsearch.cluster.routing.allocation.decider.NodeVersionAllocationDecider; import org.elasticsearch.cluster.routing.allocation.decider.RebalanceOnlyWhenActiveAllocationDecider; @@ -195,6 +196,7 @@ public void testAllocationDeciderOrder() { SnapshotInProgressAllocationDecider.class, RestoreInProgressAllocationDecider.class, NodeShutdownAllocationDecider.class, + NodeReplacementAllocationDecider.class, FilterAllocationDecider.class, SameShardAllocationDecider.class, DiskThresholdDecider.class, diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/NodeReplacementAllocationDeciderTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/NodeReplacementAllocationDeciderTests.java new file mode 100644 index 0000000000000..41484108e86c4 --- /dev/null +++ b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/NodeReplacementAllocationDeciderTests.java @@ -0,0 +1,248 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.cluster.routing.allocation.decider; + +import org.elasticsearch.Version; +import org.elasticsearch.cluster.ClusterState; +import org.elasticsearch.cluster.ESAllocationTestCase; +import org.elasticsearch.cluster.EmptyClusterInfoService; +import org.elasticsearch.cluster.metadata.IndexMetadata; +import org.elasticsearch.cluster.metadata.Metadata; +import org.elasticsearch.cluster.metadata.NodesShutdownMetadata; +import org.elasticsearch.cluster.metadata.SingleNodeShutdownMetadata; +import org.elasticsearch.cluster.node.DiscoveryNode; +import org.elasticsearch.cluster.node.DiscoveryNodeRole; +import org.elasticsearch.cluster.node.DiscoveryNodes; +import org.elasticsearch.cluster.routing.RecoverySource; +import org.elasticsearch.cluster.routing.RoutingNode; +import org.elasticsearch.cluster.routing.ShardRouting; +import org.elasticsearch.cluster.routing.UnassignedInfo; +import org.elasticsearch.cluster.routing.allocation.AllocationService; +import org.elasticsearch.cluster.routing.allocation.RoutingAllocation; +import org.elasticsearch.cluster.routing.allocation.allocator.BalancedShardsAllocator; +import org.elasticsearch.common.settings.ClusterSettings; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.index.shard.ShardId; +import org.elasticsearch.snapshots.EmptySnapshotsInfoService; +import org.elasticsearch.test.gateway.TestGatewayAllocator; + +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; + +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.equalTo; + +public class NodeReplacementAllocationDeciderTests extends ESAllocationTestCase { + private static final DiscoveryNode NODE_A = newNode("node-a", "node-a", Collections.singleton(DiscoveryNodeRole.DATA_ROLE)); + private static final DiscoveryNode NODE_B = newNode("node-b", "node-b", Collections.singleton(DiscoveryNodeRole.DATA_ROLE)); + private static final DiscoveryNode NODE_C = newNode("node-c", "node-c", Collections.singleton(DiscoveryNodeRole.DATA_ROLE)); + private final ShardRouting shard = ShardRouting.newUnassigned( + new ShardId("myindex", "myindex", 0), + true, + RecoverySource.EmptyStoreRecoverySource.INSTANCE, + new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, "index created") + ); + private final ClusterSettings clusterSettings = new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); + private NodeReplacementAllocationDecider decider = new NodeReplacementAllocationDecider(); + private final AllocationDeciders allocationDeciders = new AllocationDeciders( + Arrays.asList( + decider, + new SameShardAllocationDecider(Settings.EMPTY, clusterSettings), + new ReplicaAfterPrimaryActiveAllocationDecider(), + new NodeShutdownAllocationDecider() + ) + ); + private final AllocationService service = new AllocationService( + allocationDeciders, + new TestGatewayAllocator(), + new BalancedShardsAllocator(Settings.EMPTY), + EmptyClusterInfoService.INSTANCE, + EmptySnapshotsInfoService.INSTANCE + ); + + private final String idxName = "test-idx"; + private final String idxUuid = "test-idx-uuid"; + private final IndexMetadata indexMetadata = IndexMetadata.builder(idxName) + .settings( + Settings.builder() + .put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT) + .put(IndexMetadata.SETTING_INDEX_UUID, idxUuid) + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0) + .build() + ) + .build(); + + public void testNoReplacements() { + ClusterState state = ClusterState.builder(ClusterState.EMPTY_STATE) + .nodes(DiscoveryNodes.builder() + .add(NODE_A) + .add(NODE_B) + .add(NODE_C) + .build()) + .build(); + + RoutingAllocation allocation = new RoutingAllocation(allocationDeciders, state.getRoutingNodes(), state, null, null, 0); + DiscoveryNode node = randomFrom(NODE_A, NODE_B, NODE_C); + RoutingNode routingNode = new RoutingNode(node.getId(), node, shard); + allocation.debugDecision(true); + + Decision decision = decider.canAllocate(shard, routingNode, allocation); + assertThat(decision.type(), equalTo(Decision.Type.YES)); + assertThat( + decision.getExplanation(), + equalTo(NodeReplacementAllocationDecider.NO_REPLACEMENTS.getExplanation()) + ); + + decision = decider.canRemain(shard, routingNode, allocation); + assertThat(decision.type(), equalTo(Decision.Type.YES)); + assertThat( + decision.getExplanation(), + equalTo(NodeReplacementAllocationDecider.NO_REPLACEMENTS.getExplanation()) + ); + } + + public void testCanForceAllocate() { + ClusterState state = prepareState(service.reroute(ClusterState.EMPTY_STATE, "initial state"), NODE_A.getId(), NODE_B.getName()); + RoutingAllocation allocation = new RoutingAllocation(allocationDeciders, state.getRoutingNodes(), state, null, null, 0); + RoutingNode routingNode = new RoutingNode(NODE_A.getId(), NODE_A, shard); + allocation.debugDecision(true); + + ShardRouting assignedShard = ShardRouting.newUnassigned( + new ShardId("myindex", "myindex", 0), + true, + RecoverySource.EmptyStoreRecoverySource.INSTANCE, + new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, "index created") + ); + assignedShard = assignedShard.initialize(NODE_A.getId(), null, 1); + assignedShard = assignedShard.moveToStarted(); + + Decision decision = decider.canForceDuringVacate(assignedShard, routingNode, allocation); + assertThat(decision.type(), equalTo(Decision.Type.NO)); + assertThat( + decision.getExplanation(), + equalTo("shard is not on the source of a node replacement relocated to the replacement target") + ); + + routingNode = new RoutingNode(NODE_B.getId(), NODE_B, assignedShard); + + decision = decider.canForceDuringVacate(assignedShard, routingNode, allocation); + assertThat(decision.type(), equalTo(Decision.Type.YES)); + assertThat(decision.getExplanation(), + equalTo("node [" + NODE_A.getId() + "] is being replaced by node [" + NODE_B.getId() + + "], and can be force vacated to the target")); + + routingNode = new RoutingNode(NODE_C.getId(), NODE_C, assignedShard); + + decision = decider.canForceDuringVacate(assignedShard, routingNode, allocation); + assertThat(decision.type(), equalTo(Decision.Type.NO)); + assertThat(decision.getExplanation(), + equalTo("shard is not on the source of a node replacement relocated to the replacement target")); + } + + public void testCanAllocateFromTheVoid() { + ClusterState state = prepareState(service.reroute(ClusterState.EMPTY_STATE, "initial state"), NODE_A.getId(), NODE_B.getName()); + RoutingAllocation allocation = new RoutingAllocation(allocationDeciders, state.getRoutingNodes(), state, null, null, 0); + RoutingNode routingNode = new RoutingNode(NODE_A.getId(), NODE_A, shard); + allocation.debugDecision(true); + + Decision decision = decider.canAllocate(indexMetadata, routingNode, allocation); + assertThat(decision.type(), equalTo(Decision.Type.YES)); + assertThat(decision.getExplanation(), equalTo("node is not a replacement target, so allocation is allowed")); + + routingNode = new RoutingNode(NODE_B.getId(), NODE_B, shard); + + decision = decider.canAllocate(indexMetadata, routingNode, allocation); + assertThat(decision.type(), equalTo(Decision.Type.NO)); + assertThat(decision.getExplanation(), equalTo("node [" + NODE_B.getId() + + "] is replacing a vacating node, so no data from other nodes may be allocated to it until the replacement is complete")); + + routingNode = new RoutingNode(NODE_C.getId(), NODE_C, shard); + + decision = decider.canAllocate(indexMetadata, routingNode, allocation); + assertThat(decision.type(), equalTo(Decision.Type.YES)); + assertThat(decision.getExplanation(), equalTo("node is not a replacement target, so allocation is allowed")); + } + + public void testCannotRemainOnReplacedNode() { + ClusterState state = prepareState(service.reroute(ClusterState.EMPTY_STATE, "initial state"), NODE_A.getId(), NODE_B.getName()); + RoutingAllocation allocation = new RoutingAllocation(allocationDeciders, state.getRoutingNodes(), state, null, null, 0); + RoutingNode routingNode = new RoutingNode(NODE_A.getId(), NODE_A, shard); + allocation.debugDecision(true); + + Decision decision = decider.canRemain(shard, routingNode, allocation); + assertThat(decision.type(), equalTo(Decision.Type.NO)); + assertThat( + decision.getExplanation(), + equalTo("node [" + NODE_A.getId() + "] is being replaced by node [" + NODE_B.getId() + "], so no data may remain on it") + ); + + routingNode = new RoutingNode(NODE_B.getId(), NODE_B, shard); + + decision = decider.canRemain(shard, routingNode, allocation); + assertThat(decision.type(), equalTo(Decision.Type.YES)); + assertThat(decision.getExplanation(), equalTo("node [" + NODE_B.getId() + "] is not being replaced")); + } + + public void testCanAllocateToNeitherSourceNorTarget() { + ClusterState state = prepareState(service.reroute(ClusterState.EMPTY_STATE, "initial state"), NODE_A.getId(), NODE_B.getName()); + RoutingAllocation allocation = new RoutingAllocation(allocationDeciders, state.getRoutingNodes(), state, null, null, 0); + RoutingNode routingNode = new RoutingNode(NODE_A.getId(), NODE_A, shard); + allocation.debugDecision(true); + + Decision decision = decider.canAllocate(shard, routingNode, allocation); + assertThat(decision.type(), equalTo(Decision.Type.NO)); + assertThat( + decision.getExplanation(), + equalTo("node [" + NODE_A.getId() + "] is being removed, so no data from other nodes may be allocated to it") + ); + + routingNode = new RoutingNode(NODE_B.getId(), NODE_B, shard); + + decision = decider.canAllocate(shard, routingNode, allocation); + assertThat(decision.type(), equalTo(Decision.Type.NO)); + assertThat( + decision.getExplanation(), + equalTo("node [" + NODE_B.getId() + + "] is replacing a vacating node, so no data from other nodes may be allocated to it until the replacement is complete") + ); + + routingNode = new RoutingNode(NODE_C.getId(), NODE_C, shard); + + decision = decider.canAllocate(shard, routingNode, allocation); + assertThat(decision.getExplanation(), decision.type(), equalTo(Decision.Type.YES)); + assertThat( + decision.getExplanation(), + containsString("neither the source nor target node are part of an ongoing node replacement") + ); + } + + private ClusterState prepareState(ClusterState initialState, String sourceNodeId, String targetNodeName) { + final SingleNodeShutdownMetadata nodeShutdownMetadata = SingleNodeShutdownMetadata.builder() + .setNodeId(sourceNodeId) + .setTargetNodeName(targetNodeName) + .setType(SingleNodeShutdownMetadata.Type.REPLACE) + .setReason(this.getTestName()) + .setStartedAtMillis(1L) + .build(); + NodesShutdownMetadata nodesShutdownMetadata = new NodesShutdownMetadata(new HashMap<>()).putSingleNodeMetadata( + nodeShutdownMetadata + ); + return ClusterState.builder(initialState) + .nodes(DiscoveryNodes.builder() + .add(NODE_A) + .add(NODE_B) + .add(NODE_C) + .build()) + .metadata(Metadata.builder().put(IndexMetadata.builder(indexMetadata)) + .putCustom(NodesShutdownMetadata.TYPE, nodesShutdownMetadata)) + .build(); + } +} diff --git a/test/framework/src/main/java/org/elasticsearch/cluster/ESAllocationTestCase.java b/test/framework/src/main/java/org/elasticsearch/cluster/ESAllocationTestCase.java index 502e03a8c8e7d..1d6bc2b7b1dc6 100644 --- a/test/framework/src/main/java/org/elasticsearch/cluster/ESAllocationTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/cluster/ESAllocationTestCase.java @@ -128,6 +128,10 @@ protected static DiscoveryNode newNode(String nodeId, Set rol return new DiscoveryNode(nodeId, buildNewFakeTransportAddress(), emptyMap(), roles, Version.CURRENT); } + protected static DiscoveryNode newNode(String nodeName, String nodeId, Set roles) { + return new DiscoveryNode(nodeName, nodeId, buildNewFakeTransportAddress(), emptyMap(), roles, Version.CURRENT); + } + protected static DiscoveryNode newNode(String nodeId, Version version) { return new DiscoveryNode(nodeId, buildNewFakeTransportAddress(), emptyMap(), MASTER_DATA_ROLES, version); } diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ilm/SetSingleNodeAllocateStep.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ilm/SetSingleNodeAllocateStep.java index 934c345be9c4d..decf1f6a598b7 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ilm/SetSingleNodeAllocateStep.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ilm/SetSingleNodeAllocateStep.java @@ -22,6 +22,7 @@ import org.elasticsearch.cluster.routing.allocation.decider.AllocationDeciders; import org.elasticsearch.cluster.routing.allocation.decider.Decision; import org.elasticsearch.cluster.routing.allocation.decider.FilterAllocationDecider; +import org.elasticsearch.cluster.routing.allocation.decider.NodeReplacementAllocationDecider; import org.elasticsearch.cluster.routing.allocation.decider.NodeShutdownAllocationDecider; import org.elasticsearch.cluster.routing.allocation.decider.NodeVersionAllocationDecider; import org.elasticsearch.cluster.routing.allocation.decider.ShardsLimitAllocationDecider; @@ -68,7 +69,8 @@ public void performAction(IndexMetadata indexMetadata, ClusterState clusterState new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS)), new DataTierAllocationDecider(), new NodeVersionAllocationDecider(), - new NodeShutdownAllocationDecider() + new NodeShutdownAllocationDecider(), + new NodeReplacementAllocationDecider() )); final RoutingNodes routingNodes = clusterState.getRoutingNodes(); RoutingAllocation allocation = new RoutingAllocation(allocationDeciders, routingNodes, clusterState, null, diff --git a/x-pack/plugin/shutdown/src/internalClusterTest/java/org/elasticsearch/xpack/shutdown/NodeShutdownShardsIT.java b/x-pack/plugin/shutdown/src/internalClusterTest/java/org/elasticsearch/xpack/shutdown/NodeShutdownShardsIT.java index 098e028d4118d..ae7ab1fc2c5cd 100644 --- a/x-pack/plugin/shutdown/src/internalClusterTest/java/org/elasticsearch/xpack/shutdown/NodeShutdownShardsIT.java +++ b/x-pack/plugin/shutdown/src/internalClusterTest/java/org/elasticsearch/xpack/shutdown/NodeShutdownShardsIT.java @@ -8,11 +8,15 @@ package org.elasticsearch.xpack.shutdown; import org.elasticsearch.Build; +import org.elasticsearch.action.admin.cluster.allocation.ClusterAllocationExplainResponse; import org.elasticsearch.action.admin.cluster.node.info.NodeInfo; import org.elasticsearch.action.admin.cluster.node.info.NodesInfoResponse; import org.elasticsearch.action.support.master.AcknowledgedResponse; +import org.elasticsearch.cluster.ClusterState; import org.elasticsearch.cluster.metadata.SingleNodeShutdownMetadata; import org.elasticsearch.cluster.node.DiscoveryNode; +import org.elasticsearch.cluster.routing.ShardRouting; +import org.elasticsearch.cluster.routing.allocation.decider.Decision; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.plugins.Plugin; import org.elasticsearch.test.ESIntegTestCase; @@ -20,8 +24,10 @@ import java.util.Arrays; import java.util.Collection; +import java.util.stream.Collectors; import static org.elasticsearch.cluster.metadata.SingleNodeShutdownMetadata.Status.COMPLETE; +import static org.elasticsearch.cluster.metadata.SingleNodeShutdownMetadata.Status.STALLED; import static org.hamcrest.Matchers.equalTo; @ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.TEST, numDataNodes = 0, numClientNodes = 0) @@ -137,6 +143,110 @@ public void testShardStatusIsCompleteOnNonDataNodes() throws Exception { assertThat(getResp.getShutdownStatuses().get(0).migrationStatus().getStatus(), equalTo(COMPLETE)); } + public void testNodeReplacementOverridesFilters() throws Exception { + String nodeA = internalCluster().startNode(Settings.builder().put("node.name", "node-a")); + // Create an index and pin it to nodeA, when we replace it with nodeB, + // it'll move the data, overridding the `_name` allocation filter + createIndex( + "myindex", + Settings.builder() + .put("index.routing.allocation.require._name", nodeA) + .put("index.number_of_shards", 3) + .put("index.number_of_replicas", 0) + .build() + ); + final String nodeAId = getNodeId(nodeA); + final String nodeB = "node_t1"; // TODO: fix this to so it's actually overrideable + + // Mark the nodeA as being replaced + PutShutdownNodeAction.Request putShutdownRequest = new PutShutdownNodeAction.Request( + nodeAId, + SingleNodeShutdownMetadata.Type.REPLACE, + this.getTestName(), + null, + nodeB + ); + AcknowledgedResponse putShutdownResponse = client().execute(PutShutdownNodeAction.INSTANCE, putShutdownRequest).get(); + assertTrue(putShutdownResponse.isAcknowledged()); + + GetShutdownStatusAction.Response getResp = client().execute( + GetShutdownStatusAction.INSTANCE, + new GetShutdownStatusAction.Request(nodeAId) + ).get(); + + assertThat(getResp.getShutdownStatuses().get(0).migrationStatus().getStatus(), equalTo(STALLED)); + + internalCluster().startNode(Settings.builder().put("node.name", nodeB)); + final String nodeBId = getNodeId(nodeB); + + logger.info("--> NodeA: {} -- {}", nodeA, nodeAId); + logger.info("--> NodeB: {} -- {}", nodeB, nodeBId); + + assertBusy(() -> { + ClusterState state = client().admin().cluster().prepareState().clear().setRoutingTable(true).get().getState(); + for (ShardRouting sr : state.routingTable().allShards("myindex")) { + assertThat( + "expected shard on nodeB (" + nodeBId + ") but it was on a different node", + sr.currentNodeId(), + equalTo(nodeBId) + ); + } + }); + + assertBusy(() -> { + GetShutdownStatusAction.Response shutdownStatus = client().execute( + GetShutdownStatusAction.INSTANCE, + new GetShutdownStatusAction.Request(nodeAId) + ).get(); + assertThat(shutdownStatus.getShutdownStatuses().get(0).migrationStatus().getStatus(), equalTo(COMPLETE)); + }); + + final String nodeC = internalCluster().startNode(); + + createIndex("other", Settings.builder().put("index.number_of_shards", 1).put("index.number_of_replicas", 1).build()); + + ensureYellow("other"); + + // Explain the replica for the "other" index + ClusterAllocationExplainResponse explainResponse = client().admin() + .cluster() + .prepareAllocationExplain() + .setIndex("other") + .setShard(0) + .setPrimary(false) + .get(); + + // Validate that the replica cannot be allocated to nodeB because it's the target of a node replacement + explainResponse.getExplanation() + .getShardAllocationDecision() + .getAllocateDecision() + .getNodeDecisions() + .stream() + .filter(nodeDecision -> nodeDecision.getNode().getId().equals(nodeBId)) + .findFirst() + .ifPresentOrElse(nodeAllocationResult -> { + assertThat(nodeAllocationResult.getCanAllocateDecision().type(), equalTo(Decision.Type.NO)); + assertTrue( + "expected decisions to mention node replacement: " + + nodeAllocationResult.getCanAllocateDecision() + .getDecisions() + .stream() + .map(Decision::getExplanation) + .collect(Collectors.joining(",")), + nodeAllocationResult.getCanAllocateDecision() + .getDecisions() + .stream() + .anyMatch( + decision -> decision.getExplanation() + .contains( + "is replacing a vacating node, so no data from " + + "other nodes may be allocated to it until the replacement is complete" + ) + ) + ); + }, () -> fail("expected a 'NO' decision for nodeB but there was no explanation for that node")); + } + private String getNodeId(String nodeName) throws Exception { NodesInfoResponse nodes = client().admin().cluster().prepareNodesInfo().clear().get(); return nodes.getNodes() diff --git a/x-pack/plugin/shutdown/src/test/java/org/elasticsearch/xpack/shutdown/TransportGetShutdownStatusActionTests.java b/x-pack/plugin/shutdown/src/test/java/org/elasticsearch/xpack/shutdown/TransportGetShutdownStatusActionTests.java index 1aaa13e79b746..f9a9ad55a2b25 100644 --- a/x-pack/plugin/shutdown/src/test/java/org/elasticsearch/xpack/shutdown/TransportGetShutdownStatusActionTests.java +++ b/x-pack/plugin/shutdown/src/test/java/org/elasticsearch/xpack/shutdown/TransportGetShutdownStatusActionTests.java @@ -30,6 +30,7 @@ import org.elasticsearch.cluster.routing.allocation.decider.AllocationDecider; import org.elasticsearch.cluster.routing.allocation.decider.AllocationDeciders; import org.elasticsearch.cluster.routing.allocation.decider.Decision; +import org.elasticsearch.cluster.routing.allocation.decider.NodeReplacementAllocationDecider; import org.elasticsearch.cluster.routing.allocation.decider.NodeShutdownAllocationDecider; import org.elasticsearch.common.collect.ImmutableOpenMap; import org.elasticsearch.common.settings.Settings; @@ -78,35 +79,37 @@ private void setup() { canRemain.set((r, n, a) -> { throw new UnsupportedOperationException("canRemain not initiated in this test"); }); clusterInfoService = EmptyClusterInfoService.INSTANCE; - allocationDeciders = new AllocationDeciders(List.of(new NodeShutdownAllocationDecider(), new AllocationDecider() { - @Override - public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { - return canAllocate.get().test(shardRouting, node, allocation); - } - - @Override - public Decision canRebalance(ShardRouting shardRouting, RoutingAllocation allocation) { - // No behavior should change based on rebalance decisions - return Decision.NO; - } - - @Override - public Decision canRemain(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { - return canRemain.get().test(shardRouting, node, allocation); - } - - @Override - public Decision shouldAutoExpandToNode(IndexMetadata indexMetadata, DiscoveryNode node, RoutingAllocation allocation) { - // No behavior relevant to these tests should change based on auto expansion decisions - throw new UnsupportedOperationException(); - } - - @Override - public Decision canRebalance(RoutingAllocation allocation) { - // No behavior should change based on rebalance decisions - return Decision.NO; - } - })); + allocationDeciders = new AllocationDeciders( + List.of(new NodeShutdownAllocationDecider(), new NodeReplacementAllocationDecider(), new AllocationDecider() { + @Override + public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { + return canAllocate.get().test(shardRouting, node, allocation); + } + + @Override + public Decision canRebalance(ShardRouting shardRouting, RoutingAllocation allocation) { + // No behavior should change based on rebalance decisions + return Decision.NO; + } + + @Override + public Decision canRemain(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { + return canRemain.get().test(shardRouting, node, allocation); + } + + @Override + public Decision shouldAutoExpandToNode(IndexMetadata indexMetadata, DiscoveryNode node, RoutingAllocation allocation) { + // No behavior relevant to these tests should change based on auto expansion decisions + throw new UnsupportedOperationException(); + } + + @Override + public Decision canRebalance(RoutingAllocation allocation) { + // No behavior should change based on rebalance decisions + return Decision.NO; + } + }) + ); snapshotsInfoService = () -> new SnapshotShardSizeInfo( new ImmutableOpenMap.Builder().build() ); From fd0c5973345c6bd7e360150089e9ccadb086377f Mon Sep 17 00:00:00 2001 From: Lee Hinman Date: Mon, 4 Oct 2021 13:19:47 -0600 Subject: [PATCH 02/33] Pull `if` branch into a variable --- .../allocation/allocator/BalancedShardsAllocator.java | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancedShardsAllocator.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancedShardsAllocator.java index 650d7d101109e..7d69f3fe28075 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancedShardsAllocator.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancedShardsAllocator.java @@ -690,11 +690,11 @@ public MoveDecision decideMove(final ShardRouting shardRouting) { */ // don't use canRebalance as we want hard filtering rules to apply. See #17698 MoveDecision moveDecision = decideMove(shardRouting, sourceNode, canRemain, this::decideCanAllocate); - if (moveDecision.forceMove() == false && - allocation.metadata().nodeShutdowns().values().stream() - .filter(s -> s.getType() == SingleNodeShutdownMetadata.Type.REPLACE) - .map(SingleNodeShutdownMetadata::getNodeId) - .anyMatch(shardRouting.currentNodeId()::equals)) { + final boolean shardsOnReplacedNodes = allocation.metadata().nodeShutdowns().values().stream() + .filter(s -> s.getType() == SingleNodeShutdownMetadata.Type.REPLACE) + .map(SingleNodeShutdownMetadata::getNodeId) + .anyMatch(shardRouting.currentNodeId()::equals); + if (moveDecision.forceMove() == false && shardsOnReplacedNodes) { return decideMove(shardRouting, sourceNode, canRemain, this::decideCanForceAllocateForVacate); } return moveDecision; From 41c6238c05ddf0b5b5bec974dbdb72875df32113 Mon Sep 17 00:00:00 2001 From: Lee Hinman Date: Mon, 4 Oct 2021 13:20:29 -0600 Subject: [PATCH 03/33] Remove outdated javadoc --- .../allocation/decider/NodeReplacementAllocationDecider.java | 5 ----- 1 file changed, 5 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/NodeReplacementAllocationDecider.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/NodeReplacementAllocationDecider.java index 1cfe2851db1cb..3080b5e10d78d 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/NodeReplacementAllocationDecider.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/NodeReplacementAllocationDecider.java @@ -61,11 +61,6 @@ public Decision canRemain(ShardRouting shardRouting, RoutingNode node, RoutingAl } } - /** - * See the comment in the else branch of {@link #canAllocate(ShardRouting, RoutingNode, RoutingAllocation)} - * for a reason why we allow allocation that may potentially allocate to the source of a node - * replacement shutdown. - */ @Override public Decision canAllocate(IndexMetadata indexMetadata, RoutingNode node, RoutingAllocation allocation) { if (replacementOngoing(allocation.metadata()) == false) { From a96e5964f83b339696a98c0522abe9c06e932f16 Mon Sep 17 00:00:00 2001 From: Lee Hinman Date: Mon, 4 Oct 2021 13:23:40 -0600 Subject: [PATCH 04/33] Remove map iteration, use target name instead of id (whoops) --- .../NodeReplacementAllocationDecider.java | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/NodeReplacementAllocationDecider.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/NodeReplacementAllocationDecider.java index 3080b5e10d78d..5f05abe706f84 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/NodeReplacementAllocationDecider.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/NodeReplacementAllocationDecider.java @@ -28,7 +28,7 @@ public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, Routing final Metadata metadata = allocation.metadata(); if (replacementOngoing(metadata) == false) { return NO_REPLACEMENTS; - } else if (replacementFromSourceToTarget(metadata, shardRouting.currentNodeId(), node.nodeId())) { + } else if (replacementFromSourceToTarget(metadata, shardRouting.currentNodeId(), node.node().getName())) { return Decision.single(Decision.Type.YES, NAME, "node [%s] is replacing node [%s], and may receive shards from it", shardRouting.currentNodeId(), node.nodeId()); } else if (isReplacementSource(metadata, shardRouting.currentNodeId())) { @@ -95,7 +95,7 @@ public Decision shouldAutoExpandToNode(IndexMetadata indexMetadata, DiscoveryNod @Override public Decision canForceDuringVacate(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { - if (replacementFromSourceToTarget(allocation.metadata(), shardRouting.currentNodeId(), node.nodeId())) { + if (replacementFromSourceToTarget(allocation.metadata(), shardRouting.currentNodeId(), node.node().getName())) { return Decision.single(Decision.Type.YES, NAME, "node [%s] is being replaced by node [%s], and can be force vacated to the target", shardRouting.currentNodeId(), node.nodeId()); @@ -116,17 +116,18 @@ private static boolean replacementOngoing(Metadata metadata) { /** * Returns true if there is a replacement currently ongoing from the source to the target node id */ - private static boolean replacementFromSourceToTarget(Metadata metadata, String sourceNodeId, String targetNodeId) { + private static boolean replacementFromSourceToTarget(Metadata metadata, String sourceNodeId, String targetNodeName) { if (replacementOngoing(metadata) == false) { return false; } - if (sourceNodeId == null || targetNodeId == null) { + if (sourceNodeId == null || targetNodeName == null) { return false; } - return metadata.nodeShutdowns().values().stream() - .filter(shutdown -> shutdown.getType().equals(SingleNodeShutdownMetadata.Type.REPLACE)) - .filter(shutdown -> shutdown.getNodeId().equals(sourceNodeId)) - .anyMatch(shutdown -> shutdown.getTargetNodeName().equals(targetNodeId)); + final SingleNodeShutdownMetadata shutdown = metadata.nodeShutdowns().get(sourceNodeId); + return shutdown != null && + shutdown.getType().equals(SingleNodeShutdownMetadata.Type.REPLACE) && + shutdown.getNodeId().equals(sourceNodeId) && + shutdown.getTargetNodeName().equals(targetNodeName); } /** From 238112a36c506ba0a133c677e8151ed0ac03e2e7 Mon Sep 17 00:00:00 2001 From: Lee Hinman Date: Mon, 4 Oct 2021 13:25:51 -0600 Subject: [PATCH 05/33] Remove streaming from isReplacementSource --- .../allocation/decider/NodeReplacementAllocationDecider.java | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/NodeReplacementAllocationDecider.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/NodeReplacementAllocationDecider.java index 5f05abe706f84..8bb18f3ea960d 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/NodeReplacementAllocationDecider.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/NodeReplacementAllocationDecider.java @@ -137,9 +137,8 @@ private static boolean isReplacementSource(Metadata metadata, String nodeId) { if (nodeId == null || replacementOngoing(metadata) == false) { return false; } - return metadata.nodeShutdowns().values().stream() - .filter(shutdown -> shutdown.getType().equals(SingleNodeShutdownMetadata.Type.REPLACE)) - .anyMatch(shutdown -> shutdown.getNodeId().equals(nodeId)); + return metadata.nodeShutdowns().containsKey(nodeId) && + metadata.nodeShutdowns().get(nodeId).getType().equals(SingleNodeShutdownMetadata.Type.REPLACE); } /** From 2402803624a56d4fec911125bba8906bf5451a46 Mon Sep 17 00:00:00 2001 From: Lee Hinman Date: Mon, 4 Oct 2021 13:28:19 -0600 Subject: [PATCH 06/33] Simplify getReplacementName --- .../decider/NodeReplacementAllocationDecider.java | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/NodeReplacementAllocationDecider.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/NodeReplacementAllocationDecider.java index 8bb18f3ea960d..620c26c267c62 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/NodeReplacementAllocationDecider.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/NodeReplacementAllocationDecider.java @@ -16,6 +16,8 @@ import org.elasticsearch.cluster.routing.ShardRouting; import org.elasticsearch.cluster.routing.allocation.RoutingAllocation; +import java.util.Optional; + public class NodeReplacementAllocationDecider extends AllocationDecider { public static final String NAME = "node_replacement"; @@ -153,14 +155,12 @@ private static boolean isReplacementTargetName(Metadata metadata, String nodeNam .anyMatch(shutdown -> shutdown.getTargetNodeName().equals(nodeName)); } - private static String getReplacementName(Metadata metadata, String nodeBeingReplaced) { - if (nodeBeingReplaced == null || replacementOngoing(metadata) == false) { + private static String getReplacementName(Metadata metadata, String nodeIdBeingReplaced) { + if (nodeIdBeingReplaced == null || replacementOngoing(metadata) == false) { return null; } - return metadata.nodeShutdowns().values().stream() + return Optional.ofNullable(metadata.nodeShutdowns().get(nodeIdBeingReplaced)) .filter(shutdown -> shutdown.getType().equals(SingleNodeShutdownMetadata.Type.REPLACE)) - .filter(shutdown -> shutdown.getNodeId().equals(nodeBeingReplaced)) - .findFirst() .map(SingleNodeShutdownMetadata::getTargetNodeName) .orElse(null); } From ff30a2a7663895ec542e10c31e751e62d0da2e49 Mon Sep 17 00:00:00 2001 From: Lee Hinman Date: Tue, 5 Oct 2021 13:30:57 -0600 Subject: [PATCH 07/33] Only calculate node shutdowns if canRemain==false and forceMove==false --- .../allocator/BalancedShardsAllocator.java | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancedShardsAllocator.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancedShardsAllocator.java index 7d69f3fe28075..2e3fb079295a9 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancedShardsAllocator.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancedShardsAllocator.java @@ -690,12 +690,14 @@ public MoveDecision decideMove(final ShardRouting shardRouting) { */ // don't use canRebalance as we want hard filtering rules to apply. See #17698 MoveDecision moveDecision = decideMove(shardRouting, sourceNode, canRemain, this::decideCanAllocate); - final boolean shardsOnReplacedNodes = allocation.metadata().nodeShutdowns().values().stream() - .filter(s -> s.getType() == SingleNodeShutdownMetadata.Type.REPLACE) - .map(SingleNodeShutdownMetadata::getNodeId) - .anyMatch(shardRouting.currentNodeId()::equals); - if (moveDecision.forceMove() == false && shardsOnReplacedNodes) { - return decideMove(shardRouting, sourceNode, canRemain, this::decideCanForceAllocateForVacate); + if (moveDecision.canRemain() == false && moveDecision.forceMove() == false) { + final boolean shardsOnReplacedNodes = allocation.metadata().nodeShutdowns().values().stream() + .filter(s -> s.getType() == SingleNodeShutdownMetadata.Type.REPLACE) + .map(SingleNodeShutdownMetadata::getNodeId) + .anyMatch(shardRouting.currentNodeId()::equals); + if (shardsOnReplacedNodes) { + return decideMove(shardRouting, sourceNode, canRemain, this::decideCanForceAllocateForVacate); + } } return moveDecision; } From 47500c7c691708f29258ee0b7e6180ac87aac9dc Mon Sep 17 00:00:00 2001 From: Lee Hinman Date: Tue, 5 Oct 2021 13:31:17 -0600 Subject: [PATCH 08/33] Move canRebalance comment in BalancedShardsAllocator --- .../routing/allocation/allocator/BalancedShardsAllocator.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancedShardsAllocator.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancedShardsAllocator.java index 2e3fb079295a9..fc3a6377e32f9 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancedShardsAllocator.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancedShardsAllocator.java @@ -688,7 +688,6 @@ public MoveDecision decideMove(final ShardRouting shardRouting) { * This is not guaranteed to be balanced after this operation we still try best effort to * allocate on the minimal eligible node. */ - // don't use canRebalance as we want hard filtering rules to apply. See #17698 MoveDecision moveDecision = decideMove(shardRouting, sourceNode, canRemain, this::decideCanAllocate); if (moveDecision.canRemain() == false && moveDecision.forceMove() == false) { final boolean shardsOnReplacedNodes = allocation.metadata().nodeShutdowns().values().stream() @@ -737,6 +736,7 @@ private MoveDecision decideMove(ShardRouting shardRouting, ModelNode sourceNode, } private Decision decideCanAllocate(ShardRouting shardRouting, RoutingNode target) { + // don't use canRebalance as we want hard filtering rules to apply. See #17698 return allocation.deciders().canAllocate(shardRouting, target, allocation); } From 9ded9f35950806cf226555b3bfd02c9db070998d Mon Sep 17 00:00:00 2001 From: Lee Hinman Date: Tue, 5 Oct 2021 13:32:57 -0600 Subject: [PATCH 09/33] Rename canForceDuringVacate -> canForceAllocateDuringReplace --- .../allocation/allocator/BalancedShardsAllocator.java | 2 +- .../routing/allocation/decider/AllocationDecider.java | 8 ++++---- .../allocation/decider/AwarenessAllocationDecider.java | 2 +- .../routing/allocation/decider/DiskThresholdDecider.java | 4 ++-- .../allocation/decider/MaxRetryAllocationDecider.java | 2 +- .../decider/NodeReplacementAllocationDecider.java | 2 +- .../allocation/decider/NodeVersionAllocationDecider.java | 2 +- .../ReplicaAfterPrimaryActiveAllocationDecider.java | 2 +- .../allocation/decider/ResizeAllocationDecider.java | 2 +- .../decider/RestoreInProgressAllocationDecider.java | 2 +- .../allocation/decider/SameShardAllocationDecider.java | 2 +- .../decider/SnapshotInProgressAllocationDecider.java | 2 +- .../allocation/decider/ThrottlingAllocationDecider.java | 2 +- .../decider/NodeReplacementAllocationDeciderTests.java | 6 +++--- 14 files changed, 20 insertions(+), 20 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancedShardsAllocator.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancedShardsAllocator.java index fc3a6377e32f9..01dcf48f35ab7 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancedShardsAllocator.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancedShardsAllocator.java @@ -741,7 +741,7 @@ private Decision decideCanAllocate(ShardRouting shardRouting, RoutingNode target } private Decision decideCanForceAllocateForVacate(ShardRouting shardRouting, RoutingNode target) { - return allocation.deciders().canForceDuringVacate(shardRouting, target, allocation); + return allocation.deciders().canForceAllocateDuringReplace(shardRouting, target, allocation); } /** diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/AllocationDecider.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/AllocationDecider.java index 2c3daf0794156..5631fd2db78e0 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/AllocationDecider.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/AllocationDecider.java @@ -107,8 +107,8 @@ public Decision canForceAllocatePrimary(ShardRouting shardRouting, RoutingNode n /** * Returns a {@link Decision} whether the given shard can be forced to the - * given node in the event that the shard's source node is being vacated. - * This allows nodes using a vacate-type node shutdown (replace/vacate) to + * given node in the event that the shard's source node is being replaced. + * This allows nodes using a replace-type node shutdown to * override certain deciders in the interest of moving the shard away from * a node that *must* be removed. * @@ -116,10 +116,10 @@ public Decision canForceAllocatePrimary(ShardRouting shardRouting, RoutingNode n * opt-out to having their other NO decisions *not* overridden while vacating. * * The caller is responsible for first checking: - * - that a replacement/vacate is ongoing + * - that a replacement is ongoing * - the shard routing's current node is the source of the replacement */ - public Decision canForceDuringVacate(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { + public Decision canForceAllocateDuringReplace(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { return Decision.YES; } } diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/AwarenessAllocationDecider.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/AwarenessAllocationDecider.java index 6dc5a01c987b0..8f0e9cb8e1c0b 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/AwarenessAllocationDecider.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/AwarenessAllocationDecider.java @@ -121,7 +121,7 @@ public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, Routing } @Override - public Decision canForceDuringVacate(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { + public Decision canForceAllocateDuringReplace(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { return canAllocate(shardRouting, node, allocation); } diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/DiskThresholdDecider.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/DiskThresholdDecider.java index d3a92224fb27e..50f8a41205187 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/DiskThresholdDecider.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/DiskThresholdDecider.java @@ -319,9 +319,9 @@ public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, Routing } @Override - public Decision canForceDuringVacate(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { + public Decision canForceAllocateDuringReplace(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { // TODO: check whether we would reach 100% disk as a safety effort - return super.canForceDuringVacate(shardRouting, node, allocation); + return super.canForceAllocateDuringReplace(shardRouting, node, allocation); } private static final Decision YES_NOT_MOST_UTILIZED_DISK = Decision.single(Decision.Type.YES, NAME, diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/MaxRetryAllocationDecider.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/MaxRetryAllocationDecider.java index dbae5f5502f57..46c7b80f70536 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/MaxRetryAllocationDecider.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/MaxRetryAllocationDecider.java @@ -75,7 +75,7 @@ public Decision canForceAllocatePrimary(ShardRouting shardRouting, RoutingNode n } @Override - public Decision canForceDuringVacate(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { + public Decision canForceAllocateDuringReplace(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { return canAllocate(shardRouting, node, allocation); } } diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/NodeReplacementAllocationDecider.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/NodeReplacementAllocationDecider.java index 620c26c267c62..82b127e458f55 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/NodeReplacementAllocationDecider.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/NodeReplacementAllocationDecider.java @@ -96,7 +96,7 @@ public Decision shouldAutoExpandToNode(IndexMetadata indexMetadata, DiscoveryNod } @Override - public Decision canForceDuringVacate(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { + public Decision canForceAllocateDuringReplace(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { if (replacementFromSourceToTarget(allocation.metadata(), shardRouting.currentNodeId(), node.node().getName())) { return Decision.single(Decision.Type.YES, NAME, "node [%s] is being replaced by node [%s], and can be force vacated to the target", diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/NodeVersionAllocationDecider.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/NodeVersionAllocationDecider.java index b50d5256bdcff..f1fcb230de8f7 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/NodeVersionAllocationDecider.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/NodeVersionAllocationDecider.java @@ -54,7 +54,7 @@ public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, Routing } @Override - public Decision canForceDuringVacate(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { + public Decision canForceAllocateDuringReplace(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { return canAllocate(shardRouting, node, allocation); } diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/ReplicaAfterPrimaryActiveAllocationDecider.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/ReplicaAfterPrimaryActiveAllocationDecider.java index b385f2e34cca4..d395cd9020b1e 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/ReplicaAfterPrimaryActiveAllocationDecider.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/ReplicaAfterPrimaryActiveAllocationDecider.java @@ -37,7 +37,7 @@ public Decision canAllocate(ShardRouting shardRouting, RoutingAllocation allocat } @Override - public Decision canForceDuringVacate(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { + public Decision canForceAllocateDuringReplace(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { return canAllocate(shardRouting, node, allocation); } } diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/ResizeAllocationDecider.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/ResizeAllocationDecider.java index 00ec82050733e..b99b759efc1f4 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/ResizeAllocationDecider.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/ResizeAllocationDecider.java @@ -73,7 +73,7 @@ public Decision canForceAllocatePrimary(ShardRouting shardRouting, RoutingNode n } @Override - public Decision canForceDuringVacate(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { + public Decision canForceAllocateDuringReplace(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { return canAllocate(shardRouting, node, allocation); } } diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/RestoreInProgressAllocationDecider.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/RestoreInProgressAllocationDecider.java index d5ab85cabe5ed..360340a419768 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/RestoreInProgressAllocationDecider.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/RestoreInProgressAllocationDecider.java @@ -70,7 +70,7 @@ public Decision canForceAllocatePrimary(ShardRouting shardRouting, RoutingNode n } @Override - public Decision canForceDuringVacate(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { + public Decision canForceAllocateDuringReplace(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { return canAllocate(shardRouting, node, allocation); } } diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/SameShardAllocationDecider.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/SameShardAllocationDecider.java index 9c8f2c274deeb..83bcdcd30130c 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/SameShardAllocationDecider.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/SameShardAllocationDecider.java @@ -106,7 +106,7 @@ public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, Routing } @Override - public Decision canForceDuringVacate(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { + public Decision canForceAllocateDuringReplace(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { return canAllocate(shardRouting, node, allocation); } diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/SnapshotInProgressAllocationDecider.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/SnapshotInProgressAllocationDecider.java index 48ccbb4de7e41..34372238031d1 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/SnapshotInProgressAllocationDecider.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/SnapshotInProgressAllocationDecider.java @@ -47,7 +47,7 @@ public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, Routing } @Override - public Decision canForceDuringVacate(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { + public Decision canForceAllocateDuringReplace(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { return canAllocate(shardRouting, node, allocation); } diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/ThrottlingAllocationDecider.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/ThrottlingAllocationDecider.java index 0c4b115d4a351..f93494fde8d82 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/ThrottlingAllocationDecider.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/ThrottlingAllocationDecider.java @@ -167,7 +167,7 @@ public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, Routing } @Override - public Decision canForceDuringVacate(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { + public Decision canForceAllocateDuringReplace(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { return canAllocate(shardRouting, node, allocation); } diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/NodeReplacementAllocationDeciderTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/NodeReplacementAllocationDeciderTests.java index 41484108e86c4..5c76dc38248f0 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/NodeReplacementAllocationDeciderTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/NodeReplacementAllocationDeciderTests.java @@ -124,7 +124,7 @@ public void testCanForceAllocate() { assignedShard = assignedShard.initialize(NODE_A.getId(), null, 1); assignedShard = assignedShard.moveToStarted(); - Decision decision = decider.canForceDuringVacate(assignedShard, routingNode, allocation); + Decision decision = decider.canForceAllocateDuringReplace(assignedShard, routingNode, allocation); assertThat(decision.type(), equalTo(Decision.Type.NO)); assertThat( decision.getExplanation(), @@ -133,7 +133,7 @@ public void testCanForceAllocate() { routingNode = new RoutingNode(NODE_B.getId(), NODE_B, assignedShard); - decision = decider.canForceDuringVacate(assignedShard, routingNode, allocation); + decision = decider.canForceAllocateDuringReplace(assignedShard, routingNode, allocation); assertThat(decision.type(), equalTo(Decision.Type.YES)); assertThat(decision.getExplanation(), equalTo("node [" + NODE_A.getId() + "] is being replaced by node [" + NODE_B.getId() + @@ -141,7 +141,7 @@ public void testCanForceAllocate() { routingNode = new RoutingNode(NODE_C.getId(), NODE_C, assignedShard); - decision = decider.canForceDuringVacate(assignedShard, routingNode, allocation); + decision = decider.canForceAllocateDuringReplace(assignedShard, routingNode, allocation); assertThat(decision.type(), equalTo(Decision.Type.NO)); assertThat(decision.getExplanation(), equalTo("shard is not on the source of a node replacement relocated to the replacement target")); From f3613bcdc81173610be807b500269fbea70284cf Mon Sep 17 00:00:00 2001 From: Lee Hinman Date: Tue, 5 Oct 2021 13:35:30 -0600 Subject: [PATCH 10/33] Add comment to AwarenessAllocationDecider.canForceAllocateDuringReplace --- .../routing/allocation/decider/AwarenessAllocationDecider.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/AwarenessAllocationDecider.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/AwarenessAllocationDecider.java index 8f0e9cb8e1c0b..41093ec8da0b7 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/AwarenessAllocationDecider.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/AwarenessAllocationDecider.java @@ -122,6 +122,9 @@ public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, Routing @Override public Decision canForceAllocateDuringReplace(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { + // We need to meet the criteria for shard awareness even during a replacement so that all + // copies of a shard do not get allocated to the same host/rack/AZ, so this explicitly + // checks the awareness 'canAllocate' to ensure we don't violate that constraint. return canAllocate(shardRouting, node, allocation); } From 7b4c51813950317106c11fcdf9dd7c8b7b493e2e Mon Sep 17 00:00:00 2001 From: Lee Hinman Date: Tue, 5 Oct 2021 13:37:13 -0600 Subject: [PATCH 11/33] Revert changes to ClusterRebalanceAllocationDecider --- .../allocation/decider/ClusterRebalanceAllocationDecider.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/ClusterRebalanceAllocationDecider.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/ClusterRebalanceAllocationDecider.java index fc11bd4a31506..92d850109930c 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/ClusterRebalanceAllocationDecider.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/ClusterRebalanceAllocationDecider.java @@ -8,6 +8,8 @@ package org.elasticsearch.cluster.routing.allocation.decider; +import java.util.Locale; + import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.elasticsearch.cluster.routing.RoutingNodes; @@ -18,8 +20,6 @@ import org.elasticsearch.common.settings.Setting.Property; import org.elasticsearch.common.settings.Settings; -import java.util.Locale; - /** * This {@link AllocationDecider} controls re-balancing operations based on the * cluster wide active shard state. This decided can not be configured in From 4ce00ef1a24f0f324452eda198bee000d0db1f6b Mon Sep 17 00:00:00 2001 From: Lee Hinman Date: Tue, 5 Oct 2021 13:40:06 -0600 Subject: [PATCH 12/33] Change "no replacement" decision message in NodeReplacementAllocationDecider --- .../allocation/decider/NodeReplacementAllocationDecider.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/NodeReplacementAllocationDecider.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/NodeReplacementAllocationDecider.java index 82b127e458f55..8c972db2e9edc 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/NodeReplacementAllocationDecider.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/NodeReplacementAllocationDecider.java @@ -23,7 +23,7 @@ public class NodeReplacementAllocationDecider extends AllocationDecider { public static final String NAME = "node_replacement"; static final Decision NO_REPLACEMENTS = Decision.single(Decision.Type.YES, NAME, - "no node replacements are currently ongoing, allocation is allowed"); + "neither the source nor target node are part of an ongoing node replacement (no replacements)"); @Override public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { From 27ebeebab5658cc1ca0beb88dc8cd256b2e17ba4 Mon Sep 17 00:00:00 2001 From: Lee Hinman Date: Tue, 5 Oct 2021 14:32:08 -0600 Subject: [PATCH 13/33] Only construct shutdown map once in isReplacementSource --- .../allocation/decider/NodeReplacementAllocationDecider.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/NodeReplacementAllocationDecider.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/NodeReplacementAllocationDecider.java index 8c972db2e9edc..db1329c1e5381 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/NodeReplacementAllocationDecider.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/NodeReplacementAllocationDecider.java @@ -16,6 +16,7 @@ import org.elasticsearch.cluster.routing.ShardRouting; import org.elasticsearch.cluster.routing.allocation.RoutingAllocation; +import java.util.Map; import java.util.Optional; public class NodeReplacementAllocationDecider extends AllocationDecider { @@ -139,8 +140,8 @@ private static boolean isReplacementSource(Metadata metadata, String nodeId) { if (nodeId == null || replacementOngoing(metadata) == false) { return false; } - return metadata.nodeShutdowns().containsKey(nodeId) && - metadata.nodeShutdowns().get(nodeId).getType().equals(SingleNodeShutdownMetadata.Type.REPLACE); + final Map nodeShutdowns = metadata.nodeShutdowns(); + return nodeShutdowns.containsKey(nodeId) && nodeShutdowns.get(nodeId).getType().equals(SingleNodeShutdownMetadata.Type.REPLACE); } /** From cb1c0c4df22bd0bd8a821d11862bc1cddeeb8d76 Mon Sep 17 00:00:00 2001 From: Lee Hinman Date: Tue, 5 Oct 2021 14:42:17 -0600 Subject: [PATCH 14/33] Make node shutdowns and target shutdowns available within RoutingAllocation --- .../routing/allocation/RoutingAllocation.java | 27 +++++++++ .../NodeReplacementAllocationDecider.java | 60 +++++++++---------- 2 files changed, 55 insertions(+), 32 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/RoutingAllocation.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/RoutingAllocation.java index 11200891051db..54b4d9d1da193 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/RoutingAllocation.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/RoutingAllocation.java @@ -12,6 +12,7 @@ import org.elasticsearch.cluster.ClusterState; import org.elasticsearch.cluster.RestoreInProgress; import org.elasticsearch.cluster.metadata.Metadata; +import org.elasticsearch.cluster.metadata.SingleNodeShutdownMetadata; import org.elasticsearch.cluster.node.DiscoveryNodes; import org.elasticsearch.cluster.routing.RoutingChangesObserver; import org.elasticsearch.cluster.routing.RoutingNodes; @@ -24,6 +25,7 @@ import org.elasticsearch.snapshots.RestoreService.RestoreInProgressUpdater; import org.elasticsearch.snapshots.SnapshotShardSizeInfo; +import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.Map; @@ -71,6 +73,9 @@ public class RoutingAllocation { nodesChangedObserver, indexMetadataUpdater, restoreInProgressUpdater ); + private final Map nodeShutdowns; + private final Map nodeReplacementTargets; + /** * Creates a new {@link RoutingAllocation} @@ -90,6 +95,14 @@ public RoutingAllocation(AllocationDeciders deciders, RoutingNodes routingNodes, this.clusterInfo = clusterInfo; this.shardSizeInfo = shardSizeInfo; this.currentNanoTime = currentNanoTime; + this.nodeShutdowns = metadata.nodeShutdowns(); + Map targetNameToShutdown = new HashMap<>(); + for (SingleNodeShutdownMetadata shutdown : this.nodeShutdowns.values()) { + if (shutdown.getType() == SingleNodeShutdownMetadata.Type.REPLACE) { + targetNameToShutdown.put(shutdown.getTargetNodeName(), shutdown); + } + } + this.nodeReplacementTargets = Collections.unmodifiableMap(targetNameToShutdown); } /** returns the nano time captured at the beginning of the allocation. used to make sure all time based decisions are aligned */ @@ -145,6 +158,20 @@ public SnapshotShardSizeInfo snapshotShardSizeInfo() { return shardSizeInfo; } + /** + * Returns the map of node id to shutdown metadata currently in the cluster + */ + public Map nodeShutdowns() { + return this.nodeShutdowns; + } + + /** + * Returns a map of target node name to replacement shutdown + */ + public Map replacementTargetShutdowns() { + return this.nodeReplacementTargets; + } + @SuppressWarnings("unchecked") public T custom(String key) { return (T) customs.get(key); diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/NodeReplacementAllocationDecider.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/NodeReplacementAllocationDecider.java index db1329c1e5381..83f8505aed5ed 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/NodeReplacementAllocationDecider.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/NodeReplacementAllocationDecider.java @@ -9,7 +9,6 @@ package org.elasticsearch.cluster.routing.allocation.decider; import org.elasticsearch.cluster.metadata.IndexMetadata; -import org.elasticsearch.cluster.metadata.Metadata; import org.elasticsearch.cluster.metadata.SingleNodeShutdownMetadata; import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.cluster.routing.RoutingNode; @@ -28,20 +27,19 @@ public class NodeReplacementAllocationDecider extends AllocationDecider { @Override public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { - final Metadata metadata = allocation.metadata(); - if (replacementOngoing(metadata) == false) { + if (replacementOngoing(allocation) == false) { return NO_REPLACEMENTS; - } else if (replacementFromSourceToTarget(metadata, shardRouting.currentNodeId(), node.node().getName())) { + } else if (replacementFromSourceToTarget(allocation, shardRouting.currentNodeId(), node.node().getName())) { return Decision.single(Decision.Type.YES, NAME, "node [%s] is replacing node [%s], and may receive shards from it", shardRouting.currentNodeId(), node.nodeId()); - } else if (isReplacementSource(metadata, shardRouting.currentNodeId())) { + } else if (isReplacementSource(allocation, shardRouting.currentNodeId())) { return Decision.single(Decision.Type.NO, NAME, "node [%s] is being replaced, and its shards may only be allocated to the replacement target", shardRouting.currentNodeId()); - } else if (isReplacementSource(metadata, node.nodeId())) { + } else if (isReplacementSource(allocation, node.nodeId())) { return Decision.single(Decision.Type.NO, NAME, "node [%s] is being removed, so no data from other nodes may be allocated to it", node.nodeId()); - } else if (isReplacementTargetName(metadata, node.node().getName())) { + } else if (isReplacementTargetName(allocation, node.node().getName())) { return Decision.single(Decision.Type.NO, NAME, "node [%s] is replacing a vacating node, so no data from other nodes " + "may be allocated to it until the replacement is complete", node.nodeId()); @@ -53,12 +51,12 @@ public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, Routing @Override public Decision canRemain(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { - if (replacementOngoing(allocation.metadata()) == false) { + if (replacementOngoing(allocation) == false) { return NO_REPLACEMENTS; - } else if (isReplacementSource(allocation.metadata(), node.nodeId())) { + } else if (isReplacementSource(allocation, node.nodeId())) { return Decision.single(Decision.Type.NO, NAME, "node [%s] is being replaced by node [%s], so no data may remain on it", node.nodeId(), - getReplacementName(allocation.metadata(), node.nodeId())); + getReplacementName(allocation, node.nodeId())); } else { return Decision.single(Decision.Type.YES, NAME, "node [%s] is not being replaced", node.nodeId()); } @@ -66,9 +64,9 @@ public Decision canRemain(ShardRouting shardRouting, RoutingNode node, RoutingAl @Override public Decision canAllocate(IndexMetadata indexMetadata, RoutingNode node, RoutingAllocation allocation) { - if (replacementOngoing(allocation.metadata()) == false) { + if (replacementOngoing(allocation) == false) { return NO_REPLACEMENTS; - } else if (isReplacementTargetName(allocation.metadata(), node.node().getName())) { + } else if (isReplacementTargetName(allocation, node.node().getName())) { return Decision.single(Decision.Type.NO, NAME, "node [%s] is replacing a vacating node, so no data from other nodes " + "may be allocated to it until the replacement is complete", node.nodeId()); @@ -81,13 +79,13 @@ public Decision canAllocate(IndexMetadata indexMetadata, RoutingNode node, Routi @Override public Decision shouldAutoExpandToNode(IndexMetadata indexMetadata, DiscoveryNode node, RoutingAllocation allocation) { - if (replacementOngoing(allocation.metadata()) == false) { + if (replacementOngoing(allocation) == false) { return NO_REPLACEMENTS; - } else if (isReplacementTargetName(allocation.metadata(), node.getName())) { + } else if (isReplacementTargetName(allocation, node.getName())) { return Decision.single(Decision.Type.NO, NAME, "node [%s] is a node replacement target, shards cannot auto expand to be on it until the replacement is complete", node.getId(), "source"); - } else if (isReplacementSource(allocation.metadata(), node.getId())) { + } else if (isReplacementSource(allocation, node.getId())) { return Decision.single(Decision.Type.NO, NAME, "node [%s] is being replaced, shards cannot auto expand to be on it", node.getId()); } else { @@ -98,7 +96,7 @@ public Decision shouldAutoExpandToNode(IndexMetadata indexMetadata, DiscoveryNod @Override public Decision canForceAllocateDuringReplace(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { - if (replacementFromSourceToTarget(allocation.metadata(), shardRouting.currentNodeId(), node.node().getName())) { + if (replacementFromSourceToTarget(allocation, shardRouting.currentNodeId(), node.node().getName())) { return Decision.single(Decision.Type.YES, NAME, "node [%s] is being replaced by node [%s], and can be force vacated to the target", shardRouting.currentNodeId(), node.nodeId()); @@ -111,22 +109,22 @@ public Decision canForceAllocateDuringReplace(ShardRouting shardRouting, Routing /** * Returns true if there are any node replacements ongoing in the cluster */ - private static boolean replacementOngoing(Metadata metadata) { - return metadata.nodeShutdowns().values().stream() + private static boolean replacementOngoing(RoutingAllocation allocation) { + return allocation.nodeShutdowns().values().stream() .anyMatch(shutdown -> shutdown.getType().equals(SingleNodeShutdownMetadata.Type.REPLACE)); } /** * Returns true if there is a replacement currently ongoing from the source to the target node id */ - private static boolean replacementFromSourceToTarget(Metadata metadata, String sourceNodeId, String targetNodeName) { - if (replacementOngoing(metadata) == false) { + private static boolean replacementFromSourceToTarget(RoutingAllocation allocation, String sourceNodeId, String targetNodeName) { + if (replacementOngoing(allocation) == false) { return false; } if (sourceNodeId == null || targetNodeName == null) { return false; } - final SingleNodeShutdownMetadata shutdown = metadata.nodeShutdowns().get(sourceNodeId); + final SingleNodeShutdownMetadata shutdown = allocation.nodeShutdowns().get(sourceNodeId); return shutdown != null && shutdown.getType().equals(SingleNodeShutdownMetadata.Type.REPLACE) && shutdown.getNodeId().equals(sourceNodeId) && @@ -136,31 +134,29 @@ private static boolean replacementFromSourceToTarget(Metadata metadata, String s /** * Returns true if the given node id is the source (the replaced node) of an ongoing node replacement */ - private static boolean isReplacementSource(Metadata metadata, String nodeId) { - if (nodeId == null || replacementOngoing(metadata) == false) { + private static boolean isReplacementSource(RoutingAllocation allocation, String nodeId) { + if (nodeId == null || replacementOngoing(allocation) == false) { return false; } - final Map nodeShutdowns = metadata.nodeShutdowns(); + final Map nodeShutdowns = allocation.nodeShutdowns(); return nodeShutdowns.containsKey(nodeId) && nodeShutdowns.get(nodeId).getType().equals(SingleNodeShutdownMetadata.Type.REPLACE); } /** * Returns true if the given node name (not the id!) is the target (the replacing node) of an ongoing node replacement */ - private static boolean isReplacementTargetName(Metadata metadata, String nodeName) { - if (nodeName == null || replacementOngoing(metadata) == false) { + private static boolean isReplacementTargetName(RoutingAllocation allocation, String nodeName) { + if (nodeName == null || replacementOngoing(allocation) == false) { return false; } - return metadata.nodeShutdowns().values().stream() - .filter(shutdown -> shutdown.getType().equals(SingleNodeShutdownMetadata.Type.REPLACE)) - .anyMatch(shutdown -> shutdown.getTargetNodeName().equals(nodeName)); + return allocation.replacementTargetShutdowns().get(nodeName) != null; } - private static String getReplacementName(Metadata metadata, String nodeIdBeingReplaced) { - if (nodeIdBeingReplaced == null || replacementOngoing(metadata) == false) { + private static String getReplacementName(RoutingAllocation allocation, String nodeIdBeingReplaced) { + if (nodeIdBeingReplaced == null || replacementOngoing(allocation) == false) { return null; } - return Optional.ofNullable(metadata.nodeShutdowns().get(nodeIdBeingReplaced)) + return Optional.ofNullable(allocation.nodeShutdowns().get(nodeIdBeingReplaced)) .filter(shutdown -> shutdown.getType().equals(SingleNodeShutdownMetadata.Type.REPLACE)) .map(SingleNodeShutdownMetadata::getTargetNodeName) .orElse(null); From c2571ee54bc72f64b9d0f18cc9c9e94f07c32039 Mon Sep 17 00:00:00 2001 From: Lee Hinman Date: Tue, 5 Oct 2021 14:50:40 -0600 Subject: [PATCH 15/33] Add randomization for adding the filter that is overridden in test --- .../xpack/shutdown/NodeShutdownShardsIT.java | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/x-pack/plugin/shutdown/src/internalClusterTest/java/org/elasticsearch/xpack/shutdown/NodeShutdownShardsIT.java b/x-pack/plugin/shutdown/src/internalClusterTest/java/org/elasticsearch/xpack/shutdown/NodeShutdownShardsIT.java index ae7ab1fc2c5cd..9431d565b7775 100644 --- a/x-pack/plugin/shutdown/src/internalClusterTest/java/org/elasticsearch/xpack/shutdown/NodeShutdownShardsIT.java +++ b/x-pack/plugin/shutdown/src/internalClusterTest/java/org/elasticsearch/xpack/shutdown/NodeShutdownShardsIT.java @@ -147,14 +147,14 @@ public void testNodeReplacementOverridesFilters() throws Exception { String nodeA = internalCluster().startNode(Settings.builder().put("node.name", "node-a")); // Create an index and pin it to nodeA, when we replace it with nodeB, // it'll move the data, overridding the `_name` allocation filter - createIndex( - "myindex", - Settings.builder() - .put("index.routing.allocation.require._name", nodeA) - .put("index.number_of_shards", 3) - .put("index.number_of_replicas", 0) - .build() - ); + Settings.Builder nodeASettings = Settings.builder() + .put("index.number_of_shards", 3) + .put("index.number_of_replicas", 0); + // Randomly add a filter that will be overridden by the node shutdown-replace + if (randomBoolean()) { + nodeASettings.put("index.routing.allocation.require._name", nodeA); + } + createIndex("myindex", nodeASettings.build()); final String nodeAId = getNodeId(nodeA); final String nodeB = "node_t1"; // TODO: fix this to so it's actually overrideable From fe1889c3dfb2cccbdc779d8202bd053ec4cbd7ab Mon Sep 17 00:00:00 2001 From: Lee Hinman Date: Tue, 5 Oct 2021 14:55:47 -0600 Subject: [PATCH 16/33] Add integration test with replicas: 1 --- .../xpack/shutdown/NodeShutdownShardsIT.java | 108 +++++++++++++++++- 1 file changed, 104 insertions(+), 4 deletions(-) diff --git a/x-pack/plugin/shutdown/src/internalClusterTest/java/org/elasticsearch/xpack/shutdown/NodeShutdownShardsIT.java b/x-pack/plugin/shutdown/src/internalClusterTest/java/org/elasticsearch/xpack/shutdown/NodeShutdownShardsIT.java index 9431d565b7775..7aa62882b71a0 100644 --- a/x-pack/plugin/shutdown/src/internalClusterTest/java/org/elasticsearch/xpack/shutdown/NodeShutdownShardsIT.java +++ b/x-pack/plugin/shutdown/src/internalClusterTest/java/org/elasticsearch/xpack/shutdown/NodeShutdownShardsIT.java @@ -143,17 +143,117 @@ public void testShardStatusIsCompleteOnNonDataNodes() throws Exception { assertThat(getResp.getShutdownStatuses().get(0).migrationStatus().getStatus(), equalTo(COMPLETE)); } + public void testNodeReplacementOnlyAllowsShardsFromReplacedNode() throws Exception { + String nodeA = internalCluster().startNode(Settings.builder().put("node.name", "node-a")); + Settings.Builder nodeASettings = Settings.builder() + .put("index.number_of_shards", 3) + .put("index.number_of_replicas", 1); + createIndex("myindex", nodeASettings.build()); + final String nodeAId = getNodeId(nodeA); + final String nodeB = "node_t1"; // TODO: fix this to so it's actually overrideable + + // Mark the nodeA as being replaced + PutShutdownNodeAction.Request putShutdownRequest = new PutShutdownNodeAction.Request( + nodeAId, + SingleNodeShutdownMetadata.Type.REPLACE, + this.getTestName(), + null, + nodeB + ); + AcknowledgedResponse putShutdownResponse = client().execute(PutShutdownNodeAction.INSTANCE, putShutdownRequest).get(); + assertTrue(putShutdownResponse.isAcknowledged()); + + GetShutdownStatusAction.Response getResp = client().execute( + GetShutdownStatusAction.INSTANCE, + new GetShutdownStatusAction.Request(nodeAId) + ).get(); + + assertThat(getResp.getShutdownStatuses().get(0).migrationStatus().getStatus(), equalTo(STALLED)); + + internalCluster().startNode(Settings.builder().put("node.name", nodeB)); + final String nodeBId = getNodeId(nodeB); + + logger.info("--> NodeA: {} -- {}", nodeA, nodeAId); + logger.info("--> NodeB: {} -- {}", nodeB, nodeBId); + + assertBusy(() -> { + ClusterState state = client().admin().cluster().prepareState().clear().setRoutingTable(true).get().getState(); + int active = 0; + for (ShardRouting sr : state.routingTable().allShards("myindex")) { + if (sr.active()) { + active++; + assertThat( + "expected shard on nodeB (" + nodeBId + ") but it was on a different node", + sr.currentNodeId(), + equalTo(nodeBId) + ); + } + } + assertThat("expected all 3 of the primary shards to be allocated", active, equalTo(3)); + }); + + assertBusy(() -> { + GetShutdownStatusAction.Response shutdownStatus = client().execute( + GetShutdownStatusAction.INSTANCE, + new GetShutdownStatusAction.Request(nodeAId) + ).get(); + assertThat(shutdownStatus.getShutdownStatuses().get(0).migrationStatus().getStatus(), equalTo(COMPLETE)); + }); + + final String nodeC = internalCluster().startNode(); + + createIndex("other", Settings.builder().put("index.number_of_shards", 1).put("index.number_of_replicas", 1).build()); + + ensureYellow("other"); + + // Explain the replica for the "other" index + ClusterAllocationExplainResponse explainResponse = client().admin() + .cluster() + .prepareAllocationExplain() + .setIndex("other") + .setShard(0) + .setPrimary(false) + .get(); + + // Validate that the replica cannot be allocated to nodeB because it's the target of a node replacement + explainResponse.getExplanation() + .getShardAllocationDecision() + .getAllocateDecision() + .getNodeDecisions() + .stream() + .filter(nodeDecision -> nodeDecision.getNode().getId().equals(nodeBId)) + .findFirst() + .ifPresentOrElse(nodeAllocationResult -> { + assertThat(nodeAllocationResult.getCanAllocateDecision().type(), equalTo(Decision.Type.NO)); + assertTrue( + "expected decisions to mention node replacement: " + + nodeAllocationResult.getCanAllocateDecision() + .getDecisions() + .stream() + .map(Decision::getExplanation) + .collect(Collectors.joining(",")), + nodeAllocationResult.getCanAllocateDecision() + .getDecisions() + .stream() + .anyMatch( + decision -> decision.getExplanation() + .contains( + "is replacing a vacating node, so no data from " + + "other nodes may be allocated to it until the replacement is complete" + ) + ) + ); + }, () -> fail("expected a 'NO' decision for nodeB but there was no explanation for that node")); + } + public void testNodeReplacementOverridesFilters() throws Exception { String nodeA = internalCluster().startNode(Settings.builder().put("node.name", "node-a")); // Create an index and pin it to nodeA, when we replace it with nodeB, // it'll move the data, overridding the `_name` allocation filter Settings.Builder nodeASettings = Settings.builder() + .put("index.routing.allocation.require._name", nodeA) .put("index.number_of_shards", 3) .put("index.number_of_replicas", 0); - // Randomly add a filter that will be overridden by the node shutdown-replace - if (randomBoolean()) { - nodeASettings.put("index.routing.allocation.require._name", nodeA); - } createIndex("myindex", nodeASettings.build()); final String nodeAId = getNodeId(nodeA); final String nodeB = "node_t1"; // TODO: fix this to so it's actually overrideable From 4f0da9c437c0af468d5cc096e36981dbc9305ac7 Mon Sep 17 00:00:00 2001 From: Lee Hinman Date: Tue, 5 Oct 2021 15:07:19 -0600 Subject: [PATCH 17/33] Go nuts with the verbosity of allocation decisions --- .../NodeReplacementAllocationDecider.java | 28 ++++++++++++------- ...NodeReplacementAllocationDeciderTests.java | 9 ++++-- 2 files changed, 24 insertions(+), 13 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/NodeReplacementAllocationDecider.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/NodeReplacementAllocationDecider.java index 83f8505aed5ed..19a93217c3c76 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/NodeReplacementAllocationDecider.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/NodeReplacementAllocationDecider.java @@ -34,15 +34,18 @@ public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, Routing "node [%s] is replacing node [%s], and may receive shards from it", shardRouting.currentNodeId(), node.nodeId()); } else if (isReplacementSource(allocation, shardRouting.currentNodeId())) { return Decision.single(Decision.Type.NO, NAME, - "node [%s] is being replaced, and its shards may only be allocated to the replacement target", - shardRouting.currentNodeId()); + "node [%s] is being replaced, and its shards may only be allocated to the replacement target [%s]", + shardRouting.currentNodeId(), getReplacementName(allocation, shardRouting.currentNodeId())); } else if (isReplacementSource(allocation, node.nodeId())) { return Decision.single(Decision.Type.NO, NAME, - "node [%s] is being removed, so no data from other nodes may be allocated to it", node.nodeId()); + "node [%s] is being replaced by [%s], so no data from other node [%s] may be allocated to it", + node.nodeId(), getReplacementName(allocation, node.nodeId()), shardRouting.currentNodeId()); } else if (isReplacementTargetName(allocation, node.node().getName())) { + final SingleNodeShutdownMetadata shutdown = allocation.replacementTargetShutdowns().get(node.node().getName()); return Decision.single(Decision.Type.NO, NAME, - "node [%s] is replacing a vacating node, so no data from other nodes " + - "may be allocated to it until the replacement is complete", node.nodeId()); + "node [%s] is replacing the vacating node [%s], so no data from other node [%s] " + + "may be allocated to it until the replacement is complete", + node.nodeId(), shutdown == null ? null : shutdown.getNodeId(), shardRouting.currentNodeId()); } else { return Decision.single(Decision.Type.YES, NAME, "neither the source nor target node are part of an ongoing node replacement"); @@ -67,9 +70,11 @@ public Decision canAllocate(IndexMetadata indexMetadata, RoutingNode node, Routi if (replacementOngoing(allocation) == false) { return NO_REPLACEMENTS; } else if (isReplacementTargetName(allocation, node.node().getName())) { + final SingleNodeShutdownMetadata shutdown = allocation.replacementTargetShutdowns().get(node.node().getName()); return Decision.single(Decision.Type.NO, NAME, - "node [%s] is replacing a vacating node, so no data from other nodes " + - "may be allocated to it until the replacement is complete", node.nodeId()); + "node [%s] is replacing a vacating node [%s], so no data from other nodes " + + "may be allocated to it until the replacement is complete", + node.nodeId(), shutdown == null ? null : shutdown.getNodeId()); } else { // The node in question is not a replacement target, so allow allocation. return Decision.single(Decision.Type.YES, NAME, @@ -82,12 +87,15 @@ public Decision shouldAutoExpandToNode(IndexMetadata indexMetadata, DiscoveryNod if (replacementOngoing(allocation) == false) { return NO_REPLACEMENTS; } else if (isReplacementTargetName(allocation, node.getName())) { + final SingleNodeShutdownMetadata shutdown = allocation.replacementTargetShutdowns().get(node.getName()); return Decision.single(Decision.Type.NO, NAME, - "node [%s] is a node replacement target, shards cannot auto expand to be on it until the replacement is complete", - node.getId(), "source"); + "node [%s] is a node replacement target for node [%s], " + + "shards cannot auto expand to be on it until the replacement is complete", + node.getId(), shutdown == null ? null : shutdown.getNodeId()); } else if (isReplacementSource(allocation, node.getId())) { return Decision.single(Decision.Type.NO, NAME, - "node [%s] is being replaced, shards cannot auto expand to be on it", node.getId()); + "node [%s] is being replaced by [%s], shards cannot auto expand to be on it", + node.getId(), getReplacementName(allocation, node.getId())); } else { return Decision.single(Decision.Type.YES, NAME, "node is not part of a node replacement, so shards may be auto expanded onto it"); diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/NodeReplacementAllocationDeciderTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/NodeReplacementAllocationDeciderTests.java index 5c76dc38248f0..19036f6aac22a 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/NodeReplacementAllocationDeciderTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/NodeReplacementAllocationDeciderTests.java @@ -162,7 +162,8 @@ public void testCanAllocateFromTheVoid() { decision = decider.canAllocate(indexMetadata, routingNode, allocation); assertThat(decision.type(), equalTo(Decision.Type.NO)); assertThat(decision.getExplanation(), equalTo("node [" + NODE_B.getId() + - "] is replacing a vacating node, so no data from other nodes may be allocated to it until the replacement is complete")); + "] is replacing a vacating node [" + NODE_A.getId() + + "], so no data from other nodes may be allocated to it until the replacement is complete")); routingNode = new RoutingNode(NODE_C.getId(), NODE_C, shard); @@ -201,7 +202,8 @@ public void testCanAllocateToNeitherSourceNorTarget() { assertThat(decision.type(), equalTo(Decision.Type.NO)); assertThat( decision.getExplanation(), - equalTo("node [" + NODE_A.getId() + "] is being removed, so no data from other nodes may be allocated to it") + equalTo("node [" + NODE_A.getId() + "] is being replaced by [" + NODE_B.getName() + + "], so no data from other node [null] may be allocated to it") ); routingNode = new RoutingNode(NODE_B.getId(), NODE_B, shard); @@ -211,7 +213,8 @@ public void testCanAllocateToNeitherSourceNorTarget() { assertThat( decision.getExplanation(), equalTo("node [" + NODE_B.getId() + - "] is replacing a vacating node, so no data from other nodes may be allocated to it until the replacement is complete") + "] is replacing the vacating node [" + NODE_A.getId() + + "], so no data from other node [null] may be allocated to it until the replacement is complete") ); routingNode = new RoutingNode(NODE_C.getId(), NODE_C, shard); From d659b116564738bbbf66de437c6e0fb46a0d5887 Mon Sep 17 00:00:00 2001 From: Lee Hinman Date: Tue, 5 Oct 2021 15:09:09 -0600 Subject: [PATCH 18/33] Also check NODE_C in unit test --- .../decider/NodeReplacementAllocationDeciderTests.java | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/NodeReplacementAllocationDeciderTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/NodeReplacementAllocationDeciderTests.java index 19036f6aac22a..a11ffa16e8c82 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/NodeReplacementAllocationDeciderTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/NodeReplacementAllocationDeciderTests.java @@ -190,6 +190,12 @@ public void testCannotRemainOnReplacedNode() { decision = decider.canRemain(shard, routingNode, allocation); assertThat(decision.type(), equalTo(Decision.Type.YES)); assertThat(decision.getExplanation(), equalTo("node [" + NODE_B.getId() + "] is not being replaced")); + + routingNode = new RoutingNode(NODE_C.getId(), NODE_C, shard); + + decision = decider.canRemain(shard, routingNode, allocation); + assertThat(decision.type(), equalTo(Decision.Type.YES)); + assertThat(decision.getExplanation(), equalTo("node [" + NODE_C.getId() + "] is not being replaced")); } public void testCanAllocateToNeitherSourceNorTarget() { From 448968ba73f015279217cddebd4a6e9135dccb87 Mon Sep 17 00:00:00 2001 From: Lee Hinman Date: Tue, 5 Oct 2021 15:13:37 -0600 Subject: [PATCH 19/33] Test with randomly assigned shard --- ...NodeReplacementAllocationDeciderTests.java | 21 ++++++++++++------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/NodeReplacementAllocationDeciderTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/NodeReplacementAllocationDeciderTests.java index a11ffa16e8c82..3ff6704960c00 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/NodeReplacementAllocationDeciderTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/NodeReplacementAllocationDeciderTests.java @@ -204,28 +204,33 @@ public void testCanAllocateToNeitherSourceNorTarget() { RoutingNode routingNode = new RoutingNode(NODE_A.getId(), NODE_A, shard); allocation.debugDecision(true); - Decision decision = decider.canAllocate(shard, routingNode, allocation); + ShardRouting testShard = this.shard; + if (randomBoolean()) { + testShard = shard.initialize(NODE_C.getId(), null, 1); + testShard = testShard.moveToStarted(); + } + Decision decision = decider.canAllocate(testShard, routingNode, allocation); assertThat(decision.type(), equalTo(Decision.Type.NO)); assertThat( decision.getExplanation(), equalTo("node [" + NODE_A.getId() + "] is being replaced by [" + NODE_B.getName() + - "], so no data from other node [null] may be allocated to it") + "], so no data from other node [" + testShard.currentNodeId() + "] may be allocated to it") ); - routingNode = new RoutingNode(NODE_B.getId(), NODE_B, shard); + routingNode = new RoutingNode(NODE_B.getId(), NODE_B, testShard); - decision = decider.canAllocate(shard, routingNode, allocation); + decision = decider.canAllocate(testShard, routingNode, allocation); assertThat(decision.type(), equalTo(Decision.Type.NO)); assertThat( decision.getExplanation(), equalTo("node [" + NODE_B.getId() + - "] is replacing the vacating node [" + NODE_A.getId() + - "], so no data from other node [null] may be allocated to it until the replacement is complete") + "] is replacing the vacating node [" + NODE_A.getId() + "], so no data from other node [" + + testShard.currentNodeId() + "] may be allocated to it until the replacement is complete") ); - routingNode = new RoutingNode(NODE_C.getId(), NODE_C, shard); + routingNode = new RoutingNode(NODE_C.getId(), NODE_C, testShard); - decision = decider.canAllocate(shard, routingNode, allocation); + decision = decider.canAllocate(testShard, routingNode, allocation); assertThat(decision.getExplanation(), decision.type(), equalTo(Decision.Type.YES)); assertThat( decision.getExplanation(), From e83b630aaacd8c1d348167e1e38d477d94609f20 Mon Sep 17 00:00:00 2001 From: Lee Hinman Date: Tue, 5 Oct 2021 15:24:55 -0600 Subject: [PATCH 20/33] Fix test for extra verbose decision messages --- .../xpack/shutdown/NodeShutdownShardsIT.java | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/x-pack/plugin/shutdown/src/internalClusterTest/java/org/elasticsearch/xpack/shutdown/NodeShutdownShardsIT.java b/x-pack/plugin/shutdown/src/internalClusterTest/java/org/elasticsearch/xpack/shutdown/NodeShutdownShardsIT.java index 7aa62882b71a0..deed7c6fd8bb5 100644 --- a/x-pack/plugin/shutdown/src/internalClusterTest/java/org/elasticsearch/xpack/shutdown/NodeShutdownShardsIT.java +++ b/x-pack/plugin/shutdown/src/internalClusterTest/java/org/elasticsearch/xpack/shutdown/NodeShutdownShardsIT.java @@ -236,12 +236,8 @@ public void testNodeReplacementOnlyAllowsShardsFromReplacedNode() throws Excepti .getDecisions() .stream() .anyMatch( - decision -> decision.getExplanation() - .contains( - "is replacing a vacating node, so no data from " - + "other nodes may be allocated to it until the replacement is complete" - ) - ) + decision -> decision.getExplanation().contains("is replacing the vacating node") && + decision.getExplanation().contains("may be allocated to it until the replacement is complete")) ); }, () -> fail("expected a 'NO' decision for nodeB but there was no explanation for that node")); } @@ -337,12 +333,8 @@ public void testNodeReplacementOverridesFilters() throws Exception { .getDecisions() .stream() .anyMatch( - decision -> decision.getExplanation() - .contains( - "is replacing a vacating node, so no data from " - + "other nodes may be allocated to it until the replacement is complete" - ) - ) + decision -> decision.getExplanation().contains("is replacing the vacating node") && + decision.getExplanation().contains("may be allocated to it until the replacement is complete")) ); }, () -> fail("expected a 'NO' decision for nodeB but there was no explanation for that node")); } From df91ec1bcdf7d7520f978a202744dd7339369f4d Mon Sep 17 00:00:00 2001 From: Lee Hinman Date: Tue, 5 Oct 2021 15:25:09 -0600 Subject: [PATCH 21/33] Remove canAllocate(IndexMetadat, RoutingNode, RoutingAllocation) overriding --- .../NodeReplacementAllocationDecider.java | 17 ------------- ...NodeReplacementAllocationDeciderTests.java | 25 ------------------- 2 files changed, 42 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/NodeReplacementAllocationDecider.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/NodeReplacementAllocationDecider.java index 19a93217c3c76..ec02aa1ad9a0d 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/NodeReplacementAllocationDecider.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/NodeReplacementAllocationDecider.java @@ -65,23 +65,6 @@ public Decision canRemain(ShardRouting shardRouting, RoutingNode node, RoutingAl } } - @Override - public Decision canAllocate(IndexMetadata indexMetadata, RoutingNode node, RoutingAllocation allocation) { - if (replacementOngoing(allocation) == false) { - return NO_REPLACEMENTS; - } else if (isReplacementTargetName(allocation, node.node().getName())) { - final SingleNodeShutdownMetadata shutdown = allocation.replacementTargetShutdowns().get(node.node().getName()); - return Decision.single(Decision.Type.NO, NAME, - "node [%s] is replacing a vacating node [%s], so no data from other nodes " + - "may be allocated to it until the replacement is complete", - node.nodeId(), shutdown == null ? null : shutdown.getNodeId()); - } else { - // The node in question is not a replacement target, so allow allocation. - return Decision.single(Decision.Type.YES, NAME, - "node is not a replacement target, so allocation is allowed"); - } - } - @Override public Decision shouldAutoExpandToNode(IndexMetadata indexMetadata, DiscoveryNode node, RoutingAllocation allocation) { if (replacementOngoing(allocation) == false) { diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/NodeReplacementAllocationDeciderTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/NodeReplacementAllocationDeciderTests.java index 3ff6704960c00..300a6bf6e798b 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/NodeReplacementAllocationDeciderTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/NodeReplacementAllocationDeciderTests.java @@ -147,31 +147,6 @@ public void testCanForceAllocate() { equalTo("shard is not on the source of a node replacement relocated to the replacement target")); } - public void testCanAllocateFromTheVoid() { - ClusterState state = prepareState(service.reroute(ClusterState.EMPTY_STATE, "initial state"), NODE_A.getId(), NODE_B.getName()); - RoutingAllocation allocation = new RoutingAllocation(allocationDeciders, state.getRoutingNodes(), state, null, null, 0); - RoutingNode routingNode = new RoutingNode(NODE_A.getId(), NODE_A, shard); - allocation.debugDecision(true); - - Decision decision = decider.canAllocate(indexMetadata, routingNode, allocation); - assertThat(decision.type(), equalTo(Decision.Type.YES)); - assertThat(decision.getExplanation(), equalTo("node is not a replacement target, so allocation is allowed")); - - routingNode = new RoutingNode(NODE_B.getId(), NODE_B, shard); - - decision = decider.canAllocate(indexMetadata, routingNode, allocation); - assertThat(decision.type(), equalTo(Decision.Type.NO)); - assertThat(decision.getExplanation(), equalTo("node [" + NODE_B.getId() + - "] is replacing a vacating node [" + NODE_A.getId() + - "], so no data from other nodes may be allocated to it until the replacement is complete")); - - routingNode = new RoutingNode(NODE_C.getId(), NODE_C, shard); - - decision = decider.canAllocate(indexMetadata, routingNode, allocation); - assertThat(decision.type(), equalTo(Decision.Type.YES)); - assertThat(decision.getExplanation(), equalTo("node is not a replacement target, so allocation is allowed")); - } - public void testCannotRemainOnReplacedNode() { ClusterState state = prepareState(service.reroute(ClusterState.EMPTY_STATE, "initial state"), NODE_A.getId(), NODE_B.getName()); RoutingAllocation allocation = new RoutingAllocation(allocationDeciders, state.getRoutingNodes(), state, null, null, 0); From c3f6c23c0163a3d07a22c0ad8f6a225ceb52ed45 Mon Sep 17 00:00:00 2001 From: Lee Hinman Date: Tue, 5 Oct 2021 15:42:29 -0600 Subject: [PATCH 22/33] Spotless :| --- .../xpack/shutdown/NodeShutdownShardsIT.java | 30 +++++++++---------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/x-pack/plugin/shutdown/src/internalClusterTest/java/org/elasticsearch/xpack/shutdown/NodeShutdownShardsIT.java b/x-pack/plugin/shutdown/src/internalClusterTest/java/org/elasticsearch/xpack/shutdown/NodeShutdownShardsIT.java index 03c5b3750c3c9..8d70dbdf25cd3 100644 --- a/x-pack/plugin/shutdown/src/internalClusterTest/java/org/elasticsearch/xpack/shutdown/NodeShutdownShardsIT.java +++ b/x-pack/plugin/shutdown/src/internalClusterTest/java/org/elasticsearch/xpack/shutdown/NodeShutdownShardsIT.java @@ -188,9 +188,7 @@ public void testNotStalledIfAllShardsHaveACopyOnAnotherNode() throws Exception { public void testNodeReplacementOnlyAllowsShardsFromReplacedNode() throws Exception { String nodeA = internalCluster().startNode(Settings.builder().put("node.name", "node-a")); - Settings.Builder nodeASettings = Settings.builder() - .put("index.number_of_shards", 3) - .put("index.number_of_replicas", 1); + Settings.Builder nodeASettings = Settings.builder().put("index.number_of_shards", 3).put("index.number_of_replicas", 1); createIndex("myindex", nodeASettings.build()); final String nodeAId = getNodeId(nodeA); final String nodeB = "node_t1"; // TODO: fix this to so it's actually overrideable @@ -271,16 +269,17 @@ public void testNodeReplacementOnlyAllowsShardsFromReplacedNode() throws Excepti assertTrue( "expected decisions to mention node replacement: " + nodeAllocationResult.getCanAllocateDecision() - .getDecisions() - .stream() - .map(Decision::getExplanation) - .collect(Collectors.joining(",")), + .getDecisions() + .stream() + .map(Decision::getExplanation) + .collect(Collectors.joining(",")), nodeAllocationResult.getCanAllocateDecision() .getDecisions() .stream() .anyMatch( - decision -> decision.getExplanation().contains("is replacing the vacating node") && - decision.getExplanation().contains("may be allocated to it until the replacement is complete")) + decision -> decision.getExplanation().contains("is replacing the vacating node") + && decision.getExplanation().contains("may be allocated to it until the replacement is complete") + ) ); }, () -> fail("expected a 'NO' decision for nodeB but there was no explanation for that node")); } @@ -368,16 +367,17 @@ public void testNodeReplacementOverridesFilters() throws Exception { assertTrue( "expected decisions to mention node replacement: " + nodeAllocationResult.getCanAllocateDecision() - .getDecisions() - .stream() - .map(Decision::getExplanation) - .collect(Collectors.joining(",")), + .getDecisions() + .stream() + .map(Decision::getExplanation) + .collect(Collectors.joining(",")), nodeAllocationResult.getCanAllocateDecision() .getDecisions() .stream() .anyMatch( - decision -> decision.getExplanation().contains("is replacing the vacating node") && - decision.getExplanation().contains("may be allocated to it until the replacement is complete")) + decision -> decision.getExplanation().contains("is replacing the vacating node") + && decision.getExplanation().contains("may be allocated to it until the replacement is complete") + ) ); }, () -> fail("expected a 'NO' decision for nodeB but there was no explanation for that node")); } From bc4c78bf0c35a995f986edaf3f29e9c8acc03f34 Mon Sep 17 00:00:00 2001 From: Lee Hinman Date: Tue, 5 Oct 2021 15:54:52 -0600 Subject: [PATCH 23/33] Implement 100% disk usage check during force-replace-allocate --- .../decider/DiskThresholdDecider.java | 25 ++++++++- .../DiskThresholdDeciderUnitTests.java | 51 +++++++++++++++++++ 2 files changed, 74 insertions(+), 2 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/DiskThresholdDecider.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/DiskThresholdDecider.java index 02ee7ecf7660d..403b6533b9442 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/DiskThresholdDecider.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/DiskThresholdDecider.java @@ -319,8 +319,29 @@ public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, Routing @Override public Decision canForceAllocateDuringReplace(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { - // TODO: check whether we would reach 100% disk as a safety effort - return super.canForceAllocateDuringReplace(shardRouting, node, allocation); + ImmutableOpenMap usages = allocation.clusterInfo().getNodeMostAvailableDiskUsages(); + final Decision decision = earlyTerminate(allocation, usages); + if (decision != null) { + return decision; + } + + if (allocation.metadata().index(shardRouting.index()).ignoreDiskWatermarks()) { + return YES_DISK_WATERMARKS_IGNORED; + } + + final DiskUsageWithRelocations usage = getDiskUsage(node, allocation, usages, false); + final long shardSize = getExpectedShardSize(shardRouting, 0L, + allocation.clusterInfo(), allocation.snapshotShardSizeInfo(), allocation.metadata(), allocation.routingTable()); + assert shardSize >= 0 : shardSize; + double freeSpaceAfterShard = freeDiskPercentageAfterShardAssigned(usage, shardSize); + if (freeSpaceAfterShard < 0) { + return Decision.single(Decision.Type.NO, NAME, + "unable to force allocate shard to [%s] during replacement, " + + "as allocating to this node would cause disk usage to exceed 100%% ([%s] free)", + node.nodeId(), Strings.format1Decimals(freeSpaceAfterShard, "%")); + } else { + return super.canForceAllocateDuringReplace(shardRouting, node, allocation); + } } private static final Decision YES_NOT_MOST_UTILIZED_DISK = Decision.single(Decision.Type.YES, NAME, diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/DiskThresholdDeciderUnitTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/DiskThresholdDeciderUnitTests.java index 5830a4b952e96..cab4c52a976aa 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/DiskThresholdDeciderUnitTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/DiskThresholdDeciderUnitTests.java @@ -524,4 +524,55 @@ public void testDecidesYesIfWatermarksIgnored() { assertThat(decision.getExplanation(), containsString("disk watermarks are ignored on this index")); } + public void testCannotForceAllocateOver100PercentUsage() { + ClusterSettings nss = new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); + DiskThresholdDecider decider = new DiskThresholdDecider(Settings.EMPTY, nss); + + Metadata metadata = Metadata.builder() + .put(IndexMetadata.builder("test").settings(settings(Version.CURRENT)).numberOfShards(1).numberOfReplicas(1)) + .build(); + + final Index index = metadata.index("test").getIndex(); + + ShardRouting test_0 = ShardRouting.newUnassigned(new ShardId(index, 0), true, EmptyStoreRecoverySource.INSTANCE, + new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, "foo")); + DiscoveryNode node_0 = new DiscoveryNode("node_0", buildNewFakeTransportAddress(), Collections.emptyMap(), + new HashSet<>(DiscoveryNodeRole.roles()), Version.CURRENT); + DiscoveryNode node_1 = new DiscoveryNode("node_1", buildNewFakeTransportAddress(), Collections.emptyMap(), + new HashSet<>(DiscoveryNodeRole.roles()), Version.CURRENT); + + RoutingTable routingTable = RoutingTable.builder() + .addAsNew(metadata.index("test")) + .build(); + + ClusterState clusterState = ClusterState.builder(ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)) + .metadata(metadata).routingTable(routingTable).build(); + + clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder() + .add(node_0) + .add(node_1) + ).build(); + + // actual test -- after all that bloat :) + ImmutableOpenMap.Builder leastAvailableUsages = ImmutableOpenMap.builder(); + leastAvailableUsages.put("node_0", new DiskUsage("node_0", "node_0", "_na_", 100, 0)); // all full + ImmutableOpenMap.Builder mostAvailableUsage = ImmutableOpenMap.builder(); + mostAvailableUsage.put("node_0", new DiskUsage("node_0", "node_0", "_na_", 100, 0)); // all full + + ImmutableOpenMap.Builder shardSizes = ImmutableOpenMap.builder(); + // bigger than available space + final long shardSize = randomIntBetween(1, 10); + shardSizes.put("[test][0][p]", shardSize); + ClusterInfo clusterInfo = new ClusterInfo(leastAvailableUsages.build(), mostAvailableUsage.build(), + shardSizes.build(), null, ImmutableOpenMap.of(), ImmutableOpenMap.of()); + RoutingAllocation allocation = new RoutingAllocation(new AllocationDeciders(Collections.singleton(decider)), + clusterState.getRoutingNodes(), clusterState, clusterInfo, null, System.nanoTime()); + allocation.debugDecision(true); + Decision decision = decider.canForceAllocateDuringReplace(test_0, new RoutingNode("node_0", node_0), allocation); + assertEquals(Decision.Type.NO, decision.type()); + + assertThat(decision.getExplanation(), containsString( + "unable to force allocate shard to [node_0] during replacement, " + + "as allocating to this node would cause disk usage to exceed 100% ([-" + shardSize + "%] free)")); + } } From 21e9f9ed12464f6a08767c4e424f3acf7611cb6c Mon Sep 17 00:00:00 2001 From: Lee Hinman Date: Tue, 5 Oct 2021 16:01:59 -0600 Subject: [PATCH 24/33] Add rudimentary documentation for "replace" shutdown type --- docs/reference/shutdown/apis/shutdown-put.asciidoc | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/docs/reference/shutdown/apis/shutdown-put.asciidoc b/docs/reference/shutdown/apis/shutdown-put.asciidoc index e4a5a3af73233..96c77da8f5ea2 100644 --- a/docs/reference/shutdown/apis/shutdown-put.asciidoc +++ b/docs/reference/shutdown/apis/shutdown-put.asciidoc @@ -26,7 +26,7 @@ Migrates ongoing tasks and index shards to other nodes as needed to prepare a node to be restarted or shut down and removed from the cluster. This ensures that {es} can be stopped safely with minimal disruption to the cluster. -You must specify the type of shutdown: `restart` or `remove`. +You must specify the type of shutdown: `restart`, `remove`, or `replace`. If a node is already being prepared for shutdown, you can use this API to change the shutdown type. @@ -58,12 +58,15 @@ include::{es-repo-dir}/rest-api/common-parms.asciidoc[tag=timeoutparms] `type`:: (Required, string) -Valid values are `restart` and `remove`. +Valid values are `restart`, `remove`, or `replace`. Use `restart` when you need to temporarily shut down a node to perform an upgrade, make configuration changes, or perform other maintenance. Because the node is expected to rejoin the cluster, data is not migrated off of the node. Use `remove` when you need to permanently remove a node from the cluster. The node is not marked ready for shutdown until data is migrated off of the node +Use `replace` to do a 1:1 replacement of a node with another node. Certain allocation decisions will +be ignored (such as disk watermarks) in the interest of true replacement of the source node with the +target node. `reason`:: (Required, string) @@ -76,6 +79,13 @@ it does not affect the shut down process. Only valid if `type` is `restart`. Controls how long {es} will wait for the node to restart and join the cluster before reassigning its shards to other nodes. This works the same as <> with the `index.unassigned.node_left.delayed_timeout` setting. If you specify both a restart allocation delay and an index-level allocation delay, the longer of the two is used. +`target_node_name`:: +(Optional, string) +Only valid if `type` is `replace`. Specifies the name of the node that is replacing the node being +shut down. Shards from the shut down node are only allowed to be allocated to the target node, and +no other data will be allocated to the target node. During relocation of data certain allocation +rules are ignored, such as disk watermarks or user attribute filtering rules. + [[put-shutdown-api-example]] ==== {api-examples-title} From f7c407f874dcf09b34aa860d14b5da166924d49d Mon Sep 17 00:00:00 2001 From: Lee Hinman Date: Wed, 6 Oct 2021 10:13:02 -0600 Subject: [PATCH 25/33] Use RoutingAllocation shutdown map in BalancedShardsAllocator --- .../allocation/allocator/BalancedShardsAllocator.java | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancedShardsAllocator.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancedShardsAllocator.java index 01dcf48f35ab7..83366156a147e 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancedShardsAllocator.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancedShardsAllocator.java @@ -31,12 +31,12 @@ import org.elasticsearch.cluster.routing.allocation.decider.Decision; import org.elasticsearch.cluster.routing.allocation.decider.Decision.Type; import org.elasticsearch.cluster.routing.allocation.decider.DiskThresholdDecider; -import org.elasticsearch.core.Tuple; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.settings.ClusterSettings; import org.elasticsearch.common.settings.Setting; import org.elasticsearch.common.settings.Setting.Property; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.core.Tuple; import org.elasticsearch.gateway.PriorityComparator; import java.util.ArrayList; @@ -690,11 +690,10 @@ public MoveDecision decideMove(final ShardRouting shardRouting) { */ MoveDecision moveDecision = decideMove(shardRouting, sourceNode, canRemain, this::decideCanAllocate); if (moveDecision.canRemain() == false && moveDecision.forceMove() == false) { - final boolean shardsOnReplacedNodes = allocation.metadata().nodeShutdowns().values().stream() - .filter(s -> s.getType() == SingleNodeShutdownMetadata.Type.REPLACE) - .map(SingleNodeShutdownMetadata::getNodeId) - .anyMatch(shardRouting.currentNodeId()::equals); - if (shardsOnReplacedNodes) { + final SingleNodeShutdownMetadata shutdown = allocation.nodeShutdowns().get(shardRouting.currentNodeId()); + final boolean shardsOnReplacedNode = shutdown != null && + shutdown.getType().equals(SingleNodeShutdownMetadata.Type.REPLACE); + if (shardsOnReplacedNode) { return decideMove(shardRouting, sourceNode, canRemain, this::decideCanForceAllocateForVacate); } } From 06320e3e1bf0938d92f23bcdc0adca0d3cbab111 Mon Sep 17 00:00:00 2001 From: Lee Hinman Date: Wed, 6 Oct 2021 10:22:54 -0600 Subject: [PATCH 26/33] Add canForceAllocateDuringReplace to AllocationDeciders & add test --- .../decider/AllocationDeciders.java | 19 ++++++ .../xpack/shutdown/NodeShutdownShardsIT.java | 68 +++++++++++++++++++ 2 files changed, 87 insertions(+) diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/AllocationDeciders.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/AllocationDeciders.java index 7ea06bb119047..5f5f361ec5c19 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/AllocationDeciders.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/AllocationDeciders.java @@ -212,6 +212,25 @@ public Decision canForceAllocatePrimary(ShardRouting shardRouting, RoutingNode n return ret; } + @Override + public Decision canForceAllocateDuringReplace(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { + Decision.Multi ret = new Decision.Multi(); + for (AllocationDecider allocationDecider : allocations) { + Decision decision = allocationDecider.canForceAllocateDuringReplace(shardRouting, node, allocation); + // short track if a NO is returned. + if (decision.type() == Decision.Type.NO) { + if (allocation.debugDecision() == false) { + return Decision.NO; + } else { + ret.add(decision); + } + } else { + addDecision(ret, decision, allocation); + } + } + return ret; + } + private void addDecision(Decision.Multi ret, Decision decision, RoutingAllocation allocation) { // We never add ALWAYS decisions and only add YES decisions when requested by debug mode (since Multi default is YES). if (decision != Decision.ALWAYS diff --git a/x-pack/plugin/shutdown/src/internalClusterTest/java/org/elasticsearch/xpack/shutdown/NodeShutdownShardsIT.java b/x-pack/plugin/shutdown/src/internalClusterTest/java/org/elasticsearch/xpack/shutdown/NodeShutdownShardsIT.java index 8d70dbdf25cd3..7a8508abb3434 100644 --- a/x-pack/plugin/shutdown/src/internalClusterTest/java/org/elasticsearch/xpack/shutdown/NodeShutdownShardsIT.java +++ b/x-pack/plugin/shutdown/src/internalClusterTest/java/org/elasticsearch/xpack/shutdown/NodeShutdownShardsIT.java @@ -12,6 +12,7 @@ import org.elasticsearch.action.admin.cluster.allocation.ClusterAllocationExplainResponse; import org.elasticsearch.action.admin.cluster.node.info.NodeInfo; import org.elasticsearch.action.admin.cluster.node.info.NodesInfoResponse; +import org.elasticsearch.action.admin.cluster.state.ClusterStateResponse; import org.elasticsearch.action.index.IndexRequestBuilder; import org.elasticsearch.action.support.master.AcknowledgedResponse; import org.elasticsearch.cluster.ClusterState; @@ -382,6 +383,73 @@ public void testNodeReplacementOverridesFilters() throws Exception { }, () -> fail("expected a 'NO' decision for nodeB but there was no explanation for that node")); } + public void testNodeReplacementOnlyToTarget() throws Exception { + String nodeA = internalCluster().startNode(Settings.builder().put("node.name", "node-a")); + // Create an index and pin it to nodeA, when we replace it with nodeB, + // it'll move the data, overridding the `_name` allocation filter + Settings.Builder nodeASettings = Settings.builder() + .put("index.number_of_shards", 4) + .put("index.number_of_replicas", 0); + createIndex("myindex", nodeASettings.build()); + final String nodeAId = getNodeId(nodeA); + final String nodeB = "node_t1"; // TODO: fix this to so it's actually overrideable + final String nodeC = "node_t2"; // TODO: fix this to so it's actually overrideable + + // Mark the nodeA as being replaced + PutShutdownNodeAction.Request putShutdownRequest = new PutShutdownNodeAction.Request( + nodeAId, + SingleNodeShutdownMetadata.Type.REPLACE, + this.getTestName(), + null, + nodeB + ); + AcknowledgedResponse putShutdownResponse = client().execute(PutShutdownNodeAction.INSTANCE, putShutdownRequest).get(); + assertTrue(putShutdownResponse.isAcknowledged()); + + GetShutdownStatusAction.Response getResp = client().execute( + GetShutdownStatusAction.INSTANCE, + new GetShutdownStatusAction.Request(nodeAId) + ).get(); + + assertThat(getResp.getShutdownStatuses().get(0).migrationStatus().getStatus(), equalTo(STALLED)); + + internalCluster().startNode(Settings.builder().put("node.name", nodeB)); + internalCluster().startNode(Settings.builder().put("node.name", nodeC)); + final String nodeBId = getNodeId(nodeB); + final String nodeCId = getNodeId(nodeC); + + logger.info("--> NodeA: {} -- {}", nodeA, nodeAId); + logger.info("--> NodeB: {} -- {}", nodeB, nodeBId); + logger.info("--> NodeC: {} -- {}", nodeC, nodeCId); + + assertBusy(() -> { + ClusterState state = client().admin().cluster().prepareState().clear().setRoutingTable(true).get().getState(); + for (ShardRouting sr : state.routingTable().allShards("myindex")) { + assertThat( + "expected shard on nodeB (" + nodeBId + ") but it was on a different node", + sr.currentNodeId(), + equalTo(nodeBId) + ); + } + }); + + assertBusy(() -> { + GetShutdownStatusAction.Response shutdownStatus = client().execute( + GetShutdownStatusAction.INSTANCE, + new GetShutdownStatusAction.Request(nodeAId) + ).get(); + assertThat(shutdownStatus.getShutdownStatuses().get(0).migrationStatus().getStatus(), equalTo(COMPLETE)); + }); + + ClusterStateResponse stateResponse = client().admin().cluster().prepareState().get(); + + assertTrue("expected all shards for index to be on node B: " + + stateResponse.getState().routingTable().allShards("myindex").stream() + .map(ShardRouting::currentNodeId).collect(Collectors.joining(",")), + stateResponse.getState().routingTable().allShards("myindex").stream() + .allMatch(sr -> sr.currentNodeId().equals(nodeBId))); + } + private void indexRandomData() throws Exception { int numDocs = scaledRandomIntBetween(100, 1000); IndexRequestBuilder[] builders = new IndexRequestBuilder[numDocs]; From 4ab4496ff4513ae8dc681e7ae0440dc6e4cd146c Mon Sep 17 00:00:00 2001 From: Lee Hinman Date: Wed, 6 Oct 2021 10:28:02 -0600 Subject: [PATCH 27/33] Switch from percentage to bytes in DiskThresholdDecider force check --- .../routing/allocation/decider/DiskThresholdDecider.java | 8 ++++---- .../allocation/decider/DiskThresholdDeciderUnitTests.java | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/DiskThresholdDecider.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/DiskThresholdDecider.java index 403b6533b9442..97b7d3a24d21f 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/DiskThresholdDecider.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/DiskThresholdDecider.java @@ -333,12 +333,12 @@ public Decision canForceAllocateDuringReplace(ShardRouting shardRouting, Routing final long shardSize = getExpectedShardSize(shardRouting, 0L, allocation.clusterInfo(), allocation.snapshotShardSizeInfo(), allocation.metadata(), allocation.routingTable()); assert shardSize >= 0 : shardSize; - double freeSpaceAfterShard = freeDiskPercentageAfterShardAssigned(usage, shardSize); - if (freeSpaceAfterShard < 0) { + final long freeBytesAfterShard = usage.getFreeBytes() - shardSize; + if (freeBytesAfterShard < 0) { return Decision.single(Decision.Type.NO, NAME, "unable to force allocate shard to [%s] during replacement, " + - "as allocating to this node would cause disk usage to exceed 100%% ([%s] free)", - node.nodeId(), Strings.format1Decimals(freeSpaceAfterShard, "%")); + "as allocating to this node would cause disk usage to exceed 100%% ([%s] bytes above available disk space)", + node.nodeId(), -freeBytesAfterShard); } else { return super.canForceAllocateDuringReplace(shardRouting, node, allocation); } diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/DiskThresholdDeciderUnitTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/DiskThresholdDeciderUnitTests.java index cab4c52a976aa..298cd7aef8a66 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/DiskThresholdDeciderUnitTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/DiskThresholdDeciderUnitTests.java @@ -573,6 +573,6 @@ public void testCannotForceAllocateOver100PercentUsage() { assertThat(decision.getExplanation(), containsString( "unable to force allocate shard to [node_0] during replacement, " + - "as allocating to this node would cause disk usage to exceed 100% ([-" + shardSize + "%] free)")); + "as allocating to this node would cause disk usage to exceed 100% ([" + shardSize + "] bytes above available disk space)")); } } From e8773b7abfad6597e368e5e3ec4e71bab03daf0e Mon Sep 17 00:00:00 2001 From: Lee Hinman Date: Wed, 6 Oct 2021 10:29:23 -0600 Subject: [PATCH 28/33] Enhance docs with note about rollover, creation, & shrink --- docs/reference/shutdown/apis/shutdown-put.asciidoc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/reference/shutdown/apis/shutdown-put.asciidoc b/docs/reference/shutdown/apis/shutdown-put.asciidoc index 96c77da8f5ea2..6d6c09fe93826 100644 --- a/docs/reference/shutdown/apis/shutdown-put.asciidoc +++ b/docs/reference/shutdown/apis/shutdown-put.asciidoc @@ -66,7 +66,8 @@ Use `remove` when you need to permanently remove a node from the cluster. The node is not marked ready for shutdown until data is migrated off of the node Use `replace` to do a 1:1 replacement of a node with another node. Certain allocation decisions will be ignored (such as disk watermarks) in the interest of true replacement of the source node with the -target node. +target node. During a replace-type shutdown, rollover and index creation may result in unassigned +shards, and shrink may fail until the replacement is complete. `reason`:: (Required, string) From 658a721edbe011b6a7ffe7b48454f5905d34e25b Mon Sep 17 00:00:00 2001 From: Lee Hinman Date: Wed, 6 Oct 2021 11:47:04 -0600 Subject: [PATCH 29/33] Clarify decision messages, add test for target-only allocation --- .../NodeReplacementAllocationDecider.java | 4 ++-- ...NodeReplacementAllocationDeciderTests.java | 6 +++--- .../xpack/shutdown/NodeShutdownShardsIT.java | 20 +++++-------------- 3 files changed, 10 insertions(+), 20 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/NodeReplacementAllocationDecider.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/NodeReplacementAllocationDecider.java index ec02aa1ad9a0d..f0dbb2a7d7489 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/NodeReplacementAllocationDecider.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/NodeReplacementAllocationDecider.java @@ -38,12 +38,12 @@ public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, Routing shardRouting.currentNodeId(), getReplacementName(allocation, shardRouting.currentNodeId())); } else if (isReplacementSource(allocation, node.nodeId())) { return Decision.single(Decision.Type.NO, NAME, - "node [%s] is being replaced by [%s], so no data from other node [%s] may be allocated to it", + "node [%s] is being replaced by [%s], so no data may be allocated to it", node.nodeId(), getReplacementName(allocation, node.nodeId()), shardRouting.currentNodeId()); } else if (isReplacementTargetName(allocation, node.node().getName())) { final SingleNodeShutdownMetadata shutdown = allocation.replacementTargetShutdowns().get(node.node().getName()); return Decision.single(Decision.Type.NO, NAME, - "node [%s] is replacing the vacating node [%s], so no data from other node [%s] " + + "node [%s] is replacing the vacating node [%s], only data currently allocated to the source node " + "may be allocated to it until the replacement is complete", node.nodeId(), shutdown == null ? null : shutdown.getNodeId(), shardRouting.currentNodeId()); } else { diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/NodeReplacementAllocationDeciderTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/NodeReplacementAllocationDeciderTests.java index 300a6bf6e798b..b31e40e4dfaf6 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/NodeReplacementAllocationDeciderTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/NodeReplacementAllocationDeciderTests.java @@ -189,7 +189,7 @@ public void testCanAllocateToNeitherSourceNorTarget() { assertThat( decision.getExplanation(), equalTo("node [" + NODE_A.getId() + "] is being replaced by [" + NODE_B.getName() + - "], so no data from other node [" + testShard.currentNodeId() + "] may be allocated to it") + "], so no data may be allocated to it") ); routingNode = new RoutingNode(NODE_B.getId(), NODE_B, testShard); @@ -199,8 +199,8 @@ public void testCanAllocateToNeitherSourceNorTarget() { assertThat( decision.getExplanation(), equalTo("node [" + NODE_B.getId() + - "] is replacing the vacating node [" + NODE_A.getId() + "], so no data from other node [" + - testShard.currentNodeId() + "] may be allocated to it until the replacement is complete") + "] is replacing the vacating node [" + NODE_A.getId() + "], only data currently allocated " + + "to the source node may be allocated to it until the replacement is complete") ); routingNode = new RoutingNode(NODE_C.getId(), NODE_C, testShard); diff --git a/x-pack/plugin/shutdown/src/internalClusterTest/java/org/elasticsearch/xpack/shutdown/NodeShutdownShardsIT.java b/x-pack/plugin/shutdown/src/internalClusterTest/java/org/elasticsearch/xpack/shutdown/NodeShutdownShardsIT.java index 7a8508abb3434..13d286eb21dab 100644 --- a/x-pack/plugin/shutdown/src/internalClusterTest/java/org/elasticsearch/xpack/shutdown/NodeShutdownShardsIT.java +++ b/x-pack/plugin/shutdown/src/internalClusterTest/java/org/elasticsearch/xpack/shutdown/NodeShutdownShardsIT.java @@ -12,7 +12,6 @@ import org.elasticsearch.action.admin.cluster.allocation.ClusterAllocationExplainResponse; import org.elasticsearch.action.admin.cluster.node.info.NodeInfo; import org.elasticsearch.action.admin.cluster.node.info.NodesInfoResponse; -import org.elasticsearch.action.admin.cluster.state.ClusterStateResponse; import org.elasticsearch.action.index.IndexRequestBuilder; import org.elasticsearch.action.support.master.AcknowledgedResponse; import org.elasticsearch.cluster.ClusterState; @@ -384,7 +383,8 @@ public void testNodeReplacementOverridesFilters() throws Exception { } public void testNodeReplacementOnlyToTarget() throws Exception { - String nodeA = internalCluster().startNode(Settings.builder().put("node.name", "node-a")); + String nodeA = internalCluster().startNode(Settings.builder().put("node.name", "node-a") + .put("cluster.routing.rebalance.enable", "none")); // Create an index and pin it to nodeA, when we replace it with nodeB, // it'll move the data, overridding the `_name` allocation filter Settings.Builder nodeASettings = Settings.builder() @@ -425,11 +425,9 @@ public void testNodeReplacementOnlyToTarget() throws Exception { assertBusy(() -> { ClusterState state = client().admin().cluster().prepareState().clear().setRoutingTable(true).get().getState(); for (ShardRouting sr : state.routingTable().allShards("myindex")) { - assertThat( - "expected shard on nodeB (" + nodeBId + ") but it was on a different node", - sr.currentNodeId(), - equalTo(nodeBId) - ); + assertThat("expected all shards for index to be on node B (" + nodeBId + ") but " + + sr.toString() + " is on " + sr.currentNodeId(), + sr.currentNodeId(), equalTo(nodeBId)); } }); @@ -440,14 +438,6 @@ public void testNodeReplacementOnlyToTarget() throws Exception { ).get(); assertThat(shutdownStatus.getShutdownStatuses().get(0).migrationStatus().getStatus(), equalTo(COMPLETE)); }); - - ClusterStateResponse stateResponse = client().admin().cluster().prepareState().get(); - - assertTrue("expected all shards for index to be on node B: " + - stateResponse.getState().routingTable().allShards("myindex").stream() - .map(ShardRouting::currentNodeId).collect(Collectors.joining(",")), - stateResponse.getState().routingTable().allShards("myindex").stream() - .allMatch(sr -> sr.currentNodeId().equals(nodeBId))); } private void indexRandomData() throws Exception { From d4c865020ad06da902c5f81707310d6c617d51db Mon Sep 17 00:00:00 2001 From: Lee Hinman Date: Wed, 6 Oct 2021 11:48:13 -0600 Subject: [PATCH 30/33] Simplify NodeReplacementAllocationDecider.replacementOngoing --- .../allocation/decider/NodeReplacementAllocationDecider.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/NodeReplacementAllocationDecider.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/NodeReplacementAllocationDecider.java index f0dbb2a7d7489..623838bd64b81 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/NodeReplacementAllocationDecider.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/NodeReplacementAllocationDecider.java @@ -101,8 +101,7 @@ public Decision canForceAllocateDuringReplace(ShardRouting shardRouting, Routing * Returns true if there are any node replacements ongoing in the cluster */ private static boolean replacementOngoing(RoutingAllocation allocation) { - return allocation.nodeShutdowns().values().stream() - .anyMatch(shutdown -> shutdown.getType().equals(SingleNodeShutdownMetadata.Type.REPLACE)); + return allocation.replacementTargetShutdowns().isEmpty() == false; } /** From fa926989b471d3ae560e332657cda257dc939d40 Mon Sep 17 00:00:00 2001 From: Lee Hinman Date: Wed, 6 Oct 2021 11:50:53 -0600 Subject: [PATCH 31/33] Start nodeC before nodeB in integration test --- .../elasticsearch/xpack/shutdown/NodeShutdownShardsIT.java | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/x-pack/plugin/shutdown/src/internalClusterTest/java/org/elasticsearch/xpack/shutdown/NodeShutdownShardsIT.java b/x-pack/plugin/shutdown/src/internalClusterTest/java/org/elasticsearch/xpack/shutdown/NodeShutdownShardsIT.java index 13d286eb21dab..45760c9fa8702 100644 --- a/x-pack/plugin/shutdown/src/internalClusterTest/java/org/elasticsearch/xpack/shutdown/NodeShutdownShardsIT.java +++ b/x-pack/plugin/shutdown/src/internalClusterTest/java/org/elasticsearch/xpack/shutdown/NodeShutdownShardsIT.java @@ -294,7 +294,7 @@ public void testNodeReplacementOverridesFilters() throws Exception { .put("index.number_of_replicas", 0); createIndex("myindex", nodeASettings.build()); final String nodeAId = getNodeId(nodeA); - final String nodeB = "node_t1"; // TODO: fix this to so it's actually overrideable + final String nodeB = "node_t2"; // TODO: fix this to so it's actually overrideable // Mark the nodeA as being replaced PutShutdownNodeAction.Request putShutdownRequest = new PutShutdownNodeAction.Request( @@ -314,6 +314,7 @@ public void testNodeReplacementOverridesFilters() throws Exception { assertThat(getResp.getShutdownStatuses().get(0).migrationStatus().getStatus(), equalTo(STALLED)); + final String nodeC = internalCluster().startNode(); internalCluster().startNode(Settings.builder().put("node.name", nodeB)); final String nodeBId = getNodeId(nodeB); @@ -339,8 +340,6 @@ public void testNodeReplacementOverridesFilters() throws Exception { assertThat(shutdownStatus.getShutdownStatuses().get(0).migrationStatus().getStatus(), equalTo(COMPLETE)); }); - final String nodeC = internalCluster().startNode(); - createIndex("other", Settings.builder().put("index.number_of_shards", 1).put("index.number_of_replicas", 1).build()); ensureYellow("other"); From c237f95d05cb7d4758c9ad077aabd6bdbf1245b4 Mon Sep 17 00:00:00 2001 From: Lee Hinman Date: Wed, 6 Oct 2021 11:52:19 -0600 Subject: [PATCH 32/33] Spotleeeessssssss! You get me every time! --- .../xpack/shutdown/NodeShutdownShardsIT.java | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/x-pack/plugin/shutdown/src/internalClusterTest/java/org/elasticsearch/xpack/shutdown/NodeShutdownShardsIT.java b/x-pack/plugin/shutdown/src/internalClusterTest/java/org/elasticsearch/xpack/shutdown/NodeShutdownShardsIT.java index 45760c9fa8702..4122a46ff253b 100644 --- a/x-pack/plugin/shutdown/src/internalClusterTest/java/org/elasticsearch/xpack/shutdown/NodeShutdownShardsIT.java +++ b/x-pack/plugin/shutdown/src/internalClusterTest/java/org/elasticsearch/xpack/shutdown/NodeShutdownShardsIT.java @@ -382,13 +382,12 @@ public void testNodeReplacementOverridesFilters() throws Exception { } public void testNodeReplacementOnlyToTarget() throws Exception { - String nodeA = internalCluster().startNode(Settings.builder().put("node.name", "node-a") - .put("cluster.routing.rebalance.enable", "none")); + String nodeA = internalCluster().startNode( + Settings.builder().put("node.name", "node-a").put("cluster.routing.rebalance.enable", "none") + ); // Create an index and pin it to nodeA, when we replace it with nodeB, // it'll move the data, overridding the `_name` allocation filter - Settings.Builder nodeASettings = Settings.builder() - .put("index.number_of_shards", 4) - .put("index.number_of_replicas", 0); + Settings.Builder nodeASettings = Settings.builder().put("index.number_of_shards", 4).put("index.number_of_replicas", 0); createIndex("myindex", nodeASettings.build()); final String nodeAId = getNodeId(nodeA); final String nodeB = "node_t1"; // TODO: fix this to so it's actually overrideable @@ -424,9 +423,11 @@ public void testNodeReplacementOnlyToTarget() throws Exception { assertBusy(() -> { ClusterState state = client().admin().cluster().prepareState().clear().setRoutingTable(true).get().getState(); for (ShardRouting sr : state.routingTable().allShards("myindex")) { - assertThat("expected all shards for index to be on node B (" + nodeBId + ") but " + - sr.toString() + " is on " + sr.currentNodeId(), - sr.currentNodeId(), equalTo(nodeBId)); + assertThat( + "expected all shards for index to be on node B (" + nodeBId + ") but " + sr.toString() + " is on " + sr.currentNodeId(), + sr.currentNodeId(), + equalTo(nodeBId) + ); } }); From f692e633e6e1d3545e7c8e561db6dadfe8ff8af0 Mon Sep 17 00:00:00 2001 From: Lee Hinman Date: Thu, 7 Oct 2021 09:01:26 -0600 Subject: [PATCH 33/33] Remove outdated comment --- .../org/elasticsearch/xpack/shutdown/NodeShutdownShardsIT.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/x-pack/plugin/shutdown/src/internalClusterTest/java/org/elasticsearch/xpack/shutdown/NodeShutdownShardsIT.java b/x-pack/plugin/shutdown/src/internalClusterTest/java/org/elasticsearch/xpack/shutdown/NodeShutdownShardsIT.java index 4122a46ff253b..e0880c8ede6ee 100644 --- a/x-pack/plugin/shutdown/src/internalClusterTest/java/org/elasticsearch/xpack/shutdown/NodeShutdownShardsIT.java +++ b/x-pack/plugin/shutdown/src/internalClusterTest/java/org/elasticsearch/xpack/shutdown/NodeShutdownShardsIT.java @@ -385,8 +385,6 @@ public void testNodeReplacementOnlyToTarget() throws Exception { String nodeA = internalCluster().startNode( Settings.builder().put("node.name", "node-a").put("cluster.routing.rebalance.enable", "none") ); - // Create an index and pin it to nodeA, when we replace it with nodeB, - // it'll move the data, overridding the `_name` allocation filter Settings.Builder nodeASettings = Settings.builder().put("index.number_of_shards", 4).put("index.number_of_replicas", 0); createIndex("myindex", nodeASettings.build()); final String nodeAId = getNodeId(nodeA);