From 8f24e8ecd15ea9dd7c036e634d03877968882ef8 Mon Sep 17 00:00:00 2001 From: Pooya Salehi Date: Wed, 6 Nov 2024 10:58:54 +0100 Subject: [PATCH 01/12] Long balance computation should not delay new index primary assignment (#115511) A long desired balance computation could delay a newly created index shard from being assigned since first the computation has to finish for the assignments to be published and the shards getting assigned. With this change we add a new setting which allows setting a maximum time for a computation in case there are unassigned primary shards. Note that this is similar to how a new cluster state causes early publishing of the desired balance. Closes ES-9616 --- .../allocator/ContinuousComputation.java | 12 +- .../allocation/allocator/DesiredBalance.java | 11 +- .../allocator/DesiredBalanceComputer.java | 66 +++++- .../DesiredBalanceShardsAllocator.java | 11 +- .../common/settings/ClusterSettings.java | 1 + .../allocator/ContinuousComputationTests.java | 63 ++++++ .../DesiredBalanceComputerTests.java | 7 +- .../DesiredBalanceShardsAllocatorTests.java | 202 ++++++++++++++++-- 8 files changed, 345 insertions(+), 28 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/ContinuousComputation.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/ContinuousComputation.java index d82dcbac17c21..3846f7f9e5740 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/ContinuousComputation.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/ContinuousComputation.java @@ -49,6 +49,16 @@ public void onNewInput(T input) { } } + /** + * enqueues {@code input} if {@code expectedLatestKnownInput} is the latest known input. + * Neither of the parameters can be null. + */ + protected boolean compareAndEnqueue(T expectedLatestKnownInput, T input) { + assert expectedLatestKnownInput != null; + assert input != null; + return enqueuedInput.compareAndSet(Objects.requireNonNull(expectedLatestKnownInput), Objects.requireNonNull(input)); + } + /** * @return {@code false} iff there are no active/enqueued computations */ @@ -67,7 +77,7 @@ protected boolean isFresh(T input) { /** * Process the given input. * - * @param input the value that was last received by {@link #onNewInput} before invocation. + * @param input the value that was last received by {@link #onNewInput} or {@link #compareAndEnqueue} before invocation. */ protected abstract void processInput(T input); diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalance.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalance.java index aeedbb56b9df2..9de95804b49b2 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalance.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalance.java @@ -26,11 +26,18 @@ public record DesiredBalance( long lastConvergedIndex, Map assignments, - Map weightsPerNode + Map weightsPerNode, + ComputationFinishReason finishReason ) { + enum ComputationFinishReason { + CONVERGED, + YIELD_TO_NEW_INPUT, + STOP_EARLY + } + public DesiredBalance(long lastConvergedIndex, Map assignments) { - this(lastConvergedIndex, assignments, Map.of()); + this(lastConvergedIndex, assignments, Map.of(), ComputationFinishReason.CONVERGED); } public static final DesiredBalance INITIAL = new DesiredBalance(-1, Map.of()); diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceComputer.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceComputer.java index 56c48492a2051..44794b70a41b9 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceComputer.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceComputer.java @@ -38,6 +38,7 @@ import java.util.Set; import java.util.TreeMap; import java.util.TreeSet; +import java.util.function.LongSupplier; import java.util.function.Predicate; import static java.util.stream.Collectors.toUnmodifiableSet; @@ -49,8 +50,8 @@ public class DesiredBalanceComputer { private static final Logger logger = LogManager.getLogger(DesiredBalanceComputer.class); - private final ThreadPool threadPool; private final ShardsAllocator delegateAllocator; + private final LongSupplier timeSupplierMillis; // stats protected final MeanMetric iterations = new MeanMetric(); @@ -63,12 +64,28 @@ public class DesiredBalanceComputer { Setting.Property.NodeScope ); + public static final Setting MAX_BALANCE_COMPUTATION_TIME_DURING_INDEX_CREATION_SETTING = Setting.timeSetting( + "cluster.routing.allocation.desired_balance.max_balance_computation_time_during_index_creation", + TimeValue.timeValueSeconds(1), + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + private TimeValue progressLogInterval; + private long maxBalanceComputationTimeDuringIndexCreationMillis; public DesiredBalanceComputer(ClusterSettings clusterSettings, ThreadPool threadPool, ShardsAllocator delegateAllocator) { - this.threadPool = threadPool; + this(clusterSettings, delegateAllocator, threadPool::relativeTimeInMillis); + } + + DesiredBalanceComputer(ClusterSettings clusterSettings, ShardsAllocator delegateAllocator, LongSupplier timeSupplierMillis) { this.delegateAllocator = delegateAllocator; + this.timeSupplierMillis = timeSupplierMillis; clusterSettings.initializeAndWatch(PROGRESS_LOG_INTERVAL_SETTING, value -> this.progressLogInterval = value); + clusterSettings.initializeAndWatch( + MAX_BALANCE_COMPUTATION_TIME_DURING_INDEX_CREATION_SETTING, + value -> this.maxBalanceComputationTimeDuringIndexCreationMillis = value.millis() + ); } public DesiredBalance compute( @@ -77,7 +94,6 @@ public DesiredBalance compute( Queue> pendingDesiredBalanceMoves, Predicate isFresh ) { - if (logger.isTraceEnabled()) { logger.trace( "Recomputing desired balance for [{}]: {}, {}, {}, {}", @@ -97,9 +113,10 @@ public DesiredBalance compute( final var changes = routingAllocation.changes(); final var ignoredShards = getIgnoredShardsWithDiscardedAllocationStatus(desiredBalanceInput.ignoredShards()); final var clusterInfoSimulator = new ClusterInfoSimulator(routingAllocation); + DesiredBalance.ComputationFinishReason finishReason = DesiredBalance.ComputationFinishReason.CONVERGED; if (routingNodes.size() == 0) { - return new DesiredBalance(desiredBalanceInput.index(), Map.of()); + return new DesiredBalance(desiredBalanceInput.index(), Map.of(), Map.of(), finishReason); } // we assume that all ongoing recoveries will complete @@ -263,11 +280,12 @@ public DesiredBalance compute( final int iterationCountReportInterval = computeIterationCountReportInterval(routingAllocation); final long timeWarningInterval = progressLogInterval.millis(); - final long computationStartedTime = threadPool.relativeTimeInMillis(); + final long computationStartedTime = timeSupplierMillis.getAsLong(); long nextReportTime = computationStartedTime + timeWarningInterval; int i = 0; boolean hasChanges = false; + boolean assignedNewlyCreatedPrimaryShards = false; while (true) { if (hasChanges) { // Not the first iteration, so every remaining unassigned shard has been ignored, perhaps due to throttling. We must bring @@ -293,6 +311,15 @@ public DesiredBalance compute( for (final var shardRouting : routingNode) { if (shardRouting.initializing()) { hasChanges = true; + if (shardRouting.primary() + && shardRouting.unassignedInfo() != null + && shardRouting.unassignedInfo().reason() == UnassignedInfo.Reason.INDEX_CREATED) { + // TODO: we could include more cases that would cause early publishing of desired balance in case of a long + // computation. e.g.: + // - unassigned search replicas in case the shard has no assigned shard replicas + // - other reasons for an unassigned shard such as NEW_INDEX_RESTORED + assignedNewlyCreatedPrimaryShards = true; + } clusterInfoSimulator.simulateShardStarted(shardRouting); routingNodes.startShard(shardRouting, changes, 0L); } @@ -301,14 +328,14 @@ public DesiredBalance compute( i++; final int iterations = i; - final long currentTime = threadPool.relativeTimeInMillis(); + final long currentTime = timeSupplierMillis.getAsLong(); final boolean reportByTime = nextReportTime <= currentTime; final boolean reportByIterationCount = i % iterationCountReportInterval == 0; if (reportByTime || reportByIterationCount) { nextReportTime = currentTime + timeWarningInterval; } - if (hasChanges == false) { + if (hasComputationConverged(hasChanges, i)) { logger.debug( "Desired balance computation for [{}] converged after [{}] and [{}] iterations", desiredBalanceInput.index(), @@ -324,9 +351,25 @@ public DesiredBalance compute( "Desired balance computation for [{}] interrupted after [{}] and [{}] iterations as newer cluster state received. " + "Publishing intermediate desired balance and restarting computation", desiredBalanceInput.index(), + TimeValue.timeValueMillis(currentTime - computationStartedTime).toString(), + i + ); + finishReason = DesiredBalance.ComputationFinishReason.YIELD_TO_NEW_INPUT; + break; + } + + if (assignedNewlyCreatedPrimaryShards + && currentTime - computationStartedTime >= maxBalanceComputationTimeDuringIndexCreationMillis) { + logger.info( + "Desired balance computation for [{}] interrupted after [{}] and [{}] iterations " + + "in order to not delay assignment of newly created index shards for more than [{}]. " + + "Publishing intermediate desired balance and restarting computation", + desiredBalanceInput.index(), + TimeValue.timeValueMillis(currentTime - computationStartedTime).toString(), i, - TimeValue.timeValueMillis(currentTime - computationStartedTime).toString() + TimeValue.timeValueMillis(maxBalanceComputationTimeDuringIndexCreationMillis).toString() ); + finishReason = DesiredBalance.ComputationFinishReason.STOP_EARLY; break; } @@ -368,7 +411,12 @@ public DesiredBalance compute( } long lastConvergedIndex = hasChanges ? previousDesiredBalance.lastConvergedIndex() : desiredBalanceInput.index(); - return new DesiredBalance(lastConvergedIndex, assignments, routingNodes.getBalanceWeightStatsPerNode()); + return new DesiredBalance(lastConvergedIndex, assignments, routingNodes.getBalanceWeightStatsPerNode(), finishReason); + } + + // visible for testing + boolean hasComputationConverged(boolean hasRoutingChanges, int currentIteration) { + return hasRoutingChanges == false; } private static Map collectShardAssignments(RoutingNodes routingNodes) { diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceShardsAllocator.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceShardsAllocator.java index 4171100191211..0cfb3af87f012 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceShardsAllocator.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceShardsAllocator.java @@ -136,7 +136,16 @@ protected void processInput(DesiredBalanceInput desiredBalanceInput) { ) ); computationsExecuted.inc(); - if (isFresh(desiredBalanceInput)) { + + if (currentDesiredBalance.finishReason() == DesiredBalance.ComputationFinishReason.STOP_EARLY) { + logger.debug( + "Desired balance computation for [{}] terminated early with partial result, scheduling reconciliation", + index + ); + submitReconcileTask(currentDesiredBalance); + var newInput = DesiredBalanceInput.create(indexGenerator.incrementAndGet(), desiredBalanceInput.routingAllocation()); + desiredBalanceComputation.compareAndEnqueue(desiredBalanceInput, newInput); + } else if (isFresh(desiredBalanceInput)) { logger.debug("Desired balance computation for [{}] is completed, scheduling reconciliation", index); computationsConverged.inc(); submitReconcileTask(currentDesiredBalance); diff --git a/server/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java b/server/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java index 7bb78eabc8727..456602d952e9f 100644 --- a/server/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java +++ b/server/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java @@ -221,6 +221,7 @@ public void apply(Settings value, Settings current, Settings previous) { DataStreamAutoShardingService.CLUSTER_AUTO_SHARDING_MAX_WRITE_THREADS, DataStreamAutoShardingService.CLUSTER_AUTO_SHARDING_MIN_WRITE_THREADS, DesiredBalanceComputer.PROGRESS_LOG_INTERVAL_SETTING, + DesiredBalanceComputer.MAX_BALANCE_COMPUTATION_TIME_DURING_INDEX_CREATION_SETTING, DesiredBalanceReconciler.UNDESIRED_ALLOCATIONS_LOG_INTERVAL_SETTING, DesiredBalanceReconciler.UNDESIRED_ALLOCATIONS_LOG_THRESHOLD_SETTING, BreakerSettings.CIRCUIT_BREAKER_LIMIT_SETTING, diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/ContinuousComputationTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/ContinuousComputationTests.java index c15ca6d8205de..5ee57ebaa2c3c 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/ContinuousComputationTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/ContinuousComputationTests.java @@ -21,6 +21,7 @@ import java.util.concurrent.CyclicBarrier; import java.util.concurrent.Semaphore; import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicReference; @@ -73,6 +74,68 @@ protected void processInput(Integer input) { assertTrue(Arrays.toString(valuePerThread) + " vs " + result.get(), Arrays.stream(valuePerThread).anyMatch(i -> i == result.get())); } + public void testCompareAndEnqueue() throws Exception { + final var initialInput = new Object(); + final var compareAndEnqueueCount = between(1, 10); + final var remaining = new AtomicInteger(compareAndEnqueueCount); + final var computationsExecuted = new AtomicInteger(); + final var result = new AtomicReference<>(); + final var computation = new ContinuousComputation<>(threadPool.generic()) { + @Override + protected void processInput(Object input) { + result.set(input); + if (remaining.decrementAndGet() >= 0) { + compareAndEnqueue(input, new Object()); + } + computationsExecuted.incrementAndGet(); + } + }; + computation.onNewInput(initialInput); + assertBusy(() -> assertFalse(computation.isActive())); + assertNotEquals(result.get(), initialInput); + assertEquals(computationsExecuted.get(), 1 + compareAndEnqueueCount); + } + + public void testCompareAndEnqueueSkipped() throws Exception { + final var barrier = new CyclicBarrier(2); + final var computationsExecuted = new AtomicInteger(); + final var initialInput = new Object(); + final var conditionalInput = new Object(); + final var newInput = new Object(); + final var submitConditional = new AtomicBoolean(true); + final var result = new AtomicReference<>(); + + final var computation = new ContinuousComputation<>(threadPool.generic()) { + @Override + protected void processInput(Object input) { + assertNotEquals(input, conditionalInput); + safeAwait(barrier); // start + safeAwait(barrier); // continue + if (submitConditional.getAndSet(false)) { + compareAndEnqueue(input, conditionalInput); + } + result.set(input); + safeAwait(barrier); // finished + computationsExecuted.incrementAndGet(); + } + }; + computation.onNewInput(initialInput); + + safeAwait(barrier); // start + computation.onNewInput(newInput); + safeAwait(barrier); // continue + safeAwait(barrier); // finished + assertEquals(result.get(), initialInput); + + safeAwait(barrier); // start + safeAwait(barrier); // continue + safeAwait(barrier); // finished + + assertBusy(() -> assertFalse(computation.isActive())); + assertEquals(result.get(), newInput); + assertEquals(computationsExecuted.get(), 2); + } + public void testSkipsObsoleteValues() throws Exception { final var barrier = new CyclicBarrier(2); final Runnable await = () -> safeAwait(barrier); diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceComputerTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceComputerTests.java index 7198170ab0c7c..56a687646b364 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceComputerTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceComputerTests.java @@ -1210,7 +1210,12 @@ private void checkIterationLogging(int iterations, long eachIterationDuration, M var currentTime = new AtomicLong(0L); when(mockThreadPool.relativeTimeInMillis()).thenAnswer(invocation -> currentTime.addAndGet(eachIterationDuration)); - var desiredBalanceComputer = new DesiredBalanceComputer(createBuiltInClusterSettings(), mockThreadPool, new ShardsAllocator() { + // Some runs of this test try to simulate a long desired balance computation. Setting a high value on the following setting + // prevents interrupting a long computation. + var clusterSettings = createBuiltInClusterSettings( + Settings.builder().put(DesiredBalanceComputer.MAX_BALANCE_COMPUTATION_TIME_DURING_INDEX_CREATION_SETTING.getKey(), "2m").build() + ); + var desiredBalanceComputer = new DesiredBalanceComputer(clusterSettings, mockThreadPool, new ShardsAllocator() { @Override public void allocate(RoutingAllocation allocation) { final var unassignedIterator = allocation.routingNodes().unassigned().iterator(); diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceShardsAllocatorTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceShardsAllocatorTests.java index 739f81ed6d110..cd0d1e8a180dd 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceShardsAllocatorTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceShardsAllocatorTests.java @@ -9,6 +9,7 @@ package org.elasticsearch.cluster.routing.allocation.allocator; +import org.apache.logging.log4j.Level; import org.apache.lucene.util.SetOnce; import org.elasticsearch.action.ActionListener; import org.elasticsearch.action.support.ActionTestUtils; @@ -52,6 +53,7 @@ import org.elasticsearch.snapshots.SnapshotShardSizeInfo; import org.elasticsearch.telemetry.TelemetryProvider; import org.elasticsearch.test.ClusterServiceUtils; +import org.elasticsearch.test.MockLog; import org.elasticsearch.threadpool.TestThreadPool; import java.util.List; @@ -59,11 +61,12 @@ import java.util.Queue; import java.util.Set; import java.util.concurrent.CountDownLatch; +import java.util.concurrent.CyclicBarrier; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.AtomicReference; -import java.util.function.BiConsumer; import java.util.function.Consumer; import java.util.function.Predicate; @@ -85,14 +88,19 @@ public class DesiredBalanceShardsAllocatorTests extends ESAllocationTestCase { public void testGatewayAllocatorPreemptsAllocation() { final var nodeId = randomFrom(LOCAL_NODE_ID, OTHER_NODE_ID); testAllocate( - (allocation, unassignedAllocationHandler) -> unassignedAllocationHandler.initialize(nodeId, null, 0L, allocation.changes()), + (shardRouting, allocation, unassignedAllocationHandler) -> unassignedAllocationHandler.initialize( + nodeId, + null, + 0L, + allocation.changes() + ), routingTable -> assertEquals(nodeId, routingTable.index("test-index").shard(0).primaryShard().currentNodeId()) ); } public void testGatewayAllocatorStillFetching() { testAllocate( - (allocation, unassignedAllocationHandler) -> unassignedAllocationHandler.removeAndIgnore( + (shardRouting, allocation, unassignedAllocationHandler) -> unassignedAllocationHandler.removeAndIgnore( UnassignedInfo.AllocationStatus.FETCHING_SHARD_DATA, allocation.changes() ), @@ -108,17 +116,14 @@ public void testGatewayAllocatorStillFetching() { } public void testGatewayAllocatorDoesNothing() { - testAllocate((allocation, unassignedAllocationHandler) -> {}, routingTable -> { + testAllocate((shardRouting, allocation, unassignedAllocationHandler) -> {}, routingTable -> { var shardRouting = routingTable.shardRoutingTable("test-index", 0).primaryShard(); assertTrue(shardRouting.assignedToNode());// assigned by a followup reconciliation assertThat(shardRouting.unassignedInfo().lastAllocationStatus(), equalTo(UnassignedInfo.AllocationStatus.NO_ATTEMPT)); }); } - public void testAllocate( - BiConsumer allocateUnassigned, - Consumer verifier - ) { + public void testAllocate(AllocateUnassignedHandler allocateUnassigned, Consumer verifier) { var deterministicTaskQueue = new DeterministicTaskQueue(); var threadPool = deterministicTaskQueue.getThreadPool(); @@ -295,7 +300,7 @@ public ClusterState apply(ClusterState clusterState, RerouteStrategy routingAllo var allocationService = new AllocationService( new AllocationDeciders(List.of()), createGatewayAllocator( - (allocation, unassignedAllocationHandler) -> unassignedAllocationHandler.removeAndIgnore( + (shardRouting, allocation, unassignedAllocationHandler) -> unassignedAllocationHandler.removeAndIgnore( UnassignedInfo.AllocationStatus.NO_ATTEMPT, allocation.changes() ) @@ -336,6 +341,158 @@ protected long currentNanoTime() { } } + public void testIndexCreationInterruptsLongDesiredBalanceComputation() throws Exception { + var discoveryNode = newNode("node-0"); + var initialState = ClusterState.builder(ClusterName.DEFAULT) + .nodes(DiscoveryNodes.builder().add(discoveryNode).localNodeId(discoveryNode.getId()).masterNodeId(discoveryNode.getId())) + .build(); + final var ignoredIndexName = "index-ignored"; + + var threadPool = new TestThreadPool(getTestName()); + var time = new AtomicLong(threadPool.relativeTimeInMillis()); + var clusterService = ClusterServiceUtils.createClusterService(initialState, threadPool); + var allocationServiceRef = new SetOnce(); + var reconcileAction = new DesiredBalanceReconcilerAction() { + @Override + public ClusterState apply(ClusterState clusterState, RerouteStrategy routingAllocationAction) { + return allocationServiceRef.get().executeWithRoutingAllocation(clusterState, "reconcile", routingAllocationAction); + } + }; + + var gatewayAllocator = createGatewayAllocator((shardRouting, allocation, unassignedAllocationHandler) -> { + if (shardRouting.getIndexName().equals(ignoredIndexName)) { + unassignedAllocationHandler.removeAndIgnore(UnassignedInfo.AllocationStatus.NO_ATTEMPT, allocation.changes()); + } + }); + var shardsAllocator = new ShardsAllocator() { + @Override + public void allocate(RoutingAllocation allocation) { + // simulate long computation + time.addAndGet(1_000); + var dataNodeId = allocation.nodes().getDataNodes().values().iterator().next().getId(); + var unassignedIterator = allocation.routingNodes().unassigned().iterator(); + while (unassignedIterator.hasNext()) { + unassignedIterator.next(); + unassignedIterator.initialize(dataNodeId, null, 0L, allocation.changes()); + } + allocation.routingNodes().setBalanceWeightStatsPerNode(Map.of()); + } + + @Override + public ShardAllocationDecision decideShardAllocation(ShardRouting shard, RoutingAllocation allocation) { + throw new AssertionError("only used for allocation explain"); + } + }; + + // Make sure the computation takes at least a few iterations, where each iteration takes 1s (see {@code #shardsAllocator.allocate}). + // By setting the following setting we ensure the desired balance computation will be interrupted early to not delay assigning + // newly created primary shards. This ensures that we hit a desired balance computation (3s) which is longer than the configured + // setting below. + var clusterSettings = createBuiltInClusterSettings( + Settings.builder().put(DesiredBalanceComputer.MAX_BALANCE_COMPUTATION_TIME_DURING_INDEX_CREATION_SETTING.getKey(), "2s").build() + ); + final int minIterations = between(3, 10); + var desiredBalanceShardsAllocator = new DesiredBalanceShardsAllocator( + shardsAllocator, + threadPool, + clusterService, + new DesiredBalanceComputer(clusterSettings, shardsAllocator, time::get) { + @Override + public DesiredBalance compute( + DesiredBalance previousDesiredBalance, + DesiredBalanceInput desiredBalanceInput, + Queue> pendingDesiredBalanceMoves, + Predicate isFresh + ) { + return super.compute(previousDesiredBalance, desiredBalanceInput, pendingDesiredBalanceMoves, isFresh); + } + + @Override + boolean hasComputationConverged(boolean hasRoutingChanges, int currentIteration) { + return super.hasComputationConverged(hasRoutingChanges, currentIteration) && currentIteration >= minIterations; + } + }, + reconcileAction, + TelemetryProvider.NOOP + ); + var allocationService = createAllocationService(desiredBalanceShardsAllocator, gatewayAllocator); + allocationServiceRef.set(allocationService); + + var rerouteFinished = new CyclicBarrier(2); + // A mock cluster state update task for creating an index + class CreateIndexTask extends ClusterStateUpdateTask { + private final String indexName; + + private CreateIndexTask(String indexName) { + this.indexName = indexName; + } + + @Override + public ClusterState execute(ClusterState currentState) throws Exception { + var indexMetadata = createIndex(indexName); + var newState = ClusterState.builder(currentState) + .metadata(Metadata.builder(currentState.metadata()).put(indexMetadata, true)) + .routingTable( + RoutingTable.builder(TestShardRoutingRoleStrategies.DEFAULT_ROLE_ONLY, currentState.routingTable()) + .addAsNew(indexMetadata) + ) + .build(); + return allocationService.reroute( + newState, + "test", + ActionTestUtils.assertNoFailureListener(response -> safeAwait(rerouteFinished)) + ); + } + + @Override + public void onFailure(Exception e) { + throw new AssertionError(e); + } + } + + final var computationInterruptedMessage = + "Desired balance computation for * interrupted * in order to not delay assignment of newly created index shards *"; + try { + // Create a new index which is not ignored and therefore must be considered when a desired balance + // computation takes longer than 2s. + assertThat(desiredBalanceShardsAllocator.getStats().computationExecuted(), equalTo(0L)); + MockLog.assertThatLogger(() -> { + clusterService.submitUnbatchedStateUpdateTask("test", new CreateIndexTask("index-1")); + safeAwait(rerouteFinished); + assertThat(clusterService.state().getRoutingTable().index("index-1").primaryShardsUnassigned(), equalTo(0)); + }, + DesiredBalanceComputer.class, + new MockLog.SeenEventExpectation( + "Should log interrupted computation", + DesiredBalanceComputer.class.getCanonicalName(), + Level.INFO, + computationInterruptedMessage + ) + ); + assertBusy(() -> assertFalse(desiredBalanceShardsAllocator.getStats().computationActive())); + assertThat(desiredBalanceShardsAllocator.getStats().computationExecuted(), equalTo(2L)); + // The computation should not get interrupted when the newly created index shard stays unassigned. + MockLog.assertThatLogger(() -> { + clusterService.submitUnbatchedStateUpdateTask("test", new CreateIndexTask(ignoredIndexName)); + safeAwait(rerouteFinished); + assertThat(clusterService.state().getRoutingTable().index(ignoredIndexName).primaryShardsUnassigned(), equalTo(1)); + }, + DesiredBalanceComputer.class, + new MockLog.UnseenEventExpectation( + "Should log interrupted computation", + DesiredBalanceComputer.class.getCanonicalName(), + Level.INFO, + computationInterruptedMessage + ) + ); + assertBusy(() -> assertFalse(desiredBalanceShardsAllocator.getStats().computationActive())); + assertThat(desiredBalanceShardsAllocator.getStats().computationExecuted(), equalTo(3L)); + } finally { + clusterService.close(); + terminate(threadPool); + } + } + public void testCallListenersOnlyAfterProducingFreshInput() throws InterruptedException { var reconciliations = new AtomicInteger(0); @@ -772,13 +929,30 @@ private static GatewayAllocator createGatewayAllocator() { return createGatewayAllocator(DesiredBalanceShardsAllocatorTests::initialize); } - private static void initialize(RoutingAllocation allocation, ExistingShardsAllocator.UnassignedAllocationHandler handler) { + private static void initialize( + ShardRouting shardRouting, + RoutingAllocation allocation, + ExistingShardsAllocator.UnassignedAllocationHandler handler + ) { handler.initialize(allocation.nodes().getLocalNodeId(), null, 0L, allocation.changes()); } - private static GatewayAllocator createGatewayAllocator( - BiConsumer allocateUnassigned - ) { + /** + * A helper interface to simplify creating a GatewayAllocator in the tests by only requiring + * an implementation for {@link org.elasticsearch.cluster.routing.allocation.ExistingShardsAllocator#allocateUnassigned}. + */ + interface AllocateUnassignedHandler { + void handle( + ShardRouting shardRouting, + RoutingAllocation allocation, + ExistingShardsAllocator.UnassignedAllocationHandler unassignedAllocationHandler + ); + } + + /** + * Creates an implementation of GatewayAllocator that delegates its logic for allocating unassigned shards to the provided handler. + */ + private static GatewayAllocator createGatewayAllocator(AllocateUnassignedHandler allocateUnassigned) { return new GatewayAllocator() { @Override @@ -790,7 +964,7 @@ public void allocateUnassigned( RoutingAllocation allocation, UnassignedAllocationHandler unassignedAllocationHandler ) { - allocateUnassigned.accept(allocation, unassignedAllocationHandler); + allocateUnassigned.handle(shardRouting, allocation, unassignedAllocationHandler); } @Override From a902878e961dd1158abeafe80a3aefdad99efad6 Mon Sep 17 00:00:00 2001 From: Rene Groeschke Date: Wed, 6 Nov 2024 11:36:57 +0100 Subject: [PATCH 02/12] Mute ArchiveTests on windows see https://github.com/elastic/elasticsearch/issues/116299 --- .../elasticsearch/packaging/test/ArchiveTests.java | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/qa/packaging/src/test/java/org/elasticsearch/packaging/test/ArchiveTests.java b/qa/packaging/src/test/java/org/elasticsearch/packaging/test/ArchiveTests.java index 58472e7ba000a..34dd3976dfa52 100644 --- a/qa/packaging/src/test/java/org/elasticsearch/packaging/test/ArchiveTests.java +++ b/qa/packaging/src/test/java/org/elasticsearch/packaging/test/ArchiveTests.java @@ -19,6 +19,7 @@ import org.elasticsearch.packaging.util.ServerUtils; import org.elasticsearch.packaging.util.Shell; import org.elasticsearch.packaging.util.Shell.Result; +import org.junit.Assume; import org.junit.BeforeClass; import java.nio.file.Files; @@ -112,6 +113,10 @@ public void test32SpecialCharactersInJdkPath() throws Exception { } public void test40AutoconfigurationNotTriggeredWhenNodeIsMeantToJoinExistingCluster() throws Exception { + Assume.assumeFalse( + "https://github.com/elastic/elasticsearch/issues/116299", + distribution.platform == Distribution.Platform.WINDOWS + ); // auto-config requires that the archive owner and the process user be the same, Platforms.onWindows(() -> sh.chown(installation.config, installation.getOwner())); FileUtils.assertPathsDoNotExist(installation.data); @@ -124,8 +129,11 @@ public void test40AutoconfigurationNotTriggeredWhenNodeIsMeantToJoinExistingClus FileUtils.rm(installation.data); } - @AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/116299") public void test41AutoconfigurationNotTriggeredWhenNodeCannotContainData() throws Exception { + Assume.assumeFalse( + "https://github.com/elastic/elasticsearch/issues/116299", + distribution.platform == Distribution.Platform.WINDOWS + ); // auto-config requires that the archive owner and the process user be the same Platforms.onWindows(() -> sh.chown(installation.config, installation.getOwner())); ServerUtils.addSettingToExistingConfiguration(installation, "node.roles", "[\"voting_only\", \"master\"]"); @@ -138,6 +146,10 @@ public void test41AutoconfigurationNotTriggeredWhenNodeCannotContainData() throw } public void test42AutoconfigurationNotTriggeredWhenNodeCannotBecomeMaster() throws Exception { + Assume.assumeFalse( + "https://github.com/elastic/elasticsearch/issues/116299", + distribution.platform == Distribution.Platform.WINDOWS + ); // auto-config requires that the archive owner and the process user be the same Platforms.onWindows(() -> sh.chown(installation.config, installation.getOwner())); ServerUtils.addSettingToExistingConfiguration(installation, "node.roles", "[\"ingest\"]"); From 009c0a56cc94f0ade36deaaa9ae99c6a47cfe94f Mon Sep 17 00:00:00 2001 From: Rene Groeschke Date: Wed, 6 Nov 2024 12:23:27 +0100 Subject: [PATCH 03/12] Mute ArchiveTests on windows (#116314) see https://github.com/elastic/elasticsearch/issues/116299 From 604722c964369449bb183776c8a9b1a19485f100 Mon Sep 17 00:00:00 2001 From: Rene Groeschke Date: Wed, 6 Nov 2024 13:23:27 +0100 Subject: [PATCH 04/12] Mute ArchiveTests for now until see https://github.com/elastic/elasticsearch/issues/116299 --- .../elasticsearch/packaging/test/ArchiveTests.java | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/qa/packaging/src/test/java/org/elasticsearch/packaging/test/ArchiveTests.java b/qa/packaging/src/test/java/org/elasticsearch/packaging/test/ArchiveTests.java index 34dd3976dfa52..4e7577be8e5fc 100644 --- a/qa/packaging/src/test/java/org/elasticsearch/packaging/test/ArchiveTests.java +++ b/qa/packaging/src/test/java/org/elasticsearch/packaging/test/ArchiveTests.java @@ -19,7 +19,6 @@ import org.elasticsearch.packaging.util.ServerUtils; import org.elasticsearch.packaging.util.Shell; import org.elasticsearch.packaging.util.Shell.Result; -import org.junit.Assume; import org.junit.BeforeClass; import java.nio.file.Files; @@ -49,6 +48,7 @@ import static org.junit.Assume.assumeThat; import static org.junit.Assume.assumeTrue; +@PackagingTestCase.AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/116299") public class ArchiveTests extends PackagingTestCase { @BeforeClass @@ -113,10 +113,6 @@ public void test32SpecialCharactersInJdkPath() throws Exception { } public void test40AutoconfigurationNotTriggeredWhenNodeIsMeantToJoinExistingCluster() throws Exception { - Assume.assumeFalse( - "https://github.com/elastic/elasticsearch/issues/116299", - distribution.platform == Distribution.Platform.WINDOWS - ); // auto-config requires that the archive owner and the process user be the same, Platforms.onWindows(() -> sh.chown(installation.config, installation.getOwner())); FileUtils.assertPathsDoNotExist(installation.data); @@ -130,10 +126,6 @@ public void test40AutoconfigurationNotTriggeredWhenNodeIsMeantToJoinExistingClus } public void test41AutoconfigurationNotTriggeredWhenNodeCannotContainData() throws Exception { - Assume.assumeFalse( - "https://github.com/elastic/elasticsearch/issues/116299", - distribution.platform == Distribution.Platform.WINDOWS - ); // auto-config requires that the archive owner and the process user be the same Platforms.onWindows(() -> sh.chown(installation.config, installation.getOwner())); ServerUtils.addSettingToExistingConfiguration(installation, "node.roles", "[\"voting_only\", \"master\"]"); @@ -146,10 +138,6 @@ public void test41AutoconfigurationNotTriggeredWhenNodeCannotContainData() throw } public void test42AutoconfigurationNotTriggeredWhenNodeCannotBecomeMaster() throws Exception { - Assume.assumeFalse( - "https://github.com/elastic/elasticsearch/issues/116299", - distribution.platform == Distribution.Platform.WINDOWS - ); // auto-config requires that the archive owner and the process user be the same Platforms.onWindows(() -> sh.chown(installation.config, installation.getOwner())); ServerUtils.addSettingToExistingConfiguration(installation, "node.roles", "[\"ingest\"]"); From b8fc7d69e974909818bd308520020c6e7bf30b4c Mon Sep 17 00:00:00 2001 From: Rene Groeschke Date: Wed, 6 Nov 2024 13:34:09 +0100 Subject: [PATCH 05/12] Mute ArchiveTests for now until (#116322) see https://github.com/elastic/elasticsearch/issues/116299 From c00feba653633b6d9f3e120206d527a64896af78 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine <58790826+elasticsearchmachine@users.noreply.github.com> Date: Thu, 7 Nov 2024 00:17:52 +1100 Subject: [PATCH 06/12] Mute org.elasticsearch.packaging.test.ArchiveTests test43AutoconfigurationNotTriggeredWhenTlsAlreadyConfigured #116317 --- muted-tests.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/muted-tests.yml b/muted-tests.yml index 4fe943df3925b..86d8834450354 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -285,6 +285,9 @@ tests: issue: https://github.com/elastic/elasticsearch/issues/114723 - class: org.elasticsearch.xpack.search.AsyncSearchSecurityIT issue: https://github.com/elastic/elasticsearch/issues/116293 +- class: org.elasticsearch.packaging.test.ArchiveTests + method: test43AutoconfigurationNotTriggeredWhenTlsAlreadyConfigured + issue: https://github.com/elastic/elasticsearch/issues/116317 # Examples: # From 755400b4ae334fa69540aff91052619bc20f3ffc Mon Sep 17 00:00:00 2001 From: elasticsearchmachine <58790826+elasticsearchmachine@users.noreply.github.com> Date: Thu, 7 Nov 2024 00:25:27 +1100 Subject: [PATCH 07/12] Mute org.elasticsearch.xpack.downsample.DownsampleRestIT org.elasticsearch.xpack.downsample.DownsampleRestIT #116326 --- muted-tests.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/muted-tests.yml b/muted-tests.yml index 86d8834450354..b6e54f9f8e147 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -288,6 +288,8 @@ tests: - class: org.elasticsearch.packaging.test.ArchiveTests method: test43AutoconfigurationNotTriggeredWhenTlsAlreadyConfigured issue: https://github.com/elastic/elasticsearch/issues/116317 +- class: org.elasticsearch.xpack.downsample.DownsampleRestIT + issue: https://github.com/elastic/elasticsearch/issues/116326 # Examples: # From b8db38de06e9a427e718184cd13e75653df89dbc Mon Sep 17 00:00:00 2001 From: elasticsearchmachine <58790826+elasticsearchmachine@users.noreply.github.com> Date: Thu, 7 Nov 2024 00:26:02 +1100 Subject: [PATCH 08/12] Mute org.elasticsearch.xpack.downsample.DownsampleWithBasicRestIT org.elasticsearch.xpack.downsample.DownsampleWithBasicRestIT #116327 --- muted-tests.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/muted-tests.yml b/muted-tests.yml index b6e54f9f8e147..4f092fba9fa19 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -290,6 +290,8 @@ tests: issue: https://github.com/elastic/elasticsearch/issues/116317 - class: org.elasticsearch.xpack.downsample.DownsampleRestIT issue: https://github.com/elastic/elasticsearch/issues/116326 +- class: org.elasticsearch.xpack.downsample.DownsampleWithBasicRestIT + issue: https://github.com/elastic/elasticsearch/issues/116327 # Examples: # From f7eac4719004071db6d8032860ad8ebee132425c Mon Sep 17 00:00:00 2001 From: elasticsearchmachine <58790826+elasticsearchmachine@users.noreply.github.com> Date: Thu, 7 Nov 2024 00:26:30 +1100 Subject: [PATCH 09/12] Mute org.elasticsearch.validation.DotPrefixClientYamlTestSuiteIT org.elasticsearch.validation.DotPrefixClientYamlTestSuiteIT #116328 --- muted-tests.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/muted-tests.yml b/muted-tests.yml index 4f092fba9fa19..0bba9068094d1 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -292,6 +292,8 @@ tests: issue: https://github.com/elastic/elasticsearch/issues/116326 - class: org.elasticsearch.xpack.downsample.DownsampleWithBasicRestIT issue: https://github.com/elastic/elasticsearch/issues/116327 +- class: org.elasticsearch.validation.DotPrefixClientYamlTestSuiteIT + issue: https://github.com/elastic/elasticsearch/issues/116328 # Examples: # From d72d55a140053d1881dff530354516c7a8865ef1 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine <58790826+elasticsearchmachine@users.noreply.github.com> Date: Thu, 7 Nov 2024 00:37:55 +1100 Subject: [PATCH 10/12] Mute org.elasticsearch.action.search.SearchQueryThenFetchAsyncActionTests testBottomFieldSort #116249 --- muted-tests.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/muted-tests.yml b/muted-tests.yml index 0bba9068094d1..75cbf96c9d074 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -294,6 +294,9 @@ tests: issue: https://github.com/elastic/elasticsearch/issues/116327 - class: org.elasticsearch.validation.DotPrefixClientYamlTestSuiteIT issue: https://github.com/elastic/elasticsearch/issues/116328 +- class: org.elasticsearch.action.search.SearchQueryThenFetchAsyncActionTests + method: testBottomFieldSort + issue: https://github.com/elastic/elasticsearch/issues/116249 # Examples: # From 1ae719f8e970a4ea074337cb2d07c2ea12c9642c Mon Sep 17 00:00:00 2001 From: Ignacio Vera Date: Wed, 6 Nov 2024 14:59:10 +0100 Subject: [PATCH 11/12] Exclude "aggregations/percentiles_hdr_metric/Negative values test" from mixed cluster tests (#116311) --- muted-tests.yml | 3 --- qa/mixed-cluster/build.gradle | 3 +++ 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/muted-tests.yml b/muted-tests.yml index 75cbf96c9d074..8de2960aa7d15 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -264,9 +264,6 @@ tests: - class: org.elasticsearch.xpack.test.rest.XPackRestIT method: test {p0=ml/filter_crud/Test update filter} issue: https://github.com/elastic/elasticsearch/issues/116271 -- class: org.elasticsearch.backwards.MixedClusterClientYamlTestSuiteIT - method: test {p0=aggregations/percentiles_hdr_metric/Negative values test} - issue: https://github.com/elastic/elasticsearch/issues/116276 - class: org.elasticsearch.xpack.test.rest.XPackRestIT method: test {p0=ml/get_datafeeds/Test explicit get all datafeeds} issue: https://github.com/elastic/elasticsearch/issues/116284 diff --git a/qa/mixed-cluster/build.gradle b/qa/mixed-cluster/build.gradle index 23d7af7603d56..f3fd57f3fc8ae 100644 --- a/qa/mixed-cluster/build.gradle +++ b/qa/mixed-cluster/build.gradle @@ -61,6 +61,9 @@ excludeList.add('cluster.desired_nodes/20_dry_run/Test validation works for dry // Excluded because they create dot-prefixed indices on older versions excludeList.add('indices.resolve_index/20_resolve_system_index/*') +// Excluded because the error has changed +excludeList.add('aggregations/percentiles_hdr_metric/Negative values test') + BuildParams.bwcVersions.withWireCompatible { bwcVersion, baseName -> if (bwcVersion != VersionProperties.getElasticsearchVersion()) { From d1c5efe5c3633f5894b1557027687cb10d3bcc04 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine <58790826+elasticsearchmachine@users.noreply.github.com> Date: Thu, 7 Nov 2024 01:36:55 +1100 Subject: [PATCH 12/12] Mute org.elasticsearch.backwards.MixedClusterClientYamlTestSuiteIT test {p0=synonyms/90_synonyms_reloading_for_synset/Reload analyzers for specific synonym set} #116332 --- muted-tests.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/muted-tests.yml b/muted-tests.yml index 8de2960aa7d15..86239120196a7 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -294,6 +294,9 @@ tests: - class: org.elasticsearch.action.search.SearchQueryThenFetchAsyncActionTests method: testBottomFieldSort issue: https://github.com/elastic/elasticsearch/issues/116249 +- class: org.elasticsearch.backwards.MixedClusterClientYamlTestSuiteIT + method: test {p0=synonyms/90_synonyms_reloading_for_synset/Reload analyzers for specific synonym set} + issue: https://github.com/elastic/elasticsearch/issues/116332 # Examples: #