From 8332ad44e34171adf54f676d0e0ca57a3bd10267 Mon Sep 17 00:00:00 2001 From: Henning Andersen <33268011+henningandersen@users.noreply.github.com> Date: Mon, 3 Feb 2020 13:57:18 +0100 Subject: [PATCH] Increase disruption test publish timeout to 5s (#51803) With the new mechanism for storing cluster state in lucene, we store index metadata in multiple data paths too. This causes cluster state publish to timeout too frequently with a 1s timeout, so increasing it to 5s. Also increasing follower check timeout to 5s since it also sometimes has fsync in its timeout path and leader check for symmetry. Closes #51329 --- .../elasticsearch/discovery/AbstractDisruptionTestCase.java | 6 +++--- .../org/elasticsearch/discovery/ClusterDisruptionIT.java | 5 ++--- .../versioning/ConcurrentSeqNoVersioningIT.java | 3 --- 3 files changed, 5 insertions(+), 9 deletions(-) diff --git a/server/src/test/java/org/elasticsearch/discovery/AbstractDisruptionTestCase.java b/server/src/test/java/org/elasticsearch/discovery/AbstractDisruptionTestCase.java index bd89ceb64e6df..a411b7a4fcfbc 100644 --- a/server/src/test/java/org/elasticsearch/discovery/AbstractDisruptionTestCase.java +++ b/server/src/test/java/org/elasticsearch/discovery/AbstractDisruptionTestCase.java @@ -123,12 +123,12 @@ List startCluster(int numberOfNodes) { } static final Settings DEFAULT_SETTINGS = Settings.builder() - .put(LeaderChecker.LEADER_CHECK_TIMEOUT_SETTING.getKey(), "1s") // for hitting simulated network failures quickly + .put(LeaderChecker.LEADER_CHECK_TIMEOUT_SETTING.getKey(), "5s") // for hitting simulated network failures quickly .put(LeaderChecker.LEADER_CHECK_RETRY_COUNT_SETTING.getKey(), 1) // for hitting simulated network failures quickly - .put(FollowersChecker.FOLLOWER_CHECK_TIMEOUT_SETTING.getKey(), "1s") // for hitting simulated network failures quickly + .put(FollowersChecker.FOLLOWER_CHECK_TIMEOUT_SETTING.getKey(), "5s") // for hitting simulated network failures quickly .put(FollowersChecker.FOLLOWER_CHECK_RETRY_COUNT_SETTING.getKey(), 1) // for hitting simulated network failures quickly .put(JoinHelper.JOIN_TIMEOUT_SETTING.getKey(), "10s") // still long to induce failures but to long so test won't time out - .put(Coordinator.PUBLISH_TIMEOUT_SETTING.getKey(), "1s") // <-- for hitting simulated network failures quickly + .put(Coordinator.PUBLISH_TIMEOUT_SETTING.getKey(), "5s") // <-- for hitting simulated network failures quickly .put(TransportSettings.CONNECT_TIMEOUT.getKey(), "10s") // Network delay disruption waits for the min between this // value and the time of disruption and does not recover immediately // when disruption is stop. We should make sure we recover faster diff --git a/server/src/test/java/org/elasticsearch/discovery/ClusterDisruptionIT.java b/server/src/test/java/org/elasticsearch/discovery/ClusterDisruptionIT.java index d404aceb9e4b3..be87a9a456cc9 100644 --- a/server/src/test/java/org/elasticsearch/discovery/ClusterDisruptionIT.java +++ b/server/src/test/java/org/elasticsearch/discovery/ClusterDisruptionIT.java @@ -108,9 +108,8 @@ static ConflictMode randomMode() { @TestIssueLogging(value = "_root:DEBUG,org.elasticsearch.action.bulk:TRACE,org.elasticsearch.action.get:TRACE," + "org.elasticsearch.discovery:TRACE,org.elasticsearch.action.support.replication:TRACE," + "org.elasticsearch.cluster.service:TRACE,org.elasticsearch.indices.recovery:TRACE," + - "org.elasticsearch.indices.cluster:TRACE,org.elasticsearch.index.shard:TRACE," + - "org.elasticsearch.gateway.PersistedClusterStateService:TRACE", - issueUrl = "https://github.com/elastic/elasticsearch/issues/41068,https://github.com/elastic/elasticsearch/issues/51329") + "org.elasticsearch.indices.cluster:TRACE,org.elasticsearch.index.shard:TRACE", + issueUrl = "https://github.com/elastic/elasticsearch/issues/41068") public void testAckedIndexing() throws Exception { final int seconds = !(TEST_NIGHTLY && rarely()) ? 1 : 5; diff --git a/server/src/test/java/org/elasticsearch/versioning/ConcurrentSeqNoVersioningIT.java b/server/src/test/java/org/elasticsearch/versioning/ConcurrentSeqNoVersioningIT.java index ca3c2ce80f6a6..f488bf5eca802 100644 --- a/server/src/test/java/org/elasticsearch/versioning/ConcurrentSeqNoVersioningIT.java +++ b/server/src/test/java/org/elasticsearch/versioning/ConcurrentSeqNoVersioningIT.java @@ -38,7 +38,6 @@ import org.elasticsearch.index.engine.VersionConflictEngineException; import org.elasticsearch.test.ESIntegTestCase; import org.elasticsearch.test.disruption.ServiceDisruptionScheme; -import org.elasticsearch.test.junit.annotations.TestIssueLogging; import org.elasticsearch.threadpool.Scheduler; import org.elasticsearch.threadpool.ThreadPool; @@ -131,8 +130,6 @@ public class ConcurrentSeqNoVersioningIT extends AbstractDisruptionTestCase { // multiple threads doing CAS updates. // Wait up to 1 minute (+10s in thread to ensure it does not time out) for threads to complete previous round before initiating next // round. - @TestIssueLogging(value = "org.elasticsearch.gateway.PersistedClusterStateService:TRACE", - issueUrl = "https://github.com/elastic/elasticsearch/issues/51329") public void testSeqNoCASLinearizability() { final int disruptTimeSeconds = scaledRandomIntBetween(1, 8);