Skip to content

Commit

Permalink
Increase disruption test publish timeout to 5s (#51803)
Browse files Browse the repository at this point in the history
With the new mechanism for storing cluster state in lucene, we store
index metadata in multiple data paths too. This causes cluster state
publish to timeout too frequently with a 1s timeout, so increasing it to
5s. Also increasing follower check timeout to 5s since it also sometimes
has fsync in its timeout path and leader check for symmetry.

Closes #51329
  • Loading branch information
henningandersen committed Feb 3, 2020
1 parent 8138805 commit 918dfaf
Show file tree
Hide file tree
Showing 3 changed files with 5 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -123,12 +123,12 @@ List<String> startCluster(int numberOfNodes) {
}

static final Settings DEFAULT_SETTINGS = Settings.builder()
.put(LeaderChecker.LEADER_CHECK_TIMEOUT_SETTING.getKey(), "1s") // for hitting simulated network failures quickly
.put(LeaderChecker.LEADER_CHECK_TIMEOUT_SETTING.getKey(), "5s") // for hitting simulated network failures quickly
.put(LeaderChecker.LEADER_CHECK_RETRY_COUNT_SETTING.getKey(), 1) // for hitting simulated network failures quickly
.put(FollowersChecker.FOLLOWER_CHECK_TIMEOUT_SETTING.getKey(), "1s") // for hitting simulated network failures quickly
.put(FollowersChecker.FOLLOWER_CHECK_TIMEOUT_SETTING.getKey(), "5s") // for hitting simulated network failures quickly
.put(FollowersChecker.FOLLOWER_CHECK_RETRY_COUNT_SETTING.getKey(), 1) // for hitting simulated network failures quickly
.put(JoinHelper.JOIN_TIMEOUT_SETTING.getKey(), "10s") // still long to induce failures but to long so test won't time out
.put(Coordinator.PUBLISH_TIMEOUT_SETTING.getKey(), "1s") // <-- for hitting simulated network failures quickly
.put(Coordinator.PUBLISH_TIMEOUT_SETTING.getKey(), "5s") // <-- for hitting simulated network failures quickly
.put(TransportSettings.CONNECT_TIMEOUT.getKey(), "10s") // Network delay disruption waits for the min between this
// value and the time of disruption and does not recover immediately
// when disruption is stop. We should make sure we recover faster
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -107,9 +107,8 @@ static ConflictMode randomMode() {
@TestIssueLogging(value = "_root:DEBUG,org.elasticsearch.action.bulk:TRACE,org.elasticsearch.action.get:TRACE," +
"org.elasticsearch.discovery:TRACE,org.elasticsearch.action.support.replication:TRACE," +
"org.elasticsearch.cluster.service:TRACE,org.elasticsearch.indices.recovery:TRACE," +
"org.elasticsearch.indices.cluster:TRACE,org.elasticsearch.index.shard:TRACE," +
"org.elasticsearch.gateway.PersistedClusterStateService:TRACE",
issueUrl = "https://github.com/elastic/elasticsearch/issues/41068,https://github.com/elastic/elasticsearch/issues/51329")
"org.elasticsearch.indices.cluster:TRACE,org.elasticsearch.index.shard:TRACE",
issueUrl = "https://github.com/elastic/elasticsearch/issues/41068")
public void testAckedIndexing() throws Exception {

final int seconds = !(TEST_NIGHTLY && rarely()) ? 1 : 5;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@
import org.elasticsearch.index.engine.VersionConflictEngineException;
import org.elasticsearch.test.ESIntegTestCase;
import org.elasticsearch.test.disruption.ServiceDisruptionScheme;
import org.elasticsearch.test.junit.annotations.TestIssueLogging;
import org.elasticsearch.threadpool.Scheduler;
import org.elasticsearch.threadpool.ThreadPool;

Expand Down Expand Up @@ -132,8 +131,6 @@ public class ConcurrentSeqNoVersioningIT extends AbstractDisruptionTestCase {
// multiple threads doing CAS updates.
// Wait up to 1 minute (+10s in thread to ensure it does not time out) for threads to complete previous round before initiating next
// round.
@TestIssueLogging(value = "org.elasticsearch.gateway.PersistedClusterStateService:TRACE",
issueUrl = "https://github.com/elastic/elasticsearch/issues/51329")
public void testSeqNoCASLinearizability() {
final int disruptTimeSeconds = scaledRandomIntBetween(1, 8);

Expand Down

0 comments on commit 918dfaf

Please sign in to comment.