diff --git a/docs/changelog/88013.yaml b/docs/changelog/88013.yaml new file mode 100644 index 0000000000000..3a4533728db70 --- /dev/null +++ b/docs/changelog/88013.yaml @@ -0,0 +1,6 @@ +pr: 88013 +summary: Periodic warning for 1-node cluster w/ seed hosts +area: Cluster Coordination +type: enhancement +issues: + - 85222 diff --git a/docs/reference/modules/discovery/discovery-settings.asciidoc b/docs/reference/modules/discovery/discovery-settings.asciidoc index e875fe61ee42d..c3410900896e6 100644 --- a/docs/reference/modules/discovery/discovery-settings.asciidoc +++ b/docs/reference/modules/discovery/discovery-settings.asciidoc @@ -201,6 +201,11 @@ Sets how long the master node waits for each cluster state update to be completely published to all nodes, unless `discovery.type` is set to `single-node`. The default value is `30s`. See <>. +`cluster.discovery_configuration_check.interval `:: +(<>) +Sets the interval of some checks that will log warnings about an +incorrect discovery configuration. The default value is `30s`. + `cluster.join_validation.cache_timeout`:: (<>) When a node requests to join the cluster, the elected master node sends it a diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/Coordinator.java b/server/src/main/java/org/elasticsearch/cluster/coordination/Coordinator.java index 1ab8227a14eb9..9cd1416280937 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/Coordinator.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/Coordinator.java @@ -92,6 +92,7 @@ import static org.elasticsearch.cluster.coordination.NoMasterBlockService.NO_MASTER_BLOCK_ID; import static org.elasticsearch.core.Strings.format; +import static org.elasticsearch.discovery.SettingsBasedSeedHostsProvider.DISCOVERY_SEED_HOSTS_SETTING; import static org.elasticsearch.gateway.ClusterStateUpdaters.hideStateIfNotRecovered; import static org.elasticsearch.gateway.GatewayService.STATE_NOT_RECOVERED_BLOCK; import static org.elasticsearch.monitor.StatusInfo.Status.UNHEALTHY; @@ -116,6 +117,13 @@ public class Coordinator extends AbstractLifecycleComponent implements ClusterSt Setting.Property.NodeScope ); + public static final Setting SINGLE_NODE_CLUSTER_SEED_HOSTS_CHECK_INTERVAL_SETTING = Setting.timeSetting( + "cluster.discovery_configuration_check.interval", + TimeValue.timeValueMillis(30000), + TimeValue.timeValueMillis(1), + Setting.Property.NodeScope + ); + public static final String COMMIT_STATE_ACTION_NAME = "internal:cluster/coordination/commit_state"; private final Settings settings; @@ -140,6 +148,9 @@ public class Coordinator extends AbstractLifecycleComponent implements ClusterSt private final SeedHostsResolver configuredHostsResolver; private final TimeValue publishTimeout; private final TimeValue publishInfoTimeout; + private final TimeValue singleNodeClusterSeedHostsCheckInterval; + @Nullable + private Scheduler.Cancellable singleNodeClusterChecker = null; private final PublicationTransportHandler publicationHandler; private final LeaderChecker leaderChecker; private final FollowersChecker followersChecker; @@ -218,6 +229,7 @@ public Coordinator( this.joinAccumulator = new InitialJoinAccumulator(); this.publishTimeout = PUBLISH_TIMEOUT_SETTING.get(settings); this.publishInfoTimeout = PUBLISH_INFO_TIMEOUT_SETTING.get(settings); + this.singleNodeClusterSeedHostsCheckInterval = SINGLE_NODE_CLUSTER_SEED_HOSTS_CHECK_INTERVAL_SETTING.get(settings); this.random = random; this.electionSchedulerFactory = new ElectionSchedulerFactory(settings, random, transportService.getThreadPool()); this.preVoteCollector = new PreVoteCollector( @@ -739,6 +751,38 @@ private void processJoinRequest(JoinRequest joinRequest, ActionListener jo } } + private void cancelSingleNodeClusterChecker() { + assert Thread.holdsLock(mutex) : "Coordinator mutex not held"; + if (singleNodeClusterChecker != null) { + singleNodeClusterChecker.cancel(); + singleNodeClusterChecker = null; + } + } + + private void checkSingleNodeCluster() { + if (applierState.nodes().size() > 1) { + return; + } + + if (DISCOVERY_SEED_HOSTS_SETTING.exists(settings)) { + if (DISCOVERY_SEED_HOSTS_SETTING.get(settings).isEmpty()) { + // For a single-node cluster, the only acceptable setting is an empty list. + return; + } else { + logger.warn( + """ + This node is a fully-formed single-node cluster with cluster UUID [{}], but it is configured as if to \ + discover other nodes and form a multi-node cluster via the [{}] setting. Fully-formed clusters do not \ + attempt to discover other nodes, and nodes with different cluster UUIDs cannot belong to the same cluster. \ + The cluster UUID persists across restarts and can only be changed by deleting the contents of the node's \ + data path(s). Remove the discovery configuration to suppress this message.""", + applierState.metadata().clusterUUID(), + DISCOVERY_SEED_HOSTS_SETTING.getKey() + "=" + DISCOVERY_SEED_HOSTS_SETTING.get(settings) + ); + } + } + } + void becomeCandidate(String method) { assert Thread.holdsLock(mutex) : "Coordinator mutex not held"; logger.debug( @@ -748,6 +792,7 @@ void becomeCandidate(String method) { mode, lastKnownLeader ); + cancelSingleNodeClusterChecker(); if (mode != Mode.CANDIDATE) { final Mode prevMode = mode; @@ -803,6 +848,13 @@ private void becomeLeader() { assert leaderChecker.leader() == null : leaderChecker.leader(); followersChecker.updateFastResponseState(getCurrentTerm(), mode); + + if (applierState.nodes().size() > 1) { + cancelSingleNodeClusterChecker(); + } else if (singleNodeClusterChecker == null) { + singleNodeClusterChecker = transportService.getThreadPool() + .scheduleWithFixedDelay(() -> { checkSingleNodeCluster(); }, this.singleNodeClusterSeedHostsCheckInterval, Names.SAME); + } } void becomeFollower(String method, DiscoveryNode leaderNode) { @@ -822,6 +874,7 @@ void becomeFollower(String method, DiscoveryNode leaderNode) { lastKnownLeader ); } + cancelSingleNodeClusterChecker(); final boolean restartLeaderChecker = (mode == Mode.FOLLOWER && Optional.of(leaderNode).equals(lastKnownLeader)) == false; @@ -1028,6 +1081,10 @@ assert getLocalNode().equals(applierState.nodes().getMasterNode()) : coordinationState.get().getLastAcceptedConfiguration() + " != " + coordinationState.get().getLastCommittedConfiguration(); + + if (coordinationState.get().getLastAcceptedState().nodes().size() == 1) { + assert singleNodeClusterChecker != null; + } } else if (mode == Mode.FOLLOWER) { assert coordinationState.get().electionWon() == false : getLocalNode() + " is FOLLOWER so electionWon() should be false"; assert lastKnownLeader.isPresent() && (lastKnownLeader.get().equals(getLocalNode()) == false); @@ -1045,6 +1102,7 @@ assert getLocalNode().equals(applierState.nodes().getMasterNode()) assert currentPublication.map(Publication::isCommitted).orElse(true); assert preVoteCollector.getLeader().equals(lastKnownLeader.get()) : preVoteCollector; assert clusterFormationFailureHelper.isRunning() == false; + assert singleNodeClusterChecker == null; } else { assert mode == Mode.CANDIDATE; assert joinAccumulator instanceof JoinHelper.CandidateJoinAccumulator; diff --git a/server/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java b/server/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java index b225f6fb4ca87..89d16c01df42e 100644 --- a/server/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java +++ b/server/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java @@ -492,6 +492,7 @@ public void apply(Settings value, Settings current, Settings previous) { ElectionSchedulerFactory.ELECTION_DURATION_SETTING, Coordinator.PUBLISH_TIMEOUT_SETTING, Coordinator.PUBLISH_INFO_TIMEOUT_SETTING, + Coordinator.SINGLE_NODE_CLUSTER_SEED_HOSTS_CHECK_INTERVAL_SETTING, JoinValidationService.JOIN_VALIDATION_CACHE_TIMEOUT_SETTING, FollowersChecker.FOLLOWER_CHECK_TIMEOUT_SETTING, FollowersChecker.FOLLOWER_CHECK_INTERVAL_SETTING, diff --git a/server/src/test/java/org/elasticsearch/cluster/coordination/CoordinatorTests.java b/server/src/test/java/org/elasticsearch/cluster/coordination/CoordinatorTests.java index c481fabd83de8..275076dc5d870 100644 --- a/server/src/test/java/org/elasticsearch/cluster/coordination/CoordinatorTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/coordination/CoordinatorTests.java @@ -77,6 +77,7 @@ import static org.elasticsearch.cluster.coordination.NoMasterBlockService.NO_MASTER_BLOCK_WRITES; import static org.elasticsearch.cluster.coordination.Reconfigurator.CLUSTER_AUTO_SHRINK_VOTING_CONFIGURATION; import static org.elasticsearch.discovery.PeerFinder.DISCOVERY_FIND_PEERS_INTERVAL_SETTING; +import static org.elasticsearch.discovery.SettingsBasedSeedHostsProvider.DISCOVERY_SEED_HOSTS_SETTING; import static org.elasticsearch.monitor.StatusInfo.Status.HEALTHY; import static org.elasticsearch.monitor.StatusInfo.Status.UNHEALTHY; import static org.elasticsearch.test.NodeRoles.nonMasterNode; @@ -2109,6 +2110,61 @@ public void assertMatched() { } } + @TestLogging( + reason = "testing warning of a single-node cluster having discovery seed hosts", + value = "org.elasticsearch.cluster.coordination.Coordinator:WARN" + ) + public void testLogsWarningPeriodicallyIfSingleNodeClusterHasSeedHosts() throws IllegalAccessException { + final long warningDelayMillis; + final Settings settings; + final String fakeSeedHost = buildNewFakeTransportAddress().toString(); + if (randomBoolean()) { + settings = Settings.builder().putList(DISCOVERY_SEED_HOSTS_SETTING.getKey(), fakeSeedHost).build(); + warningDelayMillis = Coordinator.SINGLE_NODE_CLUSTER_SEED_HOSTS_CHECK_INTERVAL_SETTING.get(settings).millis(); + } else { + warningDelayMillis = randomLongBetween(1, 100000); + settings = Settings.builder() + .put(ClusterFormationFailureHelper.DISCOVERY_CLUSTER_FORMATION_WARNING_TIMEOUT_SETTING.getKey(), warningDelayMillis + "ms") + .putList(DISCOVERY_SEED_HOSTS_SETTING.getKey(), fakeSeedHost) + .build(); + } + logger.info("--> emitting warnings every [{}ms]", warningDelayMillis); + + try (Cluster cluster = new Cluster(1, true, settings)) { + cluster.runRandomly(); + cluster.stabilise(); + + for (int i = scaledRandomIntBetween(1, 10); i >= 0; i--) { + final MockLogAppender mockLogAppender = new MockLogAppender(); + try { + mockLogAppender.start(); + Loggers.addAppender(LogManager.getLogger(Coordinator.class), mockLogAppender); + mockLogAppender.addExpectation(new MockLogAppender.LoggingExpectation() { + String loggedClusterUuid; + + @Override + public void match(LogEvent event) { + final String message = event.getMessage().getFormattedMessage(); + assertThat(message, startsWith("This node is a fully-formed single-node cluster with cluster UUID")); + loggedClusterUuid = (String) event.getMessage().getParameters()[0]; + } + + @Override + public void assertMatched() { + final String clusterUuid = cluster.getAnyNode().getLastAppliedClusterState().metadata().clusterUUID(); + assertThat(loggedClusterUuid + " vs " + clusterUuid, clusterUuid, equalTo(clusterUuid)); + } + }); + cluster.runFor(warningDelayMillis + DEFAULT_DELAY_VARIABILITY, "waiting for warning to be emitted"); + mockLogAppender.assertAllExpectationsMatched(); + } finally { + Loggers.removeAppender(LogManager.getLogger(Coordinator.class), mockLogAppender); + mockLogAppender.stop(); + } + } + } + } + @TestLogging( reason = "testing LagDetector and CoordinatorPublication logging", value = "org.elasticsearch.cluster.coordination.LagDetector:DEBUG,"