Skip to content

Commit

Permalink
Periodic warning for 1-node cluster w/ seed hosts (#88013)
Browse files Browse the repository at this point in the history
For fully-formed single-node clusters, emit a periodic warning if seed_hosts has been set to a non-empty list.

Closes #85222
  • Loading branch information
kingherc authored Jun 30, 2022
1 parent 4cdfe7e commit 50d2cf3
Show file tree
Hide file tree
Showing 5 changed files with 126 additions and 0 deletions.
6 changes: 6 additions & 0 deletions docs/changelog/88013.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 88013
summary: Periodic warning for 1-node cluster w/ seed hosts
area: Cluster Coordination
type: enhancement
issues:
- 85222
5 changes: 5 additions & 0 deletions docs/reference/modules/discovery/discovery-settings.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,11 @@ Sets how long the master node waits for each cluster state update to be
completely published to all nodes, unless `discovery.type` is set to
`single-node`. The default value is `30s`. See <<cluster-state-publishing>>.

`cluster.discovery_configuration_check.interval `::
(<<static-cluster-setting,Static>>)
Sets the interval of some checks that will log warnings about an
incorrect discovery configuration. The default value is `30s`.

`cluster.join_validation.cache_timeout`::
(<<static-cluster-setting,Static>>)
When a node requests to join the cluster, the elected master node sends it a
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@

import static org.elasticsearch.cluster.coordination.NoMasterBlockService.NO_MASTER_BLOCK_ID;
import static org.elasticsearch.core.Strings.format;
import static org.elasticsearch.discovery.SettingsBasedSeedHostsProvider.DISCOVERY_SEED_HOSTS_SETTING;
import static org.elasticsearch.gateway.ClusterStateUpdaters.hideStateIfNotRecovered;
import static org.elasticsearch.gateway.GatewayService.STATE_NOT_RECOVERED_BLOCK;
import static org.elasticsearch.monitor.StatusInfo.Status.UNHEALTHY;
Expand All @@ -116,6 +117,13 @@ public class Coordinator extends AbstractLifecycleComponent implements ClusterSt
Setting.Property.NodeScope
);

public static final Setting<TimeValue> SINGLE_NODE_CLUSTER_SEED_HOSTS_CHECK_INTERVAL_SETTING = Setting.timeSetting(
"cluster.discovery_configuration_check.interval",
TimeValue.timeValueMillis(30000),
TimeValue.timeValueMillis(1),
Setting.Property.NodeScope
);

public static final String COMMIT_STATE_ACTION_NAME = "internal:cluster/coordination/commit_state";

private final Settings settings;
Expand All @@ -140,6 +148,9 @@ public class Coordinator extends AbstractLifecycleComponent implements ClusterSt
private final SeedHostsResolver configuredHostsResolver;
private final TimeValue publishTimeout;
private final TimeValue publishInfoTimeout;
private final TimeValue singleNodeClusterSeedHostsCheckInterval;
@Nullable
private Scheduler.Cancellable singleNodeClusterChecker = null;
private final PublicationTransportHandler publicationHandler;
private final LeaderChecker leaderChecker;
private final FollowersChecker followersChecker;
Expand Down Expand Up @@ -218,6 +229,7 @@ public Coordinator(
this.joinAccumulator = new InitialJoinAccumulator();
this.publishTimeout = PUBLISH_TIMEOUT_SETTING.get(settings);
this.publishInfoTimeout = PUBLISH_INFO_TIMEOUT_SETTING.get(settings);
this.singleNodeClusterSeedHostsCheckInterval = SINGLE_NODE_CLUSTER_SEED_HOSTS_CHECK_INTERVAL_SETTING.get(settings);
this.random = random;
this.electionSchedulerFactory = new ElectionSchedulerFactory(settings, random, transportService.getThreadPool());
this.preVoteCollector = new PreVoteCollector(
Expand Down Expand Up @@ -739,6 +751,38 @@ private void processJoinRequest(JoinRequest joinRequest, ActionListener<Void> jo
}
}

private void cancelSingleNodeClusterChecker() {
assert Thread.holdsLock(mutex) : "Coordinator mutex not held";
if (singleNodeClusterChecker != null) {
singleNodeClusterChecker.cancel();
singleNodeClusterChecker = null;
}
}

private void checkSingleNodeCluster() {
if (applierState.nodes().size() > 1) {
return;
}

if (DISCOVERY_SEED_HOSTS_SETTING.exists(settings)) {
if (DISCOVERY_SEED_HOSTS_SETTING.get(settings).isEmpty()) {
// For a single-node cluster, the only acceptable setting is an empty list.
return;
} else {
logger.warn(
"""
This node is a fully-formed single-node cluster with cluster UUID [{}], but it is configured as if to \
discover other nodes and form a multi-node cluster via the [{}] setting. Fully-formed clusters do not \
attempt to discover other nodes, and nodes with different cluster UUIDs cannot belong to the same cluster. \
The cluster UUID persists across restarts and can only be changed by deleting the contents of the node's \
data path(s). Remove the discovery configuration to suppress this message.""",
applierState.metadata().clusterUUID(),
DISCOVERY_SEED_HOSTS_SETTING.getKey() + "=" + DISCOVERY_SEED_HOSTS_SETTING.get(settings)
);
}
}
}

void becomeCandidate(String method) {
assert Thread.holdsLock(mutex) : "Coordinator mutex not held";
logger.debug(
Expand All @@ -748,6 +792,7 @@ void becomeCandidate(String method) {
mode,
lastKnownLeader
);
cancelSingleNodeClusterChecker();

if (mode != Mode.CANDIDATE) {
final Mode prevMode = mode;
Expand Down Expand Up @@ -803,6 +848,13 @@ private void becomeLeader() {

assert leaderChecker.leader() == null : leaderChecker.leader();
followersChecker.updateFastResponseState(getCurrentTerm(), mode);

if (applierState.nodes().size() > 1) {
cancelSingleNodeClusterChecker();
} else if (singleNodeClusterChecker == null) {
singleNodeClusterChecker = transportService.getThreadPool()
.scheduleWithFixedDelay(() -> { checkSingleNodeCluster(); }, this.singleNodeClusterSeedHostsCheckInterval, Names.SAME);
}
}

void becomeFollower(String method, DiscoveryNode leaderNode) {
Expand All @@ -822,6 +874,7 @@ void becomeFollower(String method, DiscoveryNode leaderNode) {
lastKnownLeader
);
}
cancelSingleNodeClusterChecker();

final boolean restartLeaderChecker = (mode == Mode.FOLLOWER && Optional.of(leaderNode).equals(lastKnownLeader)) == false;

Expand Down Expand Up @@ -1028,6 +1081,10 @@ assert getLocalNode().equals(applierState.nodes().getMasterNode())
: coordinationState.get().getLastAcceptedConfiguration()
+ " != "
+ coordinationState.get().getLastCommittedConfiguration();

if (coordinationState.get().getLastAcceptedState().nodes().size() == 1) {
assert singleNodeClusterChecker != null;
}
} else if (mode == Mode.FOLLOWER) {
assert coordinationState.get().electionWon() == false : getLocalNode() + " is FOLLOWER so electionWon() should be false";
assert lastKnownLeader.isPresent() && (lastKnownLeader.get().equals(getLocalNode()) == false);
Expand All @@ -1045,6 +1102,7 @@ assert getLocalNode().equals(applierState.nodes().getMasterNode())
assert currentPublication.map(Publication::isCommitted).orElse(true);
assert preVoteCollector.getLeader().equals(lastKnownLeader.get()) : preVoteCollector;
assert clusterFormationFailureHelper.isRunning() == false;
assert singleNodeClusterChecker == null;
} else {
assert mode == Mode.CANDIDATE;
assert joinAccumulator instanceof JoinHelper.CandidateJoinAccumulator;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -492,6 +492,7 @@ public void apply(Settings value, Settings current, Settings previous) {
ElectionSchedulerFactory.ELECTION_DURATION_SETTING,
Coordinator.PUBLISH_TIMEOUT_SETTING,
Coordinator.PUBLISH_INFO_TIMEOUT_SETTING,
Coordinator.SINGLE_NODE_CLUSTER_SEED_HOSTS_CHECK_INTERVAL_SETTING,
JoinValidationService.JOIN_VALIDATION_CACHE_TIMEOUT_SETTING,
FollowersChecker.FOLLOWER_CHECK_TIMEOUT_SETTING,
FollowersChecker.FOLLOWER_CHECK_INTERVAL_SETTING,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@
import static org.elasticsearch.cluster.coordination.NoMasterBlockService.NO_MASTER_BLOCK_WRITES;
import static org.elasticsearch.cluster.coordination.Reconfigurator.CLUSTER_AUTO_SHRINK_VOTING_CONFIGURATION;
import static org.elasticsearch.discovery.PeerFinder.DISCOVERY_FIND_PEERS_INTERVAL_SETTING;
import static org.elasticsearch.discovery.SettingsBasedSeedHostsProvider.DISCOVERY_SEED_HOSTS_SETTING;
import static org.elasticsearch.monitor.StatusInfo.Status.HEALTHY;
import static org.elasticsearch.monitor.StatusInfo.Status.UNHEALTHY;
import static org.elasticsearch.test.NodeRoles.nonMasterNode;
Expand Down Expand Up @@ -2109,6 +2110,61 @@ public void assertMatched() {
}
}

@TestLogging(
reason = "testing warning of a single-node cluster having discovery seed hosts",
value = "org.elasticsearch.cluster.coordination.Coordinator:WARN"
)
public void testLogsWarningPeriodicallyIfSingleNodeClusterHasSeedHosts() throws IllegalAccessException {
final long warningDelayMillis;
final Settings settings;
final String fakeSeedHost = buildNewFakeTransportAddress().toString();
if (randomBoolean()) {
settings = Settings.builder().putList(DISCOVERY_SEED_HOSTS_SETTING.getKey(), fakeSeedHost).build();
warningDelayMillis = Coordinator.SINGLE_NODE_CLUSTER_SEED_HOSTS_CHECK_INTERVAL_SETTING.get(settings).millis();
} else {
warningDelayMillis = randomLongBetween(1, 100000);
settings = Settings.builder()
.put(ClusterFormationFailureHelper.DISCOVERY_CLUSTER_FORMATION_WARNING_TIMEOUT_SETTING.getKey(), warningDelayMillis + "ms")
.putList(DISCOVERY_SEED_HOSTS_SETTING.getKey(), fakeSeedHost)
.build();
}
logger.info("--> emitting warnings every [{}ms]", warningDelayMillis);

try (Cluster cluster = new Cluster(1, true, settings)) {
cluster.runRandomly();
cluster.stabilise();

for (int i = scaledRandomIntBetween(1, 10); i >= 0; i--) {
final MockLogAppender mockLogAppender = new MockLogAppender();
try {
mockLogAppender.start();
Loggers.addAppender(LogManager.getLogger(Coordinator.class), mockLogAppender);
mockLogAppender.addExpectation(new MockLogAppender.LoggingExpectation() {
String loggedClusterUuid;

@Override
public void match(LogEvent event) {
final String message = event.getMessage().getFormattedMessage();
assertThat(message, startsWith("This node is a fully-formed single-node cluster with cluster UUID"));
loggedClusterUuid = (String) event.getMessage().getParameters()[0];
}

@Override
public void assertMatched() {
final String clusterUuid = cluster.getAnyNode().getLastAppliedClusterState().metadata().clusterUUID();
assertThat(loggedClusterUuid + " vs " + clusterUuid, clusterUuid, equalTo(clusterUuid));
}
});
cluster.runFor(warningDelayMillis + DEFAULT_DELAY_VARIABILITY, "waiting for warning to be emitted");
mockLogAppender.assertAllExpectationsMatched();
} finally {
Loggers.removeAppender(LogManager.getLogger(Coordinator.class), mockLogAppender);
mockLogAppender.stop();
}
}
}
}

@TestLogging(
reason = "testing LagDetector and CoordinatorPublication logging",
value = "org.elasticsearch.cluster.coordination.LagDetector:DEBUG,"
Expand Down

0 comments on commit 50d2cf3

Please sign in to comment.