Skip to content

Commit

Permalink
Provide option to allow writes when master is down (#60605)
Browse files Browse the repository at this point in the history
Elasticsearch currently blocks writes by default when a master is unavailable. The cluster.no_master_block setting allows
a user to change this behavior to also block reads when a master is unavailable. This PR introduces a way to now also still
allow writes when a master is offline. Writes will continue to work as long as routing table changes are not needed (as
those require the master for consistency), or if dynamic mapping updates are not required (as again, these require the
master for consistency).

Eventually we should switch the default of cluster.no_master_block to this new mode.
  • Loading branch information
ywelsch authored Aug 12, 2020
1 parent b2ee474 commit 0b517dd
Show file tree
Hide file tree
Showing 7 changed files with 120 additions and 6 deletions.
14 changes: 10 additions & 4 deletions docs/reference/modules/discovery/discovery-settings.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -32,20 +32,20 @@ for `discovery.seed_hosts` is `["127.0.0.1", "[::1]"]`. See <<unicast.hosts>>.
obtains the seed node addresses from the `discovery.seed_hosts` setting.

`discovery.type`::

Specifies whether {es} should form a multiple-node cluster. By default, {es}
discovers other nodes when forming a cluster and allows other nodes to join
the cluster later. If `discovery.type` is set to `single-node`, {es} forms
a single-node cluster and suppresses the timeout set by
`cluster.publish.timeout`. For more information about when you might use
this setting, see <<single-node-discovery>>.

`cluster.initial_master_nodes`::

Sets the initial set of master-eligible nodes in a brand-new cluster. By
default this list is empty, meaning that this node expects to join a cluster
that has already been bootstrapped. See <<initial_master_nodes>>.

[discrete]
==== Expert settings

Expand Down Expand Up @@ -199,7 +199,7 @@ or may become unstable or intolerant of certain failures.

[[no-master-block]]`cluster.no_master_block`::
Specifies which operations are rejected when there is no active master in a
cluster. This setting has two valid values:
cluster. This setting has three valid values:
+
--
`all`::: All operations on the node (both read and write operations) are rejected.
Expand All @@ -211,6 +211,12 @@ based on the last known cluster configuration. This situation may result in
partial reads of stale data as this node may be isolated from the rest of the
cluster.

`metadata_write`::: Only metadata write operations (e.g. mapping updates,
routing table changes) are rejected but regular indexing operations continue
to work. Read and write operations succeed, based on the last known cluster
configuration. This situation may result in partial reads of stale data as
this node may be isolated from the rest of the cluster.

[NOTE]
===============================
* The `cluster.no_master_block` setting doesn't apply to nodes-based APIs
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,16 +20,21 @@
package org.elasticsearch.cluster;

import org.elasticsearch.action.ActionRequestBuilder;
import org.elasticsearch.action.UnavailableShardsException;
import org.elasticsearch.action.admin.cluster.configuration.AddVotingConfigExclusionsAction;
import org.elasticsearch.action.admin.cluster.configuration.AddVotingConfigExclusionsRequest;
import org.elasticsearch.action.admin.cluster.state.ClusterStateResponse;
import org.elasticsearch.action.bulk.BulkRequestBuilder;
import org.elasticsearch.action.get.GetResponse;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.action.support.AutoCreateIndex;
import org.elasticsearch.client.Client;
import org.elasticsearch.client.Requests;
import org.elasticsearch.cluster.action.index.MappingUpdatedAction;
import org.elasticsearch.cluster.block.ClusterBlockException;
import org.elasticsearch.cluster.coordination.NoMasterBlockService;
import org.elasticsearch.cluster.metadata.IndexMetadata;
import org.elasticsearch.cluster.node.DiscoveryNode;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.common.xcontent.XContentFactory;
Expand All @@ -49,6 +54,7 @@
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.stream.Collectors;

import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertExists;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount;
Expand Down Expand Up @@ -261,4 +267,83 @@ public void testNoMasterActionsWriteMasterBlock() throws Exception {

internalCluster().clearDisruptionScheme(true);
}

public void testNoMasterActionsMetadataWriteMasterBlock() throws Exception {
Settings settings = Settings.builder()
.put(NoMasterBlockService.NO_MASTER_BLOCK_SETTING.getKey(), "metadata_write")
.put(MappingUpdatedAction.INDICES_MAPPING_DYNAMIC_TIMEOUT_SETTING.getKey(), "100ms")
.build();

final List<String> nodes = internalCluster().startNodes(3, settings);

prepareCreate("test1").setSettings(
Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1).put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1)).get();
client().admin().cluster().prepareHealth("_all").setWaitForGreenStatus().get();
client().prepareIndex("test1").setId("1").setSource("field", "value1").get();
refresh();

ensureGreen("test1");

ClusterStateResponse clusterState = client().admin().cluster().prepareState().get();
logger.info("Cluster state:\n{}", clusterState.getState());

final List<String> nodesWithShards = clusterState.getState().routingTable().index("test1").shard(0).activeShards().stream()
.map(shardRouting -> shardRouting.currentNodeId()).map(nodeId -> clusterState.getState().nodes().resolveNode(nodeId))
.map(DiscoveryNode::getName).collect(Collectors.toList());

client().execute(AddVotingConfigExclusionsAction.INSTANCE,
new AddVotingConfigExclusionsRequest(nodesWithShards.toArray(new String[0]))).get();
ensureGreen("test1");

String partitionedNode = nodes.stream().filter(n -> nodesWithShards.contains(n) == false).findFirst().get();

final NetworkDisruption disruptionScheme
= new NetworkDisruption(new NetworkDisruption.TwoPartitions(Collections.singleton(partitionedNode),
new HashSet<>(nodesWithShards)), NetworkDisruption.DISCONNECT);
internalCluster().setDisruptionScheme(disruptionScheme);
disruptionScheme.startDisrupting();

assertBusy(() -> {
for (String node : nodesWithShards) {
ClusterState state = client(node).admin().cluster().prepareState().setLocal(true).get().getState();
assertTrue(state.blocks().hasGlobalBlockWithId(NoMasterBlockService.NO_MASTER_BLOCK_ID));
}
});

GetResponse getResponse = client(randomFrom(nodesWithShards)).prepareGet("test1", "1").get();
assertExists(getResponse);

expectThrows(Exception.class, () -> client(partitionedNode).prepareGet("test1", "1").get());

SearchResponse countResponse = client(randomFrom(nodesWithShards)).prepareSearch("test1")
.setAllowPartialSearchResults(true).setSize(0).get();
assertHitCount(countResponse, 1L);

expectThrows(Exception.class, () -> client(partitionedNode).prepareSearch("test1")
.setAllowPartialSearchResults(true).setSize(0).get());

TimeValue timeout = TimeValue.timeValueMillis(200);
client(randomFrom(nodesWithShards)).prepareUpdate("test1", "1")
.setDoc(Requests.INDEX_CONTENT_TYPE, "field", "value2").setTimeout(timeout).get();

expectThrows(UnavailableShardsException.class, () -> client(partitionedNode).prepareUpdate("test1", "1")
.setDoc(Requests.INDEX_CONTENT_TYPE, "field", "value2").setTimeout(timeout).get());

client(randomFrom(nodesWithShards)).prepareIndex("test1").setId("1")
.setSource(XContentFactory.jsonBuilder().startObject().endObject()).setTimeout(timeout).get();

// dynamic mapping updates fail
expectThrows(MasterNotDiscoveredException.class, () -> client(randomFrom(nodesWithShards)).prepareIndex("test1").setId("1")
.setSource(XContentFactory.jsonBuilder().startObject().field("new_field", "value").endObject())
.setTimeout(timeout).get());

// dynamic index creation fails
expectThrows(MasterNotDiscoveredException.class, () -> client(randomFrom(nodesWithShards)).prepareIndex("test2").setId("1")
.setSource(XContentFactory.jsonBuilder().startObject().endObject()).setTimeout(timeout).get());

expectThrows(UnavailableShardsException.class, () -> client(partitionedNode).prepareIndex("test1").setId("1")
.setSource(XContentFactory.jsonBuilder().startObject().endObject()).setTimeout(timeout).get());

internalCluster().clearDisruptionScheme(true);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,8 @@ public void testIsolateMasterAndVerifyClusterStateConsensus() throws Exception {
* Verify that the proper block is applied when nodes lose their master
*/
public void testVerifyApiBlocksDuringPartition() throws Exception {
internalCluster().startNodes(3);
internalCluster().startNodes(3, Settings.builder()
.putNull(NoMasterBlockService.NO_MASTER_BLOCK_SETTING.getKey()).build());

// Makes sure that the get request can be executed on each node locally:
assertAcked(prepareCreate("test").setSettings(Settings.builder()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ public class NoMasterBlockService {
RestStatus.SERVICE_UNAVAILABLE, EnumSet.of(ClusterBlockLevel.WRITE, ClusterBlockLevel.METADATA_WRITE));
public static final ClusterBlock NO_MASTER_BLOCK_ALL = new ClusterBlock(NO_MASTER_BLOCK_ID, "no master", true, true, false,
RestStatus.SERVICE_UNAVAILABLE, ClusterBlockLevel.ALL);
public static final ClusterBlock NO_MASTER_BLOCK_METADATA_WRITES = new ClusterBlock(NO_MASTER_BLOCK_ID, "no master", true, false, false,
RestStatus.SERVICE_UNAVAILABLE, EnumSet.of(ClusterBlockLevel.METADATA_WRITE));

public static final Setting<ClusterBlock> NO_MASTER_BLOCK_SETTING =
new Setting<>("cluster.no_master_block", "write", NoMasterBlockService::parseNoMasterBlock,
Expand All @@ -52,8 +54,10 @@ private static ClusterBlock parseNoMasterBlock(String value) {
return NO_MASTER_BLOCK_ALL;
case "write":
return NO_MASTER_BLOCK_WRITES;
case "metadata_write":
return NO_MASTER_BLOCK_METADATA_WRITES;
default:
throw new IllegalArgumentException("invalid no-master block [" + value + "], must be one of [all, write]");
throw new IllegalArgumentException("invalid no-master block [" + value + "], must be one of [all, write, metadata_write]");
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@
import static org.elasticsearch.cluster.coordination.LeaderChecker.LEADER_CHECK_RETRY_COUNT_SETTING;
import static org.elasticsearch.cluster.coordination.LeaderChecker.LEADER_CHECK_TIMEOUT_SETTING;
import static org.elasticsearch.cluster.coordination.NoMasterBlockService.NO_MASTER_BLOCK_ALL;
import static org.elasticsearch.cluster.coordination.NoMasterBlockService.NO_MASTER_BLOCK_METADATA_WRITES;
import static org.elasticsearch.cluster.coordination.NoMasterBlockService.NO_MASTER_BLOCK_SETTING;
import static org.elasticsearch.cluster.coordination.NoMasterBlockService.NO_MASTER_BLOCK_WRITES;
import static org.elasticsearch.cluster.coordination.Reconfigurator.CLUSTER_AUTO_SHRINK_VOTING_CONFIGURATION;
Expand Down Expand Up @@ -1045,6 +1046,10 @@ public void testAppliesNoMasterBlockAllIfConfigured() {
testAppliesNoMasterBlock("all", NO_MASTER_BLOCK_ALL);
}

public void testAppliesNoMasterBlockMetadataWritesIfConfigured() {
testAppliesNoMasterBlock("metadata_write", NO_MASTER_BLOCK_METADATA_WRITES);
}

private void testAppliesNoMasterBlock(String noMasterBlockSetting, ClusterBlock expectedBlock) {
try (Cluster cluster = new Cluster(3)) {
cluster.runRandomly();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import org.elasticsearch.test.ESTestCase;

import static org.elasticsearch.cluster.coordination.NoMasterBlockService.NO_MASTER_BLOCK_ALL;
import static org.elasticsearch.cluster.coordination.NoMasterBlockService.NO_MASTER_BLOCK_METADATA_WRITES;
import static org.elasticsearch.cluster.coordination.NoMasterBlockService.NO_MASTER_BLOCK_SETTING;
import static org.elasticsearch.cluster.coordination.NoMasterBlockService.NO_MASTER_BLOCK_WRITES;
import static org.elasticsearch.common.settings.ClusterSettings.BUILT_IN_CLUSTER_SETTINGS;
Expand Down Expand Up @@ -53,6 +54,11 @@ public void testBlocksAllIfConfiguredBySetting() {
assertThat(noMasterBlockService.getNoMasterBlock(), sameInstance(NO_MASTER_BLOCK_ALL));
}

public void testBlocksMetadataWritesIfConfiguredBySetting() {
createService(Settings.builder().put(NO_MASTER_BLOCK_SETTING.getKey(), "metadata_write").build());
assertThat(noMasterBlockService.getNoMasterBlock(), sameInstance(NO_MASTER_BLOCK_METADATA_WRITES));
}

public void testRejectsInvalidSetting() {
expectThrows(IllegalArgumentException.class, () ->
createService(Settings.builder().put(NO_MASTER_BLOCK_SETTING.getKey(), "unknown").build()));
Expand All @@ -64,5 +70,8 @@ public void testSettingCanBeUpdated() {

clusterSettings.applySettings(Settings.builder().put(NO_MASTER_BLOCK_SETTING.getKey(), "write").build());
assertThat(noMasterBlockService.getNoMasterBlock(), sameInstance(NO_MASTER_BLOCK_WRITES));

clusterSettings.applySettings(Settings.builder().put(NO_MASTER_BLOCK_SETTING.getKey(), "metadata_write").build());
assertThat(noMasterBlockService.getNoMasterBlock(), sameInstance(NO_MASTER_BLOCK_METADATA_WRITES));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
import org.elasticsearch.action.admin.cluster.node.stats.NodeStats;
import org.elasticsearch.action.admin.indices.stats.CommonStatsFlags;
import org.elasticsearch.action.admin.indices.stats.CommonStatsFlags.Flag;
import org.elasticsearch.cluster.coordination.NoMasterBlockService;
import org.elasticsearch.index.IndexingPressure;
import org.elasticsearch.action.support.replication.TransportReplicationAction;
import org.elasticsearch.client.Client;
Expand Down Expand Up @@ -393,6 +394,9 @@ public InternalTestCluster(
RandomNumbers.randomIntBetween(random, 1, 5));
builder.put(RecoverySettings.INDICES_RECOVERY_MAX_CONCURRENT_OPERATIONS_SETTING.getKey(),
RandomNumbers.randomIntBetween(random, 1, 4));
// TODO: currently we only randomize "cluster.no_master_block" between "write" and "metadata_write", as "all" is fragile
// and fails shards when a master abdicates, which breaks many tests.
builder.put(NoMasterBlockService.NO_MASTER_BLOCK_SETTING.getKey(), randomFrom(random,"write", "metadata_write"));
defaultSettings = builder.build();
executor = EsExecutors.newScaling("internal_test_cluster_executor", 0, Integer.MAX_VALUE, 0, TimeUnit.SECONDS,
EsExecutors.daemonThreadFactory("test_" + clusterName), new ThreadContext(Settings.EMPTY));
Expand Down

0 comments on commit 0b517dd

Please sign in to comment.