From 1f35be19e6eb620ac1d2fb3316211298b3182426 Mon Sep 17 00:00:00 2001 From: Henning Andersen Date: Mon, 19 Apr 2021 09:30:48 +0200 Subject: [PATCH 1/7] Frozen default cache size This commit adds a default cache size to frozen tier of the greater of 90% and total disk size minus 100 GB. Additionally, configuring a frozen cache is now warned against on nodes with multiple data paths. --- .../searchable-snapshots/index.asciidoc | 35 ++++-- .../common/unit/RelativeByteSizeValue.java | 101 +++++++++++++++++ .../unit/RelativeByteSizeValueTests.java | 68 +++++++++++ .../BaseSearchableSnapshotsIntegTestCase.java | 8 +- .../SearchableSnapshots.java | 1 + .../cache/FrozenCacheInfoNodeAction.java | 2 +- .../cache/shared/FrozenCacheService.java | 107 ++++++++++++++++-- .../AbstractSearchableSnapshotsTestCase.java | 17 ++- .../cache/shared/FrozenCacheServiceTests.java | 98 +++++++++++++++- 9 files changed, 411 insertions(+), 26 deletions(-) create mode 100644 server/src/main/java/org/elasticsearch/common/unit/RelativeByteSizeValue.java create mode 100644 server/src/test/java/org/elasticsearch/common/unit/RelativeByteSizeValueTests.java diff --git a/docs/reference/searchable-snapshots/index.asciidoc b/docs/reference/searchable-snapshots/index.asciidoc index ac4438964d571..ba5028a6bde6d 100644 --- a/docs/reference/searchable-snapshots/index.asciidoc +++ b/docs/reference/searchable-snapshots/index.asciidoc @@ -155,27 +155,46 @@ for search. Many searches will need to retrieve only a small subset of the total shard data before returning results. To mount a searchable snapshot index with the shared cache mount option, you -must configure the `xpack.searchable.snapshot.shared_cache.size` setting to -reserve space for the cache on one or more nodes. Indices mounted with the -shared cache mount option are only allocated to nodes that have this setting -configured. +must have one or more nodes with a shared cache available. By default, +dedicated frozen data tier nodes (nodes with the `data_frozen` role and no other +data roles) have a shared cache configured using the greater of 90% of total +disk space and total disk space subtracted a headroom of 100GB. + +Using a dedicated frozen tier is highly recommended for production use. If you +do not have a dedicated frozen tier, you must configure the +`xpack.searchable.snapshot.shared_cache.size` setting to reserve space for the +cache on one or more nodes. Indices mounted with the shared cache mount option +are only allocated to nodes that have a shared cache. [[searchable-snapshots-shared-cache]] `xpack.searchable.snapshot.shared_cache.size`:: +(<>) +The size of the space reserved for the shared cache, either specified as a +percentage of total disk space or an absolute <>. +Defaults to 90% of total disk space on dedicated frozen data tier nodes, +otherwise `0b`. + +`xpack.searchable.snapshot.shared_cache.size.max_headroom`:: (<>, <>) -The size of the space reserved for the shared cache. Defaults to `0b`, meaning -that the node has no shared cache. +For dedicated frozen tier nodes, the max headroom to maintain. Defaults to 100GB +on dedicated frozen tier nodes when +`xpack.searchable.snapshot.shared_cache.size` is not explicitly set, otherwise +-1 (not set). Can only be set when `xpack.searchable.snapshot.shared_cache.size` +is set as a percentage. -You can configure the setting in `elasticsearch.yml`: +You can configure the settings in `elasticsearch.yml`: [source,yaml] ---- xpack.searchable.snapshot.shared_cache.size: 4TB ---- -IMPORTANT: You can only configure this setting on nodes with the +IMPORTANT: You can only configure these settings on nodes with the <> role. +IMPORTANT: You can only configure these settings on nodes that use a single +<> + You can set `xpack.searchable.snapshot.shared_cache.size` to any size between a couple of gigabytes up to 90% of available disk space. We only recommend larger sizes if you use the node exclusively on a frozen tier or for searchable diff --git a/server/src/main/java/org/elasticsearch/common/unit/RelativeByteSizeValue.java b/server/src/main/java/org/elasticsearch/common/unit/RelativeByteSizeValue.java new file mode 100644 index 0000000000000..9e263864d3423 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/common/unit/RelativeByteSizeValue.java @@ -0,0 +1,101 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.common.unit; + +import org.elasticsearch.ElasticsearchParseException; + +/** + * A byte size value that allows specification using either of: + * 1. Absolute value (200GB for instance) + * 2. Relative percentage value (95%) + * 3. Relative ratio value (0.95) + */ +public class RelativeByteSizeValue { + + public static final String MAX_HEADROOM_PREFIX = "max_headroom="; + private final ByteSizeValue absolute; + private final RatioValue ratio; + + public RelativeByteSizeValue(ByteSizeValue absolute) { + this.absolute = absolute; + this.ratio = null; + } + + public RelativeByteSizeValue(RatioValue ratio) { + this.absolute = null; + this.ratio = ratio; + } + + public boolean isAbsolute() { + return absolute != null; + } + + public ByteSizeValue getAbsolute() { + return absolute; + } + + RatioValue getRatio() { + return ratio; + } + + /** + * Calculate the size to use, optionally catering for a max headroom. + * @param total the total size to use + * @param maxHeadroom the max headroom to cater for or null (or -1) to ignore. + * @return the size to use + */ + public ByteSizeValue calculateValue(ByteSizeValue total, ByteSizeValue maxHeadroom) { + if (ratio != null) { + long ratioBytes = (long) Math.ceil(ratio.getAsRatio() * total.getBytes()); + if (maxHeadroom != null && maxHeadroom.getBytes() != -1) { + return ByteSizeValue.ofBytes(Math.max(ratioBytes, total.getBytes() - maxHeadroom.getBytes())); + } else { + return ByteSizeValue.ofBytes(ratioBytes); + } + } else { + return absolute; + } + } + + public boolean isNonZeroSize() { + if (ratio != null) { + return ratio.getAsRatio() > 0.0d; + } else { + return absolute.getBytes() > 0; + } + } + + public static RelativeByteSizeValue parseRelativeByteSizeValue(String value, String settingName) { + try { + RatioValue ratio = RatioValue.parseRatioValue(value); + if (ratio.getAsPercent() != 0.0d || value.endsWith("%")) { + return new RelativeByteSizeValue(ratio); + } else { + return new RelativeByteSizeValue(ByteSizeValue.ZERO); + } + } catch (ElasticsearchParseException e) { + // ignore, see if it parses as bytes + } + try { + return new RelativeByteSizeValue(ByteSizeValue.parseBytesSizeValue(value, settingName)); + // todo: fix NumberFormatException case in ByteSizeValue. + } catch (NumberFormatException | ElasticsearchParseException e) { + throw new ElasticsearchParseException("unable to parse [{}={}] as either percentage or bytes", e, + settingName, value); + } + } + + public String getStringRep() { + if (ratio != null) { + return ratio.toString(); + } else { + return absolute.getStringRep(); + } + } +} diff --git a/server/src/test/java/org/elasticsearch/common/unit/RelativeByteSizeValueTests.java b/server/src/test/java/org/elasticsearch/common/unit/RelativeByteSizeValueTests.java new file mode 100644 index 0000000000000..35bc8a05a9258 --- /dev/null +++ b/server/src/test/java/org/elasticsearch/common/unit/RelativeByteSizeValueTests.java @@ -0,0 +1,68 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.common.unit; + +import org.elasticsearch.ElasticsearchParseException; +import org.elasticsearch.test.ESTestCase; + +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.is; + +public class RelativeByteSizeValueTests extends ESTestCase { + + public void testPercentage() { + double value = randomIntBetween(0, 100); + RelativeByteSizeValue parsed = RelativeByteSizeValue.parseRelativeByteSizeValue(value + "%", "test"); + assertThat(parsed.getRatio().getAsPercent(), equalTo(value)); + assertThat(parsed.isAbsolute(), is(false)); + assertThat(parsed.isNonZeroSize(), is(value != 0.0d)); + } + + public void testRatio() { + double value = (double) randomIntBetween(1, 100) / 100; + RelativeByteSizeValue parsed = RelativeByteSizeValue.parseRelativeByteSizeValue(Double.toString(value), "test"); + assertThat(parsed.getRatio().getAsRatio(), + equalTo(value)); + assertThat(parsed.isAbsolute(), is(false)); + assertThat(parsed.isNonZeroSize(), is(true)); + } + + public void testAbsolute() { + ByteSizeValue value = new ByteSizeValue(between(0, 100), randomFrom(ByteSizeUnit.values())); + RelativeByteSizeValue parsed = RelativeByteSizeValue.parseRelativeByteSizeValue(value.getStringRep(), "test"); + assertThat(parsed.getAbsolute(), equalTo(value)); + assertThat(parsed.isAbsolute(), is(true)); + assertThat(parsed.isNonZeroSize(), is(value.getBytes() != 0)); + } + + public void testZeroAbsolute() { + RelativeByteSizeValue parsed = RelativeByteSizeValue.parseRelativeByteSizeValue("0", "test"); + assertThat(parsed.getAbsolute(), equalTo(ByteSizeValue.ZERO)); + assertThat(parsed.isAbsolute(), is(true)); + assertThat(parsed.isNonZeroSize(), is(false)); + } + + public void testFail() { + assertFail("a", "unable to parse [test=a] as either percentage or bytes"); + assertFail("%", "unable to parse [test=%] as either percentage or bytes"); + assertFail("GB", "unable to parse [test=GB] as either percentage or bytes"); + assertFail("GB%", "unable to parse [test=GB%] as either percentage or bytes"); + assertFail("100 NB", "unable to parse [test=100 NB] as either percentage or bytes"); + assertFail("100 %a", "unable to parse [test=100 %a] as either percentage or bytes"); + assertFail("100 GB a", "unable to parse [test=100 GB a] as either percentage or bytes"); + assertFail("0,1 GB", "unable to parse [test=0,1 GB] as either percentage or bytes"); + assertFail("0,1", "unable to parse [test=0,1] as either percentage or bytes"); + } + + private void assertFail(String value, String failure) { + ElasticsearchParseException exception = expectThrows(ElasticsearchParseException.class, + () -> RelativeByteSizeValue.parseRelativeByteSizeValue(value, "test")); + assertThat(exception.getMessage(), equalTo(failure)); + } +} diff --git a/x-pack/plugin/searchable-snapshots/src/internalClusterTest/java/org/elasticsearch/xpack/searchablesnapshots/BaseSearchableSnapshotsIntegTestCase.java b/x-pack/plugin/searchable-snapshots/src/internalClusterTest/java/org/elasticsearch/xpack/searchablesnapshots/BaseSearchableSnapshotsIntegTestCase.java index eb457a5394030..e5f67b2ddfcca 100644 --- a/x-pack/plugin/searchable-snapshots/src/internalClusterTest/java/org/elasticsearch/xpack/searchablesnapshots/BaseSearchableSnapshotsIntegTestCase.java +++ b/x-pack/plugin/searchable-snapshots/src/internalClusterTest/java/org/elasticsearch/xpack/searchablesnapshots/BaseSearchableSnapshotsIntegTestCase.java @@ -22,6 +22,7 @@ import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.unit.ByteSizeUnit; import org.elasticsearch.common.unit.ByteSizeValue; +import org.elasticsearch.common.unit.RatioValue; import org.elasticsearch.index.IndexSettings; import org.elasticsearch.plugins.Plugin; import org.elasticsearch.snapshots.AbstractSnapshotIntegTestCase; @@ -84,9 +85,10 @@ protected Settings nodeSettings(int nodeOrdinal, Settings otherSettings) { FrozenCacheService.SNAPSHOT_CACHE_SIZE_SETTING.getKey(), rarely() ? randomBoolean() - ? new ByteSizeValue(randomIntBetween(1, 10), ByteSizeUnit.KB) - : new ByteSizeValue(randomIntBetween(1, 1000), ByteSizeUnit.BYTES) - : new ByteSizeValue(randomIntBetween(1, 10), ByteSizeUnit.MB) + ? new ByteSizeValue(randomIntBetween(1, 10), ByteSizeUnit.KB).getStringRep() + : new ByteSizeValue(randomIntBetween(1, 1000), ByteSizeUnit.BYTES).getStringRep() + : randomBoolean() ? new ByteSizeValue(randomIntBetween(1, 10), ByteSizeUnit.MB).getStringRep() + : new RatioValue(randomDoubleBetween(0.0d, 1.0d, false)).toString() // only use up to 1% disk to be friendly. ); } builder.put( diff --git a/x-pack/plugin/searchable-snapshots/src/main/java/org/elasticsearch/xpack/searchablesnapshots/SearchableSnapshots.java b/x-pack/plugin/searchable-snapshots/src/main/java/org/elasticsearch/xpack/searchablesnapshots/SearchableSnapshots.java index 611ccb95c7474..bec7f9c9c5841 100644 --- a/x-pack/plugin/searchable-snapshots/src/main/java/org/elasticsearch/xpack/searchablesnapshots/SearchableSnapshots.java +++ b/x-pack/plugin/searchable-snapshots/src/main/java/org/elasticsearch/xpack/searchablesnapshots/SearchableSnapshots.java @@ -298,6 +298,7 @@ public List> getSettings() { CacheService.SNAPSHOT_CACHE_SYNC_SHUTDOWN_TIMEOUT, SearchableSnapshotEnableAllocationDecider.SEARCHABLE_SNAPSHOTS_ALLOCATE_ON_ROLLING_RESTART, FrozenCacheService.SNAPSHOT_CACHE_SIZE_SETTING, + FrozenCacheService.SNAPSHOT_CACHE_SIZE_MAX_HEADROOM_SETTING, FrozenCacheService.SNAPSHOT_CACHE_REGION_SIZE_SETTING, FrozenCacheService.SHARED_CACHE_RANGE_SIZE_SETTING, FrozenCacheService.FROZEN_CACHE_RECOVERY_RANGE_SIZE_SETTING, diff --git a/x-pack/plugin/searchable-snapshots/src/main/java/org/elasticsearch/xpack/searchablesnapshots/action/cache/FrozenCacheInfoNodeAction.java b/x-pack/plugin/searchable-snapshots/src/main/java/org/elasticsearch/xpack/searchablesnapshots/action/cache/FrozenCacheInfoNodeAction.java index 5a51df645f263..55b544072b044 100644 --- a/x-pack/plugin/searchable-snapshots/src/main/java/org/elasticsearch/xpack/searchablesnapshots/action/cache/FrozenCacheInfoNodeAction.java +++ b/x-pack/plugin/searchable-snapshots/src/main/java/org/elasticsearch/xpack/searchablesnapshots/action/cache/FrozenCacheInfoNodeAction.java @@ -53,7 +53,7 @@ public static class TransportAction extends HandledTransportAction 0); + response = new FrozenCacheInfoResponse(SNAPSHOT_CACHE_SIZE_SETTING.get(settings).isNonZeroSize()); } @Override diff --git a/x-pack/plugin/searchable-snapshots/src/main/java/org/elasticsearch/xpack/searchablesnapshots/cache/shared/FrozenCacheService.java b/x-pack/plugin/searchable-snapshots/src/main/java/org/elasticsearch/xpack/searchablesnapshots/cache/shared/FrozenCacheService.java index 07248c9aee163..10bdb0623416f 100644 --- a/x-pack/plugin/searchable-snapshots/src/main/java/org/elasticsearch/xpack/searchablesnapshots/cache/shared/FrozenCacheService.java +++ b/x-pack/plugin/searchable-snapshots/src/main/java/org/elasticsearch/xpack/searchablesnapshots/cache/shared/FrozenCacheService.java @@ -13,6 +13,7 @@ import org.elasticsearch.Assertions; import org.elasticsearch.action.ActionListener; import org.elasticsearch.action.StepListener; +import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.cluster.node.DiscoveryNodeRole; import org.elasticsearch.common.lease.Releasable; import org.elasticsearch.common.lease.Releasables; @@ -21,6 +22,7 @@ import org.elasticsearch.common.settings.SettingsException; import org.elasticsearch.common.unit.ByteSizeUnit; import org.elasticsearch.common.unit.ByteSizeValue; +import org.elasticsearch.common.unit.RelativeByteSizeValue; import org.elasticsearch.common.unit.TimeValue; import org.elasticsearch.common.util.concurrent.AbstractAsyncTask; import org.elasticsearch.common.util.concurrent.AbstractRefCounted; @@ -28,6 +30,8 @@ import org.elasticsearch.common.util.concurrent.KeyedLock; import org.elasticsearch.env.NodeEnvironment; import org.elasticsearch.index.shard.ShardId; +import org.elasticsearch.monitor.fs.FsInfo; +import org.elasticsearch.monitor.fs.FsProbe; import org.elasticsearch.xpack.searchablesnapshots.cache.common.CacheKey; import org.elasticsearch.xpack.searchablesnapshots.cache.common.SparseFileTracker; import org.elasticsearch.node.NodeRoleSettings; @@ -38,6 +42,8 @@ import java.io.IOException; import java.io.UncheckedIOException; import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; import java.util.Iterator; import java.util.List; import java.util.Map; @@ -89,23 +95,29 @@ private static Setting.Validator getPageSizeAlignedByteSizeValueV }; } - public static final Setting SNAPSHOT_CACHE_SIZE_SETTING = new Setting<>( - SHARED_CACHE_SETTINGS_PREFIX + "size", - ByteSizeValue.ZERO.getStringRep(), - s -> ByteSizeValue.parseBytesSizeValue(s, SHARED_CACHE_SETTINGS_PREFIX + "size"), - new Setting.Validator() { + public static final Setting SNAPSHOT_CACHE_SIZE_SETTING = new Setting<>( + new Setting.SimpleKey(SHARED_CACHE_SETTINGS_PREFIX + "size"), + (settings) -> { + if (isDedicatedFrozen(settings)) { + return "90%"; + } else { + return ByteSizeValue.ZERO.getStringRep(); + } + }, + s -> RelativeByteSizeValue.parseRelativeByteSizeValue(s, SHARED_CACHE_SETTINGS_PREFIX + "size"), + new Setting.Validator<>() { @Override - public void validate(final ByteSizeValue value) { + public void validate(final RelativeByteSizeValue value) { } @Override - public void validate(final ByteSizeValue value, final Map, Object> settings) { - if (value.getBytes() == -1) { + public void validate(final RelativeByteSizeValue value, final Map, Object> settings) { + if (value.isAbsolute() && value.getAbsolute().getBytes() == -1) { throw new SettingsException("setting [{}] must be non-negative", SHARED_CACHE_SETTINGS_PREFIX + "size"); } - if (value.getBytes() > 0) { + if (value.isNonZeroSize()) { @SuppressWarnings("unchecked") final List roles = (List) settings.get(NodeRoleSettings.NODE_ROLES_SETTING); if (DataTier.isFrozenNode(Set.of(roles.toArray(DiscoveryNodeRole[]::new))) == false) { @@ -129,6 +141,55 @@ public Iterator> settings() { Setting.Property.NodeScope ); + public static final Setting SNAPSHOT_CACHE_SIZE_MAX_HEADROOM_SETTING = new Setting<>( + new Setting.SimpleKey(SHARED_CACHE_SETTINGS_PREFIX + "size.max_headroom"), + (settings) -> { + if (SNAPSHOT_CACHE_SIZE_SETTING.exists(settings) == false && isDedicatedFrozen(settings)) { + return "100GB"; + } + + return "-1"; + }, + (s) -> ByteSizeValue.parseBytesSizeValue(s, SHARED_CACHE_SETTINGS_PREFIX + "size.max_headroom"), + new Setting.Validator<>() { + private final Collection> dependencies = List.of(SNAPSHOT_CACHE_SIZE_SETTING); + + @Override + public Iterator> settings() { + return dependencies.iterator(); + } + + @Override + public void validate(ByteSizeValue value) { + // ignore + } + + @Override + public void validate(ByteSizeValue value, Map, Object> settings, boolean isPresent) { + if (isPresent && value.getBytes() != -1) { + RelativeByteSizeValue sizeValue = (RelativeByteSizeValue) settings.get(SNAPSHOT_CACHE_SIZE_SETTING); + if (sizeValue.isAbsolute()) { + throw new SettingsException( + "setting [{}] cannot be specified for absolute [{}={}]", + SNAPSHOT_CACHE_SIZE_MAX_HEADROOM_SETTING.getKey(), + SNAPSHOT_CACHE_SIZE_SETTING.getKey(), + sizeValue.getStringRep() + ); + } + } + } + }, + Setting.Property.NodeScope + ); + + private static boolean isDedicatedFrozen(Settings settings) { + Set rolesFromSettings = DiscoveryNode.getRolesFromSettings(settings); + return rolesFromSettings.contains(DiscoveryNodeRole.DATA_FROZEN_NODE_ROLE) + && rolesFromSettings.stream() + .filter(DiscoveryNodeRole::canContainData) + .anyMatch(r -> r != DiscoveryNodeRole.DATA_FROZEN_NODE_ROLE) == false; + } + public static final Setting FROZEN_CACHE_RECOVERY_RANGE_SIZE_SETTING = Setting.byteSizeSetting( SHARED_CACHE_SETTINGS_PREFIX + "recovery_range_size", new ByteSizeValue(128, ByteSizeUnit.KB), // default @@ -185,7 +246,27 @@ public Iterator> settings() { @SuppressWarnings({ "unchecked", "rawtypes" }) public FrozenCacheService(NodeEnvironment environment, Settings settings, ThreadPool threadPool) { this.currentTimeSupplier = threadPool::relativeTimeInMillis; - final long cacheSize = SNAPSHOT_CACHE_SIZE_SETTING.get(settings).getBytes(); + RelativeByteSizeValue relativeCacheSize = SNAPSHOT_CACHE_SIZE_SETTING.get(settings); + if (relativeCacheSize.isNonZeroSize() && environment.nodeDataPaths().length != 1) { + // todo: throw instead in a follow-up. + logger.warn( + "cannot specify [" + + SNAPSHOT_CACHE_SIZE_SETTING.getKey() + + "=" + + relativeCacheSize + + "] " + + "on this node with multiple data paths [" + + Arrays.toString(environment.nodePaths()) + + "]" + ); + } + FsInfo.Path pathInfo; + try { + pathInfo = FsProbe.getFSInfo(environment.nodePaths()[0]); + } catch (IOException e) { + throw new IllegalStateException("unable to probe size of filesystem [" + environment.nodePaths()[0] + "]"); + } + final long cacheSize = calculateCacheSize(settings, pathInfo); final long regionSize = SNAPSHOT_CACHE_REGION_SIZE_SETTING.get(settings).getBytes(); final int numRegions = Math.toIntExact(cacheSize / regionSize); keyMapping = new ConcurrentHashMap<>(); @@ -216,6 +297,12 @@ public FrozenCacheService(NodeEnvironment environment, Settings settings, Thread this.recoveryRangeSize = FROZEN_CACHE_RECOVERY_RANGE_SIZE_SETTING.get(settings); } + static long calculateCacheSize(Settings settings, FsInfo.Path pathInfo) { + return SNAPSHOT_CACHE_SIZE_SETTING.get(settings) + .calculateValue(pathInfo.getTotal(), SNAPSHOT_CACHE_SIZE_MAX_HEADROOM_SETTING.get(settings)) + .getBytes(); + } + public int getRangeSize() { return toIntBytes(rangeSize.getBytes()); } diff --git a/x-pack/plugin/searchable-snapshots/src/test/java/org/elasticsearch/xpack/searchablesnapshots/AbstractSearchableSnapshotsTestCase.java b/x-pack/plugin/searchable-snapshots/src/test/java/org/elasticsearch/xpack/searchablesnapshots/AbstractSearchableSnapshotsTestCase.java index d41c5487b4ed1..43f97dcb37b67 100644 --- a/x-pack/plugin/searchable-snapshots/src/test/java/org/elasticsearch/xpack/searchablesnapshots/AbstractSearchableSnapshotsTestCase.java +++ b/x-pack/plugin/searchable-snapshots/src/test/java/org/elasticsearch/xpack/searchablesnapshots/AbstractSearchableSnapshotsTestCase.java @@ -31,7 +31,9 @@ import org.elasticsearch.common.unit.TimeValue; import org.elasticsearch.common.util.set.Sets; import org.elasticsearch.core.internal.io.IOUtils; +import org.elasticsearch.env.Environment; import org.elasticsearch.env.NodeEnvironment; +import org.elasticsearch.env.TestEnvironment; import org.elasticsearch.index.shard.ShardId; import org.elasticsearch.index.store.Store; import org.elasticsearch.indices.recovery.RecoveryState; @@ -88,6 +90,7 @@ public abstract class AbstractSearchableSnapshotsTestCase extends ESIndexInputTe protected ThreadPool threadPool; protected ClusterService clusterService; protected NodeEnvironment nodeEnvironment; + protected NodeEnvironment singlePathNodeEnvironment; @Before public void setUpTest() throws Exception { @@ -101,11 +104,13 @@ public void setUpTest() throws Exception { threadPool = new TestThreadPool(getTestName(), SearchableSnapshots.executorBuilders(Settings.EMPTY)); clusterService = ClusterServiceUtils.createClusterService(threadPool, node, CLUSTER_SETTINGS); nodeEnvironment = newNodeEnvironment(); + singlePathNodeEnvironment = newSinglePathNodeEnvironment(); } @After public void tearDownTest() throws Exception { IOUtils.close(nodeEnvironment, clusterService); + IOUtils.close(singlePathNodeEnvironment, clusterService); assertTrue(ThreadPool.terminate(threadPool, 30L, TimeUnit.SECONDS)); } @@ -157,7 +162,7 @@ protected FrozenCacheService randomFrozenCacheService() { if (randomBoolean()) { cacheSettings.put(FrozenCacheService.FROZEN_CACHE_RECOVERY_RANGE_SIZE_SETTING.getKey(), randomFrozenCacheRangeSize()); } - return new FrozenCacheService(nodeEnvironment, cacheSettings.build(), threadPool); + return new FrozenCacheService(singlePathNodeEnvironment, cacheSettings.build(), threadPool); } /** @@ -174,7 +179,7 @@ protected CacheService createCacheService(final ByteSizeValue cacheRangeSize) { protected FrozenCacheService createFrozenCacheService(final ByteSizeValue cacheSize, final ByteSizeValue cacheRangeSize) { return new FrozenCacheService( - nodeEnvironment, + singlePathNodeEnvironment, Settings.builder() .put(FrozenCacheService.SNAPSHOT_CACHE_SIZE_SETTING.getKey(), cacheSize) .put(FrozenCacheService.SHARED_CACHE_RANGE_SIZE_SETTING.getKey(), cacheRangeSize) @@ -183,6 +188,14 @@ protected FrozenCacheService createFrozenCacheService(final ByteSizeValue cacheS ); } + private NodeEnvironment newSinglePathNodeEnvironment() throws IOException { + Settings build = Settings.builder() + .put(buildEnvSettings(Settings.EMPTY)) + .putList(Environment.PATH_DATA_SETTING.getKey(), createTempDir().toAbsolutePath().toString()) + .build(); + return new NodeEnvironment(build, TestEnvironment.newEnvironment(build)); + } + /** * Returns a random shard data path for the specified {@link ShardId}. The returned path can be located on any of the data node paths. */ diff --git a/x-pack/plugin/searchable-snapshots/src/test/java/org/elasticsearch/xpack/searchablesnapshots/cache/shared/FrozenCacheServiceTests.java b/x-pack/plugin/searchable-snapshots/src/test/java/org/elasticsearch/xpack/searchablesnapshots/cache/shared/FrozenCacheServiceTests.java index 7a88b118c5f94..f7cb0eb0176af 100644 --- a/x-pack/plugin/searchable-snapshots/src/test/java/org/elasticsearch/xpack/searchablesnapshots/cache/shared/FrozenCacheServiceTests.java +++ b/x-pack/plugin/searchable-snapshots/src/test/java/org/elasticsearch/xpack/searchablesnapshots/cache/shared/FrozenCacheServiceTests.java @@ -12,9 +12,13 @@ import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.SettingsException; import org.elasticsearch.common.unit.ByteSizeValue; +import org.elasticsearch.common.unit.RatioValue; +import org.elasticsearch.common.unit.RelativeByteSizeValue; +import org.elasticsearch.common.util.set.Sets; import org.elasticsearch.env.NodeEnvironment; import org.elasticsearch.env.TestEnvironment; import org.elasticsearch.index.shard.ShardId; +import org.elasticsearch.monitor.fs.FsInfo; import org.elasticsearch.node.NodeRoleSettings; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.threadpool.ThreadPool; @@ -23,8 +27,12 @@ import org.elasticsearch.xpack.searchablesnapshots.cache.shared.FrozenCacheService.CacheFileRegion; import java.io.IOException; +import java.util.HashSet; +import java.util.Set; +import java.util.stream.Collectors; import static org.elasticsearch.node.Node.NODE_NAME_SETTING; +import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.instanceOf; import static org.hamcrest.Matchers.is; import static org.hamcrest.Matchers.notNullValue; @@ -199,8 +207,9 @@ public void testDecay() throws IOException { } public void testCacheSizeRejectedOnNonFrozenNodes() { + String cacheSize = randomBoolean() ? new ByteSizeValue(size(500)).getStringRep() : new RatioValue(between(1, 100)).toString(); final Settings settings = Settings.builder() - .put(FrozenCacheService.SNAPSHOT_CACHE_SIZE_SETTING.getKey(), new ByteSizeValue(size(500)).getStringRep()) + .put(FrozenCacheService.SNAPSHOT_CACHE_SIZE_SETTING.getKey(), cacheSize) .put(FrozenCacheService.SNAPSHOT_CACHE_REGION_SIZE_SETTING.getKey(), new ByteSizeValue(size(100)).getStringRep()) .putList(NodeRoleSettings.NODE_ROLES_SETTING.getKey(), DiscoveryNodeRole.DATA_HOT_NODE_ROLE.roleName()) .build(); @@ -216,12 +225,97 @@ public void testCacheSizeRejectedOnNonFrozenNodes() { "setting [" + FrozenCacheService.SNAPSHOT_CACHE_SIZE_SETTING.getKey() + "] to be positive [" - + new ByteSizeValue(size(500)).getStringRep() + + cacheSize + "] is only permitted on nodes with the data_frozen role, roles are [data_hot]" ) ); } + public void testDedicateFrozenCacheSizeDefaults() { + final Settings settings = Settings.builder() + .putList(NodeRoleSettings.NODE_ROLES_SETTING.getKey(), DiscoveryNodeRole.DATA_FROZEN_NODE_ROLE.roleName()) + .build(); + + RelativeByteSizeValue relativeCacheSize = FrozenCacheService.SNAPSHOT_CACHE_SIZE_SETTING.get(settings); + assertThat(relativeCacheSize.isAbsolute(), is(false)); + assertThat(relativeCacheSize.isNonZeroSize(), is(true)); + assertThat(relativeCacheSize.calculateValue(ByteSizeValue.ofBytes(10000), null), equalTo(ByteSizeValue.ofBytes(9000))); + assertThat(FrozenCacheService.SNAPSHOT_CACHE_SIZE_MAX_HEADROOM_SETTING.get(settings), equalTo(ByteSizeValue.ofGb(100))); + } + + public void testNotDedicatedFrozenCacheSizeDefaults() { + final Settings settings = Settings.builder() + .putList( + NodeRoleSettings.NODE_ROLES_SETTING.getKey(), + Sets.union( + Set.of( + randomFrom( + DiscoveryNodeRole.DATA_HOT_NODE_ROLE, + DiscoveryNodeRole.DATA_COLD_NODE_ROLE, + DiscoveryNodeRole.DATA_WARM_NODE_ROLE, + DiscoveryNodeRole.DATA_CONTENT_NODE_ROLE + ) + ), + new HashSet<>( + randomSubsetOf( + between(0, 3), + DiscoveryNodeRole.DATA_FROZEN_NODE_ROLE, + DiscoveryNodeRole.INGEST_ROLE, + DiscoveryNodeRole.MASTER_ROLE + ) + ) + ).stream().map(DiscoveryNodeRole::roleName).collect(Collectors.toList()) + ) + .build(); + + RelativeByteSizeValue relativeCacheSize = FrozenCacheService.SNAPSHOT_CACHE_SIZE_SETTING.get(settings); + assertThat(relativeCacheSize.isNonZeroSize(), is(false)); + assertThat(relativeCacheSize.isAbsolute(), is(true)); + assertThat(relativeCacheSize.getAbsolute(), equalTo(ByteSizeValue.ZERO)); + assertThat(FrozenCacheService.SNAPSHOT_CACHE_SIZE_MAX_HEADROOM_SETTING.get(settings), equalTo(ByteSizeValue.ofBytes(-1))); + } + + public void testMaxHeadroomRejectedForAbsoluteCacheSize() { + String cacheSize = new ByteSizeValue(size(500)).getStringRep(); + final Settings settings = Settings.builder() + .put(FrozenCacheService.SNAPSHOT_CACHE_SIZE_SETTING.getKey(), cacheSize) + .put(FrozenCacheService.SNAPSHOT_CACHE_SIZE_MAX_HEADROOM_SETTING.getKey(), new ByteSizeValue(size(100)).getStringRep()) + .putList(NodeRoleSettings.NODE_ROLES_SETTING.getKey(), DiscoveryNodeRole.DATA_FROZEN_NODE_ROLE.roleName()) + .build(); + final IllegalArgumentException e = expectThrows( + IllegalArgumentException.class, + () -> FrozenCacheService.SNAPSHOT_CACHE_SIZE_MAX_HEADROOM_SETTING.get(settings) + ); + assertThat(e.getCause(), notNullValue()); + assertThat(e.getCause(), instanceOf(SettingsException.class)); + assertThat( + e.getCause().getMessage(), + is( + "setting [" + + FrozenCacheService.SNAPSHOT_CACHE_SIZE_MAX_HEADROOM_SETTING.getKey() + + "] cannot be specified for absolute [" + + FrozenCacheService.SNAPSHOT_CACHE_SIZE_SETTING.getKey() + + "=" + + cacheSize + + "]" + ) + ); + } + + public void testCalculateCacheSize() { + FsInfo.Path smallPathInfo = new FsInfo.Path("ignored", null, 10000, 0, 0); + FsInfo.Path largePathInfo = new FsInfo.Path("ignored", null, ByteSizeValue.ofTb(10).getBytes(), 0, 0); + assertThat(FrozenCacheService.calculateCacheSize(Settings.EMPTY, smallPathInfo), equalTo(0L)); + final Settings settings = Settings.builder() + .putList(NodeRoleSettings.NODE_ROLES_SETTING.getKey(), DiscoveryNodeRole.DATA_FROZEN_NODE_ROLE.roleName()) + .build(); + assertThat(FrozenCacheService.calculateCacheSize(settings, smallPathInfo), equalTo(9000L)); + assertThat( + FrozenCacheService.calculateCacheSize(settings, largePathInfo), + equalTo(largePathInfo.getTotal().getBytes() - ByteSizeValue.ofGb(100).getBytes()) + ); + } + private static CacheKey generateCacheKey() { return new CacheKey( randomAlphaOfLength(10), From ced0293968ef823fa73419db231b9d094d981e12 Mon Sep 17 00:00:00 2001 From: Henning Andersen Date: Mon, 19 Apr 2021 17:33:28 +0200 Subject: [PATCH 2/7] Add separate flood stage limit for frozen Dedicated frozen nodes can survive less headroom than other data nodes. This commits introduces a separate flood stage threshold for frozen as well as an accompanying max_headroom setting that caps the amount of free space necessary on frozen. Relates #71844 --- .../modules/cluster/disk_allocator.asciidoc | 13 +++++ .../allocation/DiskThresholdMonitor.java | 24 +++++---- .../allocation/DiskThresholdSettings.java | 53 +++++++++++++++++++ .../common/settings/ClusterSettings.java | 2 + .../common/unit/RelativeByteSizeValue.java | 2 +- .../allocation/DiskThresholdMonitorTests.java | 18 ++++++- 6 files changed, 99 insertions(+), 13 deletions(-) diff --git a/docs/reference/modules/cluster/disk_allocator.asciidoc b/docs/reference/modules/cluster/disk_allocator.asciidoc index 3405167966b25..84661cbc40698 100644 --- a/docs/reference/modules/cluster/disk_allocator.asciidoc +++ b/docs/reference/modules/cluster/disk_allocator.asciidoc @@ -113,6 +113,19 @@ PUT /my-index-000001/_settings -- // end::cluster-routing-flood-stage-tag[] +[[cluster-routing-flood-stage-frozen]] +// tag::cluster-routing-flood-stage-tag[] +`cluster.routing.allocation.disk.watermark.flood_stage.frozen` {ess-icon}:: +(<>) +Controls the flood stage watermark for frozen nodes, which defaults to 95%. + +`cluster.routing.allocation.disk.watermark.flood_stage.frozen.max_headroom` {ess-icon}:: +(<>) +Controls the max headroom for the flood stage watermark for frozen nodes. +Defaults to 20GB when `cluster.routing.allocation.disk.watermark.flood_stage.frozen` +is not explicitly set. This caps the amount of free space required on frozen +nodes. + `cluster.info.update.interval`:: (<>) How often {es} should check on disk usage for each node in the diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/DiskThresholdMonitor.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/DiskThresholdMonitor.java index 747b1e63721c9..a81a16137639b 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/DiskThresholdMonitor.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/DiskThresholdMonitor.java @@ -33,6 +33,7 @@ import org.elasticsearch.common.collect.ImmutableOpenMap; import org.elasticsearch.common.settings.ClusterSettings; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.unit.ByteSizeValue; import org.elasticsearch.common.util.set.Sets; import java.util.ArrayList; @@ -136,13 +137,20 @@ public void onNewInfo(ClusterInfo info) { final DiskUsage usage = entry.value; final RoutingNode routingNode = routingNodes.node(node); - if (usage.getFreeBytes() < diskThresholdSettings.getFreeBytesThresholdFloodStage().getBytes() || - usage.getFreeDiskAsPercentage() < diskThresholdSettings.getFreeDiskThresholdFloodStage()) { - if (isFrozenOnlyNode(routingNode)) { + if (isFrozenOnlyNode(routingNode)) { + ByteSizeValue total = ByteSizeValue.ofBytes(usage.getTotalBytes()); + long frozenFloodStageThreshold = diskThresholdSettings.getFreeBytesThresholdFrozenFloodStage(total).getBytes(); + if (usage.getFreeBytes() < frozenFloodStageThreshold) { logger.warn("flood stage disk watermark [{}] exceeded on {}", - diskThresholdSettings.describeFloodStageThreshold(), usage); - continue; + diskThresholdSettings.describeFrozenFloodStageThreshold(total), usage); } + // skip checking high/low watermarks for frozen nodes, since frozen shards have only insignificant local storage footprint + // and this allows us to use more of the local storage for cache. + continue; + } + + if (usage.getFreeBytes() < diskThresholdSettings.getFreeBytesThresholdFloodStage().getBytes() || + usage.getFreeDiskAsPercentage() < diskThresholdSettings.getFreeDiskThresholdFloodStage()) { nodesOverLowThreshold.add(node); nodesOverHighThreshold.add(node); @@ -162,12 +170,6 @@ public void onNewInfo(ClusterInfo info) { continue; } - if (isFrozenOnlyNode(routingNode)) { - // skip checking high/low watermarks for frozen nodes, since frozen shards have only insignificant local storage footprint - // and this allows us to use more of the local storage for cache. - continue; - } - if (usage.getFreeBytes() < diskThresholdSettings.getFreeBytesThresholdHigh().getBytes() || usage.getFreeDiskAsPercentage() < diskThresholdSettings.getFreeDiskThresholdHigh()) { diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/DiskThresholdSettings.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/DiskThresholdSettings.java index 7dea4a0c1bad9..9fb207831527d 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/DiskThresholdSettings.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/DiskThresholdSettings.java @@ -16,6 +16,7 @@ import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.unit.ByteSizeValue; import org.elasticsearch.common.unit.RatioValue; +import org.elasticsearch.common.unit.RelativeByteSizeValue; import org.elasticsearch.common.unit.TimeValue; import java.util.Iterator; @@ -45,6 +46,21 @@ public class DiskThresholdSettings { (s) -> validWatermarkSetting(s, "cluster.routing.allocation.disk.watermark.flood_stage"), new FloodStageValidator(), Setting.Property.Dynamic, Setting.Property.NodeScope); + public static final Setting CLUSTER_ROUTING_ALLOCATION_DISK_FLOOD_STAGE_FROZEN_SETTING = + new Setting<>("cluster.routing.allocation.disk.watermark.flood_stage.frozen", "95%", + (s) -> RelativeByteSizeValue.parseRelativeByteSizeValue(s, "cluster.routing.allocation.disk.watermark.flood_stage.frozen"), + Setting.Property.Dynamic, Setting.Property.NodeScope); + public static final Setting CLUSTER_ROUTING_ALLOCATION_DISK_FLOOD_STAGE_FROZEN_MAX_HEADROOM_SETTING = + new Setting<>("cluster.routing.allocation.disk.watermark.flood_stage.frozen.max_headroom", + (settings) -> { + if (CLUSTER_ROUTING_ALLOCATION_DISK_FLOOD_STAGE_FROZEN_SETTING.exists(settings)) { + return "-1"; + } else { + return "20GB"; + } + }, + (s) -> ByteSizeValue.parseBytesSizeValue(s, "cluster.routing.allocation.disk.watermark.flood_stage.frozen.max_headroom"), + Setting.Property.Dynamic, Setting.Property.NodeScope); public static final Setting CLUSTER_ROUTING_ALLOCATION_REROUTE_INTERVAL_SETTING = Setting.positiveTimeSetting("cluster.routing.allocation.disk.reroute_interval", TimeValue.timeValueSeconds(60), Setting.Property.Dynamic, Setting.Property.NodeScope); @@ -59,6 +75,8 @@ public class DiskThresholdSettings { private volatile TimeValue rerouteInterval; private volatile Double freeDiskThresholdFloodStage; private volatile ByteSizeValue freeBytesThresholdFloodStage; + private volatile RelativeByteSizeValue frozenFloodStage; + private volatile ByteSizeValue frozenFloodStageMaxHeadroom; static { assert Version.CURRENT.major == Version.V_7_0_0.major + 1; // this check is unnecessary in v9 @@ -69,6 +87,7 @@ public class DiskThresholdSettings { } } + public DiskThresholdSettings(Settings settings, ClusterSettings clusterSettings) { final String lowWatermark = CLUSTER_ROUTING_ALLOCATION_LOW_DISK_WATERMARK_SETTING.get(settings); final String highWatermark = CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK_SETTING.get(settings); @@ -76,11 +95,16 @@ public DiskThresholdSettings(Settings settings, ClusterSettings clusterSettings) setHighWatermark(highWatermark); setLowWatermark(lowWatermark); setFloodStage(floodStage); + setFrozenFloodStage(CLUSTER_ROUTING_ALLOCATION_DISK_FLOOD_STAGE_FROZEN_SETTING.get(settings)); + setFrozenFloodStageMaxHeadroom(CLUSTER_ROUTING_ALLOCATION_DISK_FLOOD_STAGE_FROZEN_MAX_HEADROOM_SETTING.get(settings)); this.rerouteInterval = CLUSTER_ROUTING_ALLOCATION_REROUTE_INTERVAL_SETTING.get(settings); this.enabled = CLUSTER_ROUTING_ALLOCATION_DISK_THRESHOLD_ENABLED_SETTING.get(settings); clusterSettings.addSettingsUpdateConsumer(CLUSTER_ROUTING_ALLOCATION_LOW_DISK_WATERMARK_SETTING, this::setLowWatermark); clusterSettings.addSettingsUpdateConsumer(CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK_SETTING, this::setHighWatermark); clusterSettings.addSettingsUpdateConsumer(CLUSTER_ROUTING_ALLOCATION_DISK_FLOOD_STAGE_WATERMARK_SETTING, this::setFloodStage); + clusterSettings.addSettingsUpdateConsumer(CLUSTER_ROUTING_ALLOCATION_DISK_FLOOD_STAGE_FROZEN_SETTING, this::setFrozenFloodStage); + clusterSettings.addSettingsUpdateConsumer(CLUSTER_ROUTING_ALLOCATION_DISK_FLOOD_STAGE_FROZEN_MAX_HEADROOM_SETTING, + this::setFrozenFloodStageMaxHeadroom); clusterSettings.addSettingsUpdateConsumer(CLUSTER_ROUTING_ALLOCATION_REROUTE_INTERVAL_SETTING, this::setRerouteInterval); clusterSettings.addSettingsUpdateConsumer(CLUSTER_ROUTING_ALLOCATION_DISK_THRESHOLD_ENABLED_SETTING, this::setEnabled); } @@ -242,6 +266,15 @@ private void setFloodStage(String floodStageRaw) { CLUSTER_ROUTING_ALLOCATION_DISK_FLOOD_STAGE_WATERMARK_SETTING.getKey()); } + private void setFrozenFloodStage(RelativeByteSizeValue floodStage) { + this.frozenFloodStage = floodStage; + } + + private void setFrozenFloodStageMaxHeadroom(ByteSizeValue maxHeadroom) { + this.frozenFloodStageMaxHeadroom = maxHeadroom; + } + + /** * Gets the raw (uninterpreted) low watermark value as found in the settings. */ @@ -280,6 +313,14 @@ public ByteSizeValue getFreeBytesThresholdFloodStage() { return freeBytesThresholdFloodStage; } + public ByteSizeValue getFreeBytesThresholdFrozenFloodStage(ByteSizeValue total) { + // flood stage bytes are reversed compared to percentage, so we special handle it. + RelativeByteSizeValue frozenFloodStage = this.frozenFloodStage; + if (frozenFloodStage.isAbsolute()) { + return frozenFloodStage.getAbsolute(); + } + return ByteSizeValue.ofBytes(total.getBytes() - frozenFloodStage.calculateValue(total, frozenFloodStageMaxHeadroom).getBytes()); + } public boolean isEnabled() { return enabled; } @@ -306,6 +347,18 @@ String describeFloodStageThreshold() { : freeBytesThresholdFloodStage.toString(); } + String describeFrozenFloodStageThreshold(ByteSizeValue total) { + ByteSizeValue maxHeadroom = this.frozenFloodStageMaxHeadroom; + RelativeByteSizeValue floodStage = this.frozenFloodStage; + if (floodStage.isAbsolute()) { + return floodStage.getStringRep(); + } else if (floodStage.calculateValue(total, maxHeadroom).equals(floodStage.calculateValue(total, null))) { + return Strings.format1Decimals(floodStage.getRatio().getAsPercent(), "%"); + } else { + return "max_headroom=" + maxHeadroom; + } + } + /** * Attempts to parse the watermark into a percentage, returning 100.0% if * it cannot be parsed. diff --git a/server/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java b/server/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java index a72ea9507dcdf..01013cfbe093c 100644 --- a/server/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java +++ b/server/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java @@ -221,6 +221,8 @@ public void apply(Settings value, Settings current, Settings previous) { DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_LOW_DISK_WATERMARK_SETTING, DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK_SETTING, DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_DISK_FLOOD_STAGE_WATERMARK_SETTING, + DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_DISK_FLOOD_STAGE_FROZEN_SETTING, + DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_DISK_FLOOD_STAGE_FROZEN_MAX_HEADROOM_SETTING, DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_DISK_THRESHOLD_ENABLED_SETTING, DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_REROUTE_INTERVAL_SETTING, SameShardAllocationDecider.CLUSTER_ROUTING_ALLOCATION_SAME_HOST_SETTING, diff --git a/server/src/main/java/org/elasticsearch/common/unit/RelativeByteSizeValue.java b/server/src/main/java/org/elasticsearch/common/unit/RelativeByteSizeValue.java index 9e263864d3423..bbb28958b9d3a 100644 --- a/server/src/main/java/org/elasticsearch/common/unit/RelativeByteSizeValue.java +++ b/server/src/main/java/org/elasticsearch/common/unit/RelativeByteSizeValue.java @@ -40,7 +40,7 @@ public ByteSizeValue getAbsolute() { return absolute; } - RatioValue getRatio() { + public RatioValue getRatio() { return ratio; } diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/DiskThresholdMonitorTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/DiskThresholdMonitorTests.java index 147db23f755b0..380446a3f0273 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/DiskThresholdMonitorTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/DiskThresholdMonitorTests.java @@ -32,6 +32,7 @@ import org.elasticsearch.common.logging.Loggers; import org.elasticsearch.common.settings.ClusterSettings; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.unit.ByteSizeValue; import org.elasticsearch.common.util.set.Sets; import org.elasticsearch.index.shard.ShardId; import org.elasticsearch.test.MockLogAppender; @@ -450,7 +451,12 @@ long sizeOfRelocatingShards(RoutingNode routingNode, DiskUsage diskUsage, Cluste final ImmutableOpenMap.Builder allDisksOkBuilder; allDisksOkBuilder = ImmutableOpenMap.builder(); allDisksOkBuilder.put("node1", new DiskUsage("node1", "node1", "/foo/bar", 100, between(15, 100))); - allDisksOkBuilder.put("frozen", new DiskUsage("frozen", "frozen", "/foo/bar", 100, between(15, 100))); + if (randomBoolean()) { + allDisksOkBuilder.put("frozen", new DiskUsage("frozen", "frozen", "/foo/bar", 100, between(15, 100))); + } else { + allDisksOkBuilder.put("frozen", new DiskUsage("frozen", "frozen", "/foo/bar", ByteSizeValue.ofGb(1000).getBytes(), + (randomBoolean() ? ByteSizeValue.ofGb(between(20, 1000)) : ByteSizeValue.ofGb(between(20, 50))).getBytes())); + } final ImmutableOpenMap allDisksOk = allDisksOkBuilder.build(); final ImmutableOpenMap.Builder aboveLowWatermarkBuilder = ImmutableOpenMap.builder(); @@ -475,6 +481,13 @@ long sizeOfRelocatingShards(RoutingNode routingNode, DiskUsage diskUsage, Cluste frozenAboveFloodStageWatermarkBuilder.put("frozen", new DiskUsage("frozen", "frozen", "/foo/bar", 100, between(0, 4))); final ImmutableOpenMap frozenAboveFloodStageWatermark = frozenAboveFloodStageWatermarkBuilder.build(); + final ImmutableOpenMap.Builder frozenAboveFloodStageMaxHeadroomBuilder = ImmutableOpenMap.builder(); + // node1 is below low watermark, so no logging from it. + frozenAboveFloodStageMaxHeadroomBuilder.put("node1", new DiskUsage("node1", "node1", "/foo/bar", 100, between(15, 100))); + frozenAboveFloodStageMaxHeadroomBuilder.put("frozen", new DiskUsage("frozen", "frozen", "/foo/bar", + ByteSizeValue.ofGb(1000).getBytes(), ByteSizeValue.ofGb(between(0, 19)).getBytes())); + final ImmutableOpenMap frozenAboveFloodStageMaxHeadroom = frozenAboveFloodStageMaxHeadroomBuilder.build(); + assertNoLogging(monitor, allDisksOk); assertSingleInfoMessage(monitor, aboveLowWatermark, @@ -546,6 +559,9 @@ long sizeOfRelocatingShards(RoutingNode routingNode, DiskUsage diskUsage, Cluste assertRepeatedWarningMessages(monitor, frozenAboveFloodStageWatermark, "flood stage disk watermark [95%] exceeded on *frozen*"); + assertRepeatedWarningMessages(monitor, frozenAboveFloodStageMaxHeadroom, + "flood stage disk watermark [max_headroom=20gb] exceeded on *frozen*"); + assertNoLogging(monitor, allDisksOk); } From 84f5a3781bd1e6849395de831c585a6d0a118288 Mon Sep 17 00:00:00 2001 From: Henning Andersen Date: Mon, 19 Apr 2021 17:49:46 +0200 Subject: [PATCH 3/7] checkstyle after merge --- .../searchablesnapshots/cache/shared/FrozenCacheService.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/x-pack/plugin/searchable-snapshots/src/main/java/org/elasticsearch/xpack/searchablesnapshots/cache/shared/FrozenCacheService.java b/x-pack/plugin/searchable-snapshots/src/main/java/org/elasticsearch/xpack/searchablesnapshots/cache/shared/FrozenCacheService.java index 182f48e0b31d7..10439a0c0ce2a 100644 --- a/x-pack/plugin/searchable-snapshots/src/main/java/org/elasticsearch/xpack/searchablesnapshots/cache/shared/FrozenCacheService.java +++ b/x-pack/plugin/searchable-snapshots/src/main/java/org/elasticsearch/xpack/searchablesnapshots/cache/shared/FrozenCacheService.java @@ -38,8 +38,6 @@ import org.elasticsearch.threadpool.ThreadPool; import org.elasticsearch.xpack.core.DataTier; import org.elasticsearch.xpack.searchablesnapshots.cache.common.ByteRange; -import org.elasticsearch.xpack.searchablesnapshots.cache.common.CacheKey; -import org.elasticsearch.xpack.searchablesnapshots.cache.common.SparseFileTracker; import java.io.IOException; import java.io.UncheckedIOException; From c6bbdda02d293d5bb3820ce4258f2319216a959f Mon Sep 17 00:00:00 2001 From: Henning Andersen Date: Tue, 20 Apr 2021 09:40:09 +0200 Subject: [PATCH 4/7] Remove overlap to #71896 --- .../cache/shared/FrozenCacheService.java | 15 --------------- .../AbstractSearchableSnapshotsTestCase.java | 17 ++--------------- 2 files changed, 2 insertions(+), 30 deletions(-) diff --git a/x-pack/plugin/searchable-snapshots/src/main/java/org/elasticsearch/xpack/searchablesnapshots/cache/shared/FrozenCacheService.java b/x-pack/plugin/searchable-snapshots/src/main/java/org/elasticsearch/xpack/searchablesnapshots/cache/shared/FrozenCacheService.java index 10439a0c0ce2a..5b037774347ed 100644 --- a/x-pack/plugin/searchable-snapshots/src/main/java/org/elasticsearch/xpack/searchablesnapshots/cache/shared/FrozenCacheService.java +++ b/x-pack/plugin/searchable-snapshots/src/main/java/org/elasticsearch/xpack/searchablesnapshots/cache/shared/FrozenCacheService.java @@ -42,7 +42,6 @@ import java.io.IOException; import java.io.UncheckedIOException; import java.util.ArrayList; -import java.util.Arrays; import java.util.Collection; import java.util.Iterator; import java.util.List; @@ -257,20 +256,6 @@ private static boolean isDedicatedFrozen(Settings settings) { @SuppressWarnings({ "unchecked", "rawtypes" }) public FrozenCacheService(NodeEnvironment environment, Settings settings, ThreadPool threadPool) { this.currentTimeSupplier = threadPool::relativeTimeInMillis; - RelativeByteSizeValue relativeCacheSize = SNAPSHOT_CACHE_SIZE_SETTING.get(settings); - if (relativeCacheSize.isNonZeroSize() && environment.nodeDataPaths().length != 1) { - // todo: throw instead in a follow-up. - logger.warn( - "cannot specify [" - + SNAPSHOT_CACHE_SIZE_SETTING.getKey() - + "=" - + relativeCacheSize - + "] " - + "on this node with multiple data paths [" - + Arrays.toString(environment.nodePaths()) - + "]" - ); - } FsInfo.Path pathInfo; try { pathInfo = FsProbe.getFSInfo(environment.nodePaths()[0]); diff --git a/x-pack/plugin/searchable-snapshots/src/test/java/org/elasticsearch/xpack/searchablesnapshots/AbstractSearchableSnapshotsTestCase.java b/x-pack/plugin/searchable-snapshots/src/test/java/org/elasticsearch/xpack/searchablesnapshots/AbstractSearchableSnapshotsTestCase.java index 43f97dcb37b67..d41c5487b4ed1 100644 --- a/x-pack/plugin/searchable-snapshots/src/test/java/org/elasticsearch/xpack/searchablesnapshots/AbstractSearchableSnapshotsTestCase.java +++ b/x-pack/plugin/searchable-snapshots/src/test/java/org/elasticsearch/xpack/searchablesnapshots/AbstractSearchableSnapshotsTestCase.java @@ -31,9 +31,7 @@ import org.elasticsearch.common.unit.TimeValue; import org.elasticsearch.common.util.set.Sets; import org.elasticsearch.core.internal.io.IOUtils; -import org.elasticsearch.env.Environment; import org.elasticsearch.env.NodeEnvironment; -import org.elasticsearch.env.TestEnvironment; import org.elasticsearch.index.shard.ShardId; import org.elasticsearch.index.store.Store; import org.elasticsearch.indices.recovery.RecoveryState; @@ -90,7 +88,6 @@ public abstract class AbstractSearchableSnapshotsTestCase extends ESIndexInputTe protected ThreadPool threadPool; protected ClusterService clusterService; protected NodeEnvironment nodeEnvironment; - protected NodeEnvironment singlePathNodeEnvironment; @Before public void setUpTest() throws Exception { @@ -104,13 +101,11 @@ public void setUpTest() throws Exception { threadPool = new TestThreadPool(getTestName(), SearchableSnapshots.executorBuilders(Settings.EMPTY)); clusterService = ClusterServiceUtils.createClusterService(threadPool, node, CLUSTER_SETTINGS); nodeEnvironment = newNodeEnvironment(); - singlePathNodeEnvironment = newSinglePathNodeEnvironment(); } @After public void tearDownTest() throws Exception { IOUtils.close(nodeEnvironment, clusterService); - IOUtils.close(singlePathNodeEnvironment, clusterService); assertTrue(ThreadPool.terminate(threadPool, 30L, TimeUnit.SECONDS)); } @@ -162,7 +157,7 @@ protected FrozenCacheService randomFrozenCacheService() { if (randomBoolean()) { cacheSettings.put(FrozenCacheService.FROZEN_CACHE_RECOVERY_RANGE_SIZE_SETTING.getKey(), randomFrozenCacheRangeSize()); } - return new FrozenCacheService(singlePathNodeEnvironment, cacheSettings.build(), threadPool); + return new FrozenCacheService(nodeEnvironment, cacheSettings.build(), threadPool); } /** @@ -179,7 +174,7 @@ protected CacheService createCacheService(final ByteSizeValue cacheRangeSize) { protected FrozenCacheService createFrozenCacheService(final ByteSizeValue cacheSize, final ByteSizeValue cacheRangeSize) { return new FrozenCacheService( - singlePathNodeEnvironment, + nodeEnvironment, Settings.builder() .put(FrozenCacheService.SNAPSHOT_CACHE_SIZE_SETTING.getKey(), cacheSize) .put(FrozenCacheService.SHARED_CACHE_RANGE_SIZE_SETTING.getKey(), cacheRangeSize) @@ -188,14 +183,6 @@ protected FrozenCacheService createFrozenCacheService(final ByteSizeValue cacheS ); } - private NodeEnvironment newSinglePathNodeEnvironment() throws IOException { - Settings build = Settings.builder() - .put(buildEnvSettings(Settings.EMPTY)) - .putList(Environment.PATH_DATA_SETTING.getKey(), createTempDir().toAbsolutePath().toString()) - .build(); - return new NodeEnvironment(build, TestEnvironment.newEnvironment(build)); - } - /** * Returns a random shard data path for the specified {@link ShardId}. The returned path can be located on any of the data node paths. */ From da33a82ca59858a0b09e8aa5842e3478250a7808 Mon Sep 17 00:00:00 2001 From: Henning Andersen Date: Tue, 20 Apr 2021 09:50:10 +0200 Subject: [PATCH 5/7] Slightly more friendly on large full disks. --- .../BaseSearchableSnapshotsIntegTestCase.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/x-pack/plugin/searchable-snapshots/src/internalClusterTest/java/org/elasticsearch/xpack/searchablesnapshots/BaseSearchableSnapshotsIntegTestCase.java b/x-pack/plugin/searchable-snapshots/src/internalClusterTest/java/org/elasticsearch/xpack/searchablesnapshots/BaseSearchableSnapshotsIntegTestCase.java index e5f67b2ddfcca..6492c9fd04e5a 100644 --- a/x-pack/plugin/searchable-snapshots/src/internalClusterTest/java/org/elasticsearch/xpack/searchablesnapshots/BaseSearchableSnapshotsIntegTestCase.java +++ b/x-pack/plugin/searchable-snapshots/src/internalClusterTest/java/org/elasticsearch/xpack/searchablesnapshots/BaseSearchableSnapshotsIntegTestCase.java @@ -88,7 +88,7 @@ protected Settings nodeSettings(int nodeOrdinal, Settings otherSettings) { ? new ByteSizeValue(randomIntBetween(1, 10), ByteSizeUnit.KB).getStringRep() : new ByteSizeValue(randomIntBetween(1, 1000), ByteSizeUnit.BYTES).getStringRep() : randomBoolean() ? new ByteSizeValue(randomIntBetween(1, 10), ByteSizeUnit.MB).getStringRep() - : new RatioValue(randomDoubleBetween(0.0d, 1.0d, false)).toString() // only use up to 1% disk to be friendly. + : new RatioValue(randomDoubleBetween(0.0d, 0.1d, false)).toString() // only use up to 0.1% disk to be friendly. ); } builder.put( From 4d168b4473871c35f8466bc448842d2bbc2e8bea Mon Sep 17 00:00:00 2001 From: Henning Andersen Date: Tue, 20 Apr 2021 10:04:55 +0200 Subject: [PATCH 6/7] Remove most overlap to #71844 --- .../searchable-snapshots/index.asciidoc | 35 ++----- .../BaseSearchableSnapshotsIntegTestCase.java | 8 +- .../SearchableSnapshots.java | 1 - .../cache/FrozenCacheInfoNodeAction.java | 2 +- .../cache/shared/FrozenCacheService.java | 96 +++--------------- .../cache/shared/FrozenCacheServiceTests.java | 98 +------------------ 6 files changed, 26 insertions(+), 214 deletions(-) diff --git a/docs/reference/searchable-snapshots/index.asciidoc b/docs/reference/searchable-snapshots/index.asciidoc index ba5028a6bde6d..ac4438964d571 100644 --- a/docs/reference/searchable-snapshots/index.asciidoc +++ b/docs/reference/searchable-snapshots/index.asciidoc @@ -155,46 +155,27 @@ for search. Many searches will need to retrieve only a small subset of the total shard data before returning results. To mount a searchable snapshot index with the shared cache mount option, you -must have one or more nodes with a shared cache available. By default, -dedicated frozen data tier nodes (nodes with the `data_frozen` role and no other -data roles) have a shared cache configured using the greater of 90% of total -disk space and total disk space subtracted a headroom of 100GB. - -Using a dedicated frozen tier is highly recommended for production use. If you -do not have a dedicated frozen tier, you must configure the -`xpack.searchable.snapshot.shared_cache.size` setting to reserve space for the -cache on one or more nodes. Indices mounted with the shared cache mount option -are only allocated to nodes that have a shared cache. +must configure the `xpack.searchable.snapshot.shared_cache.size` setting to +reserve space for the cache on one or more nodes. Indices mounted with the +shared cache mount option are only allocated to nodes that have this setting +configured. [[searchable-snapshots-shared-cache]] `xpack.searchable.snapshot.shared_cache.size`:: -(<>) -The size of the space reserved for the shared cache, either specified as a -percentage of total disk space or an absolute <>. -Defaults to 90% of total disk space on dedicated frozen data tier nodes, -otherwise `0b`. - -`xpack.searchable.snapshot.shared_cache.size.max_headroom`:: (<>, <>) -For dedicated frozen tier nodes, the max headroom to maintain. Defaults to 100GB -on dedicated frozen tier nodes when -`xpack.searchable.snapshot.shared_cache.size` is not explicitly set, otherwise --1 (not set). Can only be set when `xpack.searchable.snapshot.shared_cache.size` -is set as a percentage. +The size of the space reserved for the shared cache. Defaults to `0b`, meaning +that the node has no shared cache. -You can configure the settings in `elasticsearch.yml`: +You can configure the setting in `elasticsearch.yml`: [source,yaml] ---- xpack.searchable.snapshot.shared_cache.size: 4TB ---- -IMPORTANT: You can only configure these settings on nodes with the +IMPORTANT: You can only configure this setting on nodes with the <> role. -IMPORTANT: You can only configure these settings on nodes that use a single -<> - You can set `xpack.searchable.snapshot.shared_cache.size` to any size between a couple of gigabytes up to 90% of available disk space. We only recommend larger sizes if you use the node exclusively on a frozen tier or for searchable diff --git a/x-pack/plugin/searchable-snapshots/src/internalClusterTest/java/org/elasticsearch/xpack/searchablesnapshots/BaseSearchableSnapshotsIntegTestCase.java b/x-pack/plugin/searchable-snapshots/src/internalClusterTest/java/org/elasticsearch/xpack/searchablesnapshots/BaseSearchableSnapshotsIntegTestCase.java index 6492c9fd04e5a..eb457a5394030 100644 --- a/x-pack/plugin/searchable-snapshots/src/internalClusterTest/java/org/elasticsearch/xpack/searchablesnapshots/BaseSearchableSnapshotsIntegTestCase.java +++ b/x-pack/plugin/searchable-snapshots/src/internalClusterTest/java/org/elasticsearch/xpack/searchablesnapshots/BaseSearchableSnapshotsIntegTestCase.java @@ -22,7 +22,6 @@ import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.unit.ByteSizeUnit; import org.elasticsearch.common.unit.ByteSizeValue; -import org.elasticsearch.common.unit.RatioValue; import org.elasticsearch.index.IndexSettings; import org.elasticsearch.plugins.Plugin; import org.elasticsearch.snapshots.AbstractSnapshotIntegTestCase; @@ -85,10 +84,9 @@ protected Settings nodeSettings(int nodeOrdinal, Settings otherSettings) { FrozenCacheService.SNAPSHOT_CACHE_SIZE_SETTING.getKey(), rarely() ? randomBoolean() - ? new ByteSizeValue(randomIntBetween(1, 10), ByteSizeUnit.KB).getStringRep() - : new ByteSizeValue(randomIntBetween(1, 1000), ByteSizeUnit.BYTES).getStringRep() - : randomBoolean() ? new ByteSizeValue(randomIntBetween(1, 10), ByteSizeUnit.MB).getStringRep() - : new RatioValue(randomDoubleBetween(0.0d, 0.1d, false)).toString() // only use up to 0.1% disk to be friendly. + ? new ByteSizeValue(randomIntBetween(1, 10), ByteSizeUnit.KB) + : new ByteSizeValue(randomIntBetween(1, 1000), ByteSizeUnit.BYTES) + : new ByteSizeValue(randomIntBetween(1, 10), ByteSizeUnit.MB) ); } builder.put( diff --git a/x-pack/plugin/searchable-snapshots/src/main/java/org/elasticsearch/xpack/searchablesnapshots/SearchableSnapshots.java b/x-pack/plugin/searchable-snapshots/src/main/java/org/elasticsearch/xpack/searchablesnapshots/SearchableSnapshots.java index 0ed725c89dcdd..ae46f9eca4eda 100644 --- a/x-pack/plugin/searchable-snapshots/src/main/java/org/elasticsearch/xpack/searchablesnapshots/SearchableSnapshots.java +++ b/x-pack/plugin/searchable-snapshots/src/main/java/org/elasticsearch/xpack/searchablesnapshots/SearchableSnapshots.java @@ -300,7 +300,6 @@ public List> getSettings() { CacheService.SNAPSHOT_CACHE_SYNC_SHUTDOWN_TIMEOUT, SearchableSnapshotEnableAllocationDecider.SEARCHABLE_SNAPSHOTS_ALLOCATE_ON_ROLLING_RESTART, FrozenCacheService.SNAPSHOT_CACHE_SIZE_SETTING, - FrozenCacheService.SNAPSHOT_CACHE_SIZE_MAX_HEADROOM_SETTING, FrozenCacheService.SNAPSHOT_CACHE_REGION_SIZE_SETTING, FrozenCacheService.SHARED_CACHE_RANGE_SIZE_SETTING, FrozenCacheService.FROZEN_CACHE_RECOVERY_RANGE_SIZE_SETTING, diff --git a/x-pack/plugin/searchable-snapshots/src/main/java/org/elasticsearch/xpack/searchablesnapshots/action/cache/FrozenCacheInfoNodeAction.java b/x-pack/plugin/searchable-snapshots/src/main/java/org/elasticsearch/xpack/searchablesnapshots/action/cache/FrozenCacheInfoNodeAction.java index 55b544072b044..5a51df645f263 100644 --- a/x-pack/plugin/searchable-snapshots/src/main/java/org/elasticsearch/xpack/searchablesnapshots/action/cache/FrozenCacheInfoNodeAction.java +++ b/x-pack/plugin/searchable-snapshots/src/main/java/org/elasticsearch/xpack/searchablesnapshots/action/cache/FrozenCacheInfoNodeAction.java @@ -53,7 +53,7 @@ public static class TransportAction extends HandledTransportAction 0); } @Override diff --git a/x-pack/plugin/searchable-snapshots/src/main/java/org/elasticsearch/xpack/searchablesnapshots/cache/shared/FrozenCacheService.java b/x-pack/plugin/searchable-snapshots/src/main/java/org/elasticsearch/xpack/searchablesnapshots/cache/shared/FrozenCacheService.java index 5b037774347ed..fe53bf7991d9f 100644 --- a/x-pack/plugin/searchable-snapshots/src/main/java/org/elasticsearch/xpack/searchablesnapshots/cache/shared/FrozenCacheService.java +++ b/x-pack/plugin/searchable-snapshots/src/main/java/org/elasticsearch/xpack/searchablesnapshots/cache/shared/FrozenCacheService.java @@ -13,7 +13,6 @@ import org.elasticsearch.Assertions; import org.elasticsearch.action.ActionListener; import org.elasticsearch.action.StepListener; -import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.cluster.node.DiscoveryNodeRole; import org.elasticsearch.common.lease.Releasable; import org.elasticsearch.common.lease.Releasables; @@ -22,7 +21,6 @@ import org.elasticsearch.common.settings.SettingsException; import org.elasticsearch.common.unit.ByteSizeUnit; import org.elasticsearch.common.unit.ByteSizeValue; -import org.elasticsearch.common.unit.RelativeByteSizeValue; import org.elasticsearch.common.unit.TimeValue; import org.elasticsearch.common.util.concurrent.AbstractAsyncTask; import org.elasticsearch.common.util.concurrent.AbstractRefCounted; @@ -30,19 +28,16 @@ import org.elasticsearch.common.util.concurrent.KeyedLock; import org.elasticsearch.env.NodeEnvironment; import org.elasticsearch.index.shard.ShardId; -import org.elasticsearch.monitor.fs.FsInfo; -import org.elasticsearch.monitor.fs.FsProbe; -import org.elasticsearch.xpack.searchablesnapshots.cache.common.CacheKey; -import org.elasticsearch.xpack.searchablesnapshots.cache.common.SparseFileTracker; import org.elasticsearch.node.NodeRoleSettings; import org.elasticsearch.threadpool.ThreadPool; import org.elasticsearch.xpack.core.DataTier; import org.elasticsearch.xpack.searchablesnapshots.cache.common.ByteRange; +import org.elasticsearch.xpack.searchablesnapshots.cache.common.CacheKey; +import org.elasticsearch.xpack.searchablesnapshots.cache.common.SparseFileTracker; import java.io.IOException; import java.io.UncheckedIOException; import java.util.ArrayList; -import java.util.Collection; import java.util.Iterator; import java.util.List; import java.util.Map; @@ -95,29 +90,23 @@ private static Setting.Validator getPageSizeAlignedByteSizeValueV }; } - public static final Setting SNAPSHOT_CACHE_SIZE_SETTING = new Setting<>( - new Setting.SimpleKey(SHARED_CACHE_SETTINGS_PREFIX + "size"), - (settings) -> { - if (isDedicatedFrozen(settings)) { - return "90%"; - } else { - return ByteSizeValue.ZERO.getStringRep(); - } - }, - s -> RelativeByteSizeValue.parseRelativeByteSizeValue(s, SHARED_CACHE_SETTINGS_PREFIX + "size"), - new Setting.Validator<>() { + public static final Setting SNAPSHOT_CACHE_SIZE_SETTING = new Setting<>( + SHARED_CACHE_SETTINGS_PREFIX + "size", + ByteSizeValue.ZERO.getStringRep(), + s -> ByteSizeValue.parseBytesSizeValue(s, SHARED_CACHE_SETTINGS_PREFIX + "size"), + new Setting.Validator() { @Override - public void validate(final RelativeByteSizeValue value) { + public void validate(final ByteSizeValue value) { } @Override - public void validate(final RelativeByteSizeValue value, final Map, Object> settings) { - if (value.isAbsolute() && value.getAbsolute().getBytes() == -1) { + public void validate(final ByteSizeValue value, final Map, Object> settings) { + if (value.getBytes() == -1) { throw new SettingsException("setting [{}] must be non-negative", SHARED_CACHE_SETTINGS_PREFIX + "size"); } - if (value.isNonZeroSize()) { + if (value.getBytes() > 0) { @SuppressWarnings("unchecked") final List roles = (List) settings.get(NodeRoleSettings.NODE_ROLES_SETTING); if (DataTier.isFrozenNode(Set.of(roles.toArray(DiscoveryNodeRole[]::new))) == false) { @@ -141,55 +130,6 @@ public Iterator> settings() { Setting.Property.NodeScope ); - public static final Setting SNAPSHOT_CACHE_SIZE_MAX_HEADROOM_SETTING = new Setting<>( - new Setting.SimpleKey(SHARED_CACHE_SETTINGS_PREFIX + "size.max_headroom"), - (settings) -> { - if (SNAPSHOT_CACHE_SIZE_SETTING.exists(settings) == false && isDedicatedFrozen(settings)) { - return "100GB"; - } - - return "-1"; - }, - (s) -> ByteSizeValue.parseBytesSizeValue(s, SHARED_CACHE_SETTINGS_PREFIX + "size.max_headroom"), - new Setting.Validator<>() { - private final Collection> dependencies = List.of(SNAPSHOT_CACHE_SIZE_SETTING); - - @Override - public Iterator> settings() { - return dependencies.iterator(); - } - - @Override - public void validate(ByteSizeValue value) { - // ignore - } - - @Override - public void validate(ByteSizeValue value, Map, Object> settings, boolean isPresent) { - if (isPresent && value.getBytes() != -1) { - RelativeByteSizeValue sizeValue = (RelativeByteSizeValue) settings.get(SNAPSHOT_CACHE_SIZE_SETTING); - if (sizeValue.isAbsolute()) { - throw new SettingsException( - "setting [{}] cannot be specified for absolute [{}={}]", - SNAPSHOT_CACHE_SIZE_MAX_HEADROOM_SETTING.getKey(), - SNAPSHOT_CACHE_SIZE_SETTING.getKey(), - sizeValue.getStringRep() - ); - } - } - } - }, - Setting.Property.NodeScope - ); - - private static boolean isDedicatedFrozen(Settings settings) { - Set rolesFromSettings = DiscoveryNode.getRolesFromSettings(settings); - return rolesFromSettings.contains(DiscoveryNodeRole.DATA_FROZEN_NODE_ROLE) - && rolesFromSettings.stream() - .filter(DiscoveryNodeRole::canContainData) - .anyMatch(r -> r != DiscoveryNodeRole.DATA_FROZEN_NODE_ROLE) == false; - } - public static final Setting FROZEN_CACHE_RECOVERY_RANGE_SIZE_SETTING = Setting.byteSizeSetting( SHARED_CACHE_SETTINGS_PREFIX + "recovery_range_size", new ByteSizeValue(128, ByteSizeUnit.KB), // default @@ -256,13 +196,7 @@ private static boolean isDedicatedFrozen(Settings settings) { @SuppressWarnings({ "unchecked", "rawtypes" }) public FrozenCacheService(NodeEnvironment environment, Settings settings, ThreadPool threadPool) { this.currentTimeSupplier = threadPool::relativeTimeInMillis; - FsInfo.Path pathInfo; - try { - pathInfo = FsProbe.getFSInfo(environment.nodePaths()[0]); - } catch (IOException e) { - throw new IllegalStateException("unable to probe size of filesystem [" + environment.nodePaths()[0] + "]"); - } - this.cacheSize = calculateCacheSize(settings, pathInfo); + this.cacheSize = SNAPSHOT_CACHE_SIZE_SETTING.get(settings).getBytes(); final long regionSize = SNAPSHOT_CACHE_REGION_SIZE_SETTING.get(settings).getBytes(); this.numRegions = Math.toIntExact(cacheSize / regionSize); keyMapping = new ConcurrentHashMap<>(); @@ -293,12 +227,6 @@ public FrozenCacheService(NodeEnvironment environment, Settings settings, Thread this.recoveryRangeSize = FROZEN_CACHE_RECOVERY_RANGE_SIZE_SETTING.get(settings); } - static long calculateCacheSize(Settings settings, FsInfo.Path pathInfo) { - return SNAPSHOT_CACHE_SIZE_SETTING.get(settings) - .calculateValue(pathInfo.getTotal(), SNAPSHOT_CACHE_SIZE_MAX_HEADROOM_SETTING.get(settings)) - .getBytes(); - } - public int getRangeSize() { return toIntBytes(rangeSize.getBytes()); } diff --git a/x-pack/plugin/searchable-snapshots/src/test/java/org/elasticsearch/xpack/searchablesnapshots/cache/shared/FrozenCacheServiceTests.java b/x-pack/plugin/searchable-snapshots/src/test/java/org/elasticsearch/xpack/searchablesnapshots/cache/shared/FrozenCacheServiceTests.java index f7cb0eb0176af..7a88b118c5f94 100644 --- a/x-pack/plugin/searchable-snapshots/src/test/java/org/elasticsearch/xpack/searchablesnapshots/cache/shared/FrozenCacheServiceTests.java +++ b/x-pack/plugin/searchable-snapshots/src/test/java/org/elasticsearch/xpack/searchablesnapshots/cache/shared/FrozenCacheServiceTests.java @@ -12,13 +12,9 @@ import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.SettingsException; import org.elasticsearch.common.unit.ByteSizeValue; -import org.elasticsearch.common.unit.RatioValue; -import org.elasticsearch.common.unit.RelativeByteSizeValue; -import org.elasticsearch.common.util.set.Sets; import org.elasticsearch.env.NodeEnvironment; import org.elasticsearch.env.TestEnvironment; import org.elasticsearch.index.shard.ShardId; -import org.elasticsearch.monitor.fs.FsInfo; import org.elasticsearch.node.NodeRoleSettings; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.threadpool.ThreadPool; @@ -27,12 +23,8 @@ import org.elasticsearch.xpack.searchablesnapshots.cache.shared.FrozenCacheService.CacheFileRegion; import java.io.IOException; -import java.util.HashSet; -import java.util.Set; -import java.util.stream.Collectors; import static org.elasticsearch.node.Node.NODE_NAME_SETTING; -import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.instanceOf; import static org.hamcrest.Matchers.is; import static org.hamcrest.Matchers.notNullValue; @@ -207,9 +199,8 @@ public void testDecay() throws IOException { } public void testCacheSizeRejectedOnNonFrozenNodes() { - String cacheSize = randomBoolean() ? new ByteSizeValue(size(500)).getStringRep() : new RatioValue(between(1, 100)).toString(); final Settings settings = Settings.builder() - .put(FrozenCacheService.SNAPSHOT_CACHE_SIZE_SETTING.getKey(), cacheSize) + .put(FrozenCacheService.SNAPSHOT_CACHE_SIZE_SETTING.getKey(), new ByteSizeValue(size(500)).getStringRep()) .put(FrozenCacheService.SNAPSHOT_CACHE_REGION_SIZE_SETTING.getKey(), new ByteSizeValue(size(100)).getStringRep()) .putList(NodeRoleSettings.NODE_ROLES_SETTING.getKey(), DiscoveryNodeRole.DATA_HOT_NODE_ROLE.roleName()) .build(); @@ -225,97 +216,12 @@ public void testCacheSizeRejectedOnNonFrozenNodes() { "setting [" + FrozenCacheService.SNAPSHOT_CACHE_SIZE_SETTING.getKey() + "] to be positive [" - + cacheSize + + new ByteSizeValue(size(500)).getStringRep() + "] is only permitted on nodes with the data_frozen role, roles are [data_hot]" ) ); } - public void testDedicateFrozenCacheSizeDefaults() { - final Settings settings = Settings.builder() - .putList(NodeRoleSettings.NODE_ROLES_SETTING.getKey(), DiscoveryNodeRole.DATA_FROZEN_NODE_ROLE.roleName()) - .build(); - - RelativeByteSizeValue relativeCacheSize = FrozenCacheService.SNAPSHOT_CACHE_SIZE_SETTING.get(settings); - assertThat(relativeCacheSize.isAbsolute(), is(false)); - assertThat(relativeCacheSize.isNonZeroSize(), is(true)); - assertThat(relativeCacheSize.calculateValue(ByteSizeValue.ofBytes(10000), null), equalTo(ByteSizeValue.ofBytes(9000))); - assertThat(FrozenCacheService.SNAPSHOT_CACHE_SIZE_MAX_HEADROOM_SETTING.get(settings), equalTo(ByteSizeValue.ofGb(100))); - } - - public void testNotDedicatedFrozenCacheSizeDefaults() { - final Settings settings = Settings.builder() - .putList( - NodeRoleSettings.NODE_ROLES_SETTING.getKey(), - Sets.union( - Set.of( - randomFrom( - DiscoveryNodeRole.DATA_HOT_NODE_ROLE, - DiscoveryNodeRole.DATA_COLD_NODE_ROLE, - DiscoveryNodeRole.DATA_WARM_NODE_ROLE, - DiscoveryNodeRole.DATA_CONTENT_NODE_ROLE - ) - ), - new HashSet<>( - randomSubsetOf( - between(0, 3), - DiscoveryNodeRole.DATA_FROZEN_NODE_ROLE, - DiscoveryNodeRole.INGEST_ROLE, - DiscoveryNodeRole.MASTER_ROLE - ) - ) - ).stream().map(DiscoveryNodeRole::roleName).collect(Collectors.toList()) - ) - .build(); - - RelativeByteSizeValue relativeCacheSize = FrozenCacheService.SNAPSHOT_CACHE_SIZE_SETTING.get(settings); - assertThat(relativeCacheSize.isNonZeroSize(), is(false)); - assertThat(relativeCacheSize.isAbsolute(), is(true)); - assertThat(relativeCacheSize.getAbsolute(), equalTo(ByteSizeValue.ZERO)); - assertThat(FrozenCacheService.SNAPSHOT_CACHE_SIZE_MAX_HEADROOM_SETTING.get(settings), equalTo(ByteSizeValue.ofBytes(-1))); - } - - public void testMaxHeadroomRejectedForAbsoluteCacheSize() { - String cacheSize = new ByteSizeValue(size(500)).getStringRep(); - final Settings settings = Settings.builder() - .put(FrozenCacheService.SNAPSHOT_CACHE_SIZE_SETTING.getKey(), cacheSize) - .put(FrozenCacheService.SNAPSHOT_CACHE_SIZE_MAX_HEADROOM_SETTING.getKey(), new ByteSizeValue(size(100)).getStringRep()) - .putList(NodeRoleSettings.NODE_ROLES_SETTING.getKey(), DiscoveryNodeRole.DATA_FROZEN_NODE_ROLE.roleName()) - .build(); - final IllegalArgumentException e = expectThrows( - IllegalArgumentException.class, - () -> FrozenCacheService.SNAPSHOT_CACHE_SIZE_MAX_HEADROOM_SETTING.get(settings) - ); - assertThat(e.getCause(), notNullValue()); - assertThat(e.getCause(), instanceOf(SettingsException.class)); - assertThat( - e.getCause().getMessage(), - is( - "setting [" - + FrozenCacheService.SNAPSHOT_CACHE_SIZE_MAX_HEADROOM_SETTING.getKey() - + "] cannot be specified for absolute [" - + FrozenCacheService.SNAPSHOT_CACHE_SIZE_SETTING.getKey() - + "=" - + cacheSize - + "]" - ) - ); - } - - public void testCalculateCacheSize() { - FsInfo.Path smallPathInfo = new FsInfo.Path("ignored", null, 10000, 0, 0); - FsInfo.Path largePathInfo = new FsInfo.Path("ignored", null, ByteSizeValue.ofTb(10).getBytes(), 0, 0); - assertThat(FrozenCacheService.calculateCacheSize(Settings.EMPTY, smallPathInfo), equalTo(0L)); - final Settings settings = Settings.builder() - .putList(NodeRoleSettings.NODE_ROLES_SETTING.getKey(), DiscoveryNodeRole.DATA_FROZEN_NODE_ROLE.roleName()) - .build(); - assertThat(FrozenCacheService.calculateCacheSize(settings, smallPathInfo), equalTo(9000L)); - assertThat( - FrozenCacheService.calculateCacheSize(settings, largePathInfo), - equalTo(largePathInfo.getTotal().getBytes() - ByteSizeValue.ofGb(100).getBytes()) - ); - } - private static CacheKey generateCacheKey() { return new CacheKey( randomAlphaOfLength(10), From 22e49ed90dcbe78f18825328af8d7521ef0c06b3 Mon Sep 17 00:00:00 2001 From: Henning Andersen Date: Tue, 20 Apr 2021 13:49:04 +0200 Subject: [PATCH 7/7] Dedicated frozen nodes --- .../reference/modules/cluster/disk_allocator.asciidoc | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/docs/reference/modules/cluster/disk_allocator.asciidoc b/docs/reference/modules/cluster/disk_allocator.asciidoc index 84661cbc40698..1da56a6de7a53 100644 --- a/docs/reference/modules/cluster/disk_allocator.asciidoc +++ b/docs/reference/modules/cluster/disk_allocator.asciidoc @@ -117,14 +117,15 @@ PUT /my-index-000001/_settings // tag::cluster-routing-flood-stage-tag[] `cluster.routing.allocation.disk.watermark.flood_stage.frozen` {ess-icon}:: (<>) -Controls the flood stage watermark for frozen nodes, which defaults to 95%. +Controls the flood stage watermark for dedicated frozen nodes, which defaults to +95%. `cluster.routing.allocation.disk.watermark.flood_stage.frozen.max_headroom` {ess-icon}:: (<>) -Controls the max headroom for the flood stage watermark for frozen nodes. -Defaults to 20GB when `cluster.routing.allocation.disk.watermark.flood_stage.frozen` -is not explicitly set. This caps the amount of free space required on frozen -nodes. +Controls the max headroom for the flood stage watermark for dedicated frozen +nodes. Defaults to 20GB when +`cluster.routing.allocation.disk.watermark.flood_stage.frozen` is not explicitly +set. This caps the amount of free space required on dedicated frozen nodes. `cluster.info.update.interval`:: (<>)