From 328da329b638a3410d5e114c2d53c26a504f0b22 Mon Sep 17 00:00:00 2001 From: panguixin Date: Wed, 5 Jun 2024 21:38:47 +0800 Subject: [PATCH] fix file cache initialization Signed-off-by: panguixin --- .../cluster/node/DiscoveryNode.java | 4 + .../remote/utils/cache/SegmentedCache.java | 4 +- .../org/opensearch/monitor/fs/FsProbe.java | 17 +++- .../main/java/org/opensearch/node/Node.java | 82 ++++++++++++------- .../env/NodeRepurposeCommandTests.java | 2 +- .../java/org/opensearch/node/NodeTests.java | 2 +- .../opensearch/test/InternalTestCluster.java | 10 ++- 7 files changed, 83 insertions(+), 38 deletions(-) diff --git a/server/src/main/java/org/opensearch/cluster/node/DiscoveryNode.java b/server/src/main/java/org/opensearch/cluster/node/DiscoveryNode.java index 5226e9570ac14..ce30994d7a16c 100644 --- a/server/src/main/java/org/opensearch/cluster/node/DiscoveryNode.java +++ b/server/src/main/java/org/opensearch/cluster/node/DiscoveryNode.java @@ -128,6 +128,10 @@ public static boolean isSearchNode(Settings settings) { return hasRole(settings, DiscoveryNodeRole.SEARCH_ROLE); } + public static boolean isDedicatedSearchNode(Settings settings) { + return getRolesFromSettings(settings).stream().allMatch(DiscoveryNodeRole.SEARCH_ROLE::equals); + } + private final String nodeName; private final String nodeId; private final String ephemeralId; diff --git a/server/src/main/java/org/opensearch/index/store/remote/utils/cache/SegmentedCache.java b/server/src/main/java/org/opensearch/index/store/remote/utils/cache/SegmentedCache.java index d3eb03df37e1b..7b85146cdb601 100644 --- a/server/src/main/java/org/opensearch/index/store/remote/utils/cache/SegmentedCache.java +++ b/server/src/main/java/org/opensearch/index/store/remote/utils/cache/SegmentedCache.java @@ -49,15 +49,15 @@ private static final int ceilingNextPowerOfTwo(int x) { private final Weigher weigher; public SegmentedCache(Builder builder) { - this.capacity = builder.capacity; final int segments = ceilingNextPowerOfTwo(builder.concurrencyLevel); this.segmentMask = segments - 1; this.table = newSegmentArray(segments); - this.perSegmentCapacity = (capacity + (segments - 1)) / segments; + this.perSegmentCapacity = (builder.capacity + (segments - 1)) / segments; this.weigher = builder.weigher; for (int i = 0; i < table.length; i++) { table[i] = new LRUCache<>(perSegmentCapacity, builder.listener, builder.weigher); } + this.capacity = perSegmentCapacity * segments; } @SuppressWarnings("unchecked") diff --git a/server/src/main/java/org/opensearch/monitor/fs/FsProbe.java b/server/src/main/java/org/opensearch/monitor/fs/FsProbe.java index f4731a4a34373..a5dc06325cc9d 100644 --- a/server/src/main/java/org/opensearch/monitor/fs/FsProbe.java +++ b/server/src/main/java/org/opensearch/monitor/fs/FsProbe.java @@ -81,7 +81,15 @@ public FsInfo stats(FsInfo previous) throws IOException { if (fileCache != null && dataLocations[i].fileCacheReservedSize != ByteSizeValue.ZERO) { paths[i].fileCacheReserved = adjustForHugeFilesystems(dataLocations[i].fileCacheReservedSize.getBytes()); paths[i].fileCacheUtilized = adjustForHugeFilesystems(fileCache.usage().usage()); - paths[i].available -= (paths[i].fileCacheReserved - paths[i].fileCacheUtilized); + // fileCacheFree will be less than zero if the cache being over-subscribed + long fileCacheFree = paths[i].fileCacheReserved - paths[i].fileCacheUtilized; + if (fileCacheFree > 0) { + paths[i].available -= fileCacheFree; + } + // If the reserved space for file caching is being used by other files, such as local indices or heap dumps. + if (paths[i].available < 0) { + paths[i].available = 0; + } } } FsInfo.IoStats ioStats = null; @@ -211,4 +219,11 @@ public static FsInfo.Path getFSInfo(NodePath nodePath) throws IOException { return fsPath; } + public static long getTotalSize(NodePath nodePath) throws IOException { + return adjustForHugeFilesystems(nodePath.fileStore.getTotalSpace()); + } + + public static long getAvailableSize(NodePath nodePath) throws IOException { + return adjustForHugeFilesystems(nodePath.fileStore.getUsableSpace()); + } } diff --git a/server/src/main/java/org/opensearch/node/Node.java b/server/src/main/java/org/opensearch/node/Node.java index 76109ba10624a..b5cb2c75733d4 100644 --- a/server/src/main/java/org/opensearch/node/Node.java +++ b/server/src/main/java/org/opensearch/node/Node.java @@ -38,6 +38,7 @@ import org.opensearch.Build; import org.opensearch.ExceptionsHelper; import org.opensearch.OpenSearchException; +import org.opensearch.OpenSearchParseException; import org.opensearch.OpenSearchTimeoutException; import org.opensearch.Version; import org.opensearch.action.ActionModule; @@ -108,6 +109,7 @@ import org.opensearch.common.settings.Settings; import org.opensearch.common.settings.SettingsException; import org.opensearch.common.settings.SettingsModule; +import org.opensearch.common.unit.RatioValue; import org.opensearch.common.unit.TimeValue; import org.opensearch.common.util.BigArrays; import org.opensearch.common.util.FeatureFlags; @@ -175,7 +177,6 @@ import org.opensearch.ingest.IngestService; import org.opensearch.monitor.MonitorService; import org.opensearch.monitor.fs.FsHealthService; -import org.opensearch.monitor.fs.FsInfo; import org.opensearch.monitor.fs.FsProbe; import org.opensearch.monitor.jvm.JvmInfo; import org.opensearch.node.remotestore.RemoteStoreNodeService; @@ -371,9 +372,12 @@ public class Node implements Closeable { } }, Setting.Property.NodeScope); - public static final Setting NODE_SEARCH_CACHE_SIZE_SETTING = Setting.byteSizeSetting( + private static final String ZERO = "0"; + + public static final Setting NODE_SEARCH_CACHE_SIZE_SETTING = new Setting<>( "node.search.cache.size", - ByteSizeValue.ZERO, + s -> DiscoveryNode.isDedicatedSearchNode(s) ? "80%" : ZERO, + Node::validateFileCacheSize, Property.NodeScope ); @@ -1995,39 +1999,57 @@ DiscoveryNode getNode() { * Initializes the search cache with a defined capacity. * The capacity of the cache is based on user configuration for {@link Node#NODE_SEARCH_CACHE_SIZE_SETTING}. * If the user doesn't configure the cache size, it fails if the node is a data + search node. - * Else it configures the size to 80% of available capacity for a dedicated search node, if not explicitly defined. + * Else it configures the size to 80% of total capacity for a dedicated search node, if not explicitly defined. */ private void initializeFileCache(Settings settings, CircuitBreaker circuitBreaker) throws IOException { - if (DiscoveryNode.isSearchNode(settings)) { - NodeEnvironment.NodePath fileCacheNodePath = nodeEnvironment.fileCacheNodePath(); - long capacity = NODE_SEARCH_CACHE_SIZE_SETTING.get(settings).getBytes(); - FsInfo.Path info = ExceptionsHelper.catchAsRuntimeException(() -> FsProbe.getFSInfo(fileCacheNodePath)); - long availableCapacity = info.getAvailable().getBytes(); - - // Initialize default values for cache if NODE_SEARCH_CACHE_SIZE_SETTING is not set. - if (capacity == 0) { - // If node is not a dedicated search node without configuration, prevent cache initialization - if (DiscoveryNode.getRolesFromSettings(settings).stream().anyMatch(role -> !DiscoveryNodeRole.SEARCH_ROLE.equals(role))) { - throw new SettingsException( - "Unable to initialize the " - + DiscoveryNodeRole.SEARCH_ROLE.roleName() - + "-" - + DiscoveryNodeRole.DATA_ROLE.roleName() - + " node: Missing value for configuration " - + NODE_SEARCH_CACHE_SIZE_SETTING.getKey() - ); - } else { - capacity = 80 * availableCapacity / 100; - } + if (DiscoveryNode.isSearchNode(settings) == false) { + return; + } + + String capacityRaw = NODE_SEARCH_CACHE_SIZE_SETTING.get(settings); + if (capacityRaw.equals(ZERO)) { + throw new SettingsException( + "Unable to initialize the " + + DiscoveryNodeRole.SEARCH_ROLE.roleName() + + "-" + + DiscoveryNodeRole.DATA_ROLE.roleName() + + " node: Missing value for configuration " + + NODE_SEARCH_CACHE_SIZE_SETTING.getKey() + ); + } + + NodeEnvironment.NodePath fileCacheNodePath = nodeEnvironment.fileCacheNodePath(); + long totalSpace = ExceptionsHelper.catchAsRuntimeException(() -> FsProbe.getTotalSize(fileCacheNodePath)); + long capacity = calculateFileCacheSize(capacityRaw, totalSpace); + if (capacity <= 0 || totalSpace <= capacity) { + throw new SettingsException("Cache size must be larger than zero and less than total capacity"); + } + + this.fileCache = FileCacheFactory.createConcurrentLRUFileCache(capacity, circuitBreaker); + fileCacheNodePath.fileCacheReservedSize = new ByteSizeValue(this.fileCache.capacity(), ByteSizeUnit.BYTES); + List fileCacheDataPaths = collectFileCacheDataPath(fileCacheNodePath); + this.fileCache.restoreFromDirectory(fileCacheDataPaths); + } + + private static long calculateFileCacheSize(String capacityRaw, long totalSpace) { + try { + RatioValue ratioValue = RatioValue.parseRatioValue(capacityRaw); + return Math.round(totalSpace * ratioValue.getAsRatio()); + } catch (OpenSearchParseException e) { + try { + return ByteSizeValue.parseBytesSizeValue(capacityRaw, NODE_SEARCH_CACHE_SIZE_SETTING.getKey()).getBytes(); + } catch (OpenSearchParseException ex) { + ex.addSuppressed(e); + throw ex; } - capacity = Math.min(capacity, availableCapacity); - fileCacheNodePath.fileCacheReservedSize = new ByteSizeValue(capacity, ByteSizeUnit.BYTES); - this.fileCache = FileCacheFactory.createConcurrentLRUFileCache(capacity, circuitBreaker); - List fileCacheDataPaths = collectFileCacheDataPath(fileCacheNodePath); - this.fileCache.restoreFromDirectory(fileCacheDataPaths); } } + private static String validateFileCacheSize(String capacityRaw) { + calculateFileCacheSize(capacityRaw, 0L); + return capacityRaw; + } + /** * Returns the {@link FileCache} instance for remote search node * Note: Visible for testing diff --git a/server/src/test/java/org/opensearch/env/NodeRepurposeCommandTests.java b/server/src/test/java/org/opensearch/env/NodeRepurposeCommandTests.java index 2a3525143c01f..d2d6fdc387dfe 100644 --- a/server/src/test/java/org/opensearch/env/NodeRepurposeCommandTests.java +++ b/server/src/test/java/org/opensearch/env/NodeRepurposeCommandTests.java @@ -95,7 +95,7 @@ public void createNodePaths() throws IOException { dataClusterManagerSettings = buildEnvSettings(Settings.EMPTY); Settings defaultSearchSettings = Settings.builder() .put(dataClusterManagerSettings) - .put(NODE_SEARCH_CACHE_SIZE_SETTING.getKey(), new ByteSizeValue(16, ByteSizeUnit.GB)) + .put(NODE_SEARCH_CACHE_SIZE_SETTING.getKey(), new ByteSizeValue(16, ByteSizeUnit.GB).toString()) .build(); searchNoDataNoClusterManagerSettings = onlyRole(dataClusterManagerSettings, DiscoveryNodeRole.SEARCH_ROLE); diff --git a/server/src/test/java/org/opensearch/node/NodeTests.java b/server/src/test/java/org/opensearch/node/NodeTests.java index f44cc352cd330..0093091f61a1c 100644 --- a/server/src/test/java/org/opensearch/node/NodeTests.java +++ b/server/src/test/java/org/opensearch/node/NodeTests.java @@ -380,7 +380,7 @@ public void testCreateWithFileCache() throws Exception { List> plugins = basePlugins(); ByteSizeValue cacheSize = new ByteSizeValue(16, ByteSizeUnit.GB); Settings searchRoleSettingsWithConfig = baseSettings().put(searchRoleSettings) - .put(Node.NODE_SEARCH_CACHE_SIZE_SETTING.getKey(), cacheSize) + .put(Node.NODE_SEARCH_CACHE_SIZE_SETTING.getKey(), cacheSize.toString()) .build(); Settings onlySearchRoleSettings = Settings.builder() .put(searchRoleSettingsWithConfig) diff --git a/test/framework/src/main/java/org/opensearch/test/InternalTestCluster.java b/test/framework/src/main/java/org/opensearch/test/InternalTestCluster.java index ca80c65e58522..ec88002317284 100644 --- a/test/framework/src/main/java/org/opensearch/test/InternalTestCluster.java +++ b/test/framework/src/main/java/org/opensearch/test/InternalTestCluster.java @@ -165,6 +165,7 @@ import static org.opensearch.test.NodeRoles.onlyRoles; import static org.opensearch.test.NodeRoles.removeRoles; import static org.opensearch.test.OpenSearchTestCase.assertBusy; +import static org.opensearch.test.OpenSearchTestCase.randomBoolean; import static org.opensearch.test.OpenSearchTestCase.randomFrom; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.greaterThan; @@ -216,7 +217,8 @@ public final class InternalTestCluster extends TestCluster { nodeAndClient.node.settings() ); - private static final ByteSizeValue DEFAULT_SEARCH_CACHE_SIZE = new ByteSizeValue(2, ByteSizeUnit.GB); + private static final String DEFAULT_SEARCH_CACHE_SIZE_BYTES = "2gb"; + private static final String DEFAULT_SEARCH_CACHE_SIZE_PERCENT = "5%"; public static final int DEFAULT_LOW_NUM_CLUSTER_MANAGER_NODES = 1; public static final int DEFAULT_HIGH_NUM_CLUSTER_MANAGER_NODES = 3; @@ -700,8 +702,10 @@ public synchronized void ensureAtLeastNumSearchAndDataNodes(int n) { logger.info("increasing cluster size from {} to {}", size, n); Set searchAndDataRoles = Set.of(DiscoveryNodeRole.DATA_ROLE, DiscoveryNodeRole.SEARCH_ROLE); Settings settings = Settings.builder() - .put(Settings.EMPTY) - .put(Node.NODE_SEARCH_CACHE_SIZE_SETTING.getKey(), DEFAULT_SEARCH_CACHE_SIZE) + .put( + Node.NODE_SEARCH_CACHE_SIZE_SETTING.getKey(), + randomBoolean() ? DEFAULT_SEARCH_CACHE_SIZE_PERCENT : DEFAULT_SEARCH_CACHE_SIZE_BYTES + ) .build(); startNodes(n - size, Settings.builder().put(onlyRoles(settings, searchAndDataRoles)).build()); validateClusterFormed();