-
Notifications
You must be signed in to change notification settings - Fork 24.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Allow total memory to be overridden #78750
Changes from 17 commits
cd22163
0f1dd57
26c5579
431c8bd
7d09701
6acad57
0f27a97
824019a
898fec6
82c7937
d1d58e6
017263b
2a74474
36410d2
a4bc21a
795f155
42a000a
3c1eabd
8df4836
b4c646c
4e7b3c1
449cf2a
ee59b9f
2f71542
7dcd8ec
be14364
8741c98
9731c2d
71f7a2a
1cd19bf
becace8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
/* | ||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
* or more contributor license agreements. Licensed under the Elastic License | ||
* 2.0 and the Server Side Public License, v 1; you may not use this file except | ||
* in compliance with, at your election, the Elastic License 2.0 or the Server | ||
* Side Public License, v 1. | ||
*/ | ||
|
||
package org.elasticsearch.tools.launchers; | ||
|
||
import java.util.List; | ||
import java.util.Objects; | ||
|
||
/** | ||
* A {@link SystemMemoryInfo} which returns a user-overridden memory size if one | ||
* has been specified using the {@code es.total_memory_bytes} system property, or | ||
* else returns the value provided by a fallback provider. | ||
*/ | ||
public final class OverridableSystemMemoryInfo implements SystemMemoryInfo { | ||
|
||
private final List<String> userDefinedJvmOptions; | ||
private final SystemMemoryInfo fallbackSystemMemoryInfo; | ||
|
||
public OverridableSystemMemoryInfo(final List<String> userDefinedJvmOptions, SystemMemoryInfo fallbackSystemMemoryInfo) { | ||
this.userDefinedJvmOptions = Objects.requireNonNull(userDefinedJvmOptions); | ||
this.fallbackSystemMemoryInfo = Objects.requireNonNull(fallbackSystemMemoryInfo); | ||
} | ||
|
||
@Override | ||
public long availableSystemMemory() throws SystemMemoryInfoException { | ||
|
||
return userDefinedJvmOptions.stream() | ||
.filter(option -> option.startsWith("-Des.total_memory_bytes=")) | ||
.map(totalMemoryBytesOption -> { | ||
try { | ||
long bytes = Long.parseLong(totalMemoryBytesOption.split("=", 2)[1]); | ||
if (bytes < 0) { | ||
throw new IllegalArgumentException("Negative memory size specified in [" + totalMemoryBytesOption + "]"); | ||
} | ||
return bytes; | ||
} catch (NumberFormatException e) { | ||
throw new IllegalArgumentException("Unable to parse number of bytes from [" + totalMemoryBytesOption + "]", e); | ||
} | ||
}) | ||
.reduce((previous, current) -> current) // this is effectively findLast(), so that ES_JAVA_OPTS overrides jvm.options | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Shouldn’t we fail if the option is specified more than once? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I agree that usually Elasticsearch fails if duplicate settings are provided. But not with those that are specified using Java system properties. For example, if I run this:
then Elasticsearch runs with maximum shards per index set to 1111. If I run this:
then Elasticsearch runs with maximum shards per index set to 1212. So with system properties, specifying them multiple times isn't an error and I am matching that behaviour here. If it's undesirable and needs to be changed then I think that should be done for all system properties rather than just this one. |
||
.orElse(fallbackSystemMemoryInfo.availableSystemMemory()); | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
/* | ||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
* or more contributor license agreements. Licensed under the Elastic License | ||
* 2.0 and the Server Side Public License, v 1; you may not use this file except | ||
* in compliance with, at your election, the Elastic License 2.0 or the Server | ||
* Side Public License, v 1. | ||
*/ | ||
|
||
package org.elasticsearch.tools.launchers; | ||
|
||
import org.elasticsearch.tools.launchers.SystemMemoryInfo.SystemMemoryInfoException; | ||
|
||
import java.util.List; | ||
|
||
import static org.hamcrest.Matchers.is; | ||
import static org.junit.Assert.assertThat; | ||
import static org.junit.Assert.fail; | ||
|
||
public class OverridableSystemMemoryInfoTests extends LaunchersTestCase { | ||
|
||
private static final long FALLBACK = -1L; | ||
|
||
public void testNoOptions() throws SystemMemoryInfoException { | ||
final SystemMemoryInfo memoryInfo = new OverridableSystemMemoryInfo(List.of(), fallbackSystemMemoryInfo()); | ||
assertThat(memoryInfo.availableSystemMemory(), is(FALLBACK)); | ||
} | ||
|
||
public void testNoOverrides() throws SystemMemoryInfoException { | ||
final SystemMemoryInfo memoryInfo = new OverridableSystemMemoryInfo(List.of("-Da=b", "-Dx=y"), fallbackSystemMemoryInfo()); | ||
assertThat(memoryInfo.availableSystemMemory(), is(FALLBACK)); | ||
} | ||
|
||
public void testValidSingleOverride() throws SystemMemoryInfoException { | ||
final SystemMemoryInfo memoryInfo = new OverridableSystemMemoryInfo( | ||
List.of("-Des.total_memory_bytes=123456789"), | ||
fallbackSystemMemoryInfo() | ||
); | ||
assertThat(memoryInfo.availableSystemMemory(), is(123456789L)); | ||
} | ||
|
||
public void testValidOverrideInList() throws SystemMemoryInfoException { | ||
final SystemMemoryInfo memoryInfo = new OverridableSystemMemoryInfo( | ||
List.of("-Da=b", "-Des.total_memory_bytes=987654321", "-Dx=y"), | ||
fallbackSystemMemoryInfo() | ||
); | ||
assertThat(memoryInfo.availableSystemMemory(), is(987654321L)); | ||
} | ||
|
||
public void testMultipleValidOverridesInList() throws SystemMemoryInfoException { | ||
final SystemMemoryInfo memoryInfo = new OverridableSystemMemoryInfo( | ||
List.of("-Des.total_memory_bytes=123456789", "-Da=b", "-Des.total_memory_bytes=987654321", "-Dx=y"), | ||
fallbackSystemMemoryInfo() | ||
); | ||
assertThat(memoryInfo.availableSystemMemory(), is(987654321L)); | ||
} | ||
|
||
public void testNegativeOverride() throws SystemMemoryInfoException { | ||
final SystemMemoryInfo memoryInfo = new OverridableSystemMemoryInfo( | ||
List.of("-Da=b", "-Des.total_memory_bytes=-123", "-Dx=y"), | ||
fallbackSystemMemoryInfo() | ||
); | ||
try { | ||
memoryInfo.availableSystemMemory(); | ||
fail("expected to fail"); | ||
} catch (IllegalArgumentException e) { | ||
assertThat(e.getMessage(), is("Negative memory size specified in [-Des.total_memory_bytes=-123]")); | ||
} | ||
} | ||
|
||
public void testUnparsableOverride() throws SystemMemoryInfoException { | ||
final SystemMemoryInfo memoryInfo = new OverridableSystemMemoryInfo( | ||
List.of("-Da=b", "-Des.total_memory_bytes=invalid", "-Dx=y"), | ||
fallbackSystemMemoryInfo() | ||
); | ||
try { | ||
memoryInfo.availableSystemMemory(); | ||
fail("expected to fail"); | ||
} catch (IllegalArgumentException e) { | ||
assertThat(e.getMessage(), is("Unable to parse number of bytes from [-Des.total_memory_bytes=invalid]")); | ||
} | ||
} | ||
|
||
private static SystemMemoryInfo fallbackSystemMemoryInfo() { | ||
return () -> FALLBACK; | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
pr: 78750 | ||
summary: Allow total memory to be overridden | ||
area: Packaging | ||
type: enhancement | ||
issues: | ||
- 65905 |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -916,6 +916,18 @@ Total amount of physical memory across all selected nodes. | |
(integer) | ||
Total amount, in bytes, of physical memory across all selected nodes. | ||
|
||
`total_override`:: | ||
(<<byte-units,byte value>>) | ||
If the amount of physical memory has been overridden using the `es.total_memory_bytes` | ||
system property on all selected nodes then this reports the sum of the overridden | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What if the total memory was only overridden on some of the selected nodes? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, this is a tricky one. I opted not to report any override at all at the cluster level if some nodes have overrides and others don't. (You can still get all the values from the node stats.) I guess the alternative would be to report the sum of overrides on the nodes that have overrides, plus un-overridden total on the nodes that don't, but only if at least one node has an override. Maybe that's better - I'd be interested to hear if subsequent reviewers have any thoughts on this. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This also ties in with #78750 (comment) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I wonder if we need to have any distinct concept of override in the stats at all. With available disk space, when limited through cgroups, we do not show what the real disk has available. Why not just show this as “this is the memory available”? |
||
values. Otherwise it is not present. | ||
|
||
`total_override_in_bytes`:: | ||
(integer) | ||
If the amount of physical memory has been overridden using the `es.total_memory_bytes` | ||
system property on all selected nodes then this reports the sum of the overridden | ||
values in bytes. Otherwise it is not present. | ||
|
||
`free`:: | ||
(<<byte-units, byte units>>) | ||
Amount of free physical memory across all selected nodes. | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -868,22 +868,45 @@ public void test140CgroupOsStatsAreAvailable() throws Exception { | |
* logic sets the correct heap size, based on the container limits. | ||
*/ | ||
public void test150MachineDependentHeap() throws Exception { | ||
final List<String> xArgs = machineDependentHeapTest("942m", List.of()); | ||
|
||
// This is roughly 0.4 * 942 | ||
assertThat(xArgs, hasItems("-Xms376m", "-Xmx376m")); | ||
} | ||
|
||
/** | ||
* Check that when available system memory is constrained by a total memory override as well as Docker, | ||
* the machine-dependant heap sizing logic sets the correct heap size, preferring the override to the | ||
* container limits. | ||
*/ | ||
public void test151MachineDependentHeapWithSizeOverride() throws Exception { | ||
final List<String> xArgs = machineDependentHeapTest( | ||
"942m", | ||
// 799014912 = 762m | ||
List.of("-Des.total_memory_bytes=799014912") | ||
); | ||
|
||
// This is roughly 0.4 * 762, in particular it's NOT 0.4 * 942 | ||
assertThat(xArgs, hasItems("-Xms304m", "-Xmx304m")); | ||
} | ||
|
||
private List<String> machineDependentHeapTest(final String containerMemory, final List<String> extraJvmOptions) throws Exception { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should make sure we're testing this scenario in our |
||
// Start by ensuring `jvm.options` doesn't define any heap options | ||
final Path jvmOptionsPath = tempDir.resolve("jvm.options"); | ||
final Path containerJvmOptionsPath = installation.config("jvm.options"); | ||
copyFromContainer(containerJvmOptionsPath, jvmOptionsPath); | ||
|
||
final List<String> jvmOptions = Files.readAllLines(jvmOptionsPath) | ||
.stream() | ||
.filter(line -> (line.startsWith("-Xms") || line.startsWith("-Xmx")) == false) | ||
.collect(Collectors.toList()); | ||
final List<String> jvmOptions = Stream.concat( | ||
Files.readAllLines(jvmOptionsPath).stream().filter(line -> (line.startsWith("-Xms") || line.startsWith("-Xmx")) == false), | ||
extraJvmOptions.stream() | ||
).collect(Collectors.toList()); | ||
|
||
Files.writeString(jvmOptionsPath, String.join("\n", jvmOptions)); | ||
|
||
// Now run the container, being explicit about the available memory | ||
runContainer( | ||
distribution(), | ||
builder().memory("942m").volume(jvmOptionsPath, containerJvmOptionsPath).envVar("ELASTIC_PASSWORD", PASSWORD) | ||
builder().memory(containerMemory).volume(jvmOptionsPath, containerJvmOptionsPath).envVar("ELASTIC_PASSWORD", PASSWORD) | ||
); | ||
waitForElasticsearch(installation, USERNAME, PASSWORD); | ||
|
||
|
@@ -897,12 +920,9 @@ public void test150MachineDependentHeap() throws Exception { | |
final JsonNode jsonNode = new ObjectMapper().readTree(jvmArgumentsLine.get()); | ||
|
||
final String argsStr = jsonNode.get("message").textValue(); | ||
final List<String> xArgs = Arrays.stream(argsStr.substring(1, argsStr.length() - 1).split(",\\s*")) | ||
return Arrays.stream(argsStr.substring(1, argsStr.length() - 1).split(",\\s*")) | ||
.filter(arg -> arg.startsWith("-X")) | ||
.collect(Collectors.toList()); | ||
|
||
// This is roughly 0.4 * 942 | ||
assertThat(xArgs, hasItems("-Xms376m", "-Xmx376m")); | ||
} | ||
|
||
/** | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -298,6 +298,32 @@ public void test81CustomPathConfAndJvmOptions() throws Exception { | |
cleanup(); | ||
} | ||
|
||
public void test82JvmOptionsTotalMemoryOverride() throws Exception { | ||
assumeTrue(isSystemd()); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why would this test only work with systemd? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I copied this bit from the test immediately above, which is the most similar one in the file. There's a comment on line 256 saying all the tests below are for systemd only. I'll rename this test and move it higher up the file so that it's not in the systemd-only section. |
||
|
||
install(); | ||
|
||
assertPathsExist(installation.envFile); | ||
stopElasticsearch(); | ||
|
||
withCustomConfig(tempConf -> { | ||
// Work as though total system memory is 850MB | ||
append(installation.envFile, "ES_JAVA_OPTS=\"-Des.total_memory_bytes=891289600\""); | ||
|
||
startElasticsearch(); | ||
|
||
final String nodesStatsResponse = makeRequest(Request.Get("http://localhost:9200/_nodes/stats")); | ||
assertThat(nodesStatsResponse, containsString("\"total_override_in_bytes\":891289600")); | ||
|
||
// 40% of 850MB | ||
assertThat(sh.run("ps auwwx").stdout, containsString("-Xmx340m -Xms340m")); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This test is failing in CI - I had a look and I didn't see these options being applied. I'd probably refine this assertion, to first find the Elasticsearch invocation, then break it into arguments, then do some kind of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, the command line has There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I found the cause. It wasn't node roles. |
||
|
||
stopElasticsearch(); | ||
}); | ||
|
||
cleanup(); | ||
} | ||
|
||
public void test83SystemdMask() throws Exception { | ||
try { | ||
assumeTrue(isSystemd()); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I like the idea of having two different memory info types here rather than always wrapping the default.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That makes testing more brittle, because then the logic to determine which to use has to live in
JvmOptionsParser
, which is poor coupling IMO. The benefit of this is that neither the options parser nor machine dependent heap has to know anything about this magic system property, that's completely encapsulated inOverrideableSystemMemoryInfo
, which can then be unit tested on its own with ease.