Skip to content

Commit

Permalink
[ML] Parse and report memory usage for DF Analytics (#52778)
Browse files Browse the repository at this point in the history
Adds reporting of memory usage for data frame analytics jobs.
This commit introduces a new index pattern `.ml-stats-*` whose
first concrete index will be `.ml-stats-000001`. This index serves
to store instrumentation information for those jobs.
  • Loading branch information
dimitris-athanasiou authored Feb 28, 2020
1 parent a095115 commit dd33193
Show file tree
Hide file tree
Showing 29 changed files with 921 additions and 218 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ public static DataFrameAnalyticsStats fromXContent(XContentParser parser) throws
static final ParseField STATE = new ParseField("state");
static final ParseField FAILURE_REASON = new ParseField("failure_reason");
static final ParseField PROGRESS = new ParseField("progress");
static final ParseField MEMORY_USAGE = new ParseField("memory_usage");
static final ParseField NODE = new ParseField("node");
static final ParseField ASSIGNMENT_EXPLANATION = new ParseField("assignment_explanation");

Expand All @@ -55,8 +56,9 @@ public static DataFrameAnalyticsStats fromXContent(XContentParser parser) throws
(DataFrameAnalyticsState) args[1],
(String) args[2],
(List<PhaseProgress>) args[3],
(NodeAttributes) args[4],
(String) args[5]));
(MemoryUsage) args[4],
(NodeAttributes) args[5],
(String) args[6]));

static {
PARSER.declareString(constructorArg(), ID);
Expand All @@ -68,6 +70,7 @@ public static DataFrameAnalyticsStats fromXContent(XContentParser parser) throws
}, STATE, ObjectParser.ValueType.STRING);
PARSER.declareString(optionalConstructorArg(), FAILURE_REASON);
PARSER.declareObjectArray(optionalConstructorArg(), PhaseProgress.PARSER, PROGRESS);
PARSER.declareObject(optionalConstructorArg(), MemoryUsage.PARSER, MEMORY_USAGE);
PARSER.declareObject(optionalConstructorArg(), NodeAttributes.PARSER, NODE);
PARSER.declareString(optionalConstructorArg(), ASSIGNMENT_EXPLANATION);
}
Expand All @@ -76,16 +79,18 @@ public static DataFrameAnalyticsStats fromXContent(XContentParser parser) throws
private final DataFrameAnalyticsState state;
private final String failureReason;
private final List<PhaseProgress> progress;
private final MemoryUsage memoryUsage;
private final NodeAttributes node;
private final String assignmentExplanation;

public DataFrameAnalyticsStats(String id, DataFrameAnalyticsState state, @Nullable String failureReason,
@Nullable List<PhaseProgress> progress, @Nullable NodeAttributes node,
@Nullable String assignmentExplanation) {
@Nullable List<PhaseProgress> progress, @Nullable MemoryUsage memoryUsage,
@Nullable NodeAttributes node, @Nullable String assignmentExplanation) {
this.id = id;
this.state = state;
this.failureReason = failureReason;
this.progress = progress;
this.memoryUsage = memoryUsage;
this.node = node;
this.assignmentExplanation = assignmentExplanation;
}
Expand All @@ -106,6 +111,11 @@ public List<PhaseProgress> getProgress() {
return progress;
}

@Nullable
public MemoryUsage getMemoryUsage() {
return memoryUsage;
}

public NodeAttributes getNode() {
return node;
}
Expand All @@ -124,13 +134,14 @@ public boolean equals(Object o) {
&& Objects.equals(state, other.state)
&& Objects.equals(failureReason, other.failureReason)
&& Objects.equals(progress, other.progress)
&& Objects.equals(memoryUsage, other.memoryUsage)
&& Objects.equals(node, other.node)
&& Objects.equals(assignmentExplanation, other.assignmentExplanation);
}

@Override
public int hashCode() {
return Objects.hash(id, state, failureReason, progress, node, assignmentExplanation);
return Objects.hash(id, state, failureReason, progress, memoryUsage, node, assignmentExplanation);
}

@Override
Expand All @@ -140,6 +151,7 @@ public String toString() {
.add("state", state)
.add("failureReason", failureReason)
.add("progress", progress)
.add("memoryUsage", memoryUsage)
.add("node", node)
.add("assignmentExplanation", assignmentExplanation)
.toString();
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.client.ml.dataframe;

import org.elasticsearch.client.common.TimeUtil;
import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.inject.internal.ToStringBuilder;
import org.elasticsearch.common.xcontent.ConstructingObjectParser;
import org.elasticsearch.common.xcontent.ObjectParser;
import org.elasticsearch.common.xcontent.ToXContentObject;
import org.elasticsearch.common.xcontent.XContentBuilder;

import java.io.IOException;
import java.time.Instant;
import java.util.Objects;

public class MemoryUsage implements ToXContentObject {

static final ParseField TIMESTAMP = new ParseField("timestamp");
static final ParseField PEAK_USAGE_BYTES = new ParseField("peak_usage_bytes");

public static final ConstructingObjectParser<MemoryUsage, Void> PARSER = new ConstructingObjectParser<>("analytics_memory_usage",
true, a -> new MemoryUsage((Instant) a[0], (long) a[1]));

static {
PARSER.declareField(ConstructingObjectParser.constructorArg(),
p -> TimeUtil.parseTimeFieldToInstant(p, TIMESTAMP.getPreferredName()),
TIMESTAMP,
ObjectParser.ValueType.VALUE);
PARSER.declareLong(ConstructingObjectParser.constructorArg(), PEAK_USAGE_BYTES);
}

private final Instant timestamp;
private final long peakUsageBytes;

public MemoryUsage(Instant timestamp, long peakUsageBytes) {
this.timestamp = Instant.ofEpochMilli(Objects.requireNonNull(timestamp).toEpochMilli());
this.peakUsageBytes = peakUsageBytes;
}

@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject();
builder.timeField(TIMESTAMP.getPreferredName(), TIMESTAMP.getPreferredName() + "_string", timestamp.toEpochMilli());
builder.field(PEAK_USAGE_BYTES.getPreferredName(), peakUsageBytes);
builder.endObject();
return builder;
}

@Override
public boolean equals(Object o) {
if (o == this) return true;
if (o == null || getClass() != o.getClass()) return false;

MemoryUsage other = (MemoryUsage) o;
return Objects.equals(timestamp, other.timestamp)
&& peakUsageBytes == other.peakUsageBytes;
}

@Override
public int hashCode() {
return Objects.hash(timestamp, peakUsageBytes);
}

@Override
public String toString() {
return new ToStringBuilder(getClass())
.add(TIMESTAMP.getPreferredName(), timestamp.getEpochSecond())
.add(PEAK_USAGE_BYTES.getPreferredName(), peakUsageBytes)
.toString();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -1506,6 +1506,7 @@ public void testGetDataFrameAnalyticsStats() throws Exception {
assertThat(progress.get(1), equalTo(new PhaseProgress("loading_data", 0)));
assertThat(progress.get(2), equalTo(new PhaseProgress("analyzing", 0)));
assertThat(progress.get(3), equalTo(new PhaseProgress("writing_results", 0)));
assertThat(stats.getMemoryUsage(), is(nullValue()));
}

public void testStartDataFrameAnalyticsConfig() throws Exception {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ public static DataFrameAnalyticsStats randomDataFrameAnalyticsStats() {
randomFrom(DataFrameAnalyticsState.values()),
randomBoolean() ? null : randomAlphaOfLength(10),
randomBoolean() ? null : createRandomProgress(),
randomBoolean() ? null : MemoryUsageTests.createRandom(),
randomBoolean() ? null : NodeAttributesTests.createRandom(),
randomBoolean() ? null : randomAlphaOfLengthBetween(1, 20));
}
Expand All @@ -70,6 +71,9 @@ public static void toXContent(DataFrameAnalyticsStats stats, XContentBuilder bui
if (stats.getProgress() != null) {
builder.field(DataFrameAnalyticsStats.PROGRESS.getPreferredName(), stats.getProgress());
}
if (stats.getMemoryUsage() != null) {
builder.field(DataFrameAnalyticsStats.MEMORY_USAGE.getPreferredName(), stats.getMemoryUsage());
}
if (stats.getNode() != null) {
builder.field(DataFrameAnalyticsStats.NODE.getPreferredName(), stats.getNode());
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.client.ml.dataframe;

import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.test.AbstractXContentTestCase;

import java.io.IOException;
import java.time.Instant;

public class MemoryUsageTests extends AbstractXContentTestCase<MemoryUsage> {

@Override
protected MemoryUsage createTestInstance() {
return createRandom();
}

public static MemoryUsage createRandom() {
return new MemoryUsage(Instant.now(), randomNonNegativeLong());
}

@Override
protected MemoryUsage doParseInstance(XContentParser parser) throws IOException {
return MemoryUsage.PARSER.apply(parser, null);
}

@Override
protected boolean supportsUnknownFields() {
return true;
}
}
16 changes: 14 additions & 2 deletions docs/reference/ml/ml-shared.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -451,13 +451,25 @@ sorted by the `id` value in ascending order.
`progress`:::
(array) The progress report of the {dfanalytics-job} by phase.

`phase`:::
`phase`::::
(string) Defines the phase of the {dfanalytics-job}. Possible phases:
`reindexing`, `loading_data`, `analyzing`, and `writing_results`.

`progress_percent`:::
`progress_percent`::::
(integer) The progress that the {dfanalytics-job} has made expressed in
percentage.

`memory_usage`:::
(Optional, Object) An object describing memory usage of the analytics.
It will be present only after the job has started and memory usage has
been reported.

`timestamp`::::
(date) The timestamp when memory usage was calculated.

`peak_usage_bytes`::::
(long) The number of bytes used at the highest peak of memory usage.

end::data-frame-analytics-stats[]

tag::datafeed-id[]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,10 @@ private TimeUtils() {
// Do nothing
}

/**
* @deprecated Please use {@link #parseTimeFieldToInstant(XContentParser, String)} instead.
*/
@Deprecated
public static Date parseTimeField(XContentParser parser, String fieldName) throws IOException {
if (parser.currentToken() == XContentParser.Token.VALUE_NUMBER) {
return new Date(parser.longValue());
Expand All @@ -36,7 +40,7 @@ public static Instant parseTimeFieldToInstant(XContentParser parser, String fiel
if (parser.currentToken() == XContentParser.Token.VALUE_NUMBER) {
return Instant.ofEpochMilli(parser.longValue());
} else if (parser.currentToken() == XContentParser.Token.VALUE_STRING) {
return Instant.ofEpochMilli(dateStringToEpoch(parser.text()));
return Instant.from(DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parse(parser.text()));
}
throw new IllegalArgumentException(
"unexpected token [" + parser.currentToken() + "] for [" + fieldName + "]");
Expand All @@ -54,6 +58,7 @@ public static Instant parseTimeFieldToInstant(XContentParser parser, String fiel
* @return The epoch time in milliseconds or -1 if the date cannot be
* parsed.
*/
@Deprecated
public static long dateStringToEpoch(String date) {
try {
long epoch = Long.parseLong(date);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.core.ml;

import org.elasticsearch.Version;
import org.elasticsearch.action.ActionListener;
import org.elasticsearch.client.Client;
import org.elasticsearch.cluster.ClusterState;
import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver;
import org.elasticsearch.xpack.core.ml.utils.MlIndexAndAlias;
import org.elasticsearch.xpack.core.template.TemplateUtils;

/**
* Describes the indices where ML is storing various stats about the users jobs.
*/
public class MlStatsIndex {

public static final String TEMPLATE_NAME = ".ml-stats";

private static final String MAPPINGS_VERSION_VARIABLE = "xpack.ml.version";

private MlStatsIndex() {}

public static String mapping() {
return TemplateUtils.loadTemplate("/org/elasticsearch/xpack/core/ml/stats_index_mappings.json",
Version.CURRENT.toString(), MAPPINGS_VERSION_VARIABLE);
}

public static String indexPattern() {
return TEMPLATE_NAME + "-*";
}

public static String writeAlias() {
return ".ml-stats-write";
}

/**
* Creates the first concrete .ml-stats-000001 index (if necessary)
* Creates the .ml-stats-write alias for that index.
* The listener will be notified with a boolean to indicate if the index was created because of this call,
* but unless there is a failure after this method returns the index and alias should be present.
*/
public static void createStatsIndexAndAliasIfNecessary(Client client, ClusterState state, IndexNameExpressionResolver resolver,
ActionListener<Boolean> listener) {
MlIndexAndAlias.createIndexAndAliasIfNecessary(client, state, resolver, TEMPLATE_NAME, writeAlias(), listener);
}
}
Loading

0 comments on commit dd33193

Please sign in to comment.