Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ML] Add categorizer stats ML result type #57978

Merged
merged 2 commits into from
Jun 11, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/

package org.elasticsearch.xpack.core.ml.job.process.autodetect.state;

import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.io.stream.Writeable;

import java.io.IOException;
import java.util.Locale;

/**
* The status of categorization for a job. OK is default, WARN
* means that inappropriate numbers of categories are being found
*/
public enum CategorizationStatus implements Writeable {
OK, WARN;

public static CategorizationStatus fromString(String statusName) {
return valueOf(statusName.trim().toUpperCase(Locale.ROOT));
}

public static CategorizationStatus readFromStream(StreamInput in) throws IOException {
return in.readEnum(CategorizationStatus.class);
}

@Override
public void writeTo(StreamOutput out) throws IOException {
out.writeEnum(this);
}

@Override
public String toString() {
return name().toLowerCase(Locale.ROOT);
}
}

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -115,32 +115,6 @@ public String toString() {
}
}

/**
* The status of categorization for a job. OK is default, WARN
* means that inappropriate numbers of categories are being found
*/
public enum CategorizationStatus implements Writeable {
OK, WARN;

public static CategorizationStatus fromString(String statusName) {
return valueOf(statusName.trim().toUpperCase(Locale.ROOT));
}

public static CategorizationStatus readFromStream(StreamInput in) throws IOException {
return in.readEnum(CategorizationStatus.class);
}

@Override
public void writeTo(StreamOutput out) throws IOException {
out.writeEnum(this);
}

@Override
public String toString() {
return name().toLowerCase(Locale.ROOT);
}
}

private final String jobId;
private final long modelBytes;
private final Long modelBytesExceeded;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/

package org.elasticsearch.xpack.core.ml.job.process.autodetect.state;

import org.elasticsearch.common.io.stream.Writeable;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.test.AbstractSerializingTestCase;

public class CategorizerStatsTests extends AbstractSerializingTestCase<CategorizerStats> {

public void testDefaultConstructor() {
CategorizerStats stats = new CategorizerStats.Builder("foo").build();
assertNull(stats.getPartitionFieldName());
assertNull(stats.getPartitionFieldValue());
assertEquals(0, stats.getCategorizedDocCount());
assertEquals(0, stats.getTotalCategoryCount());
assertEquals(0, stats.getFrequentCategoryCount());
assertEquals(0, stats.getRareCategoryCount());
assertEquals(0, stats.getDeadCategoryCount());
assertEquals(0, stats.getFailedCategoryCount());
assertEquals(CategorizationStatus.OK, stats.getCategorizationStatus());
}

@Override
protected CategorizerStats createTestInstance() {
return createRandomized("foo");
}

public static CategorizerStats createRandomized(String jobId) {
CategorizerStats.Builder stats = new CategorizerStats.Builder(jobId);
if (randomBoolean()) {
stats.setPartitionFieldName(randomAlphaOfLength(10));
stats.setPartitionFieldValue(randomAlphaOfLength(20));
}
if (randomBoolean()) {
stats.setCategorizedDocCount(randomNonNegativeLong());
}
if (randomBoolean()) {
stats.setTotalCategoryCount(randomNonNegativeLong());
}
if (randomBoolean()) {
stats.setFrequentCategoryCount(randomNonNegativeLong());
}
if (randomBoolean()) {
stats.setRareCategoryCount(randomNonNegativeLong());
}
if (randomBoolean()) {
stats.setDeadCategoryCount(randomNonNegativeLong());
}
if (randomBoolean()) {
stats.setFailedCategoryCount(randomNonNegativeLong());
}
if (randomBoolean()) {
stats.setCategorizationStatus(randomFrom(CategorizationStatus.values()));
}
return stats.build();
}

@Override
protected Writeable.Reader<CategorizerStats> instanceReader() {
return CategorizerStats::new;
}

@Override
protected CategorizerStats doParseInstance(XContentParser parser) {
// Lenient because the partitionFieldName/Value pair is added as a separate field
return CategorizerStats.LENIENT_PARSER.apply(parser, null).build();
}

@Override
protected boolean supportsUnknownFields() {
// Because the partitionFieldName/Value pair is added as a separate field
return true;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.common.xcontent.json.JsonXContent;
import org.elasticsearch.test.AbstractSerializingTestCase;
import org.elasticsearch.xpack.core.ml.job.process.autodetect.state.ModelSizeStats.CategorizationStatus;
import org.elasticsearch.xpack.core.ml.job.process.autodetect.state.ModelSizeStats.MemoryStatus;

import java.io.IOException;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@
import org.elasticsearch.xpack.core.ml.job.config.Job;
import org.elasticsearch.xpack.core.ml.job.config.JobTests;
import org.elasticsearch.xpack.core.ml.job.process.autodetect.output.FlushAcknowledgement;
import org.elasticsearch.xpack.core.ml.job.process.autodetect.state.CategorizerStats;
import org.elasticsearch.xpack.core.ml.job.process.autodetect.state.CategorizerStatsTests;
import org.elasticsearch.xpack.core.ml.job.process.autodetect.state.ModelSizeStats;
import org.elasticsearch.xpack.core.ml.job.process.autodetect.state.ModelSnapshot;
import org.elasticsearch.xpack.core.ml.job.process.autodetect.state.Quantiles;
Expand Down Expand Up @@ -187,6 +189,8 @@ public void testProcessResults() throws Exception {
resultsBuilder.addInfluencers(influencers);
CategoryDefinition categoryDefinition = createCategoryDefinition();
resultsBuilder.addCategoryDefinition(categoryDefinition);
CategorizerStats categorizerStats = createCategorizerStats();
resultsBuilder.addCategorizerStats(categorizerStats);
ModelPlot modelPlot = createModelPlot();
resultsBuilder.addModelPlot(modelPlot);
Annotation annotation = createAnnotation();
Expand Down Expand Up @@ -222,6 +226,10 @@ public void testProcessResults() throws Exception {
assertEquals(1, persistedDefinition.count());
assertEquals(categoryDefinition, persistedDefinition.results().get(0));

QueryPage<CategorizerStats> persistedCategorizerStats = jobResultsProvider.categorizerStats(JOB_ID, 0, 100);
assertEquals(1, persistedCategorizerStats.count());
assertEquals(categorizerStats, persistedCategorizerStats.results().get(0));

QueryPage<ModelPlot> persistedModelPlot = jobResultsProvider.modelPlot(JOB_ID, 0, 100);
assertEquals(1, persistedModelPlot.count());
assertEquals(modelPlot, persistedModelPlot.results().get(0));
Expand Down Expand Up @@ -472,7 +480,11 @@ private static List<Influencer> createInfluencers(boolean isInterim) {
}

private static CategoryDefinition createCategoryDefinition() {
return new CategoryDefinitionTests().createTestInstance(JOB_ID);
return CategoryDefinitionTests.createTestInstance(JOB_ID);
}

private static CategorizerStats createCategorizerStats() {
return CategorizerStatsTests.createRandomized(JOB_ID);
}

private static ModelPlot createModelPlot() {
Expand Down Expand Up @@ -517,53 +529,59 @@ private static class ResultsBuilder {
private final List<AutodetectResult> results = new ArrayList<>();

ResultsBuilder addBucket(Bucket bucket) {
Objects.requireNonNull(bucket);
results.add(
new AutodetectResult(Objects.requireNonNull(bucket), null, null, null, null, null, null, null, null, null, null, null));
new AutodetectResult(bucket, null, null, null, null, null, null, null, null, null, null, null, null));
return this;
}

ResultsBuilder addRecords(List<AnomalyRecord> records) {
results.add(new AutodetectResult(null, records, null, null, null, null, null, null, null, null, null, null));
results.add(new AutodetectResult(null, records, null, null, null, null, null, null, null, null, null, null, null));
return this;
}

ResultsBuilder addInfluencers(List<Influencer> influencers) {
results.add(new AutodetectResult(null, null, influencers, null, null, null, null, null, null, null, null, null));
results.add(new AutodetectResult(null, null, influencers, null, null, null, null, null, null, null, null, null, null));
return this;
}

ResultsBuilder addCategoryDefinition(CategoryDefinition categoryDefinition) {
results.add(new AutodetectResult(null, null, null, null, null, null, null, null, null, null, categoryDefinition, null));
results.add(new AutodetectResult(null, null, null, null, null, null, null, null, null, null, categoryDefinition, null, null));
return this;
}

ResultsBuilder addCategorizerStats(CategorizerStats categorizerStats) {
results.add(new AutodetectResult(null, null, null, null, null, null, null, null, null, null, null, categorizerStats, null));
return this;
}

ResultsBuilder addModelPlot(ModelPlot modelPlot) {
results.add(new AutodetectResult(null, null, null, null, null, null, modelPlot, null, null, null, null, null));
results.add(new AutodetectResult(null, null, null, null, null, null, modelPlot, null, null, null, null, null, null));
return this;
}

ResultsBuilder addAnnotation(Annotation annotation) {
results.add(new AutodetectResult(null, null, null, null, null, null, null, annotation, null, null, null, null));
results.add(new AutodetectResult(null, null, null, null, null, null, null, annotation, null, null, null, null, null));
return this;
}

ResultsBuilder addModelSizeStats(ModelSizeStats modelSizeStats) {
results.add(new AutodetectResult(null, null, null, null, null, modelSizeStats, null, null, null, null, null, null));
results.add(new AutodetectResult(null, null, null, null, null, modelSizeStats, null, null, null, null, null, null, null));
return this;
}

ResultsBuilder addModelSnapshot(ModelSnapshot modelSnapshot) {
results.add(new AutodetectResult(null, null, null, null, modelSnapshot, null, null, null, null, null, null, null));
results.add(new AutodetectResult(null, null, null, null, modelSnapshot, null, null, null, null, null, null, null, null));
return this;
}

ResultsBuilder addQuantiles(Quantiles quantiles) {
results.add(new AutodetectResult(null, null, null, quantiles, null, null, null, null, null, null, null, null));
results.add(new AutodetectResult(null, null, null, quantiles, null, null, null, null, null, null, null, null, null));
return this;
}

ResultsBuilder addFlushAcknowledgement(FlushAcknowledgement flushAcknowledgement) {
results.add(new AutodetectResult(null, null, null, null, null, null, null, null, null, null, null, flushAcknowledgement));
results.add(new AutodetectResult(null, null, null, null, null, null, null, null, null, null, null, null, flushAcknowledgement));
return this;
}

Expand All @@ -572,7 +590,6 @@ Iterable<AutodetectResult> build() {
}
}


private <T extends ToXContent & Writeable> void assertResultsAreSame(List<T> expected, QueryPage<T> actual) {
assertEquals(expected.size(), actual.count());
assertEquals(actual.results().size(), actual.count());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import org.elasticsearch.xpack.core.ml.datafeed.DatafeedTimingStats;
import org.elasticsearch.xpack.core.ml.job.persistence.AnomalyDetectorsIndex;
import org.elasticsearch.xpack.core.ml.job.persistence.ElasticsearchMappings;
import org.elasticsearch.xpack.core.ml.job.process.autodetect.state.CategorizerStats;
import org.elasticsearch.xpack.core.ml.job.process.autodetect.state.ModelSizeStats;
import org.elasticsearch.xpack.core.ml.job.process.autodetect.state.ModelSnapshot;
import org.elasticsearch.xpack.core.ml.job.process.autodetect.state.Quantiles;
Expand Down Expand Up @@ -196,6 +197,13 @@ public Builder persistModelPlot(ModelPlot modelPlot) {
return this;
}

public Builder persistCategorizerStats(CategorizerStats categorizerStats) {
logger.trace("[{}] ES BULK ACTION: index categorizer stats to index [{}] with ID [{}]",
jobId, indexName, categorizerStats.getId());
indexResult(categorizerStats.getId(), categorizerStats, "categorizer stats");
return this;
}

public Builder persistForecast(Forecast forecast) {
logger.trace("[{}] ES BULK ACTION: index forecast to index [{}] with ID [{}]", jobId, indexName, forecast.getId());
indexResult(forecast.getId(), forecast, Forecast.RESULT_TYPE_VALUE);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@
import org.elasticsearch.xpack.core.ml.job.persistence.AnomalyDetectorsIndex;
import org.elasticsearch.xpack.core.ml.job.persistence.ElasticsearchMappings;
import org.elasticsearch.xpack.core.ml.job.process.autodetect.state.CategorizerState;
import org.elasticsearch.xpack.core.ml.job.process.autodetect.state.CategorizerStats;
import org.elasticsearch.xpack.core.ml.job.process.autodetect.state.DataCounts;
import org.elasticsearch.xpack.core.ml.job.process.autodetect.state.ModelSizeStats;
import org.elasticsearch.xpack.core.ml.job.process.autodetect.state.ModelSnapshot;
Expand Down Expand Up @@ -1104,6 +1105,37 @@ public QueryPage<ModelPlot> modelPlot(String jobId, int from, int size) {
return new QueryPage<>(results, searchResponse.getHits().getTotalHits().value, ModelPlot.RESULTS_FIELD);
}

public QueryPage<CategorizerStats> categorizerStats(String jobId, int from, int size) {
SearchResponse searchResponse;
String indexName = AnomalyDetectorsIndex.jobResultsAliasedName(jobId);
LOGGER.trace("ES API CALL: search categorizer stats from index {} from {} size {}", indexName, from, size);

try (ThreadContext.StoredContext ignore = client.threadPool().getThreadContext().stashWithOrigin(ML_ORIGIN)) {
searchResponse = client.prepareSearch(indexName)
.setIndicesOptions(MlIndicesUtils.addIgnoreUnavailable(SearchRequest.DEFAULT_INDICES_OPTIONS))
.setQuery(new TermsQueryBuilder(Result.RESULT_TYPE.getPreferredName(), CategorizerStats.RESULT_TYPE_VALUE))
.setFrom(from).setSize(size)
.setTrackTotalHits(true)
.get();
}

List<CategorizerStats> results = new ArrayList<>();

for (SearchHit hit : searchResponse.getHits().getHits()) {
BytesReference source = hit.getSourceRef();
try (InputStream stream = source.streamInput();
XContentParser parser = XContentFactory.xContent(XContentType.JSON)
.createParser(NamedXContentRegistry.EMPTY, LoggingDeprecationHandler.INSTANCE, stream)) {
CategorizerStats categorizerStats = CategorizerStats.LENIENT_PARSER.apply(parser, null).build();
results.add(categorizerStats);
} catch (IOException e) {
throw new ElasticsearchParseException("failed to parse categorizerStats", e);
}
}

return new QueryPage<>(results, searchResponse.getHits().getTotalHits().value, ModelPlot.RESULTS_FIELD);
}

/**
* Get the job's model size stats.
*/
Expand Down
Loading