Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Backport 7.8] Add FailedCategoryCount to ModelSizeStats #4825

Merged
merged 1 commit into from
Jun 29, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions src/Nest/XPack/MachineLearning/Job/Config/JobStats.cs
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,13 @@ public class JobStats
[DataMember(Name = "data_counts")]
public DataCounts DataCounts { get; internal set; }

/// <summary>
/// Indicates that the process of deleting the job is in progress but not yet completed.
/// It is only reported when true.
/// </summary>
[DataMember(Name = "deleting")]
public bool? Deleting { get; internal set; }

/// <summary>
/// Contains job statistics if job contains a forecast.
/// </summary>
Expand Down Expand Up @@ -109,10 +116,23 @@ public class TimingStats
/// <remarks>Valid in Elasticsearch 7.4.0+</remarks>
[DataMember(Name = "exponential_average_bucket_processing_time_per_hour_ms")]
public double ExponentialAverageBucketProcessingTimePerHourMilliseconds { get; internal set; }

/// <summary>
/// Sum of all bucket processing times, in milliseconds.
/// </summary>
[DataMember(Name = "total_bucket_processing_time_ms")]
public double TotalBucketProcessingTimeMilliseconds { get; internal set; }
}

public class JobForecastStatistics
{
/// <summary>
/// A value of 0 indicates that forecasts do not exist for this job.
/// A value of 1 indicates that at least one forecast exists.
/// </summary>
[DataMember(Name = "forecasted_jobs")]
public long ForecastedJobs { get; internal set; }

/// <summary>
/// Statistics about the memory usage: minimum, maximum, average and total.
/// </summary>
Expand Down
60 changes: 60 additions & 0 deletions src/Nest/XPack/MachineLearning/Job/Process/ModelSizeStats.cs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,42 @@ public class ModelSizeStats
[DataMember(Name = "bucket_allocation_failures_count")]
public long BucketAllocationFailuresCount { get; internal set; }

/// <summary>
/// The number of documents that have had a field categorized.
/// </summary>
[DataMember(Name = "categorized_doc_count")]
public long CategorizedDocCount { get; internal set; }

/// <summary>
/// The status of categorization for the job.
/// </summary>
[DataMember(Name = "categorization_status")]
public ModelCategorizationStatus CategorizationStatus { get; internal set; }

/// <summary>
/// The number of categories created by categorization that will never be assigned again because another
/// category's definition makes it a superset of the dead category.
/// (Dead categories are a side effect of the way categorization has no prior training.)
/// </summary>
[DataMember(Name = "dead_category_count")]
public long DeadCategoryCount { get; internal set; }

/// <summary>
/// The number of times that categorization wanted to create a new category but couldn't because the job had hit its model_memory_limit.
/// This count does not track which specific categories failed to be created. Therefore you cannot use this value to determine
/// the number of unique categories that were missed.
/// <para />
/// Available in Elasticsearch 7.8.0+
/// </summary>
[DataMember(Name = "failed_category_count")]
public long FailedCategoryCount { get; internal set; }

/// <summary>
/// The number of categories that match more than 1% of categorized documents.
/// </summary>
[DataMember(Name = "frequent_category_count")]
public long FrequentCategoryCount { get; internal set; }

/// <summary>
/// A unique identifier for the job.
/// </summary>
Expand All @@ -47,6 +83,24 @@ public class ModelSizeStats
[DataMember(Name = "model_bytes")]
public long ModelBytes { get; internal set; }

/// <summary>
/// The number of bytes over the high limit for memory usage at the last allocation failure.
/// </summary>
[DataMember(Name = "model_bytes_exceeded")]
public long ModelBytesExceeded { get; internal set; }

/// <summary>
/// The upper limit for model memory usage, checked on increasing values.
/// </summary>
[DataMember(Name = "model_bytes_memory_limit")]
public long ModelBytesMemoryLimit { get; internal set; }

/// <summary>
/// The number of categories that match just one categorized document.
/// </summary>
[DataMember(Name = "rare_category_count")]
public long RareCategoryCount { get; internal set; }

/// <summary>
/// For internal use. The type of result.
/// </summary>
Expand All @@ -66,6 +120,12 @@ public class ModelSizeStats
[DataMember(Name = "total_by_field_count")]
public long TotalByFieldCount { get; internal set; }

/// <summary>
/// The number of categories created by categorization.
/// </summary>
[DataMember(Name = "total_category_count")]
public long TotalCategoryCount { get; internal set; }

/// <summary>
/// The number of over field values that were analyzed by the models.
/// </summary>
Expand Down