From 9e281d6dff59925c69b8919f042e830fd55e6b11 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Istv=C3=A1n=20Zolt=C3=A1n=20Szab=C3=B3?= Date: Wed, 19 Jul 2023 14:20:50 +0200 Subject: [PATCH] [DOCS] Adds important admonition to handling delayed data page (#97753) (#97805) --- .../ml/anomaly-detection/ml-delayed-data-detection.asciidoc | 6 ++++++ .../elasticsearch/xpack/core/ml/job/messages/Messages.java | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/docs/reference/ml/anomaly-detection/ml-delayed-data-detection.asciidoc b/docs/reference/ml/anomaly-detection/ml-delayed-data-detection.asciidoc index 60b3b5b163667..a5a7b01f095ac 100644 --- a/docs/reference/ml/anomaly-detection/ml-delayed-data-detection.asciidoc +++ b/docs/reference/ml/anomaly-detection/ml-delayed-data-detection.asciidoc @@ -16,6 +16,12 @@ if it is set too high, analysis drifts farther away from real-time. The balance that is struck depends upon each use case and the environmental factors of the cluster. +IMPORTANT: If you get an error that says +`Datafeed missed XXXX documents due to ingest latency`, consider increasing +the value of `query_delay'. If it doesn't help, investigate the ingest latency and its +cause. You can do this by comparing event and ingest timestamps. High latency +is often caused by bursts of ingested documents, misconfiguration of the ingest +pipeline, or misalignment of system clocks. == Why worry about delayed data? If data are delayed randomly (and consequently are missing from analysis), the diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/messages/Messages.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/messages/Messages.java index 77b4dcaaac83f..5d0a4aebcca28 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/messages/Messages.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/messages/Messages.java @@ -154,7 +154,7 @@ public final class Messages { public static final String JOB_AUDIT_DATAFEED_NO_DATA = "Datafeed has been retrieving no data for a while"; public static final String JOB_AUDIT_DATAFEED_MISSING_DATA = "Datafeed has missed {0} documents due to ingest latency, latest bucket with missing data is [{1}]." - + " Consider increasing query_delay"; + + " Consider increasing query_delay and investigate the cause of high latency in your ingestion process."; public static final String JOB_AUDIT_DATAFEED_RECOVERED = "Datafeed has recovered data extraction and analysis"; public static final String JOB_AUDIT_DATAFEED_STARTED_FROM_TO = "Datafeed started (from: {0} to: {1}) with frequency [{2}]"; public static final String JOB_AUDIT_DATAFEED_STARTED_REALTIME = "Datafeed started in real-time";