From 7f3421f7421e727ba8cac4ec9e3396e50908b2c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Istv=C3=A1n=20Zolt=C3=A1n=20Szab=C3=B3?= Date: Tue, 18 Jul 2023 15:49:46 +0200 Subject: [PATCH 1/7] [DOCS] Adds important admonition to handling delayed data page. --- .../ml/anomaly-detection/ml-delayed-data-detection.asciidoc | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/docs/reference/ml/anomaly-detection/ml-delayed-data-detection.asciidoc b/docs/reference/ml/anomaly-detection/ml-delayed-data-detection.asciidoc index 60b3b5b163667..3dca60ac37c69 100644 --- a/docs/reference/ml/anomaly-detection/ml-delayed-data-detection.asciidoc +++ b/docs/reference/ml/anomaly-detection/ml-delayed-data-detection.asciidoc @@ -16,6 +16,11 @@ if it is set too high, analysis drifts farther away from real-time. The balance that is struck depends upon each use case and the environmental factors of the cluster. +IMPORTANT: If you get an error that says +`Datafeed has missed XXXX documents due to ingest latency`, check whether your +ingest pipeline delays indexing. If your ingest pipeline functions well, +consider increasing the valuse of `query_delay`. + == Why worry about delayed data? If data are delayed randomly (and consequently are missing from analysis), the From 26b5a6e9766706605bda3533dd266bbf6d7cb055 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Istv=C3=A1n=20Zolt=C3=A1n=20Szab=C3=B3?= Date: Tue, 18 Jul 2023 16:04:55 +0200 Subject: [PATCH 2/7] Amends error message. --- .../org/elasticsearch/xpack/core/ml/job/messages/Messages.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/messages/Messages.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/messages/Messages.java index 77b4dcaaac83f..f0ff07bcb21df 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/messages/Messages.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/messages/Messages.java @@ -154,7 +154,7 @@ public final class Messages { public static final String JOB_AUDIT_DATAFEED_NO_DATA = "Datafeed has been retrieving no data for a while"; public static final String JOB_AUDIT_DATAFEED_MISSING_DATA = "Datafeed has missed {0} documents due to ingest latency, latest bucket with missing data is [{1}]." - + " Consider increasing query_delay"; + + " Check if your ingest pipeline delays ingesting. If the pipeline works as expected, consider increasing query_delay."; public static final String JOB_AUDIT_DATAFEED_RECOVERED = "Datafeed has recovered data extraction and analysis"; public static final String JOB_AUDIT_DATAFEED_STARTED_FROM_TO = "Datafeed started (from: {0} to: {1}) with frequency [{2}]"; public static final String JOB_AUDIT_DATAFEED_STARTED_REALTIME = "Datafeed started in real-time"; From 33d758f02713ef7c86ba1f234395e2695496e2bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Istv=C3=A1n=20Zolt=C3=A1n=20Szab=C3=B3?= Date: Tue, 18 Jul 2023 16:16:46 +0200 Subject: [PATCH 3/7] [DOCS] Fine-tunes text. --- .../ml/anomaly-detection/ml-delayed-data-detection.asciidoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/reference/ml/anomaly-detection/ml-delayed-data-detection.asciidoc b/docs/reference/ml/anomaly-detection/ml-delayed-data-detection.asciidoc index 3dca60ac37c69..34fd9a4149319 100644 --- a/docs/reference/ml/anomaly-detection/ml-delayed-data-detection.asciidoc +++ b/docs/reference/ml/anomaly-detection/ml-delayed-data-detection.asciidoc @@ -18,7 +18,7 @@ cluster. IMPORTANT: If you get an error that says `Datafeed has missed XXXX documents due to ingest latency`, check whether your -ingest pipeline delays indexing. If your ingest pipeline functions well, +ingest pipeline delays indexing. If your ingest pipeline works as expected, consider increasing the valuse of `query_delay`. == Why worry about delayed data? From 5df616897711b6aa65758dd66585f4ab80068602 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Istv=C3=A1n=20Zolt=C3=A1n=20Szab=C3=B3?= Date: Tue, 18 Jul 2023 16:17:30 +0200 Subject: [PATCH 4/7] [DOCS] Fixes typo. --- .../ml/anomaly-detection/ml-delayed-data-detection.asciidoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/reference/ml/anomaly-detection/ml-delayed-data-detection.asciidoc b/docs/reference/ml/anomaly-detection/ml-delayed-data-detection.asciidoc index 34fd9a4149319..89ec625956897 100644 --- a/docs/reference/ml/anomaly-detection/ml-delayed-data-detection.asciidoc +++ b/docs/reference/ml/anomaly-detection/ml-delayed-data-detection.asciidoc @@ -19,7 +19,7 @@ cluster. IMPORTANT: If you get an error that says `Datafeed has missed XXXX documents due to ingest latency`, check whether your ingest pipeline delays indexing. If your ingest pipeline works as expected, -consider increasing the valuse of `query_delay`. +consider increasing the value of `query_delay`. == Why worry about delayed data? From af50d7f38ef8906cdce92d1550103da065545b4b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Istv=C3=A1n=20Zolt=C3=A1n=20Szab=C3=B3?= Date: Tue, 18 Jul 2023 16:47:12 +0200 Subject: [PATCH 5/7] Rephrases text. --- .../ml/anomaly-detection/ml-delayed-data-detection.asciidoc | 6 +++--- .../elasticsearch/xpack/core/ml/job/messages/Messages.java | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/reference/ml/anomaly-detection/ml-delayed-data-detection.asciidoc b/docs/reference/ml/anomaly-detection/ml-delayed-data-detection.asciidoc index 89ec625956897..7f60748638190 100644 --- a/docs/reference/ml/anomaly-detection/ml-delayed-data-detection.asciidoc +++ b/docs/reference/ml/anomaly-detection/ml-delayed-data-detection.asciidoc @@ -17,9 +17,9 @@ that is struck depends upon each use case and the environmental factors of the cluster. IMPORTANT: If you get an error that says -`Datafeed has missed XXXX documents due to ingest latency`, check whether your -ingest pipeline delays indexing. If your ingest pipeline works as expected, -consider increasing the value of `query_delay`. +`Datafeed has missed XXXX documents due to ingest latency`, check your ingestion +process end to end if it delays indexing. If it works as expected, consider +increasing the value of `query_delay`. == Why worry about delayed data? diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/messages/Messages.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/messages/Messages.java index f0ff07bcb21df..531bfc2d46d75 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/messages/Messages.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/messages/Messages.java @@ -154,7 +154,7 @@ public final class Messages { public static final String JOB_AUDIT_DATAFEED_NO_DATA = "Datafeed has been retrieving no data for a while"; public static final String JOB_AUDIT_DATAFEED_MISSING_DATA = "Datafeed has missed {0} documents due to ingest latency, latest bucket with missing data is [{1}]." - + " Check if your ingest pipeline delays ingesting. If the pipeline works as expected, consider increasing query_delay."; + + " Check if the end-to-end ingestion process is delayed. If it works as expected, consider increasing query_delay."; public static final String JOB_AUDIT_DATAFEED_RECOVERED = "Datafeed has recovered data extraction and analysis"; public static final String JOB_AUDIT_DATAFEED_STARTED_FROM_TO = "Datafeed started (from: {0} to: {1}) with frequency [{2}]"; public static final String JOB_AUDIT_DATAFEED_STARTED_REALTIME = "Datafeed started in real-time"; From 95715447c407cfd346697e1db567175e34377eaf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Istv=C3=A1n=20Zolt=C3=A1n=20Szab=C3=B3?= Date: Wed, 19 Jul 2023 10:51:17 +0200 Subject: [PATCH 6/7] Update x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/messages/Messages.java --- .../org/elasticsearch/xpack/core/ml/job/messages/Messages.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/messages/Messages.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/messages/Messages.java index 531bfc2d46d75..5d0a4aebcca28 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/messages/Messages.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/messages/Messages.java @@ -154,7 +154,7 @@ public final class Messages { public static final String JOB_AUDIT_DATAFEED_NO_DATA = "Datafeed has been retrieving no data for a while"; public static final String JOB_AUDIT_DATAFEED_MISSING_DATA = "Datafeed has missed {0} documents due to ingest latency, latest bucket with missing data is [{1}]." - + " Check if the end-to-end ingestion process is delayed. If it works as expected, consider increasing query_delay."; + + " Consider increasing query_delay and investigate the cause of high latency in your ingestion process."; public static final String JOB_AUDIT_DATAFEED_RECOVERED = "Datafeed has recovered data extraction and analysis"; public static final String JOB_AUDIT_DATAFEED_STARTED_FROM_TO = "Datafeed started (from: {0} to: {1}) with frequency [{2}]"; public static final String JOB_AUDIT_DATAFEED_STARTED_REALTIME = "Datafeed started in real-time"; From 12b4e4109d70476575958e84da8b4a542c18c26b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Istv=C3=A1n=20Zolt=C3=A1n=20Szab=C3=B3?= Date: Wed, 19 Jul 2023 10:54:45 +0200 Subject: [PATCH 7/7] Apply suggestions from code review --- .../ml-delayed-data-detection.asciidoc | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/docs/reference/ml/anomaly-detection/ml-delayed-data-detection.asciidoc b/docs/reference/ml/anomaly-detection/ml-delayed-data-detection.asciidoc index 7f60748638190..a5a7b01f095ac 100644 --- a/docs/reference/ml/anomaly-detection/ml-delayed-data-detection.asciidoc +++ b/docs/reference/ml/anomaly-detection/ml-delayed-data-detection.asciidoc @@ -16,11 +16,12 @@ if it is set too high, analysis drifts farther away from real-time. The balance that is struck depends upon each use case and the environmental factors of the cluster. -IMPORTANT: If you get an error that says -`Datafeed has missed XXXX documents due to ingest latency`, check your ingestion -process end to end if it delays indexing. If it works as expected, consider -increasing the value of `query_delay`. - +IMPORTANT: If you get an error that says +`Datafeed missed XXXX documents due to ingest latency`, consider increasing +the value of `query_delay'. If it doesn't help, investigate the ingest latency and its +cause. You can do this by comparing event and ingest timestamps. High latency +is often caused by bursts of ingested documents, misconfiguration of the ingest +pipeline, or misalignment of system clocks. == Why worry about delayed data? If data are delayed randomly (and consequently are missing from analysis), the