From 249332baa3560ee342432c5041bd71015e086d7f Mon Sep 17 00:00:00 2001 From: Craig Taverner Date: Mon, 9 Oct 2023 16:15:16 +0200 Subject: [PATCH] Added some ESQL queries to `elastic/logs` (#466) * Added some ESQL queries to `elastic/logs` This dataset is of interest to ESQL particularly as we're targetting observability use cases. Currently ESQL is not mature enough to replace the workflows themselves, but can be used in the discover dashboard, and the queries chosen reflect possible usage in that dashboard, as well as investigating the impact of multiple grouping keys on similar aggregations. * Change test parameters to actually generate data The original parameters resulted in all indices completely empty (zero docs). Changing `start_date` and `end_date` to `bulk_start_date` and `bulk_end_date` resulted in only two indices getting data, the redis and k8s indices. Adding clients settings and increasing end date and max_generated_corpus_size results in all indices getting data, and reducing raw_data_volume_per_day increases data generation performance. These settings were chosen through trial and error to get the ESQL queries to actually run. Any smaller data sizes result in a `ValueSource mismatch` exception, likely due to some shards missing data. * Added one more ESQL query from observability set * Partial revert of index setup The fact that the tests actually use a different challenge for index setup and querying, allows for parameters much closer to the original. All we really needed was to index a full minnute instead of just 2s. * Minimise changes to logging-querying.json Some of the changes were useful only for local testing, so removing them. --- elastic/logs/challenges/logging-querying.json | 49 ++++++++++++++ elastic/logs/operations/esql.json | 66 +++++++++++++++++++ elastic/logs/track.json | 3 + it/test_logs.py | 4 +- 4 files changed, 120 insertions(+), 2 deletions(-) create mode 100644 elastic/logs/operations/esql.json diff --git a/elastic/logs/challenges/logging-querying.json b/elastic/logs/challenges/logging-querying.json index 169bd31c..51d4d6a7 100644 --- a/elastic/logs/challenges/logging-querying.json +++ b/elastic/logs/challenges/logging-querying.json @@ -54,6 +54,55 @@ {% endfor %} ] } + }, + { + "operation": "esql_basic_count_group_1", + "clients": 1, + "warmup-iterations": 10, + "iterations": 50, + "tags": ["esql"] + }, + { + "operation": "esql_basic_count_group_2", + "clients": 1, + "warmup-iterations": 5, + "iterations": 20, + "tags": ["esql"] + }, + { + "operation": "esql_basic_count_group_3", + "clients": 1, + "warmup-iterations": 5, + "iterations": 10, + "tags": ["esql"] + }, + { + "operation": "esql_basic_count_group_4", + "clients": 1, + "warmup-iterations": 5, + "iterations": 10, + "tags": ["esql"] + }, + { + "operation": "esql_time_range_and_date_histogram_two_groups_pre_filter", + "clients": 1, + "warmup-iterations": 5, + "iterations": 20, + "tags": ["esql"] + }, + { + "operation": "esql_time_range_and_date_histogram_two_groups_post_filter", + "clients": 1, + "warmup-iterations": 5, + "iterations": 20, + "tags": ["esql"] + }, + { + "operation": "esql_dissect_duration_and_stats", + "clients": 1, + "warmup-iterations": 5, + "iterations": 20, + "tags": ["esql"] } ] } diff --git a/elastic/logs/operations/esql.json b/elastic/logs/operations/esql.json new file mode 100644 index 00000000..bb27b68f --- /dev/null +++ b/elastic/logs/operations/esql.json @@ -0,0 +1,66 @@ + { + "name": "esql_basic_count_group_1", + "operation-type": "raw-request", + "method": "POST", + "path": "/_query", + "body": { + "query": "FROM logs-* | STATS count=count(*) BY agent.version | SORT count DESC | LIMIT 20" + } + }, + { + "name": "esql_basic_count_group_2", + "operation-type": "raw-request", + "method": "POST", + "path": "/_query", + "body": { + "query": "FROM logs-* | STATS count=count(*) BY agent.version, agent.type | SORT count DESC | LIMIT 20" + } + }, + { + "name": "esql_basic_count_group_3", + "operation-type": "raw-request", + "method": "POST", + "path": "/_query", + "body": { + "query": "FROM logs-* | STATS count=count(*) BY agent.version, agent.type, agent.hostname | SORT count DESC | LIMIT 20" + } + }, + { + "name": "esql_basic_count_group_4", + "operation-type": "raw-request", + "method": "POST", + "path": "/_query", + "body": { + "query": "FROM logs-* | STATS count=count(*) BY agent.version, agent.type, agent.hostname, agent.id | SORT count DESC | LIMIT 20" + } + }, + { + "name": "esql_time_range_and_date_histogram_two_groups_pre_filter", + "description": "Based on observability queries for average CPU over date histogram", + "operation-type": "raw-request", + "method": "POST", + "path": "/_query", + "body": { + "query": "FROM logs-* | EVAL start_time = DATE_PARSE(\"yyyy-MM-dd\",\"2020-01-01\"), end_time = DATE_PARSE(\"yyyy-MM-dd\",\"2020-01-02\") | WHERE @timestamp >= start_time AND @timestamp <= end_time AND http.response.body.bytes IS NOT NULL | EVAL bucket = DATE_TRUNC(1 hour, @timestamp) | STATS avg=AVG(http.response.body.bytes), min=MIN(http.response.body.bytes), max=MAX(http.response.body.bytes) BY data_stream.dataset, bucket | KEEP data_stream.dataset, bucket, min, avg, max" + } + }, + { + "name": "esql_time_range_and_date_histogram_two_groups_post_filter", + "description": "Based on observability queries for average CPU over date histogram", + "operation-type": "raw-request", + "method": "POST", + "path": "/_query", + "body": { + "query": "FROM logs-* | EVAL start_time = DATE_PARSE(\"yyyy-MM-dd\",\"2020-01-01\"), end_time = DATE_PARSE(\"yyyy-MM-dd\",\"2020-01-02\") | WHERE @timestamp >= start_time AND @timestamp <= end_time | EVAL bucket = DATE_TRUNC(1 hour, @timestamp) | STATS avg=AVG(http.response.body.bytes), min=MIN(http.response.body.bytes), max=MAX(http.response.body.bytes) BY data_stream.dataset, bucket | WHERE min IS NOT NULL | KEEP data_stream.dataset, bucket, min, avg, max" + } + }, + { + "name": "esql_dissect_duration_and_stats", + "description": "Based on observability queries for duration average", + "operation-type": "raw-request", + "method": "POST", + "path": "/_query", + "body": { + "query": "FROM logs-postgres* | DISSECT message \"duration: %{query_duration} ms\" | EVAL query_duration_num = TO_DOUBLE(query_duration) | STATS avg_duration = AVG(query_duration_num)" + } + } diff --git a/elastic/logs/track.json b/elastic/logs/track.json index 772aed3e..73739161 100644 --- a/elastic/logs/track.json +++ b/elastic/logs/track.json @@ -646,5 +646,8 @@ ], "challenges": [ {{ rally.collect(parts="challenges/*.json") }} + ], + "operations": [ + {{ rally.collect(parts="operations/*.json") }} ] } diff --git a/it/test_logs.py b/it/test_logs.py index 2f10ab49..7d09cd9a 100644 --- a/it/test_logs.py +++ b/it/test_logs.py @@ -23,7 +23,7 @@ BASE_PARAMS = { "start_date": "2021-01-01T00-00-00Z", - "end_date": "2021-01-01T00-00-02Z", + "end_date": "2021-01-01T00-01-00Z", "max_total_download_gb": "18", "raw_data_volume_per_day": "72GB", "max_generated_corpus_size": "1GB", @@ -129,7 +129,7 @@ def test_logs_indexing_querying_throttled(self, es_cluster, rally): def test_logs_querying_with_preloaded_data(self, es_cluster, rally): custom = { "bulk_start_date": "2020-09-30T00-00-00Z", - "bulk_end_date": "2020-09-30T00-00-02Z", + "bulk_end_date": "2020-09-30T00-01-00Z", "query_warmup_time_period": "1", "query_time_period": "1", "workflow_time_interval": "1",