From 3dbc8e4f71a3587a08a6966d6a08f3e830392f87 Mon Sep 17 00:00:00 2001 From: Craig Taverner Date: Fri, 6 Oct 2023 15:07:44 +0200 Subject: [PATCH 1/5] Added some ESQL queries to `elastic/logs` This dataset is of interest to ESQL particularly as we're targetting observability use cases. Currently ESQL is not mature enough to replace the workflows themselves, but can be used in the discover dashboard, and the queries chosen reflect possible usage in that dashboard, as well as investigating the impact of multiple grouping keys on similar aggregations. --- elastic/logs/challenges/esql.json | 48 +++++++++++++++++ elastic/logs/challenges/logging-querying.json | 44 +++++++++++++++ elastic/logs/operations/esql.json | 54 +++++++++++++++++++ elastic/logs/track.json | 3 ++ 4 files changed, 149 insertions(+) create mode 100644 elastic/logs/challenges/esql.json create mode 100644 elastic/logs/operations/esql.json diff --git a/elastic/logs/challenges/esql.json b/elastic/logs/challenges/esql.json new file mode 100644 index 00000000..db68bbd5 --- /dev/null +++ b/elastic/logs/challenges/esql.json @@ -0,0 +1,48 @@ +{ + "name": "esql", + "description": "Performance benchmarks for ESQL queries on elastic/logs data. This is work in progress", + "schedule": [ + { + "operation": "esql_basic_count_group_1", + "clients": 1, + "warmup-iterations": 10, + "iterations": 50, + "tags": ["esql"] + }, + { + "operation": "esql_basic_count_group_2", + "clients": 1, + "warmup-iterations": 5, + "iterations": 20, + "tags": ["esql"] + }, + { + "operation": "esql_basic_count_group_3", + "clients": 1, + "warmup-iterations": 5, + "iterations": 10, + "tags": ["esql"] + }, + { + "operation": "esql_basic_count_group_4", + "clients": 1, + "warmup-iterations": 5, + "iterations": 10, + "tags": ["esql"] + }, + { + "operation": "esql_time_range_and_date_histogram_two_groups_pre_filter", + "clients": 1, + "warmup-iterations": 5, + "iterations": 20, + "tags": ["esql"] + }, + { + "operation": "esql_time_range_and_date_histogram_two_groups_post_filter", + "clients": 1, + "warmup-iterations": 5, + "iterations": 20, + "tags": ["esql"] + } + ] +} \ No newline at end of file diff --git a/elastic/logs/challenges/logging-querying.json b/elastic/logs/challenges/logging-querying.json index 169bd31c..986bcc08 100644 --- a/elastic/logs/challenges/logging-querying.json +++ b/elastic/logs/challenges/logging-querying.json @@ -19,11 +19,13 @@ "bulk-size": {{ p_bulk_size }}, "detailed-results": true }, + "tags": ["setup"], "clients": {{ p_bulk_indexing_clients }}, "ignore-response-error-level": "{{error_level | default('non-fatal')}}" }, { "name": "compression-stats", + "tags": ["setup"], "operation": { "operation-type": "compression-statistics", "param-source": "create-datastream-source" @@ -54,6 +56,48 @@ {% endfor %} ] } + }, + { + "operation": "esql_basic_count_group_1", + "clients": 1, + "warmup-iterations": 10, + "iterations": 50, + "tags": ["esql"] + }, + { + "operation": "esql_basic_count_group_2", + "clients": 1, + "warmup-iterations": 5, + "iterations": 20, + "tags": ["esql"] + }, + { + "operation": "esql_basic_count_group_3", + "clients": 1, + "warmup-iterations": 5, + "iterations": 10, + "tags": ["esql"] + }, + { + "operation": "esql_basic_count_group_4", + "clients": 1, + "warmup-iterations": 5, + "iterations": 10, + "tags": ["esql"] + }, + { + "operation": "esql_time_range_and_date_histogram_two_groups_pre_filter", + "clients": 1, + "warmup-iterations": 5, + "iterations": 20, + "tags": ["esql"] + }, + { + "operation": "esql_time_range_and_date_histogram_two_groups_post_filter", + "clients": 1, + "warmup-iterations": 5, + "iterations": 20, + "tags": ["esql"] } ] } diff --git a/elastic/logs/operations/esql.json b/elastic/logs/operations/esql.json new file mode 100644 index 00000000..1bd9f41b --- /dev/null +++ b/elastic/logs/operations/esql.json @@ -0,0 +1,54 @@ + { + "name": "esql_basic_count_group_1", + "operation-type": "raw-request", + "method": "POST", + "path": "/_query", + "body": { + "query": "FROM logs-* | STATS count=count(*) BY agent.version | SORT count DESC | LIMIT 20" + } + }, + { + "name": "esql_basic_count_group_2", + "operation-type": "raw-request", + "method": "POST", + "path": "/_query", + "body": { + "query": "FROM logs-* | STATS count=count(*) BY agent.version, agent.type | SORT count DESC | LIMIT 20" + } + }, + { + "name": "esql_basic_count_group_3", + "operation-type": "raw-request", + "method": "POST", + "path": "/_query", + "body": { + "query": "FROM logs-* | STATS count=count(*) BY agent.version, agent.type, agent.hostname | SORT count DESC | LIMIT 20" + } + }, + { + "name": "esql_basic_count_group_4", + "operation-type": "raw-request", + "method": "POST", + "path": "/_query", + "body": { + "query": "FROM logs-* | STATS count=count(*) BY agent.version, agent.type, agent.hostname, agent.id | SORT count DESC | LIMIT 20" + } + }, + { + "name": "esql_time_range_and_date_histogram_two_groups_pre_filter", + "operation-type": "raw-request", + "method": "POST", + "path": "/_query", + "body": { + "query": "FROM logs-* | EVAL start_time = DATE_PARSE(\"yyyy-MM-dd\",\"2020-01-01\"), end_time = DATE_PARSE(\"yyyy-MM-dd\",\"2020-01-02\") | WHERE @timestamp >= start_time AND @timestamp <= end_time AND http.response.body.bytes IS NOT NULL | EVAL bucket = DATE_TRUNC(1 hour, @timestamp) | STATS avg=AVG(http.response.body.bytes), min=MIN(http.response.body.bytes), max=MAX(http.response.body.bytes) BY data_stream.dataset, bucket | KEEP data_stream.dataset, bucket, min, avg, max" + } + }, + { + "name": "esql_time_range_and_date_histogram_two_groups_post_filter", + "operation-type": "raw-request", + "method": "POST", + "path": "/_query", + "body": { + "query": "FROM logs-* | EVAL start_time = DATE_PARSE(\"yyyy-MM-dd\",\"2020-01-01\"), end_time = DATE_PARSE(\"yyyy-MM-dd\",\"2020-01-02\") | WHERE @timestamp >= start_time AND @timestamp <= end_time | EVAL bucket = DATE_TRUNC(1 hour, @timestamp) | STATS avg=AVG(http.response.body.bytes), min=MIN(http.response.body.bytes), max=MAX(http.response.body.bytes) BY data_stream.dataset, bucket | WHERE min IS NOT NULL | KEEP data_stream.dataset, bucket, min, avg, max" + } + } diff --git a/elastic/logs/track.json b/elastic/logs/track.json index 772aed3e..73739161 100644 --- a/elastic/logs/track.json +++ b/elastic/logs/track.json @@ -646,5 +646,8 @@ ], "challenges": [ {{ rally.collect(parts="challenges/*.json") }} + ], + "operations": [ + {{ rally.collect(parts="operations/*.json") }} ] } From 3fb3c53f8b01a990b6fe511e01dcd2661bb81e92 Mon Sep 17 00:00:00 2001 From: Craig Taverner Date: Mon, 9 Oct 2023 11:25:22 +0200 Subject: [PATCH 2/5] Change test parameters to actually generate data The original parameters resulted in all indices completely empty (zero docs). Changing `start_date` and `end_date` to `bulk_start_date` and `bulk_end_date` resulted in only two indices getting data, the redis and k8s indices. Adding clients settings and increasing end date and max_generated_corpus_size results in all indices getting data, and reducing raw_data_volume_per_day increases data generation performance. These settings were chosen through trial and error to get the ESQL queries to actually run. Any smaller data sizes result in a `ValueSource mismatch` exception, likely due to some shards missing data. --- it/test_logs.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/it/test_logs.py b/it/test_logs.py index 2f10ab49..05831bba 100644 --- a/it/test_logs.py +++ b/it/test_logs.py @@ -22,11 +22,13 @@ pytest_rally = pytest.importorskip("pytest_rally") BASE_PARAMS = { - "start_date": "2021-01-01T00-00-00Z", - "end_date": "2021-01-01T00-00-02Z", + "bulk_start_date": "2021-01-01T00-00-00Z", + "bulk_end_date": "2021-01-01T00-13-00Z", + "bulk_indexing_clients": 12, + "data_generation_clients": 16, "max_total_download_gb": "18", - "raw_data_volume_per_day": "72GB", - "max_generated_corpus_size": "1GB", + "raw_data_volume_per_day": "10GB", + "max_generated_corpus_size": "4GB", "wait_for_status": "green", "force_data_generation": "true", "number_of_shards": "2", From 0906d0388328c1bd1cb757617cedaff28dce4d55 Mon Sep 17 00:00:00 2001 From: Craig Taverner Date: Mon, 9 Oct 2023 12:11:20 +0200 Subject: [PATCH 3/5] Added one more ESQL query from observability set --- elastic/logs/challenges/logging-querying.json | 8 ++++++++ elastic/logs/operations/esql.json | 12 ++++++++++++ 2 files changed, 20 insertions(+) diff --git a/elastic/logs/challenges/logging-querying.json b/elastic/logs/challenges/logging-querying.json index 986bcc08..1145c6f2 100644 --- a/elastic/logs/challenges/logging-querying.json +++ b/elastic/logs/challenges/logging-querying.json @@ -48,6 +48,7 @@ "task-offset": {{ loop.index }}, "request-params": {{ p_query_request_params | tojson(indent=2) }} }, + "tags": ["logging-queries"], "think-time-interval": {{ p_user_think_time }}, "workflow-interval": {{ p_user_workflow_time }}, "clients": 1, @@ -98,6 +99,13 @@ "warmup-iterations": 5, "iterations": 20, "tags": ["esql"] + }, + { + "operation": "esql_dissect_duration_and_stats", + "clients": 1, + "warmup-iterations": 5, + "iterations": 20, + "tags": ["esql"] } ] } diff --git a/elastic/logs/operations/esql.json b/elastic/logs/operations/esql.json index 1bd9f41b..bb27b68f 100644 --- a/elastic/logs/operations/esql.json +++ b/elastic/logs/operations/esql.json @@ -36,6 +36,7 @@ }, { "name": "esql_time_range_and_date_histogram_two_groups_pre_filter", + "description": "Based on observability queries for average CPU over date histogram", "operation-type": "raw-request", "method": "POST", "path": "/_query", @@ -45,10 +46,21 @@ }, { "name": "esql_time_range_and_date_histogram_two_groups_post_filter", + "description": "Based on observability queries for average CPU over date histogram", "operation-type": "raw-request", "method": "POST", "path": "/_query", "body": { "query": "FROM logs-* | EVAL start_time = DATE_PARSE(\"yyyy-MM-dd\",\"2020-01-01\"), end_time = DATE_PARSE(\"yyyy-MM-dd\",\"2020-01-02\") | WHERE @timestamp >= start_time AND @timestamp <= end_time | EVAL bucket = DATE_TRUNC(1 hour, @timestamp) | STATS avg=AVG(http.response.body.bytes), min=MIN(http.response.body.bytes), max=MAX(http.response.body.bytes) BY data_stream.dataset, bucket | WHERE min IS NOT NULL | KEEP data_stream.dataset, bucket, min, avg, max" } + }, + { + "name": "esql_dissect_duration_and_stats", + "description": "Based on observability queries for duration average", + "operation-type": "raw-request", + "method": "POST", + "path": "/_query", + "body": { + "query": "FROM logs-postgres* | DISSECT message \"duration: %{query_duration} ms\" | EVAL query_duration_num = TO_DOUBLE(query_duration) | STATS avg_duration = AVG(query_duration_num)" + } } From abc868251ba253e10461acb188edc906ae5c1e3a Mon Sep 17 00:00:00 2001 From: Craig Taverner Date: Mon, 9 Oct 2023 14:21:47 +0200 Subject: [PATCH 4/5] Partial revert of index setup The fact that the tests actually use a different challenge for index setup and querying, allows for parameters much closer to the original. All we really needed was to index a full minnute instead of just 2s. --- it/test_logs.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/it/test_logs.py b/it/test_logs.py index 05831bba..7d09cd9a 100644 --- a/it/test_logs.py +++ b/it/test_logs.py @@ -22,13 +22,11 @@ pytest_rally = pytest.importorskip("pytest_rally") BASE_PARAMS = { - "bulk_start_date": "2021-01-01T00-00-00Z", - "bulk_end_date": "2021-01-01T00-13-00Z", - "bulk_indexing_clients": 12, - "data_generation_clients": 16, + "start_date": "2021-01-01T00-00-00Z", + "end_date": "2021-01-01T00-01-00Z", "max_total_download_gb": "18", - "raw_data_volume_per_day": "10GB", - "max_generated_corpus_size": "4GB", + "raw_data_volume_per_day": "72GB", + "max_generated_corpus_size": "1GB", "wait_for_status": "green", "force_data_generation": "true", "number_of_shards": "2", @@ -131,7 +129,7 @@ def test_logs_indexing_querying_throttled(self, es_cluster, rally): def test_logs_querying_with_preloaded_data(self, es_cluster, rally): custom = { "bulk_start_date": "2020-09-30T00-00-00Z", - "bulk_end_date": "2020-09-30T00-00-02Z", + "bulk_end_date": "2020-09-30T00-01-00Z", "query_warmup_time_period": "1", "query_time_period": "1", "workflow_time_interval": "1", From 63d62cbf28614e0d093683df084f9276f6d87ab5 Mon Sep 17 00:00:00 2001 From: Craig Taverner Date: Mon, 9 Oct 2023 14:33:10 +0200 Subject: [PATCH 5/5] Minimise changes to logging-querying.json Some of the changes were useful only for local testing, so removing them. --- elastic/logs/challenges/esql.json | 48 ------------------- elastic/logs/challenges/logging-querying.json | 3 -- 2 files changed, 51 deletions(-) delete mode 100644 elastic/logs/challenges/esql.json diff --git a/elastic/logs/challenges/esql.json b/elastic/logs/challenges/esql.json deleted file mode 100644 index db68bbd5..00000000 --- a/elastic/logs/challenges/esql.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "name": "esql", - "description": "Performance benchmarks for ESQL queries on elastic/logs data. This is work in progress", - "schedule": [ - { - "operation": "esql_basic_count_group_1", - "clients": 1, - "warmup-iterations": 10, - "iterations": 50, - "tags": ["esql"] - }, - { - "operation": "esql_basic_count_group_2", - "clients": 1, - "warmup-iterations": 5, - "iterations": 20, - "tags": ["esql"] - }, - { - "operation": "esql_basic_count_group_3", - "clients": 1, - "warmup-iterations": 5, - "iterations": 10, - "tags": ["esql"] - }, - { - "operation": "esql_basic_count_group_4", - "clients": 1, - "warmup-iterations": 5, - "iterations": 10, - "tags": ["esql"] - }, - { - "operation": "esql_time_range_and_date_histogram_two_groups_pre_filter", - "clients": 1, - "warmup-iterations": 5, - "iterations": 20, - "tags": ["esql"] - }, - { - "operation": "esql_time_range_and_date_histogram_two_groups_post_filter", - "clients": 1, - "warmup-iterations": 5, - "iterations": 20, - "tags": ["esql"] - } - ] -} \ No newline at end of file diff --git a/elastic/logs/challenges/logging-querying.json b/elastic/logs/challenges/logging-querying.json index 1145c6f2..51d4d6a7 100644 --- a/elastic/logs/challenges/logging-querying.json +++ b/elastic/logs/challenges/logging-querying.json @@ -19,13 +19,11 @@ "bulk-size": {{ p_bulk_size }}, "detailed-results": true }, - "tags": ["setup"], "clients": {{ p_bulk_indexing_clients }}, "ignore-response-error-level": "{{error_level | default('non-fatal')}}" }, { "name": "compression-stats", - "tags": ["setup"], "operation": { "operation-type": "compression-statistics", "param-source": "create-datastream-source" @@ -48,7 +46,6 @@ "task-offset": {{ loop.index }}, "request-params": {{ p_query_request_params | tojson(indent=2) }} }, - "tags": ["logging-queries"], "think-time-interval": {{ p_user_think_time }}, "workflow-interval": {{ p_user_workflow_time }}, "clients": 1,