Skip to content

Commit

Permalink
Added some ESQL queries to elastic/logs (elastic#466)
Browse files Browse the repository at this point in the history
* Added some ESQL queries to `elastic/logs`

This dataset is of interest to ESQL particularly as we're targetting observability use cases.
Currently ESQL is not mature enough to replace the workflows themselves, but can be used in the discover dashboard, and the queries chosen reflect possible usage in that dashboard, as well as investigating the impact of multiple grouping keys on similar aggregations.

* Change test parameters to actually generate data

The original parameters resulted in all indices completely empty (zero docs).
Changing `start_date` and `end_date` to `bulk_start_date` and `bulk_end_date` resulted in only two indices getting data, the redis and k8s indices.
Adding clients settings and increasing end date and max_generated_corpus_size results in all indices getting data, and reducing raw_data_volume_per_day increases data generation performance.

These settings were chosen through trial and error to get the ESQL queries to actually run. Any smaller data sizes result in a `ValueSource mismatch` exception, likely due to some shards missing data.

* Added one more ESQL query from observability set

* Partial revert of index setup

The fact that the tests actually use a different challenge for index setup and querying, allows for parameters much closer to the original.
All we really needed was to index a full minnute instead of just 2s.

* Minimise changes to logging-querying.json

Some of the changes were useful only for local testing, so removing them.
  • Loading branch information
craigtaverner authored and inqueue committed Dec 6, 2023
1 parent a030b3e commit f22a9a2
Show file tree
Hide file tree
Showing 4 changed files with 120 additions and 2 deletions.
49 changes: 49 additions & 0 deletions elastic/logs/challenges/logging-querying.json
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,55 @@
{% endfor %}
]
}
},
{
"operation": "esql_basic_count_group_1",
"clients": 1,
"warmup-iterations": 10,
"iterations": 50,
"tags": ["esql"]
},
{
"operation": "esql_basic_count_group_2",
"clients": 1,
"warmup-iterations": 5,
"iterations": 20,
"tags": ["esql"]
},
{
"operation": "esql_basic_count_group_3",
"clients": 1,
"warmup-iterations": 5,
"iterations": 10,
"tags": ["esql"]
},
{
"operation": "esql_basic_count_group_4",
"clients": 1,
"warmup-iterations": 5,
"iterations": 10,
"tags": ["esql"]
},
{
"operation": "esql_time_range_and_date_histogram_two_groups_pre_filter",
"clients": 1,
"warmup-iterations": 5,
"iterations": 20,
"tags": ["esql"]
},
{
"operation": "esql_time_range_and_date_histogram_two_groups_post_filter",
"clients": 1,
"warmup-iterations": 5,
"iterations": 20,
"tags": ["esql"]
},
{
"operation": "esql_dissect_duration_and_stats",
"clients": 1,
"warmup-iterations": 5,
"iterations": 20,
"tags": ["esql"]
}
]
}
66 changes: 66 additions & 0 deletions elastic/logs/operations/esql.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
{
"name": "esql_basic_count_group_1",
"operation-type": "raw-request",
"method": "POST",
"path": "/_query",
"body": {
"query": "FROM logs-* | STATS count=count(*) BY agent.version | SORT count DESC | LIMIT 20"
}
},
{
"name": "esql_basic_count_group_2",
"operation-type": "raw-request",
"method": "POST",
"path": "/_query",
"body": {
"query": "FROM logs-* | STATS count=count(*) BY agent.version, agent.type | SORT count DESC | LIMIT 20"
}
},
{
"name": "esql_basic_count_group_3",
"operation-type": "raw-request",
"method": "POST",
"path": "/_query",
"body": {
"query": "FROM logs-* | STATS count=count(*) BY agent.version, agent.type, agent.hostname | SORT count DESC | LIMIT 20"
}
},
{
"name": "esql_basic_count_group_4",
"operation-type": "raw-request",
"method": "POST",
"path": "/_query",
"body": {
"query": "FROM logs-* | STATS count=count(*) BY agent.version, agent.type, agent.hostname, agent.id | SORT count DESC | LIMIT 20"
}
},
{
"name": "esql_time_range_and_date_histogram_two_groups_pre_filter",
"description": "Based on observability queries for average CPU over date histogram",
"operation-type": "raw-request",
"method": "POST",
"path": "/_query",
"body": {
"query": "FROM logs-* | EVAL start_time = DATE_PARSE(\"yyyy-MM-dd\",\"2020-01-01\"), end_time = DATE_PARSE(\"yyyy-MM-dd\",\"2020-01-02\") | WHERE @timestamp >= start_time AND @timestamp <= end_time AND http.response.body.bytes IS NOT NULL | EVAL bucket = DATE_TRUNC(1 hour, @timestamp) | STATS avg=AVG(http.response.body.bytes), min=MIN(http.response.body.bytes), max=MAX(http.response.body.bytes) BY data_stream.dataset, bucket | KEEP data_stream.dataset, bucket, min, avg, max"
}
},
{
"name": "esql_time_range_and_date_histogram_two_groups_post_filter",
"description": "Based on observability queries for average CPU over date histogram",
"operation-type": "raw-request",
"method": "POST",
"path": "/_query",
"body": {
"query": "FROM logs-* | EVAL start_time = DATE_PARSE(\"yyyy-MM-dd\",\"2020-01-01\"), end_time = DATE_PARSE(\"yyyy-MM-dd\",\"2020-01-02\") | WHERE @timestamp >= start_time AND @timestamp <= end_time | EVAL bucket = DATE_TRUNC(1 hour, @timestamp) | STATS avg=AVG(http.response.body.bytes), min=MIN(http.response.body.bytes), max=MAX(http.response.body.bytes) BY data_stream.dataset, bucket | WHERE min IS NOT NULL | KEEP data_stream.dataset, bucket, min, avg, max"
}
},
{
"name": "esql_dissect_duration_and_stats",
"description": "Based on observability queries for duration average",
"operation-type": "raw-request",
"method": "POST",
"path": "/_query",
"body": {
"query": "FROM logs-postgres* | DISSECT message \"duration: %{query_duration} ms\" | EVAL query_duration_num = TO_DOUBLE(query_duration) | STATS avg_duration = AVG(query_duration_num)"
}
}
3 changes: 3 additions & 0 deletions elastic/logs/track.json
Original file line number Diff line number Diff line change
Expand Up @@ -646,5 +646,8 @@
],
"challenges": [
{{ rally.collect(parts="challenges/*.json") }}
],
"operations": [
{{ rally.collect(parts="operations/*.json") }}
]
}
4 changes: 2 additions & 2 deletions it/test_logs.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@

BASE_PARAMS = {
"start_date": "2021-01-01T00-00-00Z",
"end_date": "2021-01-01T00-00-02Z",
"end_date": "2021-01-01T00-01-00Z",
"max_total_download_gb": "18",
"raw_data_volume_per_day": "72GB",
"max_generated_corpus_size": "1GB",
Expand Down Expand Up @@ -129,7 +129,7 @@ def test_logs_indexing_querying_throttled(self, es_cluster, rally):
def test_logs_querying_with_preloaded_data(self, es_cluster, rally):
custom = {
"bulk_start_date": "2020-09-30T00-00-00Z",
"bulk_end_date": "2020-09-30T00-00-02Z",
"bulk_end_date": "2020-09-30T00-01-00Z",
"query_warmup_time_period": "1",
"query_time_period": "1",
"workflow_time_interval": "1",
Expand Down

0 comments on commit f22a9a2

Please sign in to comment.