From cac6c3333127fd5007d93663664b5b5da1538e86 Mon Sep 17 00:00:00 2001 From: Craig Taverner Date: Tue, 19 Sep 2023 21:08:57 +0200 Subject: [PATCH] Refined ESQL benchmarks, fixed NaN and removed segment (#457) * Fixed NaN results in `avg_tip_percent_esql` query * Removed all `segment` partitioning operations since they were unused * Added `setup` tags to the re-indexing to allow benchmark-only runs with `--exclude-tasks="tag:setup` --- nyc_taxis/challenges/default.json | 15 ++++-- nyc_taxis/operations/default.json | 84 ++++--------------------------- 2 files changed, 19 insertions(+), 80 deletions(-) diff --git a/nyc_taxis/challenges/default.json b/nyc_taxis/challenges/default.json index e16ee3f3..3f9c6ee7 100644 --- a/nyc_taxis/challenges/default.json +++ b/nyc_taxis/challenges/default.json @@ -710,7 +710,8 @@ "default": false, "schedule": [ { - "operation": "delete-index" + "operation": "delete-index", + "tags": ["setup"] }, { "operation": { @@ -720,7 +721,8 @@ "index.refresh_interval": "30s", "index.translog.flush_threshold_size": "4g" }{%- endif %} - } + }, + "tags": ["setup"] }, { "name": "check-cluster-health", @@ -732,17 +734,20 @@ "wait_for_no_relocating_shards": "true" }, "retry-until-success": true - } + }, + "tags": ["setup"] }, { "operation": "index", "warmup-time-period": 240, "clients": {{bulk_indexing_clients | default(8)}}, - "ignore-response-error-level": "{{error_level | default('non-fatal')}}" + "ignore-response-error-level": "{{error_level | default('non-fatal')}}", + "tags": ["setup"] }, { "name": "refresh-after-index", - "operation": "refresh" + "operation": "refresh", + "tags": ["setup"] }, { "operation": "avg_passenger_count_aggregation", diff --git a/nyc_taxis/operations/default.json b/nyc_taxis/operations/default.json index 89c13231..aa3c16f0 100644 --- a/nyc_taxis/operations/default.json +++ b/nyc_taxis/operations/default.json @@ -695,18 +695,6 @@ } } }, - { - "name": "avg_passenger_count_esql_segment_partitioning", - "operation-type": "raw-request", - "method": "POST", - "path": "/_esql", - "body": { - "query" : "from nyc_taxis | stats avg(passenger_count)", - "pragma" : { - "data_partitioning" : "segment" - } - } - }, { "name": "avg_passenger_count_esql_doc_partitioning", "operation-type": "raw-request", @@ -725,6 +713,13 @@ "request-timeout": 120, "body": { "size": 0, + "query": { + "range": { + "fare_amount": { + "gt": 0 + } + } + }, "runtime_mappings": { "tip_percent": { "type": "double", @@ -748,31 +743,19 @@ "method": "POST", "path": "/_esql", "body": { - "query" : "from nyc_taxis | eval tip_percent = tip_amount / fare_amount | stats avg(tip_percent)", + "query" : "from nyc_taxis | where fare_amount > 0 | eval tip_percent = tip_amount / fare_amount | stats avg(tip_percent)", "pragma" : { "data_partitioning" : "shard" } } }, - { - "name": "avg_tip_percent_esql_segment_partitioning", - "operation-type": "raw-request", - "method": "POST", - "path": "/_esql", - "body": { - "query" : "from nyc_taxis | eval tip_percent = tip_amount / fare_amount | stats avg(tip_percent)", - "pragma" : { - "data_partitioning" : "segment" - } - } - }, { "name": "avg_tip_percent_esql_doc_partitioning", "operation-type": "raw-request", "method": "POST", "path": "/_esql", "body": { - "query" : "from nyc_taxis | eval tip_percent = tip_amount / fare_amount | stats avg(tip_percent)", + "query" : "from nyc_taxis | where fare_amount > 0 | eval tip_percent = tip_amount / fare_amount | stats avg(tip_percent)", "pragma" : { "data_partitioning" : "doc" } @@ -815,18 +798,6 @@ } } }, - { - "name": "avg_amount_group_by_integer_esql_segment_partitioning", - "operation-type": "raw-request", - "method": "POST", - "path": "/_esql", - "body": { - "query" : "from nyc_taxis | stats avg(total_amount) by passenger_count | sort passenger_count", - "pragma" : { - "data_partitioning" : "segment" - } - } - }, { "name": "avg_amount_group_by_integer_esql_doc_partitioning", "operation-type": "raw-request", @@ -876,18 +847,6 @@ } } }, - { - "name": "avg_amount_group_by_keyword_esql_segment_partitioning", - "operation-type": "raw-request", - "method": "POST", - "path": "/_esql", - "body": { - "query" : "from nyc_taxis | stats avg(total_amount) by rate_code_id | sort rate_code_id", - "pragma" : { - "data_partitioning" : "segment" - } - } - }, { "name": "avg_amount_group_by_keyword_esql_doc_partitioning", "operation-type": "raw-request", @@ -944,18 +903,6 @@ } } }, - { - "name": "avg_passenger_count_filtered_esql_segment_partitioning", - "operation-type": "raw-request", - "method": "POST", - "path": "/_esql", - "body": { - "query" : "from nyc_taxis | where total_amount > 60 and rate_code_id==\"2\"| stats avg(passenger_count)", - "pragma" : { - "data_partitioning" : "segment" - } - } - }, { "name": "avg_passenger_count_filtered_esql_doc_partitioning", "operation-type": "raw-request", @@ -993,19 +940,6 @@ } } }, - { - "name": "sort_by_ts_esql_segment_partitioning", - "operation-type": "raw-request", - "method": "POST", - "path": "/_esql", - "request-timeout": 120, - "body": { - "query" : "from nyc_taxis | sort pickup_datetime desc | project pickup_datetime, dropoff_datetime, trip_distance| limit 1000", - "pragma" : { - "data_partitioning" : "segment" - } - } - }, { "name": "sort_by_ts_esql_doc_partitioning", "operation-type": "raw-request",