Merge branch 'main' into carlosdelest/semantic-text-field-mapping-spe…

…cifics
carlosdelest · Apr 30, 2024 · b382b3b · b382b3b
2 parents 0417268 + a2d9cc6
commit b382b3b
Show file tree

Hide file tree

Showing 110 changed files with 2,164 additions and 825 deletions.
diff --git a/.buildkite/pipelines/intake.template.yml b/.buildkite/pipelines/intake.template.yml
@@ -40,6 +40,14 @@ steps:
       image: family/elasticsearch-ubuntu-2004
       machineType: n1-standard-32
       buildDirectory: /dev/shm/bk
+  - label: part5
+    command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true -Dorg.elasticsearch.build.cache.push=true -Dignore.tests.seed -Dscan.capture-task-input-files checkPart5
+    timeout_in_minutes: 300
+    agents:
+      provider: gcp
+      image: family/elasticsearch-ubuntu-2004
+      machineType: n1-standard-32
+      buildDirectory: /dev/shm/bk
   - group: bwc-snapshots
     steps:
       - label: "{{matrix.BWC_VERSION}} / bwc-snapshots"

diff --git a/.buildkite/pipelines/intake.yml b/.buildkite/pipelines/intake.yml
@@ -41,6 +41,14 @@ steps:
       image: family/elasticsearch-ubuntu-2004
       machineType: n1-standard-32
       buildDirectory: /dev/shm/bk
+  - label: part5
+    command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true -Dorg.elasticsearch.build.cache.push=true -Dignore.tests.seed -Dscan.capture-task-input-files checkPart5
+    timeout_in_minutes: 300
+    agents:
+      provider: gcp
+      image: family/elasticsearch-ubuntu-2004
+      machineType: n1-standard-32
+      buildDirectory: /dev/shm/bk
   - group: bwc-snapshots
     steps:
       - label: "{{matrix.BWC_VERSION}} / bwc-snapshots"

diff --git a/.buildkite/pipelines/lucene-snapshot/run-tests.yml b/.buildkite/pipelines/lucene-snapshot/run-tests.yml
@@ -40,6 +40,14 @@ steps:
       image: family/elasticsearch-ubuntu-2004
       machineType: custom-32-98304
       buildDirectory: /dev/shm/bk
+  - label: part5
+    command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true -Dorg.elasticsearch.build.cache.push=true -Dignore.tests.seed -Dscan.capture-task-input-files checkPart5
+    timeout_in_minutes: 300
+    agents:
+      provider: gcp
+      image: family/elasticsearch-ubuntu-2004
+      machineType: custom-32-98304
+      buildDirectory: /dev/shm/bk
   - group: bwc-snapshots
     steps:
       - label: "{{matrix.BWC_VERSION}} / bwc-snapshots"

diff --git a/.buildkite/pipelines/periodic-platform-support.yml b/.buildkite/pipelines/periodic-platform-support.yml
@@ -48,6 +48,7 @@ steps:
               - checkPart2
               - checkPart3
               - checkPart4
+              - checkPart5
               - checkRestCompat
         agents:
           provider: gcp
@@ -72,6 +73,7 @@ steps:
               - checkPart2
               - checkPart3
               - checkPart4
+              - checkPart5
               - checkRestCompat
         agents:
           provider: aws

diff --git a/.buildkite/pipelines/periodic.template.yml b/.buildkite/pipelines/periodic.template.yml
@@ -50,6 +50,7 @@ steps:
               - checkPart2
               - checkPart3
               - checkPart4
+              - checkPart5
               - checkRestCompat
         agents:
           provider: gcp
@@ -92,6 +93,7 @@ steps:
               - checkPart2
               - checkPart3
               - checkPart4
+              - checkPart5
               - checkRestCompat
         agents:
           provider: gcp

diff --git a/.buildkite/pipelines/periodic.yml b/.buildkite/pipelines/periodic.yml
@@ -391,6 +391,7 @@ steps:
               - checkPart2
               - checkPart3
               - checkPart4
+              - checkPart5
               - checkRestCompat
         agents:
           provider: gcp
@@ -433,6 +434,7 @@ steps:
               - checkPart2
               - checkPart3
               - checkPart4
+              - checkPart5
               - checkRestCompat
         agents:
           provider: gcp

diff --git a/.buildkite/pipelines/pull-request/part-5-arm.yml b/.buildkite/pipelines/pull-request/part-5-arm.yml
@@ -0,0 +1,13 @@
+config:
+  allow-labels: "test-arm"
+steps:
+  - label: part-5-arm
+    command: .ci/scripts/run-gradle.sh -Dignore.tests.seed checkPart5
+    timeout_in_minutes: 300
+    agents:
+      provider: aws
+      imagePrefix: elasticsearch-ubuntu-2004-aarch64
+      instanceType: m6g.8xlarge
+      diskSizeGb: 350
+      diskType: gp3
+      diskName: /dev/sda1
diff --git a/.buildkite/pipelines/pull-request/part-5-fips.yml b/.buildkite/pipelines/pull-request/part-5-fips.yml
@@ -0,0 +1,11 @@
+config:
+  allow-labels: "Team:Security"
+steps:
+  - label: part-5-fips
+    command: .ci/scripts/run-gradle.sh -Dignore.tests.seed -Dtests.fips.enabled=true checkPart5
+    timeout_in_minutes: 300
+    agents:
+      provider: gcp
+      image: family/elasticsearch-ubuntu-2004
+      machineType: custom-32-98304
+      buildDirectory: /dev/shm/bk
diff --git a/.buildkite/pipelines/pull-request/part-5-windows.yml b/.buildkite/pipelines/pull-request/part-5-windows.yml
@@ -0,0 +1,14 @@
+config:
+  allow-labels: "test-windows"
+steps:
+  - label: part-5-windows
+    command: .\.buildkite\scripts\run-script.ps1 bash .buildkite/scripts/windows-run-gradle.sh
+    timeout_in_minutes: 300
+    agents:
+      provider: gcp
+      image: family/elasticsearch-windows-2022
+      machineType: custom-32-98304
+      diskType: pd-ssd
+      diskSizeGb: 350
+    env:
+      GRADLE_TASK: checkPart5
diff --git a/.buildkite/pipelines/pull-request/part-5.yml b/.buildkite/pipelines/pull-request/part-5.yml
@@ -0,0 +1,11 @@
+config:
+  skip-target-branches: "7.17"
+steps:
+  - label: part-5
+    command: .ci/scripts/run-gradle.sh -Dignore.tests.seed checkPart5
+    timeout_in_minutes: 300
+    agents:
+      provider: gcp
+      image: family/elasticsearch-ubuntu-2004
+      machineType: custom-32-98304
+      buildDirectory: /dev/shm/bk
diff --git a/build.gradle b/build.gradle
@@ -287,6 +287,8 @@ allprojects {
         tasks.register('checkPart4') { dependsOn 'check' }
       } else if (project.path == ":x-pack:plugin" || project.path.contains("ql") ||  project.path.contains("smoke-test")) {
         tasks.register('checkPart3') { dependsOn 'check' }
+      } else if (project.path.contains("multi-node")) {
+        tasks.register('checkPart5') { dependsOn 'check' }
       } else {
         tasks.register('checkPart2') { dependsOn 'check' }
       }

diff --git a/docs/changelog/101373.yaml b/docs/changelog/101373.yaml
@@ -0,0 +1,6 @@
+pr: 101373
+summary: Adding aggregations support for the `_ignored` field
+area: Search
+type: feature
+issues:
+ - 59946
diff --git a/docs/changelog/105792.yaml b/docs/changelog/105792.yaml
@@ -0,0 +1,18 @@
+pr: 105792
+summary: "Change `skip_unavailable` remote cluster setting default value to true"
+area: Search
+type: breaking
+issues: []
+breaking:
+  title: "Change `skip_unavailable` remote cluster setting default value to true"
+  area: Cluster and node setting
+  details: The default value of the `skip_unavailable` setting is now set to true.
+    All existing and future remote clusters that do not define this setting will use the new default.
+    This setting only affects cross-cluster searches using the _search or _async_search API.
+  impact: Unavailable remote clusters in a cross-cluster search will no longer cause the search to fail unless
+    skip_unavailable is configured to be `false` in elasticsearch.yml or via the `_cluster/settings` API. 
+    Unavailable clusters with `skip_unavailable`=`true` (either explicitly or by using the new default) are marked 
+    as SKIPPED in the search response metadata section and do not fail the entire search. If users want to ensure that a
+    search returns a failure when a particular remote cluster is not available, `skip_unavailable` must be now be 
+    set explicitly.    
+  notable: false
diff --git a/docs/changelog/108016.yaml b/docs/changelog/108016.yaml
@@ -0,0 +1,5 @@
+pr: 108016
+summary: Optimise `BinaryRangeAggregator` for single value fields
+area: Aggregations
+type: enhancement
+issues: []
diff --git a/docs/reference/ccr/getting-started.asciidoc b/docs/reference/ccr/getting-started.asciidoc
@@ -147,7 +147,7 @@ cluster with cluster alias `leader`.
     "num_nodes_connected" : 1, <1>
     "max_connections_per_cluster" : 3,
     "initial_connect_timeout" : "30s",
-    "skip_unavailable" : false,
+    "skip_unavailable" : true,
     "mode" : "sniff"
   }
 }

diff --git a/docs/reference/esql/functions/examples/bucket.asciidoc b/docs/reference/esql/functions/examples/bucket.asciidoc
@@ -108,7 +108,6 @@ include::{esql-specs}/bucket.csv-spec[tag=bucket_in_agg]
 |===
 include::{esql-specs}/bucket.csv-spec[tag=bucket_in_agg-result]
 |===
-
 `BUCKET` may be used in both the aggregating and grouping part of the
 <<esql-stats-by, STATS ... BY ...>> command provided that in the aggregating
 part the function is referenced by an alias defined in the
@@ -121,3 +120,4 @@ include::{esql-specs}/bucket.csv-spec[tag=reuseGroupingFunctionWithExpression]
 |===
 include::{esql-specs}/bucket.csv-spec[tag=reuseGroupingFunctionWithExpression-result]
 |===
+
diff --git a/docs/reference/esql/functions/kibana/definition/bucket.json b/docs/reference/esql/functions/kibana/definition/bucket.json
@@ -943,6 +943,7 @@
     "FROM employees\n| STATS COUNT(*) by bs = BUCKET(salary, 20, 25324, 74999)\n| SORT bs",
     "FROM employees\n| WHERE hire_date >= \"1985-01-01T00:00:00Z\" AND hire_date < \"1986-01-01T00:00:00Z\"\n| STATS c = COUNT(1) BY b = BUCKET(salary, 5000.)\n| SORT b",
     "FROM sample_data \n| WHERE @timestamp >= NOW() - 1 day and @timestamp < NOW()\n| STATS COUNT(*) BY bucket = BUCKET(@timestamp, 25, NOW() - 1 day, NOW())",
-    "FROM employees\n| WHERE hire_date >= \"1985-01-01T00:00:00Z\" AND hire_date < \"1986-01-01T00:00:00Z\"\n| STATS AVG(salary) BY bucket = BUCKET(hire_date, 20, \"1985-01-01T00:00:00Z\", \"1986-01-01T00:00:00Z\")\n| SORT bucket"
+    "FROM employees\n| WHERE hire_date >= \"1985-01-01T00:00:00Z\" AND hire_date < \"1986-01-01T00:00:00Z\"\n| STATS AVG(salary) BY bucket = BUCKET(hire_date, 20, \"1985-01-01T00:00:00Z\", \"1986-01-01T00:00:00Z\")\n| SORT bucket",
+    "FROM employees\n| STATS s1 = b1 + 1, s2 = BUCKET(salary / 1000 + 999, 50.) + 2 BY b1 = BUCKET(salary / 100 + 99, 50.), b2 = BUCKET(salary / 1000 + 999, 50.)\n| SORT b1, b2\n| KEEP s1, b1, s2, b2"
   ]
 }
diff --git a/docs/reference/mapping/fields/ignored-field.asciidoc b/docs/reference/mapping/fields/ignored-field.asciidoc
@@ -43,3 +43,20 @@ GET _search
   }
 }
 --------------------------------------------------
+
+Since 8.15.0, the `_ignored` field supports aggregations as well.
+For example, the below query finds all fields that got ignored:
+
+[source,console]
+--------------------------------------------------
+GET _search
+{
+  "aggs": {
+    "ignored_fields": {
+      "terms": {
+         "field": "_ignored"
+      }
+    }
+  }
+}
+--------------------------------------------------
diff --git a/docs/reference/modules/cluster/remote-clusters-connect.asciidoc b/docs/reference/modules/cluster/remote-clusters-connect.asciidoc
@@ -37,7 +37,7 @@ clusters on individual nodes in the local cluster, define static settings in
 `elasticsearch.yml` for each node.
 
 The following request adds a remote cluster with an alias of `cluster_one`. This
-_cluster alias_ is a unique identifier that represents the connection to the 
+_cluster alias_ is a unique identifier that represents the connection to the
 remote cluster and is used to distinguish between local and remote indices.
 
 [source,console,subs=attributes+]
@@ -60,7 +60,7 @@ PUT /_cluster/settings
 // TEST[setup:host]
 // TEST[s/127.0.0.1:\{remote-interface-default-port\}/\${transport_host}/]
 <1> The cluster alias of this remote cluster is `cluster_one`.
-<2> Specifies the hostname and {remote-interface} port of a seed node in the 
+<2> Specifies the hostname and {remote-interface} port of a seed node in the
 remote cluster.
 
 You can use the <<cluster-remote-info,remote cluster info API>> to verify that
@@ -86,7 +86,7 @@ cluster with the cluster alias `cluster_one`:
     "num_nodes_connected" : 1,  <1>
     "max_connections_per_cluster" : 3,
     "initial_connect_timeout" : "30s",
-    "skip_unavailable" : false, <2>
+    "skip_unavailable" : true, <2>
 ifeval::["{trust-mechanism}"=="api-key"]
     "cluster_credentials": "::es_redacted::", <3>
 endif::[]
@@ -103,7 +103,7 @@ connected to.
 <2> Indicates whether to skip the remote cluster if searched through {ccs} but
 no nodes are available.
 ifeval::["{trust-mechanism}"=="api-key"]
-<3> If present, indicates the remote cluster has connected using API key 
+<3> If present, indicates the remote cluster has connected using API key
 authentication.
 endif::[]
 
@@ -187,7 +187,7 @@ PUT _cluster/settings
 
 You can delete a remote cluster from the cluster settings by passing `null`
 values for each remote cluster setting. The following request removes
-`cluster_two` from the cluster settings, leaving `cluster_one` and 
+`cluster_two` from the cluster settings, leaving `cluster_one` and
 `cluster_three` intact:
 
 [source,console]
@@ -212,15 +212,15 @@ PUT _cluster/settings
 
 ===== Statically configure remote clusters
 If you specify settings in `elasticsearch.yml`, only the nodes with
-those settings can connect to the remote cluster and serve remote cluster 
+those settings can connect to the remote cluster and serve remote cluster
 requests.
 
-NOTE: Remote cluster settings that are specified using the 
+NOTE: Remote cluster settings that are specified using the
 <<cluster-update-settings,cluster update settings API>> take precedence over
 settings that you specify in `elasticsearch.yml` for individual nodes.
 
-In the following example, `cluster_one`, `cluster_two`, and `cluster_three` are 
-arbitrary cluster aliases representing the connection to each cluster. These 
+In the following example, `cluster_one`, `cluster_two`, and `cluster_three` are
+arbitrary cluster aliases representing the connection to each cluster. These
 names are subsequently used to distinguish between local and remote indices.
 
 [source,yaml,subs=attributes+]

diff --git a/docs/reference/modules/cluster/remote-clusters-settings.asciidoc b/docs/reference/modules/cluster/remote-clusters-settings.asciidoc
@@ -28,9 +28,20 @@ mode are described separately.
 
   Per cluster boolean setting that allows to skip specific clusters when no
   nodes belonging to them are available and they are the target of a remote
-  cluster request. Default is `false`, meaning that all clusters are mandatory
-  by default, but they can selectively be made optional by setting this setting
-  to `true`.
+  cluster request.
+
+IMPORTANT: In Elasticsearch 8.15, the default value for `skip_unavailable` was
+changed from `false` to `true`. Before Elasticsearch 8.15, if you want a cluster
+to be treated as optional for a {ccs}, then you need to set that configuration.
+From Elasticsearch 8.15 forward, you need to set the configuration in order to
+make a cluster required for the {ccs}. Once you upgrade the local ("querying")
+cluster search coordinator node (the node you send CCS requests to) to 8.15 or later,
+any remote clusters that do not have an explicit setting for `skip_unavailable` will
+immediately change over to using the new default of true. This is true regardless of
+whether you have upgraded the remote clusters to 8.15, as the `skip_unavailable`
+search behavior is entirely determined by the setting on the local cluster where
+you configure the remotes.
+
 
 `cluster.remote.<cluster_alias>.transport.ping_schedule`::
 

diff --git a/docs/reference/search/profile.asciidoc b/docs/reference/search/profile.asciidoc
@@ -194,7 +194,7 @@ The API returns the following result:
             "load_source_count": 5
           },
           "debug": {
-            "stored_fields": ["_id", "_ignored", "_routing", "_source"]
+            "stored_fields": ["_id", "_routing", "_source"]
           },
           "children": [
             {
@@ -1051,7 +1051,7 @@ And here is the fetch profile:
             "load_source_count": 5
           },
           "debug": {
-            "stored_fields": ["_id", "_ignored", "_routing", "_source"]
+            "stored_fields": ["_id", "_routing", "_source"]
           },
           "children": [
             {